initial (iset) integer-set library based on chicken implementation

This commit is contained in:
Alex Shinn 2012-06-07 01:36:08 +09:00
parent 25247ee4b5
commit 638558ebe0
8 changed files with 684 additions and 0 deletions

77
lib/chibi/iset.sld Normal file
View file

@ -0,0 +1,77 @@
;; base.scm - base integer set operations
;; Copyright (c) 2004-2012 Alex Shinn. All rights reserved.
;; BSD-style license: http://synthcode.com/license.txt
;; An integer set (iset) is a set of exact integers optimized for
;; minimal space usage and fast membership lookup. General set
;; operations are provided based on the character set operations found
;; in SRFI-14.
;;
;; Creating isets:
;;
;; (make-iset) ; an empty integer set
;; (make-iset n) ; a set of the single integer N
;; (make-iset n m) ; a set of the range of all integers from N-M inclusive
;;
;; The following procedures are provided as direct analogs of the
;; SRFI-14 procedures, accepting and returning isets and integers in
;; place of char-sets and characters:
;;
;; Creating isets:
;;
;; (iset-copy is) ; a new copy of IS
;; (iset n ...) ; an iset containing the elements N...
;; (list->iset ls [base-is]) ; an iset containing all the integers in
;; ; list LS, union BASE-IS if provided
;; (list->iset! ls base-is) ; same as above, allowed but not required to
;; ; modify base-is
;;
;; Querying isets:
;;
;; (iset-size is) ; return the # of elements in IS
;; (iset-contains? is n) ; test N for membership in IS
;; (iset->list is) ; returns a list of all integers in IS
;;
;; Predicates:
;;
;; (iset? obj) ; #t iff obj is an integer set
;; (iset= is ...) ; #t iff all arguments are equivalent integer sets
;; (iset<= is ...) ; #t iff the arguments are monotonically increasing sets
;; (iset>= is ...) ; #t iff the arguments are monotonically decreasing sets
;;
;; Cursors:
;;
;; (iset-cursor iset)
;; (iset-ref iset cursor)
;; (iset-cursor-next iset cursor)
;; (end-of-iset? iset)
;;
;; Set operations:
;;
;; (iset-adjoin is n ...) ; char-set-adjoin
;; (iset-delete is n ...) ; char-set-delete
;;
;; (iset-adjoin! is n ...) ; char-set-adjoin!
;; (iset-delete! is n ...) ; char-set-delete!
;;
;; (iset-union is1 ...) ; char-set-union
;; (iset-intersection is1 ...) ; char-set-intersection
;; (iset-difference is1 is2 ...) ; char-set-difference
;;
;; (iset-union! is1 ...) ; char-set-union!
;; (iset-intersection! is1 ...) ; char-set-intersection!
;; (iset-difference! is1 is2 ...) ; char-set-difference!
(define-library (chibi iset)
(import (chibi iset base)
(chibi iset iterators)
(chibi iset constructors))
(export
%make-iset make-iset iset? iset-contains? Integer-Set
iset iset-copy list->iset list->iset! iset-map
iset-adjoin iset-adjoin! iset-delete iset-delete!
iset-union iset-union! iset-intersection iset-intersection!
iset-difference iset-difference!
iset-empty? iset-fold iset-fold-node iset-for-each iset-for-each-node
iset-map iset->list iset-size
iset-cursor iset-cursor? iset-cursor-next iset-ref end-of-iset?))

32
lib/chibi/iset/base.scm Normal file
View file

@ -0,0 +1,32 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The most basic set interface. We provide a data type, low-level
;; constructor, and membership test. This allows libraries to provide
;; an iset API without needing to include the full iset library.
(define-record-type Integer-Set
(%make-iset start end bits left right)
iset?
(start iset-start iset-start-set!)
(end iset-end iset-end-set!)
(bits iset-bits iset-bits-set!)
(left iset-left iset-left-set!)
(right iset-right iset-right-set!))
(define (make-iset . opt)
(if (null? opt)
(%make-iset 0 0 0 #f #f)
(let ((end (if (pair? (cdr opt)) (cadr opt) (car opt))))
(%make-iset (car opt) end #f #f #f))))
(define (iset-contains? iset n)
(let lp ((is iset))
(let ((start (iset-start is)))
(if (< n start)
(let ((left (iset-left is))) (and left (lp left)))
(let ((end (iset-end is)))
(if (> n end)
(let ((right (iset-right is))) (and right (lp right)))
(let ((bits (iset-bits is)))
(or (not bits)
(bit-set? (- n start) bits)))))))))

8
lib/chibi/iset/base.sld Normal file
View file

@ -0,0 +1,8 @@
(define-library (chibi iset base)
(import (scheme) (srfi 9) (srfi 33))
(include "base.scm")
(export
%make-iset make-iset iset? iset-contains? Integer-Set
iset-start iset-end iset-bits iset-left iset-right
iset-start-set! iset-end-set! iset-bits-set! iset-left-set! iset-right-set!))

View file

@ -0,0 +1,321 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Utilities for constructing and joining isets.
(define bits-thresh 128) ; within 128 we join into a bitmap
(define bits-max 512) ; don't make bitmaps larger than this
(define (bit-set n index)
(bitwise-ior n (arithmetic-shift 1 index)))
(define (bit-clear n index)
(if (bit-set? index n)
(- n (arithmetic-shift 1 index))
n))
(define (iset . args)
(list->iset args))
(define (list->iset! ls iset)
(for-each (lambda (i) (iset-adjoin1! iset i)) ls)
iset)
(define (list->iset ls . opt)
(list->iset! ls (if (pair? opt) (iset-copy (car opt)) (make-iset))))
(define (iset-copy iset)
(and iset
(%make-iset
(iset-start iset)
(iset-end iset)
(iset-bits iset)
(iset-copy (iset-left iset))
(iset-copy (iset-right iset)))))
(define (iset-copy-node iset)
(%make-iset (iset-start iset) (iset-end iset) (iset-bits iset) #f #f))
(define (iset-set-node! a b)
(iset-start-set! a (iset-start b))
(iset-end-set! a (iset-end b))
(iset-bits-set! a (iset-bits b)))
(define (iset-max-end iset)
(cond ((iset-right iset) => iset-max-end)
(else (iset-end iset))))
(define (iset-min-start iset)
(cond ((iset-left iset) => iset-min-start)
(else (iset-start iset))))
(define (iset-insert-left! iset new)
(let ((left (iset-left iset)))
(if (and left (< (iset-end new) (iset-start left)))
(iset-right-set! new left)
(iset-left-set! new left)))
(iset-left-set! iset new))
(define (iset-insert-right! iset new)
(let ((right (iset-right iset)))
(if (and right (< (iset-end new) (iset-start right)))
(iset-right-set! new right)
(iset-left-set! new right)))
(iset-right-set! iset new))
(define (range->bits start end)
(- (arithmetic-shift 1 (+ 1 (- end start))) 1))
(define (iset-squash-bits! iset)
(let ((bits (iset-bits iset)))
(if (= bits (range->bits (iset-start iset) (iset-end iset)))
(iset-bits-set! iset #f))))
(define (iset-adjoin1! iset n)
(cond
((iset-empty? iset)
(iset-start-set! iset n)
(iset-end-set! iset n)
(iset-bits-set! iset #f))
(else
(let ((start (iset-start iset))
(end (iset-end iset))
(bits (iset-bits iset)))
(cond
((< n start)
(let ((s-diff (- start n)))
(if (let* ((left (iset-left iset))
(m-end (and left (iset-max-end left))))
(and m-end
(or (< n m-end)
(< (- n m-end) s-diff))))
(iset-adjoin1! (iset-left iset) n)
(cond
((and (< s-diff bits-thresh)
(< (- end n) bits-max))
(iset-start-set! iset n)
(let ((bits2 (arithmetic-shift (or bits (range->bits start end))
s-diff)))
(iset-bits-set! iset (+ bits2 1))
(iset-squash-bits! iset)))
(else (iset-insert-left! iset (make-iset n)))))))
((> n end)
(let ((e-diff (- n end)))
(if (let* ((right (iset-right iset))
(m-start (and right (iset-min-start right))))
(and m-start
(or (> n m-start)
(> (- n m-start) e-diff))))
(iset-adjoin1! (iset-right iset) n)
(cond
((and (< e-diff bits-thresh)
(< (- n start) bits-max))
(iset-end-set! iset n)
(iset-bits-set! iset (bit-set (or bits (range->bits start end))
(- n start)))
(iset-squash-bits! iset))
(else (iset-insert-right! iset (make-iset n)))))))
(bits
(iset-bits-set! iset (bit-set (iset-bits iset) (- n start)))
(iset-squash-bits! iset)))))))
(define (iset-adjoin-node! a b)
(cond
((iset-empty? a)
(iset-start-set! a (iset-start b))
(iset-end-set! a (iset-end b))
(iset-bits-set! a (iset-bits b)))
((not (iset-empty? b))
(let ((a-start (iset-start a))
(a-end (iset-end a))
(a-bits (iset-bits a))
(b-start (iset-start b))
(b-end (iset-end b))
(b-bits (iset-bits b)))
(cond
;; aaaa...
;; ...bbbb
((< b-end a-start)
(let ((near-diff (- a-start b-end))
(start-diff (- a-start b-start))
(far-diff (- a-end b-start)))
(if (let* ((left (iset-left a))
(m-end (and left (iset-max-end left))))
(and m-end
(or (< b-end m-end)
(< (- b-end m-end) near-diff))))
(iset-adjoin-node! (iset-left a) b)
(cond
((and (< near-diff bits-thresh)
(< far-diff bits-max))
(let ((bits (arithmetic-shift
(or a-bits (range->bits a-start a-end))
start-diff))
(lo-bits (or b-bits (range->bits b-start b-end))))
(iset-start-set! a b-start)
(iset-bits-set! a (bitwise-ior bits lo-bits))
(iset-squash-bits! a)))
(else (iset-insert-left! a (iset-copy-node b)))))))
;; ...aaaa
;; bbbb...
((> b-start a-end)
(let ((near-diff (- b-start a-end))
(start-diff (- b-start a-start))
(far-diff (- b-end a-start)))
(if (let* ((right (iset-right a))
(m-start (and right (iset-min-start right))))
(and m-start
(or (> b-start m-start)
(> (- b-start m-start) near-diff))))
(iset-adjoin-node! (iset-right a) b)
(cond
((and (< near-diff bits-thresh)
(< far-diff bits-max))
(iset-end-set! a b-end)
(iset-bits-set!
a
(bitwise-ior
(or a-bits (range->bits a-start a-end))
(arithmetic-shift
(or b-bits (range->bits b-start b-end))
start-diff)))
(iset-squash-bits! a))
(else (iset-insert-right! a (iset-copy-node b)))))))
;; aaaa...
;; bbbb...
((> b-start a-start)
(iset-end-set! a (max a-end b-end))
(cond
((or a-bits b-bits)
(iset-bits-set!
a
(bitwise-ior
(or a-bits (range->bits a-start a-end))
(arithmetic-shift
(or b-bits (range->bits b-start b-end))
(- b-start a-start))))
(iset-squash-bits! a))))
;; aaaa...
;; bbbb...
((< b-start a-start)
(iset-start-set! a b-start)
(iset-end-set! a (max a-end b-end))
(cond
((or a-bits b-bits)
(iset-bits-set!
a
(bitwise-ior
(arithmetic-shift
(or a-bits (range->bits a-start a-end))
(- a-start b-start))
(or b-bits (range->bits b-start b-end))))
(iset-squash-bits! a))))
;; aaaa...
;; bbbb...
(else
(iset-end-set! a (max a-end b-end))
(cond
((or a-bits b-bits)
(iset-bits-set!
a
(bitwise-ior
(or a-bits (range->bits a-start a-end))
(or b-bits (range->bits b-start b-end))))
(iset-squash-bits! a)))))))))
(define (iset-adjoin! iset . ls)
(list->iset! ls iset))
(define (iset-adjoin iset . ls)
(list->iset ls iset))
;; delete directly in this node
(define (%iset-delete1! iset n)
(let ((start (iset-start iset))
(end (iset-end iset))
(bits (iset-bits iset)))
(cond
(bits
(iset-bits-set! iset (bit-clear bits (- n start))))
((= n start)
(if (= n end)
(iset-bits-set! iset 0)
(iset-start-set! iset (+ n 1))))
((= n end)
(iset-end-set! iset (- n 1)))
(else
(iset-end-set! iset (- n 1))
(iset-insert-right! iset (make-iset (+ n 1) end))))))
(define (iset-delete1! iset n)
(let lp ((is iset))
(let ((start (iset-start is)))
(if (< n start)
(let ((left (iset-left is)))
(if left (lp left)))
(let ((end (iset-end is)))
(if (> n end)
(let ((right (iset-right is)))
(if right (lp right)))
(%iset-delete1! is n)))))))
(define (iset-delete! iset . args)
(for-each (lambda (i) (iset-delete1! iset i)) args)
iset)
(define (iset-delete iset . args)
(apply iset-delete! (iset-copy iset) args))
(define (iset-map proc iset)
(iset-fold (lambda (i is) (iset-adjoin! is i)) (make-iset) iset))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; High-level set operations.
;;
;; Union is optimized to work at the node level. Intersection and
;; difference iterate over individual elements and so have a lot of
;; room for improvement, at the expense of the complexity of
;; iset-adjoin-node!.
(define (iset-union2! a b)
(iset-for-each-node
(lambda (is)
(iset-adjoin-node! a is))
b))
(define (iset-union! . args)
(let* ((a (and (pair? args) (car args)))
(b (and (pair? args) (pair? (cdr args)) (cadr args))))
(cond
(b
(iset-union2! a b)
(apply iset-union! a (cddr args)))
(a a)
(else (make-iset)))))
(define (iset-union . args)
(if (null? args)
(make-iset)
(apply iset-union! (iset-copy (car args)) (cdr args))))
(define (iset-intersection! a . args)
(let ((b (and (pair? args) (car args))))
(cond
(b
(iset-for-each
(lambda (i) (if (not (iset-contains? b i)) (iset-delete1! a i)))
a)
(apply iset-intersection! a (cdr args)))
(else a))))
(define (iset-intersection a . args)
(apply iset-intersection! (iset-copy a) args))
(define (iset-difference! a . args)
(if (null? args)
a
(begin
(iset-for-each (lambda (i) (iset-delete1! a i)) (car args))
(apply iset-difference! a (cdr args)))))
(define (iset-difference a . args)
(apply iset-difference! (iset-copy a) args))

View file

@ -0,0 +1,9 @@
(define-library (chibi iset constructors)
(import (scheme) (srfi 33) (chibi iset base) (chibi iset iterators))
(include "constructors.scm")
(export
iset iset-copy list->iset list->iset! iset-map
iset-adjoin iset-adjoin! iset-delete iset-delete!
iset-union iset-union! iset-intersection iset-intersection!
iset-difference iset-difference!))

View file

@ -0,0 +1,169 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Cursors
(define (iset-empty? iset)
(and (iset? iset)
(cond ((iset-bits iset) => zero?) (else #f))
(let ((l (iset-left iset))) (or (not l) (iset-empty? l)))
(let ((r (iset-right iset))) (or (not r) (iset-empty? r)))))
(define-record-type Iset-Cursor
(make-iset-cursor node pos stack)
iset-cursor?
(node iset-cursor-node iset-cursor-node-set!)
(pos iset-cursor-pos iset-cursor-pos-set!)
(stack iset-cursor-stack iset-cursor-stack-set!))
;; Create a new iset cursor pointing to the first element of iset,
;; with an optional stack argument.
(define (iset-cursor iset . o)
(iset-cursor-advance
(make-iset-cursor iset
(or (iset-bits iset) (iset-start iset))
(if (pair? o) (car o) '()))))
;; Continue to the next node in the search stack.
(define (iset-cursor-pop cur)
(let ((node (iset-cursor-node cur))
(stack (iset-cursor-stack cur)))
(cond
((iset-left node)
(iset-cursor
(iset-left node)
(if (iset-right node) (cons (iset-right node) stack) stack)))
((iset-right node)
(iset-cursor (iset-right node) stack))
((pair? stack)
(iset-cursor (car stack) (cdr stack)))
(else
cur))))
;; Advance to the next node+pos that can be referenced if at the end
;; of this node's range.
(define (iset-cursor-advance cur)
(let ((node (iset-cursor-node cur))
(pos (iset-cursor-pos cur)))
(if (if (iset-bits node) (zero? pos) (> pos (iset-end node)))
(iset-cursor-pop cur)
cur)))
(define (iset-cursor-next iset cur)
(iset-cursor-advance
(let ((node (iset-cursor-node cur))
(pos (iset-cursor-pos cur))
(stack (iset-cursor-stack cur)))
(let ((pos (if (iset-bits node) (bitwise-and pos (- pos 1)) (+ pos 1))))
(make-iset-cursor node pos stack)))))
(define (iset-ref iset cur)
(let ((node (iset-cursor-node cur))
(pos (iset-cursor-pos cur)))
(cond
((iset-bits node)
(if (zero? pos)
(error "cursor reference past end of iset")
(+ (iset-start node)
(integer-length (- pos (bitwise-and pos (- pos 1))))
-1)))
(else
(if (> pos (iset-end node))
(error "cursor reference past end of iset")
pos)))))
(define (end-of-iset? cur)
(let ((node (iset-cursor-node cur)))
(and (if (iset-bits node)
(zero? (iset-cursor-pos cur))
(> (iset-cursor-pos cur) (iset-end node)))
(not (iset-left node))
(not (iset-right node))
(null? (iset-cursor-stack cur)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Equality
(define (iset2= is1 is2)
(let lp ((cur1 (iset-cursor is1))
(cur2 (iset-cursor is2)))
(cond ((end-of-iset? cur1) (end-of-iset? cur2))
((end-of-iset? cur2) #f)
((= (iset-ref is1 cur1) (iset-ref is2 cur2))
(lp (iset-cursor-next is1 cur1) (iset-cursor-next is2 cur2)))
(else #f))))
(define (iset2<= is1 is2)
(let lp ((cur1 (iset-cursor is1))
(cur2 (iset-cursor is2)))
(cond ((end-of-iset? cur1))
((end-of-iset? cur2) #f)
(else
(let ((i1 (iset-ref is1 cur1))
(i2 (iset-ref is1 cur2)))
(cond ((< i1 i2)
(lp (iset-cursor-next is1 cur1) cur2))
((= i1 i2)
(lp (iset-cursor-next is1 cur1)
(iset-cursor-next is2 cur2)))
(else
#f)))))))
(define (iset= . o)
(or (null? o)
(let lp ((a (car o)) (ls (cdr o)))
(or (null? ls) (and (iset2= a (car ls)) (lp (car ls) (cdr ls)))))))
(define (iset<= . o)
(or (null? o)
(let lp ((a (car o)) (ls (cdr o)))
(or (null? ls) (and (iset2<= a (car ls)) (lp (car ls) (cdr ls)))))))
(define (iset>= . o)
(apply iset<= (reverse o)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Folding
(define (iset-fold-node kons knil iset)
(let lp ((is iset) (acc knil))
(let* ((left (iset-left is))
(acc (kons is (if left (lp left acc) acc)))
(right (iset-right is)))
(if right (lp right acc) acc))))
(define (iset-fold kons knil iset)
(iset-fold-node
(lambda (is acc)
(let ((start (iset-start is))
(end (iset-end is))
(bits (iset-bits is)))
(if bits
(let ((limit (+ 1 (- end start))))
(do ((n1 bits n2)
(n2 (bitwise-and bits (- bits 1)) (bitwise-and n2 (- n2 1)))
(acc acc (kons (+ start (integer-length (- n1 n2)) -1) acc)))
((zero? n1) acc)))
(do ((i start (+ i 1))
(acc acc (kons i acc)))
((> i end) acc)))))
knil
iset))
(define (iset-for-each-node proc iset)
(iset-fold-node (lambda (node acc) (proc node)) #f iset))
(define (iset-for-each proc iset)
(iset-fold (lambda (i acc) (proc i)) #f iset))
(define (iset->list iset)
(reverse (iset-fold cons '() iset)))
(define (iset-size iset)
(iset-fold-node
(lambda (is acc)
(let ((bits (iset-bits is)))
(+ acc (if bits
(bit-count bits)
(+ 1 (- (iset-end is) (iset-start is)))))))
0
iset))

View file

@ -0,0 +1,9 @@
(define-library (chibi iset iterators)
(import (scheme) (srfi 9) (srfi 33) (chibi iset base))
(include "iterators.scm")
(export
iset-empty? iset-fold iset-fold-node iset-for-each iset-for-each-node
iset->list iset-size
;; low-level cursors
iset-cursor iset-cursor? iset-cursor-next iset-ref end-of-iset?))

59
tests/iset-tests.scm Normal file
View file

@ -0,0 +1,59 @@
(cond-expand
(modules (import (chibi iset) (srfi 1) (chibi test)))
(else #f))
(test-begin "iset")
(let ((tests
`((() (+ 99) (u 3 50) (? 99))
(() (u 1) (u 1000) (u -1000) (u 3) (u -1))
((17 29) (u 7 29))
((2 3 4) (u 1 2 3 4 5))
((1 2 3 4 5) (u 2 3 4))
((0) (z #f) (- 0) (z))
((0 1 2) (- 1) (- 2) (? 0))
((1 2 3 1000 2000) (u 1 4))
((1 2 3 1000 1005))
((1 128 127))
((129 2 127))
((1 -128 -126))
)))
(for-each
(lambda (tst)
(let* ((ls (car tst))
(is (list->iset ls)))
;; initial creation and sanity checks
(test-assert (lset= equal? ls (iset->list is)))
(test (length ls) (iset-size is))
(for-each
(lambda (x) (test-assert (iset-contains? is x)))
ls)
(test (iset-contains? is 42) (member 42 ls))
;; additional operations
(for-each
(lambda (op)
(case (car op)
((+)
(iset-adjoin! is (cadr op))
(test-assert (iset-contains? is (cadr op))))
((-)
(iset-delete! is (cadr op))
(test-assert (not (iset-contains? is (cadr op)))))
((?)
(test (if (pair? (cddr op)) (car (cddr op)) #t)
(iset-contains? is (cadr op))))
((d)
(set! is (iset-difference is (list->iset (cdr op))))
(for-each (lambda (x) (test-assert (iset-contains? is x))) (cdr op)))
((i) (set! is (iset-intersection is (list->iset (cdr op)))))
((s) (test (iset-size is) (cadr op)))
((u)
(set! is (iset-union is (list->iset (cdr op))))
(for-each (lambda (x) (test-assert (iset-contains? is x))) (cdr op)))
((z) (test (iset-empty? is) (if (pair? (cdr op)) (cadr op) #t)))
(else (error "unknown operation" (car op)))))
(cdr tst))))
tests))
(test-end)