fix iset-intersection on large trees in the first argument (issue #606)

This commit is contained in:
Alex Shinn 2020-01-27 21:52:17 +08:00
parent 5e3d2284ed
commit 6a2ed9cdb4
3 changed files with 24 additions and 26 deletions

View file

@ -123,6 +123,11 @@
((1 2 3 4 1001 1002 1003 1004 2001 2003 2004) ((1 2 3 4 1001 1002 1003 1004 2001 2003 2004)
(i 1 2 3 4 1001 1004 1005 2000 2001) (i 1 2 3 4 1001 1004 1005 2000 2001)
(= 1 2 3 4 1001 1004 2001)) (= 1 2 3 4 1001 1004 2001))
((0 1 2 3 4 5 6 7 8 9
101 102 103 104 105
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009)
(i 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120)
(= 101 102 103 104 105))
;; difference ;; difference
((1 2 3 4 5) (d 1) (!? 0) (? 2 3 4 5) (!? 6)) ((1 2 3 4 5) (d 1) (!? 0) (? 2 3 4 5) (!? 6))
((1 2 3 4 5) (d 1 2) (!? 0) (? 3 4 5) (!? 6)) ((1 2 3 4 5) (d 1 2) (!? 0) (? 3 4 5) (!? 6))

View file

@ -262,11 +262,6 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; High-level set operations. ;; High-level set operations.
;;
;; Union is optimized to work at the node level. Intersection and
;; difference iterate over individual elements and so have a lot of
;; room for improvement, at the expense of the complexity of
;; iset-adjoin-node!.
(define (iset-union2! a b) (define (iset-union2! a b)
(iset-for-each-node (iset-for-each-node
@ -295,25 +290,23 @@
(define (iset-intersection2! a b) (define (iset-intersection2! a b)
(let lp ((nodes-a (iset->node-list a)) (let lp ((nodes-a (iset->node-list a))
(nodes-b (iset->node-list b))) (nodes-b (iset->node-list b))
(res '()))
(cond (cond
((null? nodes-a) ((or (null? nodes-a) (null? nodes-b))
a) (let ((is (iset)))
((null? nodes-b) (for-each (lambda (x) (iset-adjoin-node! is x)) res)
(iset-bits-set! (car nodes-a) 0) is))
(iset-right-set! (car nodes-a) #f)
a)
((> (iset-start (car nodes-b)) (iset-end (car nodes-a))) ((> (iset-start (car nodes-b)) (iset-end (car nodes-a)))
(iset-bits-set! (car nodes-a) 0) (lp (cdr nodes-a) nodes-b res))
(lp (cdr nodes-a) nodes-b))
((> (iset-start (car nodes-a)) (iset-end (car nodes-b))) ((> (iset-start (car nodes-a)) (iset-end (car nodes-b)))
(lp nodes-a (cdr nodes-b))) (lp nodes-a (cdr nodes-b) res))
(else (else
(let* ((a (car nodes-a)) (let* ((a (car nodes-a))
(b (car nodes-b)) (b (car nodes-b))
(a-ls (iset-node-split a (iset-start b) (iset-end b))) (a-ls (iset-node-split a (iset-start b) (iset-end b)))
(overlap (cadr a-ls)) (overlap (cadr a-ls))
(right (car (cddr a-ls))) (a-right (car (cddr a-ls)))
(b-ls (iset-node-split b (iset-start overlap) (iset-end overlap))) (b-ls (iset-node-split b (iset-start overlap) (iset-end overlap)))
(b-overlap (cadr b-ls)) (b-overlap (cadr b-ls))
(b-right (car (cddr b-ls)))) (b-right (car (cddr b-ls))))
@ -325,18 +318,16 @@
(b-bits (iset-bits b-overlap))) (b-bits (iset-bits b-overlap)))
(iset-bits-set! a (bitwise-and a-bits b-bits))) (iset-bits-set! a (bitwise-and a-bits b-bits)))
(iset-bits-set! a (iset-bits overlap))) (iset-bits-set! a (iset-bits overlap)))
(if right (lp (if a-right (cons a-right (cdr nodes-a)) (cdr nodes-a))
(iset-insert-right! a right)) (if b-right (cons b-right (cdr nodes-b)) (cdr nodes-b))
(lp (if right (cons right (cdr nodes-a)) (cdr nodes-a)) (cons a res)))))))
(if b-right (cons b-right (cdr nodes-b)) (cdr nodes-b))))))))
(define (iset-intersection! a . args) (define (iset-intersection! a . args)
(let ((b (and (pair? args) (car args)))) (let lp ((a a) (ls args))
(cond (if (null? ls)
(b a
(iset-intersection2! a b) (lp (iset-intersection2! a (car ls))
(apply iset-intersection! a (cdr args))) (cdr ls)))))
(else a))))
;;> Returns an iset containing all integers which occur in \var{a} and ;;> Returns an iset containing all integers which occur in \var{a} and
;;> every of the isets \var{args}. If no \var{args} are present ;;> every of the isets \var{args}. If no \var{args} are present

View file

@ -130,6 +130,8 @@
(test 10 (char-set-size (test 10 (char-set-size
(char-set-intersection char-set:ascii char-set:digit))) (char-set-intersection char-set:ascii char-set:digit)))
(test 10 (char-set-size
(char-set-intersection char-set:digit char-set:ascii)))
(test 5 (char-set-count vowel? char-set:ascii)) (test 5 (char-set-count vowel? char-set:ascii))