From 6a2ed9cdb4fb87f78287b5dd4f5a894c891e0b87 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Mon, 27 Jan 2020 21:52:17 +0800 Subject: [PATCH] fix iset-intersection on large trees in the first argument (issue #606) --- lib/chibi/iset-test.sld | 5 ++++ lib/chibi/iset/constructors.scm | 43 +++++++++++++-------------------- lib/srfi/14/test.sld | 2 ++ 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/lib/chibi/iset-test.sld b/lib/chibi/iset-test.sld index faed5853..2eb7e3ae 100644 --- a/lib/chibi/iset-test.sld +++ b/lib/chibi/iset-test.sld @@ -123,6 +123,11 @@ ((1 2 3 4 1001 1002 1003 1004 2001 2003 2004) (i 1 2 3 4 1001 1004 1005 2000 2001) (= 1 2 3 4 1001 1004 2001)) + ((0 1 2 3 4 5 6 7 8 9 + 101 102 103 104 105 + 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009) + (i 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120) + (= 101 102 103 104 105)) ;; difference ((1 2 3 4 5) (d 1) (!? 0) (? 2 3 4 5) (!? 6)) ((1 2 3 4 5) (d 1 2) (!? 0) (? 3 4 5) (!? 6)) diff --git a/lib/chibi/iset/constructors.scm b/lib/chibi/iset/constructors.scm index 51eabb57..8b514251 100644 --- a/lib/chibi/iset/constructors.scm +++ b/lib/chibi/iset/constructors.scm @@ -262,11 +262,6 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; High-level set operations. -;; -;; Union is optimized to work at the node level. Intersection and -;; difference iterate over individual elements and so have a lot of -;; room for improvement, at the expense of the complexity of -;; iset-adjoin-node!. (define (iset-union2! a b) (iset-for-each-node @@ -295,25 +290,23 @@ (define (iset-intersection2! a b) (let lp ((nodes-a (iset->node-list a)) - (nodes-b (iset->node-list b))) + (nodes-b (iset->node-list b)) + (res '())) (cond - ((null? nodes-a) - a) - ((null? nodes-b) - (iset-bits-set! (car nodes-a) 0) - (iset-right-set! (car nodes-a) #f) - a) + ((or (null? nodes-a) (null? nodes-b)) + (let ((is (iset))) + (for-each (lambda (x) (iset-adjoin-node! is x)) res) + is)) ((> (iset-start (car nodes-b)) (iset-end (car nodes-a))) - (iset-bits-set! (car nodes-a) 0) - (lp (cdr nodes-a) nodes-b)) + (lp (cdr nodes-a) nodes-b res)) ((> (iset-start (car nodes-a)) (iset-end (car nodes-b))) - (lp nodes-a (cdr nodes-b))) + (lp nodes-a (cdr nodes-b) res)) (else (let* ((a (car nodes-a)) (b (car nodes-b)) (a-ls (iset-node-split a (iset-start b) (iset-end b))) (overlap (cadr a-ls)) - (right (car (cddr a-ls))) + (a-right (car (cddr a-ls))) (b-ls (iset-node-split b (iset-start overlap) (iset-end overlap))) (b-overlap (cadr b-ls)) (b-right (car (cddr b-ls)))) @@ -325,18 +318,16 @@ (b-bits (iset-bits b-overlap))) (iset-bits-set! a (bitwise-and a-bits b-bits))) (iset-bits-set! a (iset-bits overlap))) - (if right - (iset-insert-right! a right)) - (lp (if right (cons right (cdr nodes-a)) (cdr nodes-a)) - (if b-right (cons b-right (cdr nodes-b)) (cdr nodes-b)))))))) + (lp (if a-right (cons a-right (cdr nodes-a)) (cdr nodes-a)) + (if b-right (cons b-right (cdr nodes-b)) (cdr nodes-b)) + (cons a res))))))) (define (iset-intersection! a . args) - (let ((b (and (pair? args) (car args)))) - (cond - (b - (iset-intersection2! a b) - (apply iset-intersection! a (cdr args))) - (else a)))) + (let lp ((a a) (ls args)) + (if (null? ls) + a + (lp (iset-intersection2! a (car ls)) + (cdr ls))))) ;;> Returns an iset containing all integers which occur in \var{a} and ;;> every of the isets \var{args}. If no \var{args} are present diff --git a/lib/srfi/14/test.sld b/lib/srfi/14/test.sld index 83839066..c0c9c484 100644 --- a/lib/srfi/14/test.sld +++ b/lib/srfi/14/test.sld @@ -130,6 +130,8 @@ (test 10 (char-set-size (char-set-intersection char-set:ascii char-set:digit))) + (test 10 (char-set-size + (char-set-intersection char-set:digit char-set:ascii))) (test 5 (char-set-count vowel? char-set:ascii))