From 06f0cc0225662e14ffa624130f00bcbceb63d330 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Tue, 23 May 2023 22:03:19 +0900 Subject: [PATCH] Add iset-rank/select. --- lib/chibi/iset-test.sld | 15 +++++++- lib/chibi/iset.sld | 3 +- lib/chibi/iset/iterators.scm | 75 +++++++++++++++++++++++++++++++++--- lib/chibi/iset/iterators.sld | 2 + lib/chibi/loop.sld | 2 +- lib/chibi/loop/loop.scm | 20 ++++++++++ 6 files changed, 109 insertions(+), 8 deletions(-) diff --git a/lib/chibi/iset-test.sld b/lib/chibi/iset-test.sld index 2eb7e3ae..f2888f61 100644 --- a/lib/chibi/iset-test.sld +++ b/lib/chibi/iset-test.sld @@ -241,6 +241,19 @@ (let ((a (%make-iset 65 90 #f #f (%make-iset 97 122 #f #f #f))) (b (list->iset '(45 46 95 126)))) (test-assert (iset-contains? (iset-union a b) 119)) - (test-assert (iset-contains? (iset-union b a) 119))) + (test-assert (iset-contains? (iset-union b a) 119))) + + (let* ((elts '(0 1 5 27 42 113 114 256)) + (is (list->iset elts))) + (test (iota (length elts)) + (map (lambda (elt) (iset-rank is elt)) elts)) + (test elts + (map (lambda (i) (iset-select is i)) (iota (length elts))))) + + (let* ((elts '(903 595 694 581 91 628 648 152 188 29 347 876 381 945 508 890 816 654 871 228 200 397 116 952 60 878 361 205 691 318 87 998 35 886 580 787 856 535 964 133 245 314 711 598 180 984 458 235 599 692 568 1 740 514 995 930 625 638 881 997 412 151 195 512 857 948 956 750 896 813 988 40 85 426 740 83 294 249 235 45 20 784 837 640 56 519 211 780 771 684 408 510 677 773 574 114 537 934 477 136)) + (is (list->iset elts))) + (test elts + (map (lambda (i) (iset-select is i)) + (map (lambda (elt) (iset-rank is elt)) elts)))) (test-end)))) diff --git a/lib/chibi/iset.sld b/lib/chibi/iset.sld index e88a2492..bc76f70e 100644 --- a/lib/chibi/iset.sld +++ b/lib/chibi/iset.sld @@ -19,4 +19,5 @@ iset-difference iset-difference! iset-empty? iset-fold iset-fold-node iset-for-each iset-for-each-node iset-map iset->list iset-size iset= iset<= iset>= - iset-cursor iset-cursor? iset-cursor-next iset-ref end-of-iset?)) + iset-cursor iset-cursor? iset-cursor-next iset-ref end-of-iset? + iset-rank iset-select)) diff --git a/lib/chibi/iset/iterators.scm b/lib/chibi/iset/iterators.scm index 8a98324b..07548986 100644 --- a/lib/chibi/iset/iterators.scm +++ b/lib/chibi/iset/iterators.scm @@ -95,6 +95,75 @@ (not (iset-right node)) (null? (iset-cursor-stack cur))))) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Rank/Select operations, acting directly on isets without an +;; optimized data structure. + +(define (iset-node-size iset) + (if (iset-bits iset) + (bit-count (iset-bits iset)) + (+ 1 (- (iset-end iset) (iset-start iset))))) + +;; Number of bits set in i below index n. +(define (bit-rank i n) + (bit-count (bitwise-and i (- (arithmetic-shift 1 n) 1)))) + +;;> Returns the rank (i.e. index within the iset) of the given +;;> element, a number in [0, size). This can be used to compress an +;;> integer set to a minimal consecutive set of integets. Can also be +;;> thought of as the number of elements in iset smaller than element. +(define (iset-rank iset element) + (let lp ((iset iset) (count 0)) + (cond + ((< element (iset-start iset)) + (if (iset-left iset) + (lp (iset-left iset) count) + (error "integer not in iset" iset element))) + ((> element (iset-end iset)) + (if (iset-right iset) + (lp (iset-right iset) + (+ count + (cond ((iset-left iset) => iset-size) (else 0)) + (iset-node-size iset))) + (error "integer not in iset" iset element))) + ((iset-bits iset) + (+ count + (cond ((iset-left iset) => iset-size) (else 0)) + (bit-rank (iset-bits iset) + (- element (iset-start iset))))) + (else + (+ count + (cond ((iset-left iset) => iset-size) (else 0)) + (integer-length (- element (iset-start iset)))))))) + +(define (nth-set-bit i n) + ;; TODO: optimize + (if (zero? n) + (first-set-bit i) + (nth-set-bit (bitwise-and i (- i 1)) (- n 1)))) + +;;> Selects the index-th element of iset starting at 0. The inverse +;;> operation of \scheme{iset-rank}. +(define (iset-select iset index) + (let lp ((iset iset) (index index) (stack '())) + (if (and iset (iset-left iset)) + (lp (iset-left iset) index (cons iset stack)) + (let ((iset (if iset iset (car stack))) + (stack (if iset stack (cdr stack)))) + (let ((node-size (iset-node-size iset))) + (cond + ((and (< index node-size) (iset-bits iset)) + (+ (iset-start iset) + (nth-set-bit (iset-bits iset) index))) + ((< index node-size) + (+ (iset-start iset) index)) + ((iset-right iset) + (lp (iset-right iset) (- index node-size) stack)) + ((pair? stack) + (lp #f (- index node-size) stack)) + (else + (error "iset index out of range" iset index)))))))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Equality @@ -201,10 +270,6 @@ (define (iset-size iset) (iset-fold-node - (lambda (is acc) - (let ((bits (iset-bits is))) - (+ acc (if bits - (bit-count bits) - (+ 1 (- (iset-end is) (iset-start is))))))) + (lambda (is acc) (+ acc (iset-node-size is))) 0 iset)) diff --git a/lib/chibi/iset/iterators.sld b/lib/chibi/iset/iterators.sld index 8eb4b8a0..19cdad44 100644 --- a/lib/chibi/iset/iterators.sld +++ b/lib/chibi/iset/iterators.sld @@ -12,5 +12,7 @@ (export iset-empty? iset-fold iset-fold-node iset-for-each iset-for-each-node iset->list iset-size iset= iset<= iset>= + ;; rank/select + iset-rank iset-select ;; low-level cursors iset-cursor iset-cursor? iset-cursor-next iset-ref end-of-iset?)) diff --git a/lib/chibi/loop.sld b/lib/chibi/loop.sld index 1bf5094e..8dbd6736 100644 --- a/lib/chibi/loop.sld +++ b/lib/chibi/loop.sld @@ -2,7 +2,7 @@ (define-library (chibi loop) (export loop for in-list in-lists in-port in-file up-from down-from listing listing-reverse appending appending-reverse - summing multiplying in-string in-string-reverse + summing multiplying in-string in-string-reverse in-substrings in-vector in-vector-reverse) (import (chibi)) (include "loop/loop.scm")) diff --git a/lib/chibi/loop/loop.scm b/lib/chibi/loop/loop.scm index 118199c1..9189a33f 100644 --- a/lib/chibi/loop/loop.scm +++ b/lib/chibi/loop/loop.scm @@ -268,6 +268,26 @@ . rest)) )) +;;> \macro{(for substr (in-substrings k str))} + +(define (string-cursor-forward str cursor n) + (if (positive? n) + (string-cursor-forward str (string-cursor-next str cursor) (- n 1)) + cursor)) + +(define-syntax in-substrings + (syntax-rules () + ((in-substrings ((ch) (k str)) next . rest) + (next ((tmp str) (end (string-cursor-end tmp))) + ((sc1 (string-cursor-start tmp) + (string-cursor-next tmp sc1)) + (sc2 (string-cursor-forward tmp (string-cursor-start tmp) k) + (string-cursor-next tmp sc2))) + ((string-cursor>? sc2 end)) + ((ch (substring-cursor tmp sc1 sc2))) + () + . rest)))) + ;;> \macro{(for ch (in-port [input-port [reader [eof?]]]))} (define-syntax in-port