From 0f84fac70dc859709a4c9042dd39a17c1418faf6 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Sat, 26 Aug 2017 20:34:56 +0900 Subject: [PATCH] adding (srfi 14) --- lib/chibi/char-set/extras.scm | 20 +-- lib/chibi/iset/iterators.scm | 12 +- lib/srfi/14.sld | 103 ++++++++++++++++ lib/srfi/14/test.sld | 221 ++++++++++++++++++++++++++++++++++ tests/lib-tests.scm | 2 + tools/optimize-char-sets.scm | 2 +- 6 files changed, 344 insertions(+), 16 deletions(-) create mode 100644 lib/srfi/14.sld create mode 100644 lib/srfi/14/test.sld diff --git a/lib/chibi/char-set/extras.scm b/lib/chibi/char-set/extras.scm index 96846e42..c2c6866d 100644 --- a/lib/chibi/char-set/extras.scm +++ b/lib/chibi/char-set/extras.scm @@ -2,9 +2,11 @@ (define (char-set . args) (list->char-set args)) -;; This is a mistake in the SRFI-14 design - end should be inclusive. -(define (ucs-range->char-set start end) - (make-iset start (- end 1))) +(define (ucs-range->char-set start end . o) + (let ((res (make-iset start (- end 1)))) + (if (and (pair? o) (pair? (cdr o))) + (iset-union res (cadr o)) + res))) (define char-set-copy iset-copy) @@ -16,8 +18,8 @@ (define (char-set-for-each proc cset) (iset-for-each (lambda (i) (proc (integer->char i))) cset)) -(define (list->char-set ls) - (list->iset (map char->integer ls))) +(define (list->char-set ls . o) + (apply list->iset (map char->integer ls) o)) (define (char-set->list cset) (map integer->char (iset->list cset))) @@ -26,10 +28,10 @@ (define (char-set->string cset) (list->string (char-set->list cset))) -(define (char-set-adjoin! cset ch) - (iset-adjoin! cset (char->integer ch))) -(define (char-set-adjoin cset ch) - (iset-adjoin cset (char->integer ch))) +(define (char-set-adjoin! cset . o) + (apply iset-adjoin! cset (map char->integer o))) +(define (char-set-adjoin cset . o) + (apply iset-adjoin cset (map char->integer o))) (define char-set-union iset-union) (define char-set-union! iset-union!) diff --git a/lib/chibi/iset/iterators.scm b/lib/chibi/iset/iterators.scm index bd43f99c..8a98324b 100644 --- a/lib/chibi/iset/iterators.scm +++ b/lib/chibi/iset/iterators.scm @@ -84,10 +84,10 @@ (error "cursor reference past end of iset") pos))))) -;;> Returns true iff \var{cur} is at the end of \var{iset}, such that +;;> Returns true iff \var{cur} is at the end of iset, such that ;;> \scheme{iset-ref} is no longer valid. -(define (end-of-iset? iset cur) +(define (end-of-iset? cur) (let ((node (iset-cursor-node cur))) (and (if (iset-bits node) (zero? (iset-cursor-pos cur)) @@ -101,8 +101,8 @@ (define (iset2= is1 is2) (let lp ((cur1 (iset-cursor is1)) (cur2 (iset-cursor is2))) - (cond ((end-of-iset? is1 cur1) (end-of-iset? is2 cur2)) - ((end-of-iset? is2 cur2) #f) + (cond ((end-of-iset? cur1) (end-of-iset? cur2)) + ((end-of-iset? cur2) #f) ((= (iset-ref is1 cur1) (iset-ref is2 cur2)) (lp (iset-cursor-next is1 cur1) (iset-cursor-next is2 cur2))) (else @@ -111,8 +111,8 @@ (define (iset2<= is1 is2) (let lp ((cur1 (iset-cursor is1)) (cur2 (iset-cursor is2))) - (cond ((end-of-iset? is1 cur1)) - ((end-of-iset? is2 cur2) #f) + (cond ((end-of-iset? cur1)) + ((end-of-iset? cur2) #f) (else (let ((i1 (iset-ref is1 cur1)) (i2 (iset-ref is1 cur2))) diff --git a/lib/srfi/14.sld b/lib/srfi/14.sld new file mode 100644 index 00000000..7d896362 --- /dev/null +++ b/lib/srfi/14.sld @@ -0,0 +1,103 @@ + +(define-library (srfi 14) + (export + char-set? char-set= char-set<= + char-set-hash + char-set-cursor char-set-ref char-set-cursor-next end-of-char-set? + char-set-fold char-set-unfold char-set-unfold! + char-set-for-each char-set-map + char-set-copy char-set + + list->char-set string->char-set + list->char-set! string->char-set! + + char-set-filter ucs-range->char-set ->char-set + char-set-filter! ucs-range->char-set! + + char-set->list char-set->string + + char-set-size char-set-count char-set-contains? + char-set-every char-set-any + + char-set-adjoin char-set-delete + char-set-adjoin! char-set-delete! + + char-set-complement char-set-union char-set-intersection + char-set-complement! char-set-union! char-set-intersection! + + char-set-difference char-set-xor char-set-diff+intersection + char-set-difference! char-set-xor! char-set-diff+intersection! + + char-set:lower-case char-set:upper-case char-set:title-case + char-set:letter char-set:digit char-set:letter+digit + char-set:graphic char-set:printing char-set:whitespace + char-set:iso-control char-set:punctuation char-set:symbol + char-set:hex-digit char-set:blank char-set:ascii + char-set:empty char-set:full) + (import (scheme base) + (chibi char-set) + (chibi char-set full) + (chibi iset) + (only (srfi 125) hash)) + (begin + (define char-set= iset=) + (define char-set<= iset<=) + (define char-set-hash hash) + (define char-set-cursor iset-cursor) + (define char-set-cursor-next iset-cursor-next) + (define (char-set-ref cs cur) (integer->char (iset-ref cs cur))) + (define end-of-char-set? end-of-iset?) + (define (char-set-fold kons knil cs) + (iset-fold (lambda (i x) (kons (integer->char i) x)) knil cs)) + (define (char-set-unfold! p f g seed cs) + (let lp ((seed seed) (cs cs)) + (if (p seed) + cs + (lp (g seed) (char-set-adjoin! cs (f seed)))))) + (define (char-set-unfold p f g seed . o) + (let ((cs (if (pair? o) (char-set-copy (car o)) (char-set)))) + (char-set-unfold! p f g seed cs))) + (define (char-set-map proc cs) + (iset-map (lambda (i) (char->integer (proc (integer->char i)))) cs)) + (define list->char-set! list->char-set) + (define string->char-set! string->char-set) + (define ucs-range->char-set! ucs-range->char-set) + (define (->char-set x) + (cond ((char? x) (char-set x)) + ((pair? x) (list->char-set x)) + ((string? x) (string->char-set x)) + (else x))) + (define (char-set-delete cs . o) + (apply iset-delete cs (map char->integer o))) + (define (char-set-delete! cs . o) + (apply iset-delete! cs (map char->integer o))) + (define char-set-complement! char-set-complement) + (define (char-set-filter pred cs . o) + (char-set-fold + (lambda (ch res) (if (pred ch) (char-set-adjoin! res ch) res)) + (if (pair? o) (char-set-copy (car o)) (char-set)) + cs)) + (define char-set-filter! char-set-filter) + (define (char-set-count pred cs) + (char-set-fold (lambda (ch i) (if (pred ch) (+ i 1) i)) 0 cs)) + (define (char-set-any pred cs) + (let lp ((cur (char-set-cursor cs))) + (if (end-of-char-set? cur) + #f + (or (pred (char-set-ref cs cur)) + (lp (char-set-cursor-next cs cur)))))) + (define (char-set-every pred cs) + (not (char-set-any (lambda (ch) (not (pred ch))) cs))) + (define (char-set-xor2 cs1 cs2) + (char-set-union (char-set-difference cs1 cs2) + (char-set-difference cs2 cs1))) + (define (char-set-xor . o) + (cond + ((null? o) (char-set)) + ((null? (cdr o)) (char-set-copy (car o))) + (else (apply char-set-xor (char-set-xor2 (car o) (cadr o)) (cddr o))))) + (define char-set-xor! char-set-xor) + (define (char-set-diff+intersection . o) + (values (apply char-set-difference o) + (apply char-set-intersection o))) + (define char-set-diff+intersection! char-set-diff+intersection))) diff --git a/lib/srfi/14/test.sld b/lib/srfi/14/test.sld new file mode 100644 index 00000000..83839066 --- /dev/null +++ b/lib/srfi/14/test.sld @@ -0,0 +1,221 @@ + +(define-library (srfi 14 test) + (import (scheme base) (scheme char) (srfi 14) (chibi test)) + (export run-tests) + (begin + (define-syntax test-cs + (syntax-rules () + ((test-cs . o) + (test-equal char-set= . o)))) + (define (vowel? c) (member c '(#\a #\e #\i #\o #\u))) + (define (run-tests) + (test-begin "srfi-14: char-sets") + (test-not (char-set? 5)) + + (test-assert (char-set? (char-set #\a #\e #\i #\o #\u))) + + (test-assert (char-set=)) + (test-assert (char-set= (char-set))) + + (test-cs (char-set #\a #\e #\i #\o #\u) + (string->char-set "ioeauaiii")) + + (test-not (char-set= (char-set #\e #\i #\o #\u) + (string->char-set "ioeauaiii"))) + + (test-assert (char-set<=)) + (test-assert (char-set<= (char-set))) + + (test-assert (char-set<= (char-set #\a #\e #\i #\o #\u) + (string->char-set "ioeauaiii"))) + + (test-assert (char-set<= (char-set #\e #\i #\o #\u) + (string->char-set "ioeauaiii"))) + + (test-assert (<= 0 (char-set-hash char-set:graphic 100) 99)) + + (test 4 (char-set-fold (lambda (c i) (+ i 1)) 0 + (char-set #\e #\i #\o #\u #\e #\e))) + + (test-cs (string->char-set "eiaou2468013579999") + (char-set-unfold null? car cdr + '(#\a #\e #\i #\o #\u #\u #\u) + (char-set-intersection char-set:ascii + char-set:digit))) + + (test-cs (string->char-set "eiaou246801357999") + (char-set-unfold! null? car cdr '(#\a #\e #\i #\o #\u) + (string->char-set "0123456789"))) + + (test-not (char-set= (string->char-set "eiaou246801357") + (char-set-unfold! null? car cdr + '(#\a #\e #\i #\o #\u) + (string->char-set "0123456789")))) + + (let ((cs (string->char-set "0123456789"))) + (char-set-for-each (lambda (c) (set! cs (char-set-delete cs c))) + (string->char-set "02468000")) + (test-cs cs (string->char-set "97531"))) + + (test-not (let ((cs (string->char-set "0123456789"))) + (char-set-for-each (lambda (c) (set! cs (char-set-delete cs c))) + (string->char-set "02468")) + (char-set= cs (string->char-set "7531")))) + + (test-cs (string->char-set "IOUAEEEE") + (char-set-map char-upcase (string->char-set "aeiou"))) + + (test-not (char-set= (char-set-map char-upcase (string->char-set "aeiou")) + (string->char-set "OUAEEEE"))) + + (test-cs (string->char-set "aeiou") + (char-set-copy (string->char-set "aeiou"))) + + (test-cs (string->char-set "xy") (char-set #\x #\y)) + (test-not (char-set= (char-set #\x #\y #\z) (string->char-set "xy"))) + + (test-cs (string->char-set "xy") (list->char-set '(#\x #\y))) + (test-not (char-set= (string->char-set "axy") + (list->char-set '(#\x #\y)))) + + (test-cs (string->char-set "xy12345") + (list->char-set '(#\x #\y) (string->char-set "12345"))) + (test-not (char-set= (string->char-set "y12345") + (list->char-set '(#\x #\y) + (string->char-set "12345")))) + + (test-cs (string->char-set "xy12345") + (list->char-set! '(#\x #\y) (string->char-set "12345"))) + (test-not (char-set= (string->char-set "y12345") + (list->char-set! '(#\x #\y) + (string->char-set "12345")))) + + (test-cs (string->char-set "aeiou12345") + (char-set-filter vowel? + char-set:ascii + (string->char-set "12345"))) + (test-not (char-set= (string->char-set "aeou12345") + (char-set-filter vowel? + char-set:ascii + (string->char-set "12345")))) + + (test-cs (string->char-set "aeiou12345") + (char-set-filter! vowel? + char-set:ascii + (string->char-set "12345"))) + (test-not (char-set= (string->char-set "aeou12345") + (char-set-filter! vowel? + char-set:ascii + (string->char-set "12345")))) + + (test-cs (string->char-set "abcdef12345") + (ucs-range->char-set 97 103 #t (string->char-set "12345"))) + (test-not (char-set= + (string->char-set "abcef12345") + (ucs-range->char-set 97 103 #t (string->char-set "12345")))) + + (test-cs (string->char-set "abcdef12345") + (ucs-range->char-set! 97 103 #t (string->char-set "12345"))) + (test-not (char-set= + (string->char-set "abcef12345") + (ucs-range->char-set! 97 103 #t (string->char-set "12345")))) + + (test-assert (char-set= (->char-set #\x) + (->char-set "x") + (->char-set (char-set #\x)))) + + (test-not (char-set= (->char-set #\x) + (->char-set "y") + (->char-set (char-set #\x)))) + + (test 10 (char-set-size + (char-set-intersection char-set:ascii char-set:digit))) + + (test 5 (char-set-count vowel? char-set:ascii)) + + (test '(#\x) (char-set->list (char-set #\x))) + (test-not (equal? '(#\X) (char-set->list (char-set #\x)))) + + (test "x" (char-set->string (char-set #\x))) + (test-not (equal? "X" (char-set->string (char-set #\x)))) + + (test-assert (char-set-contains? (->char-set "xyz") #\x)) + (test-not (char-set-contains? (->char-set "xyz") #\a)) + + (test-assert (char-set-every char-lower-case? (->char-set "abcd"))) + (test-not (char-set-every char-lower-case? (->char-set "abcD"))) + (test-assert (char-set-any char-lower-case? (->char-set "abcd"))) + (test-not (char-set-any char-lower-case? (->char-set "ABCD"))) + + (test-cs (->char-set "ABCD") + (let ((cs (->char-set "abcd"))) + (let lp ((cur (char-set-cursor cs)) (ans '())) + (if (end-of-char-set? cur) (list->char-set ans) + (lp (char-set-cursor-next cs cur) + (cons (char-upcase (char-set-ref cs cur)) ans)))))) + + + (test-cs (->char-set "123xa") + (char-set-adjoin (->char-set "123") #\x #\a)) + (test-not (char-set= (char-set-adjoin (->char-set "123") #\x #\a) + (->char-set "123x"))) + (test-cs (->char-set "123xa") + (char-set-adjoin! (->char-set "123") #\x #\a)) + (test-not (char-set= (char-set-adjoin! (->char-set "123") #\x #\a) + (->char-set "123x"))) + + (test-cs (->char-set "13") + (char-set-delete (->char-set "123") #\2 #\a #\2)) + (test-not (char-set= (char-set-delete (->char-set "123") #\2 #\a #\2) + (->char-set "13a"))) + (test-cs (->char-set "13") + (char-set-delete! (->char-set "123") #\2 #\a #\2)) + (test-not (char-set= (char-set-delete! (->char-set "123") #\2 #\a #\2) + (->char-set "13a"))) + + (test-cs (->char-set "abcdefABCDEF") + (char-set-intersection char-set:hex-digit + (char-set-complement char-set:digit))) + (test-cs (->char-set "abcdefABCDEF") + (char-set-intersection! + (char-set-complement! (->char-set "0123456789")) + char-set:hex-digit)) + + (test-cs (->char-set "abcdefABCDEFghijkl0123456789") + (char-set-union char-set:hex-digit + (->char-set "abcdefghijkl"))) + (test-cs (->char-set "abcdefABCDEFghijkl0123456789") + (char-set-union! (->char-set "abcdefghijkl") + char-set:hex-digit)) + + (test-cs (->char-set "ghijklmn") + (char-set-difference (->char-set "abcdefghijklmn") + char-set:hex-digit)) + (test-cs (->char-set "ghijklmn") + (char-set-difference! (->char-set "abcdefghijklmn") + char-set:hex-digit)) + + (test-cs (->char-set "abcdefABCDEF") + (char-set-xor (->char-set "0123456789") + char-set:hex-digit)) + (test-cs (->char-set "abcdefABCDEF") + (char-set-xor! (->char-set "0123456789") + char-set:hex-digit)) + + (call-with-values + (lambda () + (char-set-diff+intersection char-set:hex-digit + char-set:letter)) + (lambda (d i) + (test-cs d (->char-set "0123456789")) + (test-cs i (->char-set "abcdefABCDEF")))) + + (call-with-values + (lambda () + (char-set-diff+intersection! (char-set-copy char-set:hex-digit) + (char-set-copy char-set:letter))) + (lambda (d i) + (test-cs d (->char-set "0123456789")) + (test-cs i (->char-set "abcdefABCDEF")))) + + (test-end)))) diff --git a/tests/lib-tests.scm b/tests/lib-tests.scm index b96c1e13..7b80622e 100644 --- a/tests/lib-tests.scm +++ b/tests/lib-tests.scm @@ -3,6 +3,7 @@ (chibi test) (rename (srfi 1 test) (run-tests run-srfi-1-tests)) (rename (srfi 2 test) (run-tests run-srfi-2-tests)) + (rename (srfi 14 test) (run-tests run-srfi-14-tests)) (rename (srfi 16 test) (run-tests run-srfi-16-tests)) (rename (srfi 18 test) (run-tests run-srfi-18-tests)) (rename (srfi 26 test) (run-tests run-srfi-26-tests)) @@ -54,6 +55,7 @@ (run-srfi-1-tests) (run-srfi-2-tests) +(run-srfi-14-tests) (run-srfi-16-tests) (run-srfi-18-tests) (run-srfi-26-tests) diff --git a/tools/optimize-char-sets.scm b/tools/optimize-char-sets.scm index 79b208ca..d020bf1b 100644 --- a/tools/optimize-char-sets.scm +++ b/tools/optimize-char-sets.scm @@ -48,7 +48,7 @@ (do ((cur (iset-cursor value) (iset-cursor-next value cur)) (res '() (cons (iset-ref value cur) res))) - ((end-of-iset? value cur) (reverse res))))) + ((end-of-iset? cur) (reverse res))))) (error "error in iset cursors")) (display " computing intersection\n" (current-error-port)) (let* ((iset1 (if ascii?