diff --git a/.hgignore b/.hgignore index 9d217d26..babe41d2 100644 --- a/.hgignore +++ b/.hgignore @@ -17,4 +17,5 @@ junk* gc gc6.8 chibi-scheme +chibi-scheme-static include/chibi/install.h diff --git a/Makefile b/Makefile index e62212b5..7015597b 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ ifeq ($(PLATFORM),macosx) SO = .dylib EXE = CLIBFLAGS = -dynamiclib -STATICFLAGS = -static-libgcc +STATICFLAGS = -static-libgcc -DUSE_DL=0 else ifeq ($(PLATFORM),mingw) SO = .dll @@ -44,7 +44,7 @@ else SO = .so EXE = CLIBFLAGS = -fPIC -shared -STATICFLAGS = -static +STATICFLAGS = -static -DUSE_DL=0 endif endif @@ -113,9 +113,12 @@ test-basic: chibi-scheme$(EXE) fi; \ done -test-numbers: chibi-scheme$(EXE) +test-numbers: all LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./chibi-scheme$(EXE) tests/numeric-tests.scm +test-match: all + LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./chibi-scheme$(EXE) tests/match-tests.scm + test: all LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH ./chibi-scheme$(EXE) tests/r5rs-tests.scm diff --git a/lib/chibi/match/match.scm b/lib/chibi/match/match.scm index 6557ac0e..6a6407b5 100644 --- a/lib/chibi/match/match.scm +++ b/lib/chibi/match/match.scm @@ -21,10 +21,14 @@ ;; http://groups.google.com/group/comp.lang.scheme/msg/0941234de7112ffd ;; and is still available at ;; http://synthcode.com/scheme/match-simple.scm -;; A variant of this file which uses COND-EXPAND in a few places can -;; be found at +;; It's just 80 lines for the core MATCH, and an extra 40 lines for +;; MATCH-LET, MATCH-LAMBDA and other syntactic sugar. +;; +;; A variant of this file which uses COND-EXPAND in a few places for +;; performance can be found at ;; http://synthcode.com/scheme/match-cond-expand.scm ;; +;; 2009/11/25 - adding `***' tree search patterns ;; 2008/03/20 - fixing bug where (a ...) matched non-lists ;; 2008/03/15 - removing redundant check in vector patterns ;; 2008/03/06 - you can use `...' portably now (thanks to Taylor Campbell) @@ -121,7 +125,7 @@ ;; pattern so far. (define-syntax match-two - (syntax-rules (_ ___ quote quasiquote ? $ = and or not set! get!) + (syntax-rules (_ ___ *** quote quasiquote ? $ = and or not set! get!) ((match-two v () g s (sk ...) fk i) (if (null? v) (sk ... i) fk)) ((match-two v (quote p) g s (sk ...) fk i) @@ -135,10 +139,7 @@ ((match-two v (or p) g s sk fk i) (match-one v p g s sk fk i)) ((match-two v (or p ...) g s sk fk i) - (match-extract-vars (or p ...) - (match-gen-or v (p ...) g s sk fk i) - i - ())) + (match-extract-vars (or p ...) (match-gen-or v (p ...) g s sk fk i) i ())) ((match-two v (not p) g s (sk ...) fk i) (match-one v p g s (match-drop-ids fk) (sk ... i) i)) ((match-two v (get! getter) g s (sk ...) fk i) @@ -154,17 +155,21 @@ (match-extract-vars p (match-gen-ellipses v p r g s sk fk i) i ())) ((match-two v (p) g s sk fk i) (if (and (pair? v) (null? (cdr v))) - (let ((w (car v))) - (match-one w p (car v) (set-car! v) sk fk i)) - fk)) + (let ((w (car v))) + (match-one w p (car v) (set-car! v) sk fk i)) + fk)) + ((match-two v (p *** q) g s sk fk i) + (match-extract-vars p (match-gen-search v p q g s sk fk i) i ())) + ((match-two v (p *** . q) g s sk fk i) + (match-syntax-error "invalid use of ***" (p *** . q))) ((match-two v (p . q) g s sk fk i) (if (pair? v) - (let ((w (car v)) (x (cdr v))) - (match-one w p (car v) (set-car! v) - (match-one x q (cdr v) (set-cdr! v) sk fk) - fk - i)) - fk)) + (let ((w (car v)) (x (cdr v))) + (match-one w p (car v) (set-car! v) + (match-one x q (cdr v) (set-cdr! v) sk fk) + fk + i)) + fk)) ((match-two v #(p ...) g s sk fk i) (match-vector v 0 () (p ...) sk fk i)) ((match-two v _ g s (sk ...) fk i) (sk ... i)) @@ -234,10 +239,12 @@ (syntax-rules () ((_ expr ids ...) expr))) -;; Generating OR clauses just involves binding the success -;; continuation into a thunk which takes the identifiers common to -;; each OR clause, and trying each clause, calling the thunk as soon -;; as we succeed. +;; To expand an OR group we try each clause in succession, passing the +;; first that succeeds to the success continuation. On failure for +;; any clause, we just try the next clause, finally resorting to the +;; failure continuation fk if all clauses fail. The only trick is +;; that we want to unify the identifiers, so that the success +;; continuation can refer to a variable from any of the OR clauses. (define-syntax match-gen-or (syntax-rules () @@ -262,16 +269,19 @@ ;; We match a pattern (p ...) by matching the pattern p in a loop on ;; each element of the variable, accumulating the bound ids into lists. -;; Look at the body - it's just a named let loop, matching each -;; element in turn to the same pattern. This illustrates the -;; simplicity of this generative-style pattern matching. It would be -;; just as easy to implement a tree searching pattern. +;; Look at the body of the simple case - it's just a named let loop, +;; matching each element in turn to the same pattern. The only trick +;; is that we want to keep track of the lists of each extracted id, so +;; when the loop recurses we cons the ids onto their respective list +;; variables, and on success we bind the ids (what the user input and +;; expects to see in the success body) to the reversed accumulated +;; list IDs. (define-syntax match-gen-ellipses (syntax-rules () ((_ v p () g s (sk ...) fk i ((id id-ls) ...)) (match-check-identifier p - ;; simplest case equivalent to ( . p), just bind the list + ;; simplest case equivalent to (p ...), just bind the list (let ((p v)) (if (list? p) (sk ... i) @@ -288,11 +298,12 @@ fk i))) (else fk))))) - ((_ v p (r ...) g s (sk ...) fk i ((id id-ls) ...)) - ;; general case, trailing patterns to match + ((_ v p r g s (sk ...) fk i ((id id-ls) ...)) + ;; general case, trailing patterns to match, keep track of the + ;; remaining list length so we don't need any backtracking (match-verify-no-ellipses - (r ...) - (let* ((tail-len (length '(r ...))) + r + (let* ((tail-len (length 'r)) (ls v) (len (length ls))) (if (< len tail-len) @@ -301,7 +312,7 @@ (cond ((= n tail-len) (let ((id (reverse id-ls)) ...) - (match-one ls (r ...) #f #f (sk ... i) fk i))) + (match-one ls r #f #f (sk ... i) fk i))) ((pair? ls) (let ((w (car ls))) (match-one w p (car ls) (set-car! ls) @@ -310,8 +321,73 @@ fk i))) (else - fk))))))) - )) + fk))))))))) + +;; Matching a tree search pattern is only slightly more complicated. +;; Here we allow patterns of the form +;; +;; (x *** y) +;; +;; to represent the pattern y located somewhere in a tree where the +;; path from the current object to y can be seen as a list of the form +;; (X ...). Y can immediately match the current object in which case +;; the path is the empty list. In a sense it's a 2-dimensional +;; version of the ... pattern. +;; +;; As a common case the pattern (_ *** y) can be used to search for Y +;; anywhere in a tree, regardless of the path used. +;; +;; To implement the search, we use two recursive procedures. TRY +;; attempts to match Y once, and on success it calls the normal SK on +;; the accumulated list ids as in MATCH-GEN-ELLIPSES. On failure, we +;; call NEXT which first checks if the current value is a list +;; beginning with X, then calls TRY on each remaining element of the +;; list. Since TRY will recursively call NEXT again on failure, this +;; effects a full depth-first search. +;; +;; The failure continuation throughout is a jump to the next step in +;; the tree search, initialized with the original failure continuation +;; FK. + +(define-syntax match-gen-search + (syntax-rules () + ((match-gen-search v p q g s sk fk i ((id id-ls) ...)) + (letrec ((try (lambda (w fail id-ls ...) + (match-one w q g s + (match-drop-ids + (let ((id (reverse id-ls)) ...) + sk)) + (next w fail id-ls ...) i))) + (next (lambda (w fail id-ls ...) + (if (not (pair? w)) + (fail) + (let ((u (car w))) + (match-one + u p (car w) (set-car! w) + (match-drop-ids + ;; accumulate the head variables from + ;; the p pattern, and loop over the tail + (let ((id-ls (cons id id-ls)) ...) + (let lp ((ls (cdr w))) + (if (pair? ls) + (try (car ls) + (lambda () (lp (cdr ls))) + id-ls ...) + (fail))))) + (fail) i)))))) + ;; the initial id-ls binding here is a dummy to get the right + ;; number of '()s + (let ((id-ls '()) ...) + (try v (lambda () fk) id-ls ...)))))) + +;; This is just a safety check. Although unlike syntax-rules we allow +;; trailing patterns after an ellipses, we explicitly disable multiple +;; ellipses at the same level. This is because in the general case +;; such patterns are exponential in the number of ellipses, and we +;; don't want to make it easy to construct very expensive operations +;; with simple looking patterns. For example, it would be O(n^2) for +;; patterns like (a ... b ...) because we must consider every trailing +;; element for every possible break for the leading "a ...". (define-syntax match-verify-no-ellipses (syntax-rules () @@ -321,7 +397,9 @@ (match-syntax-error "multiple ellipse patterns not allowed at same level") (match-verify-no-ellipses y sk))) - ((_ x sk) sk) + ((_ () sk) sk) + ((_ x sk) + (match-syntax-error "dotted tail not allowed after ellipse" x)) )) ;; Vector patterns are just more of the same, with the slight @@ -332,10 +410,10 @@ (syntax-rules (___) ((_ v n pats (p q) sk fk i) (match-check-ellipse q - (match-vector-ellipses v n pats p sk fk i) + (match-gen-vector-ellipses v n pats p sk fk i) (match-vector-two v n pats (p q) sk fk i))) ((_ v n pats (p ___) sk fk i) - (match-vector-ellipses v n pats p sk fk i)) + (match-gen-vector-ellipses v n pats p sk fk i)) ((_ . x) (match-vector-two . x)))) @@ -366,7 +444,7 @@ ;; With a vector ellipse pattern we first check to see if the vector ;; length is at least the required length. -(define-syntax match-vector-ellipses +(define-syntax match-gen-vector-ellipses (syntax-rules () ((_ v n ((pat index) ...) p sk fk i) (if (vector? v) @@ -396,13 +474,18 @@ ;; Extract all identifiers in a pattern. A little more complicated ;; than just looking for symbols, we need to ignore special keywords -;; and not pattern forms (such as the predicate expression in ? -;; patterns). +;; and non-pattern forms (such as the predicate expression in ? +;; patterns), and also ignore previously bound identifiers. +;; +;; Calls the continuation with all new vars as a list of the form +;; ((orig-var tmp-name) ...), where tmp-name can be used to uniquely +;; pair with the original variable (e.g. it's used in the ellipse +;; generation for list variables). ;; ;; (match-extract-vars pattern continuation (ids ...) (new-vars ...)) (define-syntax match-extract-vars - (syntax-rules (_ ___ ? $ = quote quasiquote and or not get! set!) + (syntax-rules (_ ___ *** ? $ = quote quasiquote and or not get! set!) ((match-extract-vars (? pred . p) k i v) (match-extract-vars p k i v)) ((match-extract-vars ($ rec . p) k i v) @@ -432,6 +515,7 @@ (match-extract-vars (p ...) k i v)) ((match-extract-vars _ (k ...) i v) (k ... v)) ((match-extract-vars ___ (k ...) i v) (k ... v)) + ((match-extract-vars *** (k ...) i v) (k ... v)) ;; This is the main part, the only place where we might add a new ;; var if it's an unbound symbol. ((match-extract-vars p (k ...) (i ...) v) @@ -518,8 +602,7 @@ (match-let/helper let (v ... (tmp expr)) (p ... (#(a ...) tmp)) rest . body)) ((_ let (v ...) (p ...) ((a expr) . rest) . body) - (match-let/helper let (v ... (a expr)) (p ...) rest . body)) - )) + (match-let/helper let (v ... (a expr)) (p ...) rest . body)))) (define-syntax match-named-let (syntax-rules () @@ -585,5 +668,4 @@ ((sym? x sk fk) sk) ;; otherwise x is a non-symbol datum ((sym? y sk fk) fk)))) - (sym? abracadabra success-k failure-k))) - )) + (sym? abracadabra success-k failure-k))))) diff --git a/tests/match-tests.scm b/tests/match-tests.scm new file mode 100644 index 00000000..a223e729 --- /dev/null +++ b/tests/match-tests.scm @@ -0,0 +1,196 @@ + +(import (chibi match)) + +(define *tests-run* 0) +(define *tests-passed* 0) + +(define-syntax test + (syntax-rules () + ((test name expr expect) + (begin + (set! *tests-run* (+ *tests-run* 1)) + (let ((str (call-with-output-string (lambda (out) (display name out)))) + (res expr)) + (display str) + (write-char #\space) + (display (make-string (max 0 (- 72 (string-length str))) #\.)) + (flush-output) + (cond + ((equal? res expect) + (set! *tests-passed* (+ *tests-passed* 1)) + (display " [PASS]\n")) + (else + (display " [FAIL]\n") + (display " expected ") (write expect) + (display " but got ") (write res) (newline)))))))) + +(define (test-report) + (write *tests-passed*) + (display " out of ") + (write *tests-run*) + (display " passed (") + (write (* (/ *tests-passed* *tests-run*) 100)) + (display "%)") + (newline)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; run tests + +(test "any" (match 'any (_ 'ok)) 'ok) +(test "symbol" (match 'ok (x x)) 'ok) +(test "number" (match 28 (28 'ok)) 'ok) +(test "string" (match "good" ("bad" 'fail) ("good" 'ok)) 'ok) +(test "literal symbol" (match 'good ('bad 'fail) ('good 'ok)) 'ok) +(test "null" (match '() (() 'ok)) 'ok) +(test "pair" (match '(ok) ((x) x)) 'ok) +(test "vector" (match '#(ok) (#(x) x)) 'ok) +(test "any doubled" (match '(1 2) ((_ _) 'ok)) 'ok) +(test "and empty" (match '(o k) ((and) 'ok)) 'ok) +(test "and single" (match 'ok ((and x) x)) 'ok) +(test "and double" (match 'ok ((and (? symbol?) y) 'ok)) 'ok) +(test "or empty" (match '(o k) ((or) 'fail) (else 'ok)) 'ok) +(test "or single" (match 'ok ((or x) 'ok)) 'ok) +(test "or double" (match 'ok ((or (? symbol? y) y) y)) 'ok) +(test "not" (match 28 ((not (a . b)) 'ok)) 'ok) +(test "pred" (match 28 ((? number?) 'ok)) 'ok) +(test "named pred" (match 28 ((? number? x) (+ x 1))) 29) + +(test "duplicate symbols pass" (match '(ok . ok) ((x . x) x)) 'ok) +(test "duplicate symbols fail" (match '(ok . bad) ((x . x) 'bad) (else 'ok)) 'ok) +(test "duplicate symbols samth" (match '(ok . ok) ((x . 'bad) x) (('ok . x) x)) 'ok) + +(test "ellipses" + (match '((a . 1) (b . 2) (c . 3)) + (((x . y) ___) (list x y))) + '((a b c) (1 2 3))) + +(test "real ellipses" + (match '((a . 1) (b . 2) (c . 3)) + (((x . y) ...) (list x y))) + '((a b c) (1 2 3))) + +(test "vector ellipses" + (match '#(1 2 3 (a . 1) (b . 2) (c . 3)) + (#(a b c (hd . tl) ...) (list a b c hd tl))) + '(1 2 3 (a b c) (1 2 3))) + +(test "pred ellipses" + (match '(1 2 3) + (((? odd? n) ___) n) + (((? number? n) ___) n)) + '(1 2 3)) + +(test "failure continuation" + (match '(1 2) + ((a . b) (=> next) (if (even? a) 'fail (next))) + ((a . b) 'ok)) + 'ok) + +(test "let" + (match-let ((x 'ok) (y '(o k))) + y) + '(o k)) + +(test "let*" + (match-let* ((x 'f) (y 'o) ((z w) (list y x))) + (list x y z w)) + '(f o o f)) + +(test "getter car" + (match '(1 . 2) (((get! a) . b) (list (a) b))) + '(1 2)) + +(test "getter cdr" + (match '(1 . 2) ((a . (get! b)) (list a (b)))) + '(1 2)) + +(test "getter vector" + (match '#(1 2 3) (#((get! a) b c) (list (a) b c))) + '(1 2 3)) + +(test "setter car" + (let ((x (cons 1 2))) + (match x (((set! a) . b) (a 3))) + x) + '(3 . 2)) + +(test "setter cdr" + (let ((x (cons 1 2))) + (match x ((a . (set! b)) (b 3))) + x) + '(1 . 3)) + +(test "setter vector" + (let ((x (vector 1 2 3))) + (match x (#(a (set! b) c) (b 0))) + x) + '#(1 0 3)) + +(test "single tail" + (match '((a . 1) (b . 2) (c . 3)) + (((x . y) ... last) (list x y last))) + '((a b) (1 2) (c . 3))) + +(test "single tail 2" + (match '((a . 1) (b . 2) 3) + (((x . y) ... last) (list x y last))) + '((a b) (1 2) 3)) + +(test "multiple tail" + (match '((a . 1) (b . 2) (c . 3) (d . 4) (e . 5)) + (((x . y) ... u v w) (list x y u v w))) + '((a b) (1 2) (c . 3) (d . 4) (e . 5))) + +(test "Riastradh quasiquote" + (match '(1 2 3) (`(1 ,b ,c) (list b c))) + '(2 3)) + +(test "trivial tree search" + (match '(1 2 3) ((_ *** (a b c)) (list a b c))) + '(1 2 3)) + +(test "simple tree search" + (match '(x (1 2 3)) ((_ *** (a b c)) (list a b c))) + '(1 2 3)) + +(test "deep tree search" + (match '(x (x (x (1 2 3)))) ((_ *** (a b c)) (list a b c))) + '(1 2 3)) + +(test "non-tail tree search" + (match '(x (x (x a b c (1 2 3) d e f))) ((_ *** (a b c)) (list a b c))) + '(1 2 3)) + +(test "restricted tree search" + (match '(x (x (x a b c (1 2 3) d e f))) (('x *** (a b c)) (list a b c))) + '(1 2 3)) + +(test "fail restricted tree search" + (match '(x (y (x a b c (1 2 3) d e f))) + (('x *** (a b c)) (list a b c)) + (else #f)) + #f) + +(test "sxml tree search" + (match '(p (ul (li a (b c) (a (^ (href . "http://synthcode.com/")) "synthcode") d e f))) + (((or 'p 'ul 'li 'b) *** ('a ('^ attrs ...) text ...)) + (list attrs text)) + (else #f)) + '(((href . "http://synthcode.com/")) ("synthcode"))) + +(test "failed sxml tree search" + (match '(p (ol (li a (b c) (a (^ (href . "http://synthcode.com/")) "synthcode") d e f))) + (((or 'p 'ul 'li 'b) *** ('a ('^ attrs ...) text ...)) + (list attrs text)) + (else #f)) + #f) + +(test "collect tree search" + (match '(p (ul (li a (b c) (a (^ (href . "http://synthcode.com/")) "synthcode") d e f))) + (((and tag (or 'p 'ul 'li 'b)) *** ('a ('^ attrs ...) text ...)) + (list tag attrs text)) + (else #f)) + '((p ul li) ((href . "http://synthcode.com/")) ("synthcode"))) + +(test-report) +