From c21e53832c8606daa407358db691b9d66cf2ff79 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Mon, 16 May 2011 00:49:35 -0700 Subject: [PATCH] including literate docs --- lib/chibi/match/match.scm | 274 +++++++++++++++++++++++++++++++++----- 1 file changed, 241 insertions(+), 33 deletions(-) mode change 100644 => 100755 lib/chibi/match/match.scm diff --git a/lib/chibi/match/match.scm b/lib/chibi/match/match.scm old mode 100644 new mode 100755 index dbb00167..d03dc50b --- a/lib/chibi/match/match.scm +++ b/lib/chibi/match/match.scm @@ -1,20 +1,203 @@ ;;;; match.scm -- portable hygienic pattern matcher +;;;; -*- coding: utf-8 -*- ;; ;; This code is written by Alex Shinn and placed in the ;; Public Domain. All warranties are disclaimed. -;; This is a full superset of the popular MATCH package by Andrew -;; Wright, written in fully portable SYNTAX-RULES (R5RS only, breaks -;; in R6RS SYNTAX-RULES), and thus preserving hygiene. +;;> @example-import[(srfi 9)] -;; This is a simple generative pattern matcher - each pattern is -;; expanded into the required tests, calling a failure continuation if -;; the tests fail. This makes the logic easy to follow and extend, -;; but produces sub-optimal code in cases where you have many similar -;; clauses due to repeating the same tests. Nonetheless a smart -;; compiler should be able to remove the redundant tests. For -;; MATCH-LET and DESTRUCTURING-BIND type uses there is no performance -;; hit. +;;> This is a full superset of the popular @hyperlink[ +;;> "http://www.cs.indiana.edu/scheme-repository/code.match.html"]{match} +;;> package by Andrew Wright, written in fully portable @scheme{syntax-rules} +;;> and thus preserving hygiene. + +;;> The most notable extensions are the ability to use @emph{non-linear} +;;> patterns - patterns in which the same identifier occurs multiple +;;> times, tail patterns after ellipsis, and the experimental tree patterns. + +;;> @subsubsection{Patterns} + +;;> Patterns are written to look like the printed representation of +;;> the objects they match. The basic usage is + +;;> @scheme{(match expr (pat body ...) ...)} + +;;> where the result of @var{expr} is matched against each pattern in +;;> turn, and the corresponding body is evaluated for the first to +;;> succeed. Thus, a list of three elements matches a list of three +;;> elements. + +;;> @example{(let ((ls (list 1 2 3))) (match ls ((1 2 3) #t)))} + +;;> If no patterns match an error is signalled. + +;;> Identifiers will match anything, and make the corresponding +;;> binding available in the body. + +;;> @example{(match (list 1 2 3) ((a b c) b))} + +;;> If the same identifier occurs multiple times, the first instance +;;> will match anything, but subsequent instances must match a value +;;> which is @scheme{equal?} to the first. + +;;> @example{(match (list 1 2 1) ((a a b) 1) ((a b a) 2))} + +;;> The special identifier @scheme{_} matches anything, no matter how +;;> many times it is used, and does not bind the result in the body. + +;;> @example{(match (list 1 2 1) ((_ _ b) 1) ((a b a) 2))} + +;;> To match a literal identifier (or list or any other literal), use +;;> @scheme{quote}. + +;;> @example{(match 'a ('b 1) ('a 2))} + +;;> Analogous to its normal usage in scheme, @scheme{quasiquote} can +;;> be used to quote a mostly literally matching object with selected +;;> parts unquoted. + +;;> @example|{(match (list 1 2 3) (`(1 ,b ,c) (list b c)))}| + +;;> Often you want to match any number of a repeated pattern. Inside +;;> a list pattern you can append @scheme{...} after an element to +;;> match zero or more of that pattern (like a regexp Kleene star). + +;;> @example{(match (list 1 2) ((1 2 3 ...) #t))} +;;> @example{(match (list 1 2 3) ((1 2 3 ...) #t))} +;;> @example{(match (list 1 2 3 3 3) ((1 2 3 ...) #t))} + +;;> Pattern variables matched inside the repeated pattern are bound to +;;> a list of each matching instance in the body. + +;;> @example{(match (list 1 2) ((a b c ...) c))} +;;> @example{(match (list 1 2 3) ((a b c ...) c))} +;;> @example{(match (list 1 2 3 4 5) ((a b c ...) c))} + +;;> More than one @scheme{...} may not be used in the same list, since +;;> this would require exponential backtracking in the general case. +;;> However, @scheme{...} need not be the final element in the list, +;;> and may be succeeded by a fixed number of patterns. + +;;> @example{(match (list 1 2 3 4) ((a b c ... d e) c))} +;;> @example{(match (list 1 2 3 4 5) ((a b c ... d e) c))} +;;> @example{(match (list 1 2 3 4 5 6 7) ((a b c ... d e) c))} + +;;> @scheme{___} is provided as an alias for @scheme{...} when it is +;;> inconvenient to use the ellipsis (as in a syntax-rules template). + +;;> The @scheme{..1} syntax is exactly like the @scheme{...} except +;;> that it matches one or more repetitions (like a regexp "+"). + +;;> @example{(match (list 1 2) ((a b c ..1) c))} +;;> @example{(match (list 1 2 3) ((a b c ..1) c))} + +;;> The boolean operators @scheme{and}, @scheme{or} and @scheme{not} +;;> can be used to group and negate patterns analogously to their +;;> Scheme counterparts. + +;;> The @scheme{and} operator ensures that all subpatterns match. +;;> This operator is often used with the idiom @scheme{(and x pat)} to +;;> bind @var{x} to the entire value that matches @var{pat} +;;> (c.f. "as-patterns" in ML or Haskell). Another common use is in +;;> conjunction with @scheme{not} patterns to match a general case +;;> with certain exceptions. + +;;> @example{(match 1 ((and) #t))} +;;> @example{(match 1 ((and x) x))} +;;> @example{(match 1 ((and x 1) x))} + +;;> The @scheme{or} operator ensures that at least one subpattern +;;> matches. If the same identifier occurs in different subpatterns, +;;> it is matched independently. All identifiers from all subpatterns +;;> are bound if the @scheme{or} operator matches, but the binding is +;;> only defined for identifiers from the subpattern which matched. + +;;> @example{(match 1 ((or) #t) (else #f))} +;;> @example{(match 1 ((or x) x))} +;;> @example{(match 1 ((or x 2) x))} + +;;> The @scheme{not} operator succeeds if the given pattern doesn't +;;> match. None of the identifiers used are available in the body. + +;;> @example{(match 1 ((not 2) #t))} + +;;> The more general operator @scheme{?} can be used to provide a +;;> predicate. The usage is @scheme{(? predicate pat ...)} where +;;> @var{predicate} is a Scheme expression evaluating to a predicate +;;> called on the value to match, and any optional patterns after the +;;> predicate are then matched as in an @scheme{and} pattern. + +;;> @example{(match 1 ((? odd? x) x))} + +;;> The field operator @scheme{=} is used to extract an arbitrary +;;> field and match against it. It is useful for more complex or +;;> conditional destructuring that can't be more directly expressed in +;;> the pattern syntax. The usage is @scheme{(= field pat)}, where +;;> @var{field} can be any expression, and should result in a +;;> procedure of one argument, which is applied to the value to match +;;> to generate a new value to match against @var{pat}. + +;;> Thus the pattern @scheme{(and (= car x) (= cdr y))} is equivalent +;;> to @scheme{(x . y)}, except it will result in an immediate error +;;> if the value isn't a pair. + +;;> @example{(match '(1 . 2) ((= car x) x))} +;;> @example{(match 4 ((= sqrt x) x))} + +;;> The record operator @scheme{$} is used as a concise way to match +;;> records defined by SRFI-9 (or SRFI-99). The usage is +;;> @scheme{($ rtd field ...)}, where @var{rtd} should be the record +;;> type descriptor specified as the first argument to +;;> @scheme{define-record-type}, and each @var{field} is a subpattern +;;> matched against the fields of the record in order. Not all fields +;;> must be present. + +;;> @example{ +;;> (let () +;;> (define-record-type employee +;;> (make-employee name title) +;;> employee? +;;> (name get-name) +;;> (title get-title)) +;;> (match (make-employee "Bob" "Doctor") +;;> (($ employee n t) (list t n)))) +;;> } + +;;> The @scheme{set!} and @scheme{get!} operators are used to bind an +;;> identifier to the setter and getter of a field, respectively. The +;;> setter is a procedure of one argument, which mutates the field to +;;> that argument. The getter is a procedure of no arguments which +;;> returns the current value of the field. + +;;> @example{(let ((x (cons 1 2))) (match x ((1 . (set! s)) (s 3) x)))} +;;> @example{(match '(1 . 2) ((1 . (get! g)) (g)))} + +;;> The new operator @scheme{***} can be used to search a tree for +;;> subpatterns. A pattern of the form @scheme{(x *** y)} represents +;;> the subpattern @var{y} located somewhere in a tree where the path +;;> from the current object to @var{y} can be seen as a list of the +;;> form @scheme{(x ...)}. @var{y} can immediately match the current +;;> object in which case the path is the empty list. In a sense it's +;;> a 2-dimensional version of the @scheme{...} pattern. + +;;> As a common case the pattern @scheme{(_ *** y)} can be used to +;;> search for @var{y} anywhere in a tree, regardless of the path +;;> used. + +;;> @example{(match '(a (a (a b))) ((x *** 'b) x))} +;;> @example{(match '(a (b) (c (d e) (f g))) ((x *** 'g) x))} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Notes + +;; The implementation is a simple generative pattern matcher - each +;; pattern is expanded into the required tests, calling a failure +;; continuation if the tests fail. This makes the logic easy to +;; follow and extend, but produces sub-optimal code in cases where you +;; have many similar clauses due to repeating the same tests. +;; Nonetheless a smart compiler should be able to remove the redundant +;; tests. For MATCH-LET and DESTRUCTURING-BIND type uses there is no +;; performance hit. ;; The original version was written on 2006/11/29 and described in the ;; following Usenet post: @@ -52,6 +235,21 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;> @subsubsection{Syntax} + +;;> @subsubsubsection{(match expr (pattern . body) ...)@br{} +;;> (match expr (pattern (=> failure) . body) ...)} + +;;> The result of @var{expr} is matched against each @var{pattern} in +;;> turn, according to the pattern rules described in the previous +;;> section, until the the first @var{pattern} matches. When a match is +;;> found, the corresponding @var{body}s are evaluated in order, +;;> and the result of the last expression is returned as the result +;;> of the entire @scheme{match}. If a @var{failure} is provided, +;;> then it is bound to a procedure of no arguments which continues, +;;> processing at the next @var{pattern}. If no @var{pattern} matches, +;;> an error is signalled. + ;; The basic interface. MATCH just performs some basic syntax ;; validation, binds the match expression to a temporary variable `v', ;; and passes it on to MATCH-NEXT. It's a constant throughout the @@ -361,21 +559,7 @@ ((_ x sk) (match-syntax-error "dotted tail not allowed after ellipse" x)))) -;; Matching a tree search pattern is only slightly more complicated. -;; Here we allow patterns of the form -;; -;; (x *** y) -;; -;; to represent the pattern y located somewhere in a tree where the -;; path from the current object to y can be seen as a list of the form -;; (X ...). Y can immediately match the current object in which case -;; the path is the empty list. In a sense it's a 2-dimensional -;; version of the ... pattern. -;; -;; As a common case the pattern (_ *** y) can be used to search for Y -;; anywhere in a tree, regardless of the path used. -;; -;; To implement the search, we use two recursive procedures. TRY +;; To implement the tree search, we use two recursive procedures. TRY ;; attempts to match Y once, and on success it calls the normal SK on ;; the accumulated list ids as in MATCH-GEN-ELLIPSES. On failure, we ;; call NEXT which first checks if the current value is a list @@ -593,24 +777,42 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Gimme some sugar baby. +;;> Shortcut for @scheme{lambda} + @scheme{match}. Creates a +;;> procedure of one argument, and matches that argument against each +;;> clause. + (define-syntax match-lambda (syntax-rules () - ((_ clause ...) (lambda (expr) (match expr clause ...))))) + ((_ (pattern . body) ...) (lambda (expr) (match expr (pattern . body) ...))))) + +;;> Similar to @scheme{match-lambda}. Creates a procedure of any +;;> number of arguments, and matches the argument list against each +;;> clause. (define-syntax match-lambda* (syntax-rules () - ((_ clause ...) (lambda expr (match expr clause ...))))) + ((_ (pattern . body) ...) (lambda expr (match expr (pattern . body) ...))))) + +;;> Matches each var to the corresponding expression, and evaluates +;;> the body with all match variables in scope. Raises an error if +;;> any of the expressions fail to match. Syntax analogous to named +;;> let can also be used for recursive functions which match on their +;;> arguments as in @scheme{match-lambda*}. (define-syntax match-let (syntax-rules () - ((_ (vars ...) . body) - (match-let/helper let () () (vars ...) . body)) - ((_ loop . rest) - (match-named-let loop () . rest)))) + ((_ ((var value) ...) . body) + (match-let/helper let () () ((var value) ...) . body)) + ((_ loop ((var init) ...) . body) + (match-named-let loop ((var init) ...) . body)))) + +;;> Similar to @scheme{match-let}, but analogously to @scheme{letrec} +;;> matches and binds the variables with all match variables in scope. (define-syntax match-letrec (syntax-rules () - ((_ vars . body) (match-let/helper letrec () () vars . body)))) + ((_ ((var value) ...) . body) + (match-let/helper letrec () () ((var value) ...) . body)))) (define-syntax match-let/helper (syntax-rules () @@ -638,6 +840,12 @@ ((_ loop (v ...) ((pat expr) . rest) . body) (match-named-let loop (v ... (pat expr tmp)) rest . body)))) +;;> @subsubsubsection{(match-let* ((var value) ...) body ...)} + +;;> Similar to @scheme{match-let}, but analogously to @scheme{let*} +;;> matches and binds the variables in sequence, with preceding match +;;> variables in scope. + (define-syntax match-let* (syntax-rules () ((_ () . body)