Improving regexp docs.

This commit is contained in:
Alex Shinn 2015-05-03 08:22:38 +09:00
parent b424922862
commit f71d3e18bd
2 changed files with 54 additions and 1 deletions

View file

@ -1,5 +1,5 @@
;; regexp.scm -- simple non-bactracking NFA implementation
;; Copyright (c) 2013 Alex Shinn. All rights reserved.
;; Copyright (c) 2013-2015 Alex Shinn. All rights reserved.
;; BSD-style license: http://synthcode.com/license.txt
;;; An rx represents a start state and meta-info such as the number
@ -903,6 +903,19 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Utilities
;;> The fundamental regexp matching iterator. Repeatedly searches
;;> \var{str} for the regexp \var{re} so long as a match can be found.
;;> On each successful match, applies \scheme{(\var{kons} \var{i}
;;> \var{regexp-match} \var{str} \var{acc})} where \var{i} is the
;;> index since the last match (beginning with
;;> \var{start}),\var{regexp-match} is the resulting match, and
;;> \var{acc} is the result of the previous \var{kons} application,
;;> beginning with \var{knil}. When no more matches can be found,
;;> calls \var{finish} with the same arguments, except that
;;> \var{regexp-match} is \scheme{#f}.
;;>
;;> By default \var{finish} just returns \var{acc}.
(define (regexp-fold rx kons knil str . o)
(let* ((rx (regexp rx))
(finish (if (pair? o) (car o) (lambda (from md str acc) acc)))
@ -924,6 +937,9 @@
(else
(finish (string-offset->index str from) #f str acc))))))
;;> Extracts all non-empty substrings of \var{str} which match
;;> \var{re} between \var{start} and \var{end} as a list of strings.
(define (regexp-extract rx str . o)
(apply regexp-fold
rx
@ -935,6 +951,9 @@
(lambda (from md str a) (reverse a))
o))
;;> Splits \var{str} into a list of strings separated by matches of
;;> \var{re}.
(define (regexp-split rx str . o)
;; start and end in indices passed to regexp-fold
(let ((start (if (pair? o) (car o) 0))
@ -951,6 +970,17 @@
start
end)))
;;> Partitions \var{str} into a list of non-empty strings
;;> matching \var{re}, interspersed with the unmatched portions
;;> of the string. The first and every odd element is an unmatched
;;> substring, which will be the empty string if \var{re} matches
;;> at the beginning of the string or end of the previous match. The
;;> second and every even element will be a substring matching
;;> \var{re}. If the final match ends at the end of the string,
;;> no trailing empty string will be included. Thus, in the
;;> degenerate case where \var{str} is the empty string, the
;;> result is \scheme{("")}.
(define (regexp-partition rx str . o)
(let ((start (if (pair? o) (car o) 0))
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
@ -963,6 +993,23 @@
a))
(reverse (regexp-fold rx kons '() str final start end))))
;;> Returns a new string replacing the \var{count}th match of \var{re}
;;> in \var{str} the \var{subst}, where the zero-indexed \var{count}
;;> defaults to zero (i.e. the first match). If there are not
;;> \var{count} matches, returns the selected substring unmodified.
;;> \var{subst} can be a string, an integer or symbol indicating the
;;> contents of a numbered or named submatch of \var{re},\scheme{'pre}
;;> for the substring to the left of the match, or \scheme{'post} for
;;> the substring to the right of the match.
;;> The optional parameters \var{start} and \var{end} restrict both
;;> the matching and the substitution, to the given indices, such that
;;> the result is equivalent to omitting these parameters and
;;> replacing on \scheme{(substring str start end)}. As a convenience,
;;> a value of \scheme{#f} for \var{end} is equivalent to
;;> \scheme{(string-length str)}.
(define (regexp-replace rx str subst . o)
(let* ((start (if (and (pair? o) (car o)) (car o) 0))
(o (if (pair? o) (cdr o) '()))
@ -987,6 +1034,9 @@
(regexp-match-submatch-end m 0)
(string-index->offset str end))))))))))))
;;> Equivalent to \var{regexp-replace}, but replaces all occurrences
;;> of \var{re} in \var{str}.
(define (regexp-replace-all rx str subst . o)
(regexp-fold
rx

View file

@ -1,4 +1,7 @@
;;> A regular expression engine implementing SRFI 115 using a
;;> non-backtracking Thompson NFA algorithm.
(define-library (chibi regexp)
(export regexp regexp? valid-sre? rx regexp->sre char-set->sre
regexp-matches regexp-matches? regexp-search