mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-19 05:39:18 +02:00
Improving regexp docs.
This commit is contained in:
parent
b424922862
commit
f71d3e18bd
2 changed files with 54 additions and 1 deletions
|
@ -1,5 +1,5 @@
|
||||||
;; regexp.scm -- simple non-bactracking NFA implementation
|
;; regexp.scm -- simple non-bactracking NFA implementation
|
||||||
;; Copyright (c) 2013 Alex Shinn. All rights reserved.
|
;; Copyright (c) 2013-2015 Alex Shinn. All rights reserved.
|
||||||
;; BSD-style license: http://synthcode.com/license.txt
|
;; BSD-style license: http://synthcode.com/license.txt
|
||||||
|
|
||||||
;;; An rx represents a start state and meta-info such as the number
|
;;; An rx represents a start state and meta-info such as the number
|
||||||
|
@ -903,6 +903,19 @@
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; Utilities
|
;; Utilities
|
||||||
|
|
||||||
|
;;> The fundamental regexp matching iterator. Repeatedly searches
|
||||||
|
;;> \var{str} for the regexp \var{re} so long as a match can be found.
|
||||||
|
;;> On each successful match, applies \scheme{(\var{kons} \var{i}
|
||||||
|
;;> \var{regexp-match} \var{str} \var{acc})} where \var{i} is the
|
||||||
|
;;> index since the last match (beginning with
|
||||||
|
;;> \var{start}),\var{regexp-match} is the resulting match, and
|
||||||
|
;;> \var{acc} is the result of the previous \var{kons} application,
|
||||||
|
;;> beginning with \var{knil}. When no more matches can be found,
|
||||||
|
;;> calls \var{finish} with the same arguments, except that
|
||||||
|
;;> \var{regexp-match} is \scheme{#f}.
|
||||||
|
;;>
|
||||||
|
;;> By default \var{finish} just returns \var{acc}.
|
||||||
|
|
||||||
(define (regexp-fold rx kons knil str . o)
|
(define (regexp-fold rx kons knil str . o)
|
||||||
(let* ((rx (regexp rx))
|
(let* ((rx (regexp rx))
|
||||||
(finish (if (pair? o) (car o) (lambda (from md str acc) acc)))
|
(finish (if (pair? o) (car o) (lambda (from md str acc) acc)))
|
||||||
|
@ -924,6 +937,9 @@
|
||||||
(else
|
(else
|
||||||
(finish (string-offset->index str from) #f str acc))))))
|
(finish (string-offset->index str from) #f str acc))))))
|
||||||
|
|
||||||
|
;;> Extracts all non-empty substrings of \var{str} which match
|
||||||
|
;;> \var{re} between \var{start} and \var{end} as a list of strings.
|
||||||
|
|
||||||
(define (regexp-extract rx str . o)
|
(define (regexp-extract rx str . o)
|
||||||
(apply regexp-fold
|
(apply regexp-fold
|
||||||
rx
|
rx
|
||||||
|
@ -935,6 +951,9 @@
|
||||||
(lambda (from md str a) (reverse a))
|
(lambda (from md str a) (reverse a))
|
||||||
o))
|
o))
|
||||||
|
|
||||||
|
;;> Splits \var{str} into a list of strings separated by matches of
|
||||||
|
;;> \var{re}.
|
||||||
|
|
||||||
(define (regexp-split rx str . o)
|
(define (regexp-split rx str . o)
|
||||||
;; start and end in indices passed to regexp-fold
|
;; start and end in indices passed to regexp-fold
|
||||||
(let ((start (if (pair? o) (car o) 0))
|
(let ((start (if (pair? o) (car o) 0))
|
||||||
|
@ -951,6 +970,17 @@
|
||||||
start
|
start
|
||||||
end)))
|
end)))
|
||||||
|
|
||||||
|
;;> Partitions \var{str} into a list of non-empty strings
|
||||||
|
;;> matching \var{re}, interspersed with the unmatched portions
|
||||||
|
;;> of the string. The first and every odd element is an unmatched
|
||||||
|
;;> substring, which will be the empty string if \var{re} matches
|
||||||
|
;;> at the beginning of the string or end of the previous match. The
|
||||||
|
;;> second and every even element will be a substring matching
|
||||||
|
;;> \var{re}. If the final match ends at the end of the string,
|
||||||
|
;;> no trailing empty string will be included. Thus, in the
|
||||||
|
;;> degenerate case where \var{str} is the empty string, the
|
||||||
|
;;> result is \scheme{("")}.
|
||||||
|
|
||||||
(define (regexp-partition rx str . o)
|
(define (regexp-partition rx str . o)
|
||||||
(let ((start (if (pair? o) (car o) 0))
|
(let ((start (if (pair? o) (car o) 0))
|
||||||
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
|
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
|
||||||
|
@ -963,6 +993,23 @@
|
||||||
a))
|
a))
|
||||||
(reverse (regexp-fold rx kons '() str final start end))))
|
(reverse (regexp-fold rx kons '() str final start end))))
|
||||||
|
|
||||||
|
;;> Returns a new string replacing the \var{count}th match of \var{re}
|
||||||
|
;;> in \var{str} the \var{subst}, where the zero-indexed \var{count}
|
||||||
|
;;> defaults to zero (i.e. the first match). If there are not
|
||||||
|
;;> \var{count} matches, returns the selected substring unmodified.
|
||||||
|
|
||||||
|
;;> \var{subst} can be a string, an integer or symbol indicating the
|
||||||
|
;;> contents of a numbered or named submatch of \var{re},\scheme{'pre}
|
||||||
|
;;> for the substring to the left of the match, or \scheme{'post} for
|
||||||
|
;;> the substring to the right of the match.
|
||||||
|
|
||||||
|
;;> The optional parameters \var{start} and \var{end} restrict both
|
||||||
|
;;> the matching and the substitution, to the given indices, such that
|
||||||
|
;;> the result is equivalent to omitting these parameters and
|
||||||
|
;;> replacing on \scheme{(substring str start end)}. As a convenience,
|
||||||
|
;;> a value of \scheme{#f} for \var{end} is equivalent to
|
||||||
|
;;> \scheme{(string-length str)}.
|
||||||
|
|
||||||
(define (regexp-replace rx str subst . o)
|
(define (regexp-replace rx str subst . o)
|
||||||
(let* ((start (if (and (pair? o) (car o)) (car o) 0))
|
(let* ((start (if (and (pair? o) (car o)) (car o) 0))
|
||||||
(o (if (pair? o) (cdr o) '()))
|
(o (if (pair? o) (cdr o) '()))
|
||||||
|
@ -987,6 +1034,9 @@
|
||||||
(regexp-match-submatch-end m 0)
|
(regexp-match-submatch-end m 0)
|
||||||
(string-index->offset str end))))))))))))
|
(string-index->offset str end))))))))))))
|
||||||
|
|
||||||
|
;;> Equivalent to \var{regexp-replace}, but replaces all occurrences
|
||||||
|
;;> of \var{re} in \var{str}.
|
||||||
|
|
||||||
(define (regexp-replace-all rx str subst . o)
|
(define (regexp-replace-all rx str subst . o)
|
||||||
(regexp-fold
|
(regexp-fold
|
||||||
rx
|
rx
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
|
|
||||||
|
;;> A regular expression engine implementing SRFI 115 using a
|
||||||
|
;;> non-backtracking Thompson NFA algorithm.
|
||||||
|
|
||||||
(define-library (chibi regexp)
|
(define-library (chibi regexp)
|
||||||
(export regexp regexp? valid-sre? rx regexp->sre char-set->sre
|
(export regexp regexp? valid-sre? rx regexp->sre char-set->sre
|
||||||
regexp-matches regexp-matches? regexp-search
|
regexp-matches regexp-matches? regexp-search
|
||||||
|
|
Loading…
Add table
Reference in a new issue