Renaming rx-match to regexp-match, and regexp-match to regexp-matches.

This commit is contained in:
Alex Shinn 2013-11-09 16:53:51 +09:00
parent d2bd4d6d44
commit 5fe299d4fc
3 changed files with 114 additions and 114 deletions

View file

@ -75,65 +75,65 @@
;; A record holding the current match data - essentially a wrapper ;; A record holding the current match data - essentially a wrapper
;; around a vector, plus a reference to the RX for meta-info. ;; around a vector, plus a reference to the RX for meta-info.
(define-record-type Rx-Match (define-record-type Regexp-Match
(%make-rx-match matches rx string) (%make-regexp-match matches rx string)
rx-match? regexp-match?
(matches rx-match-matches rx-match-matches-set!) (matches regexp-match-matches regexp-match-matches-set!)
(rx rx-match-rx) (rx regexp-match-rx)
(string rx-match-string)) (string regexp-match-string))
(define (rx-match-rules md) (define (regexp-match-rules md)
(rx-rules (rx-match-rx md))) (rx-rules (regexp-match-rx md)))
(define (rx-match-names md) (define (regexp-match-names md)
(rx-names (rx-match-rx md))) (rx-names (regexp-match-rx md)))
(define (make-rx-match len rx str) (define (make-regexp-match len rx str)
(%make-rx-match (make-vector len #f) rx str)) (%make-regexp-match (make-vector len #f) rx str))
(define (make-rx-match-for-rx rx str) (define (make-regexp-match-for-rx rx str)
(make-rx-match (rx-num-save-indexes rx) rx str)) (make-regexp-match (rx-num-save-indexes rx) rx str))
(define (rx-match-num-matches md) (define (regexp-match-num-matches md)
(vector-length (rx-match-matches md))) (vector-length (regexp-match-matches md)))
(define (rx-match-name-offset md name) (define (regexp-match-name-offset md name)
(cond ((assq name (rx-match-names md)) => cdr) (cond ((assq name (regexp-match-names md)) => cdr)
(else (error "unknown match name" md name)))) (else (error "unknown match name" md name))))
(define (rx-match-ref md n) (define (regexp-match-ref md n)
(vector-ref (rx-match-matches md) (vector-ref (regexp-match-matches md)
(if (integer? n) (if (integer? n)
n n
(rx-match-name-offset md n)))) (regexp-match-name-offset md n))))
(define (rx-match-set! md n val) (define (regexp-match-set! md n val)
(vector-set! (rx-match-matches md) n val)) (vector-set! (regexp-match-matches md) n val))
(define (copy-rx-match md) (define (copy-regexp-match md)
(let* ((src (rx-match-matches md)) (let* ((src (regexp-match-matches md))
(len (vector-length src)) (len (vector-length src))
(dst (make-vector len #f))) (dst (make-vector len #f)))
(do ((i 0 (+ i 1))) (do ((i 0 (+ i 1)))
((= i len) ((= i len)
(%make-rx-match dst (rx-match-rx md) (rx-match-string md))) (%make-regexp-match dst (regexp-match-rx md) (regexp-match-string md)))
(vector-set! dst i (vector-ref src i))))) (vector-set! dst i (vector-ref src i)))))
;;> Returns the matching result for the given named or indexed ;;> Returns the matching result for the given named or indexed
;;> submatch \var{n}, possibly as a list for a submatch-list, or ;;> submatch \var{n}, possibly as a list for a submatch-list, or
;;> \scheme{#f} if not matched. ;;> \scheme{#f} if not matched.
(define (rx-match-submatch/list md n) (define (regexp-match-submatch/list md n)
(let ((n (if (integer? n) n (rx-match-name-offset md n)))) (let ((n (if (integer? n) n (regexp-match-name-offset md n))))
(cond (cond
((>= n (vector-length (rx-match-rules md))) ((>= n (vector-length (regexp-match-rules md)))
#f) #f)
(else (else
(let ((rule (vector-ref (rx-match-rules md) n))) (let ((rule (vector-ref (regexp-match-rules md) n)))
(cond (cond
((pair? rule) ((pair? rule)
(let ((start (rx-match-ref md (car rule))) (let ((start (regexp-match-ref md (car rule)))
(end (rx-match-ref md (cdr rule))) (end (regexp-match-ref md (cdr rule)))
(str (rx-match-string md))) (str (regexp-match-string md)))
(and start end (substring-cursor str start end)))) (and start end (substring-cursor str start end))))
(else (else
(let ((res (rx-match-ref md rule))) (let ((res (regexp-match-ref md rule)))
(if (pair? res) (if (pair? res)
(reverse res) (reverse res)
res))))))))) res)))))))))
@ -141,18 +141,18 @@
;;> Returns the matching substring for the given named or indexed ;;> Returns the matching substring for the given named or indexed
;;> submatch \var{n}, or \scheme{#f} if not matched. ;;> submatch \var{n}, or \scheme{#f} if not matched.
(define (rx-match-submatch md n) (define (regexp-match-submatch md n)
(let ((res (rx-match-submatch/list md n))) (let ((res (regexp-match-submatch/list md n)))
(if (pair? res) (car res) res))) (if (pair? res) (car res) res)))
(define (rx-match-submatch-start+end md n) (define (regexp-match-submatch-start+end md n)
(let ((n (if (string-cursor? n) n (rx-match-name-offset md n)))) (let ((n (if (string-cursor? n) n (regexp-match-name-offset md n))))
(and (< n (vector-length (rx-match-rules md))) (and (< n (vector-length (regexp-match-rules md)))
(let ((rule (vector-ref (rx-match-rules md) n))) (let ((rule (vector-ref (regexp-match-rules md) n)))
(if (pair? rule) (if (pair? rule)
(let ((start (rx-match-ref md (car rule))) (let ((start (regexp-match-ref md (car rule)))
(end (rx-match-ref md (cdr rule))) (end (regexp-match-ref md (cdr rule)))
(str (rx-match-string md))) (str (regexp-match-string md)))
(and start end (and start end
(cons (string-offset->index str start) (cons (string-offset->index str start)
(string-offset->index str end)))) (string-offset->index str end))))
@ -161,16 +161,16 @@
;;> Returns the start index for the given named or indexed submatch ;;> Returns the start index for the given named or indexed submatch
;;> \var{n}, or \scheme{#f} if not matched. ;;> \var{n}, or \scheme{#f} if not matched.
(define (rx-match-submatch-start md n) (define (regexp-match-submatch-start md n)
(cond ((rx-match-submatch-start+end md n) => car) (else #f))) (cond ((regexp-match-submatch-start+end md n) => car) (else #f)))
;;> Returns the end index for the given named or indexed submatch ;;> Returns the end index for the given named or indexed submatch
;;> \var{n}, or \scheme{#f} if not matched. ;;> \var{n}, or \scheme{#f} if not matched.
(define (rx-match-submatch-end md n) (define (regexp-match-submatch-end md n)
(cond ((rx-match-submatch-start+end md n) => cdr) (else #f))) (cond ((regexp-match-submatch-start+end md n) => cdr) (else #f)))
(define (rx-match-convert recurse? md str) (define (regexp-match-convert recurse? md str)
(cond (cond
((vector? md) ((vector? md)
(let lp ((i 0) (res '())) (let lp ((i 0) (res '()))
@ -185,45 +185,45 @@
res))) res)))
(else (else
(lp (+ i 1) (lp (+ i 1)
(cons (rx-match-convert recurse? (vector-ref md i) str) (cons (regexp-match-convert recurse? (vector-ref md i) str)
res)))))) res))))))
((list? md) ((list? md)
(if recurse? (if recurse?
(map (lambda (x) (rx-match-convert recurse? x str)) (reverse md)) (map (lambda (x) (regexp-match-convert recurse? x str)) (reverse md))
(rx-match-convert recurse? (car md) str))) (regexp-match-convert recurse? (car md) str)))
((and (pair? md) (string-cursor? (car md)) (string-cursor? (cdr md))) ((and (pair? md) (string-cursor? (car md)) (string-cursor? (cdr md)))
(substring-cursor str (car md) (cdr md))) (substring-cursor str (car md) (cdr md)))
((rx-match? md) ((regexp-match? md)
(rx-match-convert recurse? (rx-match-matches md) (rx-match-string md))) (regexp-match-convert recurse? (regexp-match-matches md) (regexp-match-string md)))
(else (else
md))) md)))
;;> Convert an rx-match result to a list of submatches, beginning ;;> Convert an regexp-match result to a list of submatches, beginning
;;> with the full match, using \scheme{#f} for unmatched submatches. ;;> with the full match, using \scheme{#f} for unmatched submatches.
(define (rx-match->list md) (define (regexp-match->list md)
(rx-match-convert #f md #f)) (regexp-match-convert #f md #f))
;;> Convert an rx-match result to a forest of submatches, beginning ;;> Convert an regexp-match result to a forest of submatches, beginning
;;> with the full match, using \scheme{#f} for unmatched submatches. ;;> with the full match, using \scheme{#f} for unmatched submatches.
(define (rx-match->sexp md) (define (regexp-match->sexp md)
(rx-match-convert #t md #f)) (regexp-match-convert #t md #f))
;; Collect results from a list match. ;; Collect results from a list match.
(define (match-collect md spec) (define (match-collect md spec)
(define (match-extract md n) (define (match-extract md n)
(let* ((vec (rx-match-matches md)) (let* ((vec (regexp-match-matches md))
(rules (rx-match-rules md)) (rules (regexp-match-rules md))
(n-rule (vector-ref rules n)) (n-rule (vector-ref rules n))
(rule (vector-ref rules n-rule))) (rule (vector-ref rules n-rule)))
(if (pair? rule) (if (pair? rule)
(let ((start (rx-match-ref md (car rule))) (let ((start (regexp-match-ref md (car rule)))
(end (rx-match-ref md (cdr rule)))) (end (regexp-match-ref md (cdr rule))))
(and start end (cons start end))) (and start end (cons start end)))
(rx-match-ref md rule)))) (regexp-match-ref md rule))))
(let ((end (cadr spec)) (let ((end (cadr spec))
(vec (rx-match-matches md))) (vec (regexp-match-matches md)))
(let lp ((i (+ 1 (car spec))) (let lp ((i (+ 1 (car spec)))
(ls '())) (ls '()))
(if (>= i end) (if (>= i end)
@ -237,41 +237,41 @@
(state searcher-state searcher-state-set!) (state searcher-state searcher-state-set!)
(matches searcher-matches searcher-matches-set!)) (matches searcher-matches searcher-matches-set!))
;; Merge two rx-matches, preferring the leftmost-longest of their ;; Merge two regexp-matches, preferring the leftmost-longest of their
;; matches. ;; matches.
(define (rx-match>=? m1 m2) (define (regexp-match>=? m1 m2)
(let ((end (- (vector-length (rx-match-matches m1)) 1))) (let ((end (- (vector-length (regexp-match-matches m1)) 1)))
(let lp ((i 0)) (let lp ((i 0))
(cond (cond
((>= i end) ((>= i end)
#t) #t)
((and (eqv? (rx-match-ref m1 i) (rx-match-ref m2 i)) ((and (eqv? (regexp-match-ref m1 i) (regexp-match-ref m2 i))
(eqv? (rx-match-ref m1 (+ i 1)) (rx-match-ref m2 (+ i 1)))) (eqv? (regexp-match-ref m1 (+ i 1)) (regexp-match-ref m2 (+ i 1))))
(lp (+ i 2))) (lp (+ i 2)))
((and (string-cursor? (rx-match-ref m2 i)) ((and (string-cursor? (regexp-match-ref m2 i))
(string-cursor? (rx-match-ref m2 (+ i 1))) (string-cursor? (regexp-match-ref m2 (+ i 1)))
(or (not (string-cursor? (rx-match-ref m1 i))) (or (not (string-cursor? (regexp-match-ref m1 i)))
(not (string-cursor? (rx-match-ref m1 (+ i 1)))) (not (string-cursor? (regexp-match-ref m1 (+ i 1))))
(string-cursor<? (rx-match-ref m2 i) (rx-match-ref m1 i)) (string-cursor<? (regexp-match-ref m2 i) (regexp-match-ref m1 i))
(and (string-cursor=? (rx-match-ref m2 i) (rx-match-ref m1 i)) (and (string-cursor=? (regexp-match-ref m2 i) (regexp-match-ref m1 i))
(string-cursor>? (rx-match-ref m2 (+ i 1)) (string-cursor>? (regexp-match-ref m2 (+ i 1))
(rx-match-ref m1 (+ i 1)))))) (regexp-match-ref m1 (+ i 1))))))
#f) #f)
(else (else
#t))))) #t)))))
(define (rx-match-max m1 m2) (define (regexp-match-max m1 m2)
(if (rx-match>=? m1 m2) m1 m2)) (if (regexp-match>=? m1 m2) m1 m2))
;; Merge match data from sr2 into sr1, preferring the leftmost-longest ;; Merge match data from sr2 into sr1, preferring the leftmost-longest
;; match in the event of a conflict. ;; match in the event of a conflict.
(define (searcher-merge! sr1 sr2) (define (searcher-merge! sr1 sr2)
(let ((m (rx-match-max (searcher-matches sr1) (searcher-matches sr2)))) (let ((m (regexp-match-max (searcher-matches sr1) (searcher-matches sr2))))
(searcher-matches-set! sr1 m))) (searcher-matches-set! sr1 m)))
(define (searcher-max sr1 sr2) (define (searcher-max sr1 sr2)
(if (or (not (searcher? sr2)) (if (or (not (searcher? sr2))
(rx-match>=? (searcher-matches sr1) (searcher-matches sr2))) (regexp-match>=? (searcher-matches sr1) (searcher-matches sr2)))
sr1 sr1
sr2)) sr2))
@ -303,7 +303,7 @@
(list->posse args)) (list->posse args))
(define (make-start-searcher rx str) (define (make-start-searcher rx str)
(make-searcher (rx-start-state rx) (make-rx-match-for-rx rx str))) (make-searcher (rx-start-state rx) (make-regexp-match-for-rx rx str)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Execution ;; Execution
@ -345,12 +345,12 @@
(cond (cond
((pair? index) ((pair? index)
;; Submatch list, accumulate and push. ;; Submatch list, accumulate and push.
(let* ((prev (rx-match-ref matches (car index))) (let* ((prev (regexp-match-ref matches (car index)))
(new (cons (match-collect matches (cdr index)) (new (cons (match-collect matches (cdr index))
(if (pair? prev) prev '())))) (if (pair? prev) prev '()))))
(rx-match-set! matches (car index) new))) (regexp-match-set! matches (car index) new)))
(else (else
(rx-match-set! matches index i)))))) (regexp-match-set! matches index i))))))
;; Follow transitions. ;; Follow transitions.
(cond (cond
((state-accept? st) ((state-accept? st)
@ -374,11 +374,11 @@
(next2 (next2
(let ((sr2 (make-searcher (let ((sr2 (make-searcher
next2 next2
(copy-rx-match (searcher-matches sr))))) (copy-regexp-match (searcher-matches sr)))))
(advance! sr2))))))))) (advance! sr2)))))))))
;; Non-special, non-epsilon searcher, add to posse. ;; Non-special, non-epsilon searcher, add to posse.
((posse-ref new sr) ((posse-ref new sr)
;; Merge rx-match for existing searcher. ;; Merge regexp-match for existing searcher.
=> (lambda (sr-prev) (searcher-merge! sr-prev sr))) => (lambda (sr-prev) (searcher-merge! sr-prev sr)))
(else (else
;; Add new searcher. ;; Add new searcher.
@ -409,7 +409,7 @@
;; searching, return false. ;; searching, return false.
(and (searcher? (cdr accept)) (and (searcher? (cdr accept))
(let ((matches (searcher-matches (cdr accept)))) (let ((matches (searcher-matches (cdr accept))))
(and (or search? (>= (rx-match-ref matches 1) end)) (and (or search? (>= (regexp-match-ref matches 1) end))
(searcher-matches (cdr accept)))))) (searcher-matches (cdr accept))))))
(else (else
;; Otherwise advance normally. ;; Otherwise advance normally.
@ -438,14 +438,14 @@
;;> Match the given regexp or SRE against the entire string and return ;;> Match the given regexp or SRE against the entire string and return
;;> the match data on success. Returns \scheme{#f} on failure. ;;> the match data on success. Returns \scheme{#f} on failure.
(define (regexp-match rx str . o) (define (regexp-matches rx str . o)
(apply regexp-run #f rx str o)) (apply regexp-run #f rx str o))
;;> Match the given regexp or SRE against the entire string and return ;;> Match the given regexp or SRE against the entire string and return
;;> the \scheme{#t} on success. Returns \scheme{#f} on failure. ;;> the \scheme{#t} on success. Returns \scheme{#f} on failure.
(define (regexp-match? rx str . o) (define (regexp-matches? rx str . o)
(and (apply regexp-match rx str o) #t)) (and (apply regexp-matches rx str o) #t))
;;> Search for the given regexp or SRE within string and return ;;> Search for the given regexp or SRE within string and return
;;> the match data on success. Returns \scheme{#f} on failure. ;;> the match data on success. Returns \scheme{#f} on failure.
@ -803,7 +803,7 @@
(cond (cond
((and (string-cursor<? i end) (regexp-run-offsets #t rx str i end)) ((and (string-cursor<? i end) (regexp-run-offsets #t rx str i end))
=> (lambda (md) => (lambda (md)
(let ((j (rx-match-ref md 1))) (let ((j (regexp-match-ref md 1)))
(lp (if (and (string-cursor=? i j) (string-cursor<? j end)) (lp (if (and (string-cursor=? i j) (string-cursor<? j end))
(string-cursor-next str j) (string-cursor-next str j)
j) j)
@ -816,7 +816,7 @@
(apply regexp-fold (apply regexp-fold
rx rx
(lambda (from md str a) (lambda (from md str a)
(let ((s (rx-match-submatch md 0))) (let ((s (regexp-match-submatch md 0)))
(if (equal? s "") a (cons s a)))) (if (equal? s "") a (cons s a))))
'() '()
str str
@ -830,7 +830,7 @@
(regexp-fold (regexp-fold
rx rx
(lambda (from md str a) (lambda (from md str a)
(let ((i (rx-match-submatch-start md 0))) (let ((i (regexp-match-submatch-start md 0)))
(if (< from i) (cons (substring str from i) a) a))) (if (< from i) (cons (substring str from i) a) a)))
'() '()
str str
@ -848,11 +848,11 @@
(cons (cons
(substring-cursor str (substring-cursor str
(string-index->offset str start) (string-index->offset str start)
(rx-match-submatch-start m 0)) (regexp-match-submatch-start m 0))
(append (append
(reverse (regexp-apply-match m str subst)) (reverse (regexp-apply-match m str subst))
(list (substring-cursor str (list (substring-cursor str
(rx-match-submatch-end m 0) (regexp-match-submatch-end m 0)
(string-index->offset str end)))))) (string-index->offset str end))))))
str))) str)))
@ -860,7 +860,7 @@
(regexp-fold (regexp-fold
rx rx
(lambda (i m str acc) (lambda (i m str acc)
(let ((m-start (rx-match-submatch-start m 0))) (let ((m-start (regexp-match-submatch-start m 0)))
(append (regexp-apply-match m str subst) (append (regexp-apply-match m str subst)
(if (>= i m-start) (if (>= i m-start)
acc acc
@ -882,24 +882,24 @@
((not (pair? ls)) ((not (pair? ls))
(lp (list ls) res)) (lp (list ls) res))
((integer? (car ls)) ((integer? (car ls))
(lp (cdr ls) (cons (or (rx-match-submatch m (car ls)) "") res))) (lp (cdr ls) (cons (or (regexp-match-submatch m (car ls)) "") res)))
((procedure? (car ls)) ((procedure? (car ls))
(lp (cdr ls) (cons ((car ls) m) res))) (lp (cdr ls) (cons ((car ls) m) res)))
((symbol? (car ls)) ((symbol? (car ls))
(case (car ls) (case (car ls)
((pre) ((pre)
(lp (cdr ls) (lp (cdr ls)
(cons (substring-cursor str 0 (rx-match-submatch-start m 0)) (cons (substring-cursor str 0 (regexp-match-submatch-start m 0))
res))) res)))
((post) ((post)
(lp (cdr ls) (lp (cdr ls)
(cons (substring str (cons (substring str
(rx-match-submatch-end m 0) (regexp-match-submatch-end m 0)
(string-length str)) (string-length str))
res))) res)))
(else (else
(cond (cond
((assq (car ls) (rx-match-names m)) ((assq (car ls) (regexp-match-names m))
=> (lambda (x) (lp (cons (cdr x) (cdr ls)) res))) => (lambda (x) (lp (cons (cdr x) (cdr ls)) res)))
(else (else
(error "unknown match replacement" (car ls))))))) (error "unknown match replacement" (car ls)))))))

View file

@ -1,11 +1,11 @@
(define-library (chibi regexp) (define-library (chibi regexp)
(export regexp regexp? regexp-match regexp-match? regexp-search (export regexp regexp? regexp-matches regexp-matches? regexp-search
regexp-replace regexp-replace-all regexp-replace regexp-replace-all
regexp-fold regexp-extract regexp-split regexp-fold regexp-extract regexp-split
rx-match? rx-match-num-matches regexp-match? regexp-match-num-matches
rx-match-submatch rx-match-submatch/list regexp-match-submatch regexp-match-submatch/list
rx-match->list rx-match->sexp) regexp-match->list regexp-match->sexp)
(import (srfi 33) (srfi 69)) (import (srfi 33) (srfi 69))
;; Chibi's char-set library is more factored than SRFI-14. ;; Chibi's char-set library is more factored than SRFI-14.
(cond-expand (cond-expand

View file

@ -2,27 +2,27 @@
(import (chibi) (chibi regexp) (chibi regexp pcre) (import (chibi) (chibi regexp) (chibi regexp pcre)
(chibi string) (chibi io) (chibi match) (chibi test)) (chibi string) (chibi io) (chibi match) (chibi test))
(define (regexp-match->sexp rx str . o) (define (maybe-match->sexp rx str . o)
(let ((res (apply regexp-match rx str o))) (let ((res (apply regexp-matches rx str o)))
(and res (rx-match->sexp res)))) (and res (regexp-match->sexp res))))
(define-syntax test-re (define-syntax test-re
(syntax-rules () (syntax-rules ()
((test-re res rx str start end) ((test-re res rx str start end)
(test res (regexp-match->sexp rx str start end))) (test res (maybe-match->sexp rx str start end)))
((test-re res rx str start) ((test-re res rx str start)
(test-re res rx str start (string-length str))) (test-re res rx str start (string-length str)))
((test-re res rx str) ((test-re res rx str)
(test-re res rx str 0)))) (test-re res rx str 0))))
(define (regexp-search->sexp rx str . o) (define (maybe-search->sexp rx str . o)
(let ((res (apply regexp-search rx str o))) (let ((res (apply regexp-search rx str o)))
(and res (rx-match->sexp res)))) (and res (regexp-match->sexp res))))
(define-syntax test-re-search (define-syntax test-re-search
(syntax-rules () (syntax-rules ()
((test-re-search res rx str start end) ((test-re-search res rx str start end)
(test res (regexp-search->sexp rx str start end))) (test res (maybe-search->sexp rx str start end)))
((test-re-search res rx str start) ((test-re-search res rx str start)
(test-re-search res rx str start (string-length str))) (test-re-search res rx str start (string-length str)))
((test-re-search res rx str) ((test-re-search res rx str)
@ -168,7 +168,7 @@
(define (subst-matches matches input subst) (define (subst-matches matches input subst)
(define (submatch n) (define (submatch n)
(rx-match-submatch matches n)) (regexp-match-submatch matches n))
(and (and
matches matches
(call-with-output-string (call-with-output-string