From 614efb4c8b1727395bc0e72b749d048db30b5716 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Mon, 19 May 2014 21:59:10 +0900 Subject: [PATCH] Adding w/nocapture. --- lib/chibi/regexp.scm | 61 +++++++++++++++++++++++++++--------------- tests/regexp-tests.scm | 12 +++++++++ 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/lib/chibi/regexp.scm b/lib/chibi/regexp.scm index f307b5da..620bcf0e 100644 --- a/lib/chibi/regexp.scm +++ b/lib/chibi/regexp.scm @@ -53,6 +53,7 @@ (define ~none 0) (define ~ci? 1) (define ~ascii? 2) +(define ~nocapture? 4) (define (flag-set? flags i) (= i (bitwise-and flags i))) (define (flag-join a b) (if b (bitwise-ior a b) a)) @@ -803,35 +804,51 @@ ((-> => submatch-named) ;; Named submatches just record the name for the current ;; match and rewrite as a non-named submatch. - (set! match-names - (cons (cons (cadr sre) (+ 1 current-match)) match-names)) - (->rx (cons 'submatch (cddr sre)) flags next)) + (cond + ((flag-set? flags ~nocapture?) + (->rx (cons 'seq (cddr sre)) flags next)) + (else + (set! match-names + (cons (cons (cadr sre) (+ 1 current-match)) match-names)) + (->rx (cons 'submatch (cddr sre)) flags next)))) ((*-> *=> submatch-named-list) - (set! match-names (cons (cons (cadr sre) current-match) match-names)) - (->rx (cons 'submatch-list (cddr sre)) flags next)) + (cond + ((flag-set? flags ~nocapture?) + (->rx (cons 'seq (cddr sre)) flags next)) + (else + (set! match-names (cons (cons (cadr sre) current-match) match-names)) + (->rx (cons 'submatch-list (cddr sre)) flags next)))) (($ submatch) ;; A submatch wraps next with an epsilon transition before ;; next, setting the start and end index on the result and ;; wrapped next respectively. - (let ((num current-match) - (index current-index)) - (set! current-match (+ current-match 1)) - (set! current-index (+ current-index 2)) - (set! match-rules `((,index . ,(+ index 1)) ,@match-rules)) - (make-submatch-state (cons 'seq (cdr sre)) flags next index))) + (cond + ((flag-set? flags ~nocapture?) + (->rx (cons 'seq (cdr sre)) flags next)) + (else + (let ((num current-match) + (index current-index)) + (set! current-match (+ current-match 1)) + (set! current-index (+ current-index 2)) + (set! match-rules `((,index . ,(+ index 1)) ,@match-rules)) + (make-submatch-state (cons 'seq (cdr sre)) flags next index))))) ((*$ submatch-list) ;; A submatch-list wraps a range of submatch results into a ;; single match value. - (let* ((num current-match) - (index current-index)) - (set! current-match (+ current-match 1)) - (set! current-index (+ current-index 1)) - (set! match-rules `(,index ,@match-rules)) - (let* ((n2 (make-epsilon-state next)) - (n1 (->rx (cons 'submatch (cdr sre)) flags n2))) - (state-match-set! n2 (list index num current-match)) - (state-match-rule-set! n2 'list) - n1))) + (cond + ((flag-set? flags ~nocapture?) + (->rx (cons 'seq (cdr sre)) flags next)) + (else + (let* ((num current-match) + (index current-index)) + (set! current-match (+ current-match 1)) + (set! current-index (+ current-index 1)) + (set! match-rules `(,index ,@match-rules)) + (let* ((n2 (make-epsilon-state next)) + (n1 (->rx (cons 'submatch (cdr sre)) flags n2))) + (state-match-set! n2 (list index num current-match)) + (state-match-rule-set! n2 'list) + n1))))) ((~ - & / complement difference and char-range char-set) (make-char-state (sre->char-set sre flags) ~none next)) ((word) @@ -852,6 +869,8 @@ (->rx `(: ,@(cdr sre)) (flag-clear flags ~ascii?) next)) ((w/ascii) (->rx `(: ,@(cdr sre)) (flag-join flags ~ascii?) next)) + ((w/nocapture) + (->rx `(: ,@(cdr sre)) (flag-join flags ~nocapture?) next)) (else (if (string? (car sre)) (make-char-state (sre->char-set sre flags) ~none next) diff --git a/tests/regexp-tests.scm b/tests/regexp-tests.scm index fea4ced2..f5e8eddc 100644 --- a/tests/regexp-tests.scm +++ b/tests/regexp-tests.scm @@ -146,6 +146,18 @@ '(: (* digit) ($ (* (/"af")))) "12345beef") +(let ((number '($ (+ digit)))) + (test '("555" "867" "5309") + (cdr + (regexp-match->list + (regexp-search `(: ,number "-" ,number "-" ,number) + "555-867-5309")))) + (test '("555" "5309") + (cdr + (regexp-match->list + (regexp-search `(: ,number "-" (w/nocapture ,number) "-" ,number) + "555-867-5309"))))) + (test-re '("12345BeeF" "BeeF") '(: (* digit) (w/nocase ($ (* (/"af"))))) "12345BeeF")