diff --git a/lib/chibi/regexp.scm b/lib/chibi/regexp.scm index 3ea8b84d..92008bc0 100644 --- a/lib/chibi/regexp.scm +++ b/lib/chibi/regexp.scm @@ -60,13 +60,12 @@ (define (flag-clear a b) (bitwise-and a (bitwise-not b))) (define (char-set-ci cset) - (let ((res (char-set))) - (char-set-for-each - (lambda (ch) - (char-set-adjoin! res (char-upcase ch)) - (char-set-adjoin! res (char-downcase ch))) - cset) - res)) + (char-set-fold + (lambda (ch res) + (char-set-adjoin! (char-set-adjoin! res (char-upcase ch)) + (char-downcase ch))) + (char-set) + cset)) (define (make-char-state ch flags next) (if (flag-set? flags ~ci?) @@ -557,6 +556,7 @@ ((numeric num digit) %char-set:digit) ((alphanumeric alphanum alnum) %char-set:letter+digit) ((punctuation punct) %char-set:punctuation) + ((symbol) %char-set:symbol) ((graphic graph) %char-set:graphic) ((word-constituent) %char-set:word-constituent) ((whitespace white space) %char-set:whitespace) @@ -579,6 +579,7 @@ ((numeric num digit) char-set:digit) ((alphanumeric alphanum alnum) char-set:letter+digit) ((punctuation punct) char-set:punctuation) + ((symbol) char-set:symbol) ((graphic graph) char-set:graphic) ((word-constituent) char-set:word-constituent) ((whitespace white space) char-set:whitespace) @@ -613,7 +614,8 @@ (or (string? (car sre)) (memq (car sre) '(char-set / char-range & and ~ complement - difference)) - (and (memq (car sre) '(|\|| or)) + (and (memq (car sre) + '(|\|| or w/case w/nocase w/unicode w/ascii)) (every char-set-sre? (cdr sre))))))) (define (valid-sre? x) @@ -622,32 +624,37 @@ (define (sre->char-set sre . o) (let ((flags (if (pair? o) (car o) ~none))) (define (->cs sre) (sre->char-set sre flags)) + (define (maybe-ci sre) + (if (flag-set? flags ~ci?) (char-set-ci sre) sre)) (cond ((lookup-char-set sre flags)) - ((char-set? sre) (char-set-ci sre)) - ((char? sre) (char-set-ci (char-set sre))) + ((char-set? sre) (maybe-ci sre)) + ((char? sre) (maybe-ci (char-set sre))) ((string? sre) (if (= 1 (string-length sre)) - (string->char-set sre) + (maybe-ci (string->char-set sre)) (error "only single char strings can be char-sets"))) ((pair? sre) (if (string? (car sre)) - (string->char-set (car sre)) + (maybe-ci (string->char-set (car sre))) (case (car sre) - ((char-set) (string->char-set (cadr sre))) + ((char-set) (maybe-ci (string->char-set (cadr sre)))) ((/ char-range) (->cs `(or ,@(map (lambda (x) - (char-set-ci - (ucs-range->char-set - (char->integer (car x)) - (+ 1 (char->integer (cdr x)))))) + (ucs-range->char-set + (char->integer (car x)) + (+ 1 (char->integer (cdr x))))) (sre-flatten-ranges (cdr sre)))))) ((& and) (apply char-set-intersection (map ->cs (cdr sre)))) ((|\|| or) (apply char-set-union (map ->cs (cdr sre)))) ((~ complement) (char-set-complement (->cs `(or ,@(cdr sre))))) ((- difference) (char-set-difference (->cs (cadr sre)) (->cs `(or ,@(cddr sre))))) + ((w/case) (sre->char-set (cadr sre) (flag-clear flags ~ci?))) + ((w/nocase) (sre->char-set (cadr sre) (flag-join flags ~ci?))) + ((w/ascii) (sre->char-set (cadr sre) (flag-join flags ~ascii?))) + ((w/unicode) (sre->char-set (cadr sre) (flag-clear flags ~ascii?))) (else (error "invalid sre char-set" sre))))) (else (error "invalid sre char-set" sre))))) @@ -932,6 +939,18 @@ start end))) +(define (regexp-partition rx str . o) + (let ((start (if (pair? o) (car o) 0)) + (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) + (define (kons from md str a) + (let ((left (substring str from (regexp-match-submatch-start md 0)))) + (cons (regexp-match-submatch md 0) (cons left a)))) + (define (final from md str a) + (if (or (< from end) (null? a)) + (cons (substring str from end) a) + a)) + (reverse (regexp-fold rx kons '() str final start end)))) + (define (regexp-replace rx str subst . o) (let* ((start (if (and (pair? o) (car o)) (car o) 0)) (o (if (pair? o) (cdr o) '())) diff --git a/lib/chibi/regexp.sld b/lib/chibi/regexp.sld index 148d6085..d1554bd9 100644 --- a/lib/chibi/regexp.sld +++ b/lib/chibi/regexp.sld @@ -3,7 +3,7 @@ (export regexp regexp? valid-sre? rx regexp->sre char-set->sre regexp-matches regexp-matches? regexp-search regexp-replace regexp-replace-all - regexp-fold regexp-extract regexp-split + regexp-fold regexp-extract regexp-split regexp-partition regexp-match? regexp-match-count regexp-match-submatch regexp-match-submatch/list regexp-match-submatch-start regexp-match-submatch-end @@ -43,6 +43,8 @@ (char-set-intersection char-set:ascii char-set:letter+digit)) (define %char-set:punctuation (char-set-intersection char-set:ascii char-set:punctuation)) + (define %char-set:punctuation + (char-set-intersection char-set:ascii char-set:symbol)) (define %char-set:graphic (char-set-intersection char-set:ascii char-set:graphic)) (define %char-set:whitespace @@ -69,7 +71,7 @@ (define (string-start-arg s o) (if (pair? o) (string-index->offset s (car o)) 0)) (define (string-end-arg s o) - (if (pair? o) (string-index->offset (car o)) (string-length s))) + (if (pair? o) (string-index->offset s (car o)) (string-length s))) (define string-cursor=? =) (define string-cursor