Fixing SRFI 115 bug reports from Will Clinger.

Adding regexp-partition.
This commit is contained in:
Alex Shinn 2015-03-02 23:47:32 +09:00
parent 327685359c
commit 390308fe0b
2 changed files with 40 additions and 19 deletions

View file

@ -60,13 +60,12 @@
(define (flag-clear a b) (bitwise-and a (bitwise-not b)))
(define (char-set-ci cset)
(let ((res (char-set)))
(char-set-for-each
(lambda (ch)
(char-set-adjoin! res (char-upcase ch))
(char-set-adjoin! res (char-downcase ch)))
cset)
res))
(char-set-fold
(lambda (ch res)
(char-set-adjoin! (char-set-adjoin! res (char-upcase ch))
(char-downcase ch)))
(char-set)
cset))
(define (make-char-state ch flags next)
(if (flag-set? flags ~ci?)
@ -557,6 +556,7 @@
((numeric num digit) %char-set:digit)
((alphanumeric alphanum alnum) %char-set:letter+digit)
((punctuation punct) %char-set:punctuation)
((symbol) %char-set:symbol)
((graphic graph) %char-set:graphic)
((word-constituent) %char-set:word-constituent)
((whitespace white space) %char-set:whitespace)
@ -579,6 +579,7 @@
((numeric num digit) char-set:digit)
((alphanumeric alphanum alnum) char-set:letter+digit)
((punctuation punct) char-set:punctuation)
((symbol) char-set:symbol)
((graphic graph) char-set:graphic)
((word-constituent) char-set:word-constituent)
((whitespace white space) char-set:whitespace)
@ -613,7 +614,8 @@
(or (string? (car sre))
(memq (car sre)
'(char-set / char-range & and ~ complement - difference))
(and (memq (car sre) '(|\|| or))
(and (memq (car sre)
'(|\|| or w/case w/nocase w/unicode w/ascii))
(every char-set-sre? (cdr sre)))))))
(define (valid-sre? x)
@ -622,32 +624,37 @@
(define (sre->char-set sre . o)
(let ((flags (if (pair? o) (car o) ~none)))
(define (->cs sre) (sre->char-set sre flags))
(define (maybe-ci sre)
(if (flag-set? flags ~ci?) (char-set-ci sre) sre))
(cond
((lookup-char-set sre flags))
((char-set? sre) (char-set-ci sre))
((char? sre) (char-set-ci (char-set sre)))
((char-set? sre) (maybe-ci sre))
((char? sre) (maybe-ci (char-set sre)))
((string? sre)
(if (= 1 (string-length sre))
(string->char-set sre)
(maybe-ci (string->char-set sre))
(error "only single char strings can be char-sets")))
((pair? sre)
(if (string? (car sre))
(string->char-set (car sre))
(maybe-ci (string->char-set (car sre)))
(case (car sre)
((char-set) (string->char-set (cadr sre)))
((char-set) (maybe-ci (string->char-set (cadr sre))))
((/ char-range)
(->cs
`(or ,@(map (lambda (x)
(char-set-ci
(ucs-range->char-set
(char->integer (car x))
(+ 1 (char->integer (cdr x))))))
(+ 1 (char->integer (cdr x)))))
(sre-flatten-ranges (cdr sre))))))
((& and) (apply char-set-intersection (map ->cs (cdr sre))))
((|\|| or) (apply char-set-union (map ->cs (cdr sre))))
((~ complement) (char-set-complement (->cs `(or ,@(cdr sre)))))
((- difference) (char-set-difference (->cs (cadr sre))
(->cs `(or ,@(cddr sre)))))
((w/case) (sre->char-set (cadr sre) (flag-clear flags ~ci?)))
((w/nocase) (sre->char-set (cadr sre) (flag-join flags ~ci?)))
((w/ascii) (sre->char-set (cadr sre) (flag-join flags ~ascii?)))
((w/unicode) (sre->char-set (cadr sre) (flag-clear flags ~ascii?)))
(else (error "invalid sre char-set" sre)))))
(else (error "invalid sre char-set" sre)))))
@ -932,6 +939,18 @@
start
end)))
(define (regexp-partition rx str . o)
(let ((start (if (pair? o) (car o) 0))
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
(define (kons from md str a)
(let ((left (substring str from (regexp-match-submatch-start md 0))))
(cons (regexp-match-submatch md 0) (cons left a))))
(define (final from md str a)
(if (or (< from end) (null? a))
(cons (substring str from end) a)
a))
(reverse (regexp-fold rx kons '() str final start end))))
(define (regexp-replace rx str subst . o)
(let* ((start (if (and (pair? o) (car o)) (car o) 0))
(o (if (pair? o) (cdr o) '()))

View file

@ -3,7 +3,7 @@
(export regexp regexp? valid-sre? rx regexp->sre char-set->sre
regexp-matches regexp-matches? regexp-search
regexp-replace regexp-replace-all
regexp-fold regexp-extract regexp-split
regexp-fold regexp-extract regexp-split regexp-partition
regexp-match? regexp-match-count
regexp-match-submatch regexp-match-submatch/list
regexp-match-submatch-start regexp-match-submatch-end
@ -43,6 +43,8 @@
(char-set-intersection char-set:ascii char-set:letter+digit))
(define %char-set:punctuation
(char-set-intersection char-set:ascii char-set:punctuation))
(define %char-set:punctuation
(char-set-intersection char-set:ascii char-set:symbol))
(define %char-set:graphic
(char-set-intersection char-set:ascii char-set:graphic))
(define %char-set:whitespace
@ -69,7 +71,7 @@
(define (string-start-arg s o)
(if (pair? o) (string-index->offset s (car o)) 0))
(define (string-end-arg s o)
(if (pair? o) (string-index->offset (car o)) (string-length s)))
(if (pair? o) (string-index->offset s (car o)) (string-length s)))
(define string-cursor=? =)
(define string-cursor<? <)
(define string-cursor<=? <=)