mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-19 05:39:18 +02:00
Fixing SRFI 115 bug reports from Will Clinger.
Adding regexp-partition.
This commit is contained in:
parent
327685359c
commit
390308fe0b
2 changed files with 40 additions and 19 deletions
|
@ -60,13 +60,12 @@
|
|||
(define (flag-clear a b) (bitwise-and a (bitwise-not b)))
|
||||
|
||||
(define (char-set-ci cset)
|
||||
(let ((res (char-set)))
|
||||
(char-set-for-each
|
||||
(lambda (ch)
|
||||
(char-set-adjoin! res (char-upcase ch))
|
||||
(char-set-adjoin! res (char-downcase ch)))
|
||||
cset)
|
||||
res))
|
||||
(char-set-fold
|
||||
(lambda (ch res)
|
||||
(char-set-adjoin! (char-set-adjoin! res (char-upcase ch))
|
||||
(char-downcase ch)))
|
||||
(char-set)
|
||||
cset))
|
||||
|
||||
(define (make-char-state ch flags next)
|
||||
(if (flag-set? flags ~ci?)
|
||||
|
@ -557,6 +556,7 @@
|
|||
((numeric num digit) %char-set:digit)
|
||||
((alphanumeric alphanum alnum) %char-set:letter+digit)
|
||||
((punctuation punct) %char-set:punctuation)
|
||||
((symbol) %char-set:symbol)
|
||||
((graphic graph) %char-set:graphic)
|
||||
((word-constituent) %char-set:word-constituent)
|
||||
((whitespace white space) %char-set:whitespace)
|
||||
|
@ -579,6 +579,7 @@
|
|||
((numeric num digit) char-set:digit)
|
||||
((alphanumeric alphanum alnum) char-set:letter+digit)
|
||||
((punctuation punct) char-set:punctuation)
|
||||
((symbol) char-set:symbol)
|
||||
((graphic graph) char-set:graphic)
|
||||
((word-constituent) char-set:word-constituent)
|
||||
((whitespace white space) char-set:whitespace)
|
||||
|
@ -613,7 +614,8 @@
|
|||
(or (string? (car sre))
|
||||
(memq (car sre)
|
||||
'(char-set / char-range & and ~ complement - difference))
|
||||
(and (memq (car sre) '(|\|| or))
|
||||
(and (memq (car sre)
|
||||
'(|\|| or w/case w/nocase w/unicode w/ascii))
|
||||
(every char-set-sre? (cdr sre)))))))
|
||||
|
||||
(define (valid-sre? x)
|
||||
|
@ -622,32 +624,37 @@
|
|||
(define (sre->char-set sre . o)
|
||||
(let ((flags (if (pair? o) (car o) ~none)))
|
||||
(define (->cs sre) (sre->char-set sre flags))
|
||||
(define (maybe-ci sre)
|
||||
(if (flag-set? flags ~ci?) (char-set-ci sre) sre))
|
||||
(cond
|
||||
((lookup-char-set sre flags))
|
||||
((char-set? sre) (char-set-ci sre))
|
||||
((char? sre) (char-set-ci (char-set sre)))
|
||||
((char-set? sre) (maybe-ci sre))
|
||||
((char? sre) (maybe-ci (char-set sre)))
|
||||
((string? sre)
|
||||
(if (= 1 (string-length sre))
|
||||
(string->char-set sre)
|
||||
(maybe-ci (string->char-set sre))
|
||||
(error "only single char strings can be char-sets")))
|
||||
((pair? sre)
|
||||
(if (string? (car sre))
|
||||
(string->char-set (car sre))
|
||||
(maybe-ci (string->char-set (car sre)))
|
||||
(case (car sre)
|
||||
((char-set) (string->char-set (cadr sre)))
|
||||
((char-set) (maybe-ci (string->char-set (cadr sre))))
|
||||
((/ char-range)
|
||||
(->cs
|
||||
`(or ,@(map (lambda (x)
|
||||
(char-set-ci
|
||||
(ucs-range->char-set
|
||||
(char->integer (car x))
|
||||
(+ 1 (char->integer (cdr x))))))
|
||||
(ucs-range->char-set
|
||||
(char->integer (car x))
|
||||
(+ 1 (char->integer (cdr x)))))
|
||||
(sre-flatten-ranges (cdr sre))))))
|
||||
((& and) (apply char-set-intersection (map ->cs (cdr sre))))
|
||||
((|\|| or) (apply char-set-union (map ->cs (cdr sre))))
|
||||
((~ complement) (char-set-complement (->cs `(or ,@(cdr sre)))))
|
||||
((- difference) (char-set-difference (->cs (cadr sre))
|
||||
(->cs `(or ,@(cddr sre)))))
|
||||
((w/case) (sre->char-set (cadr sre) (flag-clear flags ~ci?)))
|
||||
((w/nocase) (sre->char-set (cadr sre) (flag-join flags ~ci?)))
|
||||
((w/ascii) (sre->char-set (cadr sre) (flag-join flags ~ascii?)))
|
||||
((w/unicode) (sre->char-set (cadr sre) (flag-clear flags ~ascii?)))
|
||||
(else (error "invalid sre char-set" sre)))))
|
||||
(else (error "invalid sre char-set" sre)))))
|
||||
|
||||
|
@ -932,6 +939,18 @@
|
|||
start
|
||||
end)))
|
||||
|
||||
(define (regexp-partition rx str . o)
|
||||
(let ((start (if (pair? o) (car o) 0))
|
||||
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
|
||||
(define (kons from md str a)
|
||||
(let ((left (substring str from (regexp-match-submatch-start md 0))))
|
||||
(cons (regexp-match-submatch md 0) (cons left a))))
|
||||
(define (final from md str a)
|
||||
(if (or (< from end) (null? a))
|
||||
(cons (substring str from end) a)
|
||||
a))
|
||||
(reverse (regexp-fold rx kons '() str final start end))))
|
||||
|
||||
(define (regexp-replace rx str subst . o)
|
||||
(let* ((start (if (and (pair? o) (car o)) (car o) 0))
|
||||
(o (if (pair? o) (cdr o) '()))
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
(export regexp regexp? valid-sre? rx regexp->sre char-set->sre
|
||||
regexp-matches regexp-matches? regexp-search
|
||||
regexp-replace regexp-replace-all
|
||||
regexp-fold regexp-extract regexp-split
|
||||
regexp-fold regexp-extract regexp-split regexp-partition
|
||||
regexp-match? regexp-match-count
|
||||
regexp-match-submatch regexp-match-submatch/list
|
||||
regexp-match-submatch-start regexp-match-submatch-end
|
||||
|
@ -43,6 +43,8 @@
|
|||
(char-set-intersection char-set:ascii char-set:letter+digit))
|
||||
(define %char-set:punctuation
|
||||
(char-set-intersection char-set:ascii char-set:punctuation))
|
||||
(define %char-set:punctuation
|
||||
(char-set-intersection char-set:ascii char-set:symbol))
|
||||
(define %char-set:graphic
|
||||
(char-set-intersection char-set:ascii char-set:graphic))
|
||||
(define %char-set:whitespace
|
||||
|
@ -69,7 +71,7 @@
|
|||
(define (string-start-arg s o)
|
||||
(if (pair? o) (string-index->offset s (car o)) 0))
|
||||
(define (string-end-arg s o)
|
||||
(if (pair? o) (string-index->offset (car o)) (string-length s)))
|
||||
(if (pair? o) (string-index->offset s (car o)) (string-length s)))
|
||||
(define string-cursor=? =)
|
||||
(define string-cursor<? <)
|
||||
(define string-cursor<=? <=)
|
||||
|
|
Loading…
Add table
Reference in a new issue