Adding support for w/ascii.

This commit is contained in:
Alex Shinn 2013-11-16 08:12:50 +09:00
parent 5fe299d4fc
commit 80c2f3f02c
3 changed files with 89 additions and 27 deletions

View file

@ -45,6 +45,7 @@
(define ~none 0) (define ~none 0)
(define ~ci? 1) (define ~ci? 1)
(define ~ascii? 2)
(define (flag-set? flags i) (= i (bitwise-and flags i))) (define (flag-set? flags i) (= i (bitwise-and flags i)))
(define (flag-join a b) (if b (bitwise-ior a b) a)) (define (flag-join a b) (if b (bitwise-ior a b) a))
@ -194,7 +195,8 @@
((and (pair? md) (string-cursor? (car md)) (string-cursor? (cdr md))) ((and (pair? md) (string-cursor? (car md)) (string-cursor? (cdr md)))
(substring-cursor str (car md) (cdr md))) (substring-cursor str (car md) (cdr md)))
((regexp-match? md) ((regexp-match? md)
(regexp-match-convert recurse? (regexp-match-matches md) (regexp-match-string md))) (regexp-match-convert
recurse? (regexp-match-matches md) (regexp-match-string md)))
(else (else
md))) md)))
@ -245,15 +247,19 @@
(cond (cond
((>= i end) ((>= i end)
#t) #t)
((and (eqv? (regexp-match-ref m1 i) (regexp-match-ref m2 i)) ((and (eqv? (regexp-match-ref m1 i)
(eqv? (regexp-match-ref m1 (+ i 1)) (regexp-match-ref m2 (+ i 1)))) (regexp-match-ref m2 i))
(eqv? (regexp-match-ref m1 (+ i 1))
(regexp-match-ref m2 (+ i 1))))
(lp (+ i 2))) (lp (+ i 2)))
((and (string-cursor? (regexp-match-ref m2 i)) ((and (string-cursor? (regexp-match-ref m2 i))
(string-cursor? (regexp-match-ref m2 (+ i 1))) (string-cursor? (regexp-match-ref m2 (+ i 1)))
(or (not (string-cursor? (regexp-match-ref m1 i))) (or (not (string-cursor? (regexp-match-ref m1 i)))
(not (string-cursor? (regexp-match-ref m1 (+ i 1)))) (not (string-cursor? (regexp-match-ref m1 (+ i 1))))
(string-cursor<? (regexp-match-ref m2 i) (regexp-match-ref m1 i)) (string-cursor<? (regexp-match-ref m2 i)
(and (string-cursor=? (regexp-match-ref m2 i) (regexp-match-ref m1 i)) (regexp-match-ref m1 i))
(and (string-cursor=? (regexp-match-ref m2 i)
(regexp-match-ref m1 i))
(string-cursor>? (regexp-match-ref m2 (+ i 1)) (string-cursor>? (regexp-match-ref m2 (+ i 1))
(regexp-match-ref m1 (+ i 1)))))) (regexp-match-ref m1 (+ i 1))))))
#f) #f)
@ -469,6 +475,8 @@
(define char-set:control (ucs-range->char-set 0 32)) (define char-set:control (ucs-range->char-set 0 32))
(define char-set:word-constituent (define char-set:word-constituent
(char-set-union char-set:letter char-set:digit (char-set #\_))) (char-set-union char-set:letter char-set:digit (char-set #\_)))
(define %char-set:word-constituent
(char-set-union %char-set:letter %char-set:digit (char-set #\_)))
(define (char-word-constituent? ch) (define (char-word-constituent? ch)
(char-set-contains? char-set:word-constituent ch)) (char-set-contains? char-set:word-constituent ch))
@ -529,26 +537,47 @@
(match/bog str i2 ch2 start end matches))))) (match/bog str i2 ch2 start end matches)))))
(define (lookup-char-set name flags) (define (lookup-char-set name flags)
(case name (cond
((any) char-set:full) ((flag-set? flags ~ascii?)
((nonl) char-set:nonl) (case name
((lower-case lower) ((any) char-set:full)
(if (flag-set? flags ~ci?) char-set:letter char-set:lower-case)) ((nonl) char-set:nonl)
((upper-case upper) ((lower-case lower)
(if (flag-set? flags ~ci?) char-set:letter char-set:upper-case)) (if (flag-set? flags ~ci?) %char-set:letter %char-set:lower-case))
((alphabetic alpha) char-set:letter) ((upper-case upper)
((numeric num digit) char-set:digit) (if (flag-set? flags ~ci?) %char-set:letter %char-set:upper-case))
((alphanumeric alphanum alnum) char-set:letter+digit) ((alphabetic alpha) %char-set:letter)
((punctuation punct) char-set:punctuation) ((numeric num digit) %char-set:digit)
((graphic graph) char-set:graphic) ((alphanumeric alphanum alnum) %char-set:letter+digit)
((word-constituent) char-set:word-constituent) ((punctuation punct) %char-set:punctuation)
((whitespace white space) char-set:whitespace) ((graphic graph) %char-set:graphic)
((printing print) char-set:printing) ((word-constituent) %char-set:word-constituent)
((control cntrl) char-set:control) ((whitespace white space) %char-set:whitespace)
((hex-digit xdigit hex) char-set:hex-digit) ((printing print) %char-set:printing)
((blank) char-set:blank) ((control cntrl) %char-set:iso-control)
((ascii) char-set:ascii) ((hex-digit xdigit hex) char-set:hex-digit)
(else #f))) ((ascii) char-set:ascii)
(else #f)))
(else
(case name
((any) char-set:full)
((nonl) char-set:nonl)
((lower-case lower)
(if (flag-set? flags ~ci?) char-set:letter char-set:lower-case))
((upper-case upper)
(if (flag-set? flags ~ci?) char-set:letter char-set:upper-case))
((alphabetic alpha) char-set:letter)
((numeric num digit) char-set:digit)
((alphanumeric alphanum alnum) char-set:letter+digit)
((punctuation punct) char-set:punctuation)
((graphic graph) char-set:graphic)
((word-constituent) char-set:word-constituent)
((whitespace white space) char-set:whitespace)
((printing print) char-set:printing)
((control cntrl) char-set:control)
((hex-digit xdigit hex) char-set:hex-digit)
((ascii) char-set:ascii)
(else #f)))))
(define (sre-flatten-ranges orig-ls) (define (sre-flatten-ranges orig-ls)
(let lp ((ls orig-ls) (res '())) (let lp ((ls orig-ls) (res '()))
@ -777,6 +806,10 @@
(->rx `(: ,@(cdr sre)) (flag-clear flags ~ci?) next)) (->rx `(: ,@(cdr sre)) (flag-clear flags ~ci?) next))
((w/nocase) ((w/nocase)
(->rx `(: ,@(cdr sre)) (flag-join flags ~ci?) next)) (->rx `(: ,@(cdr sre)) (flag-join flags ~ci?) next))
((w/unicode)
(->rx `(: ,@(cdr sre)) (flag-clear flags ~ascii?) next))
((w/ascii)
(->rx `(: ,@(cdr sre)) (flag-join flags ~ascii?) next))
(else (else
(if (string? (car sre)) (if (string? (car sre))
(make-char-state (sre->char-set sre flags) ~none next) (make-char-state (sre->char-set sre flags) ~none next)

View file

@ -10,9 +10,32 @@
;; Chibi's char-set library is more factored than SRFI-14. ;; Chibi's char-set library is more factored than SRFI-14.
(cond-expand (cond-expand
(chibi (chibi
(import (chibi) (srfi 9) (chibi char-set) (chibi char-set full))) (import (chibi) (srfi 9) (chibi char-set)
(chibi char-set full)
(prefix (chibi char-set ascii) %)))
(else (else
(import (scheme base) (srfi 14)))) (import (scheme base) (srfi 14))
(begin
(define %char-set:letter
(char-set-intersection char-set:ascii char-set:letter))
(define %char-set:lower-case
(char-set-intersection char-set:ascii char-set:lower-case))
(define %char-set:upper-case
(char-set-intersection char-set:ascii char-set:upper-case))
(define %char-set:digit
(char-set-intersection char-set:ascii char-set:digit))
(define %char-set:letter+digit
(char-set-intersection char-set:ascii char-set:letter+digit))
(define %char-set:punctuation
(char-set-intersection char-set:ascii char-set:punctuation))
(define %char-set:graphic
(char-set-intersection char-set:ascii char-set:graphic))
(define %char-set:whitespace
(char-set-intersection char-set:ascii char-set:whitespace))
(define %char-set:printing
(char-set-intersection char-set:ascii char-set:printing))
(define %char-set:iso-control
(char-set-intersection char-set:ascii char-set:iso-control)))))
(import (chibi char-set boundary)) (import (chibi char-set boundary))
;; Use string-cursors where available. ;; Use string-cursors where available.
(begin (begin

View file

@ -143,6 +143,12 @@
(test-re #f '(* lower) "abcD") (test-re #f '(* lower) "abcD")
(test-re '("abcD") '(w/nocase (* lower)) "abcD") (test-re '("abcD") '(w/nocase (* lower)) "abcD")
(test-re '("кириллица") '(* alpha) "кириллица")
(test-re #f '(w/ascii (* alpha)) "кириллица")
(test-re '("") '(* digit) "")
(test-re #f '(w/ascii (* digit)) "")
(test-re '("한") 'grapheme "한") (test-re '("한") 'grapheme "한")
(test-re '("글") 'grapheme "글") (test-re '("글") 'grapheme "글")