mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-07-08 13:37:35 +02:00
Adding support for w/ascii.
This commit is contained in:
parent
5fe299d4fc
commit
80c2f3f02c
3 changed files with 89 additions and 27 deletions
|
@ -45,6 +45,7 @@
|
||||||
|
|
||||||
(define ~none 0)
|
(define ~none 0)
|
||||||
(define ~ci? 1)
|
(define ~ci? 1)
|
||||||
|
(define ~ascii? 2)
|
||||||
|
|
||||||
(define (flag-set? flags i) (= i (bitwise-and flags i)))
|
(define (flag-set? flags i) (= i (bitwise-and flags i)))
|
||||||
(define (flag-join a b) (if b (bitwise-ior a b) a))
|
(define (flag-join a b) (if b (bitwise-ior a b) a))
|
||||||
|
@ -194,7 +195,8 @@
|
||||||
((and (pair? md) (string-cursor? (car md)) (string-cursor? (cdr md)))
|
((and (pair? md) (string-cursor? (car md)) (string-cursor? (cdr md)))
|
||||||
(substring-cursor str (car md) (cdr md)))
|
(substring-cursor str (car md) (cdr md)))
|
||||||
((regexp-match? md)
|
((regexp-match? md)
|
||||||
(regexp-match-convert recurse? (regexp-match-matches md) (regexp-match-string md)))
|
(regexp-match-convert
|
||||||
|
recurse? (regexp-match-matches md) (regexp-match-string md)))
|
||||||
(else
|
(else
|
||||||
md)))
|
md)))
|
||||||
|
|
||||||
|
@ -245,15 +247,19 @@
|
||||||
(cond
|
(cond
|
||||||
((>= i end)
|
((>= i end)
|
||||||
#t)
|
#t)
|
||||||
((and (eqv? (regexp-match-ref m1 i) (regexp-match-ref m2 i))
|
((and (eqv? (regexp-match-ref m1 i)
|
||||||
(eqv? (regexp-match-ref m1 (+ i 1)) (regexp-match-ref m2 (+ i 1))))
|
(regexp-match-ref m2 i))
|
||||||
|
(eqv? (regexp-match-ref m1 (+ i 1))
|
||||||
|
(regexp-match-ref m2 (+ i 1))))
|
||||||
(lp (+ i 2)))
|
(lp (+ i 2)))
|
||||||
((and (string-cursor? (regexp-match-ref m2 i))
|
((and (string-cursor? (regexp-match-ref m2 i))
|
||||||
(string-cursor? (regexp-match-ref m2 (+ i 1)))
|
(string-cursor? (regexp-match-ref m2 (+ i 1)))
|
||||||
(or (not (string-cursor? (regexp-match-ref m1 i)))
|
(or (not (string-cursor? (regexp-match-ref m1 i)))
|
||||||
(not (string-cursor? (regexp-match-ref m1 (+ i 1))))
|
(not (string-cursor? (regexp-match-ref m1 (+ i 1))))
|
||||||
(string-cursor<? (regexp-match-ref m2 i) (regexp-match-ref m1 i))
|
(string-cursor<? (regexp-match-ref m2 i)
|
||||||
(and (string-cursor=? (regexp-match-ref m2 i) (regexp-match-ref m1 i))
|
(regexp-match-ref m1 i))
|
||||||
|
(and (string-cursor=? (regexp-match-ref m2 i)
|
||||||
|
(regexp-match-ref m1 i))
|
||||||
(string-cursor>? (regexp-match-ref m2 (+ i 1))
|
(string-cursor>? (regexp-match-ref m2 (+ i 1))
|
||||||
(regexp-match-ref m1 (+ i 1))))))
|
(regexp-match-ref m1 (+ i 1))))))
|
||||||
#f)
|
#f)
|
||||||
|
@ -469,6 +475,8 @@
|
||||||
(define char-set:control (ucs-range->char-set 0 32))
|
(define char-set:control (ucs-range->char-set 0 32))
|
||||||
(define char-set:word-constituent
|
(define char-set:word-constituent
|
||||||
(char-set-union char-set:letter char-set:digit (char-set #\_)))
|
(char-set-union char-set:letter char-set:digit (char-set #\_)))
|
||||||
|
(define %char-set:word-constituent
|
||||||
|
(char-set-union %char-set:letter %char-set:digit (char-set #\_)))
|
||||||
(define (char-word-constituent? ch)
|
(define (char-word-constituent? ch)
|
||||||
(char-set-contains? char-set:word-constituent ch))
|
(char-set-contains? char-set:word-constituent ch))
|
||||||
|
|
||||||
|
@ -529,6 +537,28 @@
|
||||||
(match/bog str i2 ch2 start end matches)))))
|
(match/bog str i2 ch2 start end matches)))))
|
||||||
|
|
||||||
(define (lookup-char-set name flags)
|
(define (lookup-char-set name flags)
|
||||||
|
(cond
|
||||||
|
((flag-set? flags ~ascii?)
|
||||||
|
(case name
|
||||||
|
((any) char-set:full)
|
||||||
|
((nonl) char-set:nonl)
|
||||||
|
((lower-case lower)
|
||||||
|
(if (flag-set? flags ~ci?) %char-set:letter %char-set:lower-case))
|
||||||
|
((upper-case upper)
|
||||||
|
(if (flag-set? flags ~ci?) %char-set:letter %char-set:upper-case))
|
||||||
|
((alphabetic alpha) %char-set:letter)
|
||||||
|
((numeric num digit) %char-set:digit)
|
||||||
|
((alphanumeric alphanum alnum) %char-set:letter+digit)
|
||||||
|
((punctuation punct) %char-set:punctuation)
|
||||||
|
((graphic graph) %char-set:graphic)
|
||||||
|
((word-constituent) %char-set:word-constituent)
|
||||||
|
((whitespace white space) %char-set:whitespace)
|
||||||
|
((printing print) %char-set:printing)
|
||||||
|
((control cntrl) %char-set:iso-control)
|
||||||
|
((hex-digit xdigit hex) char-set:hex-digit)
|
||||||
|
((ascii) char-set:ascii)
|
||||||
|
(else #f)))
|
||||||
|
(else
|
||||||
(case name
|
(case name
|
||||||
((any) char-set:full)
|
((any) char-set:full)
|
||||||
((nonl) char-set:nonl)
|
((nonl) char-set:nonl)
|
||||||
|
@ -546,9 +576,8 @@
|
||||||
((printing print) char-set:printing)
|
((printing print) char-set:printing)
|
||||||
((control cntrl) char-set:control)
|
((control cntrl) char-set:control)
|
||||||
((hex-digit xdigit hex) char-set:hex-digit)
|
((hex-digit xdigit hex) char-set:hex-digit)
|
||||||
((blank) char-set:blank)
|
|
||||||
((ascii) char-set:ascii)
|
((ascii) char-set:ascii)
|
||||||
(else #f)))
|
(else #f)))))
|
||||||
|
|
||||||
(define (sre-flatten-ranges orig-ls)
|
(define (sre-flatten-ranges orig-ls)
|
||||||
(let lp ((ls orig-ls) (res '()))
|
(let lp ((ls orig-ls) (res '()))
|
||||||
|
@ -777,6 +806,10 @@
|
||||||
(->rx `(: ,@(cdr sre)) (flag-clear flags ~ci?) next))
|
(->rx `(: ,@(cdr sre)) (flag-clear flags ~ci?) next))
|
||||||
((w/nocase)
|
((w/nocase)
|
||||||
(->rx `(: ,@(cdr sre)) (flag-join flags ~ci?) next))
|
(->rx `(: ,@(cdr sre)) (flag-join flags ~ci?) next))
|
||||||
|
((w/unicode)
|
||||||
|
(->rx `(: ,@(cdr sre)) (flag-clear flags ~ascii?) next))
|
||||||
|
((w/ascii)
|
||||||
|
(->rx `(: ,@(cdr sre)) (flag-join flags ~ascii?) next))
|
||||||
(else
|
(else
|
||||||
(if (string? (car sre))
|
(if (string? (car sre))
|
||||||
(make-char-state (sre->char-set sre flags) ~none next)
|
(make-char-state (sre->char-set sre flags) ~none next)
|
||||||
|
|
|
@ -10,9 +10,32 @@
|
||||||
;; Chibi's char-set library is more factored than SRFI-14.
|
;; Chibi's char-set library is more factored than SRFI-14.
|
||||||
(cond-expand
|
(cond-expand
|
||||||
(chibi
|
(chibi
|
||||||
(import (chibi) (srfi 9) (chibi char-set) (chibi char-set full)))
|
(import (chibi) (srfi 9) (chibi char-set)
|
||||||
|
(chibi char-set full)
|
||||||
|
(prefix (chibi char-set ascii) %)))
|
||||||
(else
|
(else
|
||||||
(import (scheme base) (srfi 14))))
|
(import (scheme base) (srfi 14))
|
||||||
|
(begin
|
||||||
|
(define %char-set:letter
|
||||||
|
(char-set-intersection char-set:ascii char-set:letter))
|
||||||
|
(define %char-set:lower-case
|
||||||
|
(char-set-intersection char-set:ascii char-set:lower-case))
|
||||||
|
(define %char-set:upper-case
|
||||||
|
(char-set-intersection char-set:ascii char-set:upper-case))
|
||||||
|
(define %char-set:digit
|
||||||
|
(char-set-intersection char-set:ascii char-set:digit))
|
||||||
|
(define %char-set:letter+digit
|
||||||
|
(char-set-intersection char-set:ascii char-set:letter+digit))
|
||||||
|
(define %char-set:punctuation
|
||||||
|
(char-set-intersection char-set:ascii char-set:punctuation))
|
||||||
|
(define %char-set:graphic
|
||||||
|
(char-set-intersection char-set:ascii char-set:graphic))
|
||||||
|
(define %char-set:whitespace
|
||||||
|
(char-set-intersection char-set:ascii char-set:whitespace))
|
||||||
|
(define %char-set:printing
|
||||||
|
(char-set-intersection char-set:ascii char-set:printing))
|
||||||
|
(define %char-set:iso-control
|
||||||
|
(char-set-intersection char-set:ascii char-set:iso-control)))))
|
||||||
(import (chibi char-set boundary))
|
(import (chibi char-set boundary))
|
||||||
;; Use string-cursors where available.
|
;; Use string-cursors where available.
|
||||||
(begin
|
(begin
|
||||||
|
|
|
@ -143,6 +143,12 @@
|
||||||
(test-re #f '(* lower) "abcD")
|
(test-re #f '(* lower) "abcD")
|
||||||
(test-re '("abcD") '(w/nocase (* lower)) "abcD")
|
(test-re '("abcD") '(w/nocase (* lower)) "abcD")
|
||||||
|
|
||||||
|
(test-re '("кириллица") '(* alpha) "кириллица")
|
||||||
|
(test-re #f '(w/ascii (* alpha)) "кириллица")
|
||||||
|
|
||||||
|
(test-re '("12345") '(* digit) "12345")
|
||||||
|
(test-re #f '(w/ascii (* digit)) "12345")
|
||||||
|
|
||||||
(test-re '("한") 'grapheme "한")
|
(test-re '("한") 'grapheme "한")
|
||||||
(test-re '("글") 'grapheme "글")
|
(test-re '("글") 'grapheme "글")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue