diff --git a/lib/chibi/regexp.scm b/lib/chibi/regexp.scm index c9553f0a..f307b5da 100644 --- a/lib/chibi/regexp.scm +++ b/lib/chibi/regexp.scm @@ -492,6 +492,21 @@ (char-set-union %char-set:letter %char-set:digit (char-set #\_))) (define (char-word-constituent? ch) (char-set-contains? char-set:word-constituent ch)) +(define char-set:title-case + (char-set-union + (ucs-range->char-set #x1F88 #x1F90) + (ucs-range->char-set #x1F98 #x1FA0) + (ucs-range->char-set #x1FA8 #x1FB0) + (char-set #\x01C5 #\x01C8 #\x01CB #\x01F2 #\x1FBC #\x1FCC #\x1FFC))) +(define get-char-set:cased + (let ((char-set:cased #f)) + (lambda () + (if (not char-set:cased) + (set! char-set:cased + (char-set-union char-set:upper-case + char-set:lower-case + char-set:title-case))) + char-set:cased))) (define (match/bos str i ch start end matches) (string-cursor=? i start)) @@ -537,6 +552,8 @@ (if (flag-set? flags ~ci?) %char-set:letter %char-set:lower-case)) ((upper-case upper) (if (flag-set? flags ~ci?) %char-set:letter %char-set:upper-case)) + ((title-case title) + (if (flag-set? flags ~ci?) %char-set:letter (char-set))) ((alphabetic alpha) %char-set:letter) ((numeric num digit) %char-set:digit) ((alphanumeric alphanum alnum) %char-set:letter+digit) @@ -554,9 +571,11 @@ ((any) char-set:full) ((nonl) char-set:nonl) ((lower-case lower) - (if (flag-set? flags ~ci?) char-set:letter char-set:lower-case)) + (if (flag-set? flags ~ci?) (get-char-set:cased) char-set:lower-case)) ((upper-case upper) - (if (flag-set? flags ~ci?) char-set:letter char-set:upper-case)) + (if (flag-set? flags ~ci?) (get-char-set:cased) char-set:upper-case)) + ((title-case title) + (if (flag-set? flags ~ci?) (get-char-set:cased) char-set:title-case)) ((alphabetic alpha) char-set:letter) ((numeric num digit) char-set:digit) ((alphanumeric alphanum alnum) char-set:letter+digit) diff --git a/tests/regexp-tests.scm b/tests/regexp-tests.scm index 6396c20f..fea4ced2 100644 --- a/tests/regexp-tests.scm +++ b/tests/regexp-tests.scm @@ -152,6 +152,10 @@ (test-re #f '(* lower) "abcD") (test-re '("abcD") '(w/nocase (* lower)) "abcD") +(test-re '("σζ") '(* lower) "σζ") +(test-re '("Σ") '(* upper) "Σ") +(test-re '("\x01C5;") '(* title) "\x01C5;") +(test-re '("σζ\x01C5;") '(w/nocase (* lower)) "σζ\x01C5;") (test-re '("кириллица") '(* alpha) "кириллица") (test-re #f '(w/ascii (* alpha)) "кириллица")