allow zero-or-more (*) combining characters in the 'grapheme SRE rather than one-or-more (+)

This commit is contained in:
Alex Shinn 2018-09-18 23:21:19 +08:00
parent 7830ca1654
commit a7584ae647
2 changed files with 8 additions and 1 deletions

View file

@ -205,6 +205,13 @@
(test-re '("한") '(: bog grapheme eog) "한") (test-re '("한") '(: bog grapheme eog) "한")
(test-re #f '(: "ᄒ" bog grapheme eog "ᆫ") "한") (test-re #f '(: "ᄒ" bog grapheme eog "ᆫ") "한")
(test '("a" "b" "c") (regexp-extract 'grapheme "abc"))
(test '("a" " " "b" " " "c") (regexp-extract 'grapheme "a b c"))
(test '("a" "\n" "b" "\r\n" "c") (regexp-extract 'grapheme "a\nb\r\nc"))
(test '("a\x0300;" "b\x0301;\x0302;" "c\x0303;\x0304;\x0305;")
(regexp-extract 'grapheme "a\x0300;b\x0301;\x0302;c\x0303;\x0304;\x0305;"))
(test '("한" "글") (regexp-extract 'grapheme "한글"))
(test '("123" "456" "789") (regexp-extract '(+ digit) "abc123def456ghi789")) (test '("123" "456" "789") (regexp-extract '(+ digit) "abc123def456ghi789"))
(test '("123" "456" "789") (regexp-extract '(* digit) "abc123def456ghi789")) (test '("123" "456" "789") (regexp-extract '(* digit) "abc123def456ghi789"))
(test '("abc" "def" "ghi" "") (regexp-split '(+ digit) "abc123def456ghi789")) (test '("abc" "def" "ghi" "") (regexp-split '(+ digit) "abc123def456ghi789"))

View file

@ -783,7 +783,7 @@
(+ ,char-set:regional-indicator) (+ ,char-set:regional-indicator)
(: "\r\n") (: "\r\n")
(: (~ control ("\r\n")) (: (~ control ("\r\n"))
(+ ,char-set:extend-or-spacing-mark)) (* ,char-set:extend-or-spacing-mark))
control) control)
flags flags
next)) next))