Simplifying bog/eog matches.

This commit is contained in:
Alex Shinn 2013-11-30 00:41:36 +09:00
parent dc4bab73d2
commit 1abee0b788
2 changed files with 12 additions and 29 deletions

View file

@ -510,38 +510,16 @@
(char-word-constituent?
(string-cursor-ref str (string-cursor-prev str i)))))
(define (match/bog str i ch start end matches)
(and
(string-cursor<? i end)
(and (string-cursor<? i end)
(or (string-cursor=? i start)
(let ((ch0 (string-cursor-ref str (string-cursor-prev str i))))
(cond
((eqv? ch0 #\return)
(not (eqv? ch #\newline)))
((char-set-contains? char-set:control ch0))
((char-set-contains? char-set:regional-indicator ch0)
(not (char-set-contains? char-set:regional-indicator ch)))
((char-set-contains? char-set:hangul-l ch0)
(not (or (char-set-contains? char-set:hangul-l ch0)
(char-set-contains? char-set:hangul-lv ch0)
(char-set-contains? char-set:hangul-lvt ch0)
(char-set-contains? char-set:hangul-v ch0)
(char-set-contains? char-set:hangul-t ch0))))
((or (char-set-contains? char-set:hangul-lv ch0)
(char-set-contains? char-set:hangul-v ch0))
(not (or (char-set-contains? char-set:hangul-v ch0)
(char-set-contains? char-set:hangul-t ch0))))
((char-set-contains? char-set:hangul-t ch0)
(not (char-set-contains? char-set:hangul-t ch0)))
((char-set-contains? char-set:hangul-lvt ch0)
(not (char-set-contains? char-set:hangul-t ch0)))
(else
(not (char-set-contains? char-set:extend-or-spacing-mark ch))))))))
(match/eog str (string-cursor-prev str i) ch start end matches))))
(define (match/eog str i ch start end matches)
(and (string-cursor>? i start)
(or (string-cursor>=? i end)
(let* ((i2 (string-cursor-next str i))
(ch2 (string-cursor-ref str i2)))
(match/bog str i2 ch2 start end matches)))))
(let ((m (regexp-search re:grapheme str
(string-offset->index str i)
(string-offset->index str end))))
(and m (string-cursor<=? (regexp-match-submatch-end m 0) i))))))
(define (lookup-char-set name flags)
(cond
@ -961,3 +939,5 @@
(error "unknown match replacement" (car ls)))))))
(else
(lp (cdr ls) (cons (car ls) res))))))
(define re:grapheme (regexp 'grapheme))

View file

@ -152,6 +152,9 @@
(test-re '("한") 'grapheme "한")
(test-re '("글") 'grapheme "글")
(test-re '("한") '(: bog grapheme eog) "한")
(test-re #f '(: "ᄒ" bog grapheme eog "ᆫ") "한")
(test '("123" "456" "789") (regexp-extract '(+ digit) "abc123def456ghi789"))
(test '("123" "456" "789") (regexp-extract '(* digit) "abc123def456ghi789"))
(test '("abc" "def" "ghi") (regexp-split '(+ digit) "abc123def456ghi789"))