fixing empty match handling in regexp-split/partition

This commit is contained in:
Alex Shinn 2017-10-11 22:59:55 +09:00
parent b2cdeba142
commit 768a37c7a0
2 changed files with 30 additions and 12 deletions

View file

@ -208,11 +208,19 @@
(test '("123" "456" "789") (regexp-extract '(+ digit) "abc123def456ghi789")) (test '("123" "456" "789") (regexp-extract '(+ digit) "abc123def456ghi789"))
(test '("123" "456" "789") (regexp-extract '(* digit) "abc123def456ghi789")) (test '("123" "456" "789") (regexp-extract '(* digit) "abc123def456ghi789"))
(test '("abc" "def" "ghi" "") (regexp-split '(+ digit) "abc123def456ghi789")) (test '("abc" "def" "ghi" "") (regexp-split '(+ digit) "abc123def456ghi789"))
;; (test '("a" "b" "c" "d" "e" "f" "g" "h" "i") (test '("abc" "def" "ghi" "")
;; (regexp-split '(* digit) "abc123def456ghi789")) (regexp-split '(* digit) "abc123def456ghi789"))
(test '("a" "b") (regexp-split '(+ whitespace) "a b")) (test '("a" "b") (regexp-split '(+ whitespace) "a b"))
(test '("a" "" "b")
(regexp-split '(",;") "a,,b"))
(test '("a" "" "b" "") (test '("a" "" "b" "")
(regexp-split '(",;") "a,,b,")) (regexp-split '(",;") "a,,b,"))
(test '("")
(regexp-partition '(* digit) ""))
(test '("abc" "123" "def" "456" "ghi")
(regexp-partition '(* digit) "abc123def456ghi"))
(test '("abc" "123" "def" "456" "ghi" "789")
(regexp-partition '(* digit) "abc123def456ghi789"))
(test '("한" "글") (test '("한" "글")
(regexp-extract (regexp-extract

View file

@ -1033,12 +1033,16 @@
(regexp-fold (regexp-fold
rx rx
(lambda (from md str a) (lambda (from md str a)
(let ((i (regexp-match-submatch-start md 0))) (let ((i (regexp-match-submatch-start md 0))
(if (eqv? i 0) a (cons (substring str from i) a)))) (j (regexp-match-submatch-end md 0)))
'() (if (eqv? i j)
a
(cons j
(cons (substring str (car a) i) (cdr a))))))
(cons start '())
str str
(lambda (from md str a) (lambda (from md str a)
(reverse (cons (substring str from end) a))) (reverse (cons (substring str (car a) end) (cdr a))))
start start
end))) end)))
@ -1057,13 +1061,19 @@
(let ((start (if (pair? o) (car o) 0)) (let ((start (if (pair? o) (car o) 0))
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
(define (kons from md str a) (define (kons from md str a)
(let ((left (substring str from (regexp-match-submatch-start md 0)))) (let ((i (regexp-match-submatch-start md 0))
(cons (regexp-match-submatch md 0) (cons left a)))) (j (regexp-match-submatch-end md 0)))
(if (eqv? i j)
a
(let ((left (substring str (car a) i)))
(cons j
(cons (regexp-match-submatch md 0)
(cons left (cdr a))))))))
(define (final from md str a) (define (final from md str a)
(if (or (< from end) (null? a)) (if (or (< from end) (null? (cdr a)))
(cons (substring str from end) a) (cons (substring str (car a) end) (cdr a))
a)) (cdr a)))
(reverse (regexp-fold rx kons '() str final start end)))) (reverse (regexp-fold rx kons (cons start '()) str final start end))))
;;> Returns a new string replacing the \var{count}th match of \var{re} ;;> Returns a new string replacing the \var{count}th match of \var{re}
;;> in \var{str} the \var{subst}, where the zero-indexed \var{count} ;;> in \var{str} the \var{subst}, where the zero-indexed \var{count}