add proper grammar support to srfi 130 string-split

This commit is contained in:
Alex Shinn 2021-04-02 13:51:02 +09:00
parent d80589144d
commit 8b27ce9726
2 changed files with 137 additions and 8 deletions

View file

@ -252,6 +252,8 @@
(let* ((delim-len (string-length delim))
(grammar (if (pair? o) (car o) 'infix))
(o (if (pair? o) (cdr o) '()))
;; default to an arbitrary limit guaranteed to be more than
;; the maximum number of matches
(limit (or (and (pair? o) (car o)) (string-length str)))
(o (if (pair? o) (cdr o) '()))
(start (cursor-arg str
@ -261,17 +263,37 @@
(string-cursor-end str)))))
(if (and (eq? grammar 'strict-infix) (string-cursor>=? start end))
(error "string-split 'strict-infix called on an empty string"))
(let lp ((sc start) (i 0) (res '()))
(let lp ((sc start) (found? #f) (i 1) (res '()))
(cond
((string-cursor>=? sc end)
(reverse res))
((and (< i limit) (string-contains str delim sc end))
(if (and found? (not (eq? 'suffix grammar)))
(reverse (cons "" res))
(reverse res)))
((string-contains str delim sc end)
=> (lambda (sc2)
(lp (string-cursor-forward str sc2 delim-len)
(+ i 1)
(cons (substring-cursor str sc sc2) res))))
(let ((sc3 (string-cursor-forward str sc2 delim-len)))
(cond
((>= i limit)
(let* ((res (if (equal? "" delim)
res
(cons (substring-cursor str sc sc2) res)))
(res (if (and (string-cursor=? sc3 end)
(eq? 'suffix grammar))
res
(cons (substring-cursor str sc3 end) res))))
(lp end #f i res)))
((equal? "" delim)
(lp (string-cursor-forward str sc2 1)
#f
(+ i 1)
(cons (string (string-cursor-ref str sc2)) res)))
((and (string-cursor=? sc2 start) (eq? 'prefix grammar))
(lp sc3 #t (+ i 1) res))
(else
(lp sc3 #t (+ i 1)
(cons (substring-cursor str sc sc2) res)))))))
(else
(lp end i (cons (substring-cursor str sc end) res)))))))
(lp end #f i (cons (substring-cursor str sc end) res)))))))
(define (string-filter pred str . o)
(let ((out (open-output-string)))

View file

@ -343,6 +343,8 @@
(test '("foo" "bar" "baz")
(string-split "foo:bar:baz" ":"))
(test '("foo" "bar" "baz" "")
(string-split "foo:bar:baz:" ":"))
(test '("foo" "bar" "baz")
(string-split "foo:bar:baz:" ":" 'suffix))
(test '("foo" "bar:baz:")
@ -352,8 +354,113 @@
(test '() (string-split "" ":"))
(test '() (string-split "" ":" 'suffix))
(test '("") (string-split ":" ":" 'suffix))
(test '("foo" "bar" "baz")
(string-split ":foo:bar:baz" ":" 'prefix))
;;; Regression tests: check that reported bugs have been fixed
;; from SRFI 130 test suite
(test '() (string-split "" ""))
(test '("a" "b" "c") (string-split "abc" ""))
(test '("too" "" "much" "" "data")
(string-split "too much data" " "))
(test '("" "there" "ya" "go" "")
(string-split "***there***ya***go***" "***"))
(test '() (string-split "" "" 'infix))
(test '("a" "b" "c") (string-split "abc" "" 'infix))
(test '("too" "" "much" "" "data")
(string-split "too much data" " " 'infix))
(test '("" "there" "ya" "go" "")
(string-split "***there***ya***go***" "***" 'infix))
(test-error (string-split "" "" 'strict-infix))
(test '("a" "b" "c") (string-split "abc" "" 'strict-infix))
(test '("too" "" "much" "" "data")
(string-split "too much data" " " 'strict-infix))
(test '("" "there" "ya" "go" "")
(string-split "***there***ya***go***" "***" 'strict-infix))
(test '() (string-split "" "" 'prefix))
(test '("a" "b" "c") (string-split "abc" "" 'prefix))
(test '("too" "" "much" "" "data")
(string-split "too much data" " " 'prefix))
(test '("there" "ya" "go" "")
(string-split "***there***ya***go***" "***" 'prefix))
(test '() (string-split "" "" 'suffix))
(test '("a" "b" "c") (string-split "abc" "" 'suffix))
(test '("too" "" "much" "" "data")
(string-split "too much data" " " 'suffix))
(test '("" "there" "ya" "go")
(string-split "***there***ya***go***" "***" 'suffix))
(test '() (string-split "" "" 'infix #f))
(test '("a" "b" "c") (string-split "abc" "" 'infix #f))
(test '("too" "" "much" "" "data")
(string-split "too much data" " " 'infix #f))
(test '("" "there" "ya" "go" "")
(string-split "***there***ya***go***" "***" 'infix #f))
(test '("a" "b" "c") (string-split "abc" "" 'strict-infix #f))
(test '("too" "" "much" "" "data")
(string-split "too much data" " " 'strict-infix #f))
(test '("" "there" "ya" "go" "")
(string-split "***there***ya***go***" "***" 'strict-infix #f))
(test '() (string-split "" "" 'prefix #f))
(test '("a" "b" "c") (string-split "abc" "" 'prefix #f))
(test '("too" "" "much" "" "data")
(string-split "too much data" " " 'prefix #f))
(test '("there" "ya" "go" "")
(string-split "***there***ya***go***" "***" 'prefix #f))
(test '() (string-split "" "" 'suffix #f))
(test '("a" "b" "c") (string-split "abc" "" 'suffix #f))
(test '("too" "" "much" "" "data")
(string-split "too much data" " " 'suffix #f))
(test '("" "there" "ya" "go")
(string-split "***there***ya***go***" "***" 'suffix #f))
(test-error (string-split "" "" 'strict-infix 3))
(test '("a" "b" "c") (string-split "abc" "" 'strict-infix 3))
(test '("too" "" "much" " data")
(string-split "too much data" " " 'strict-infix 3))
(test '("" "there" "ya" "go***")
(string-split "***there***ya***go***" "***" 'strict-infix 3))
(test '() (string-split "" "" 'prefix 3))
(test '("a" "b" "c") (string-split "abc" "" 'prefix 3))
(test '("too" "" "much" " data")
(string-split "too much data" " " 'prefix 3))
(test '("there" "ya" "go***")
(string-split "***there***ya***go***" "***" 'prefix 3))
(test '() (string-split "" "" 'suffix 3))
(test '("a" "b" "c") (string-split "abc" "" 'suffix 3))
(test '("too" "" "much" " data")
(string-split "too much data" " " 'suffix 3))
(test '("" "there" "ya" "go***")
(string-split "***there***ya***go***" "***" 'suffix 3))
(test-error (string-split "" "" 'strict-infix 3 0))
(test '("b" "c") (string-split "abc" "" 'strict-infix 3 1))
(test '("oo" "" "much" " data")
(string-split "too much data" " " 'strict-infix 3 1))
(test '("**there" "ya" "go" "")
(string-split "***there***ya***go***" "***" 'strict-infix 3 1))
(test '() (string-split "" "" 'prefix 3 0))
(test '("b" "c") (string-split "abc" "" 'prefix 3 1))
(test '("oo" "" "much" " data")
(string-split "too much data" " " 'prefix 3 1))
(test '("**there" "ya" "go" "")
(string-split "***there***ya***go***" "***" 'prefix 3 1))
(test '() (string-split "" "" 'suffix 3 0))
(test '("b" "c") (string-split "abc" "" 'suffix 3 1))
(test '("oo" "" "much" " data")
(string-split "too much data" " " 'suffix 3 1))
(test '("**there" "ya" "go")
(string-split "***there***ya***go***" "***" 'suffix 3 1))
(test-error (string-split "" "" 'strict-infix 3 0 0))
(test '("b") (string-split "abc" "" 'strict-infix 3 1 2))
(test '("oo" "" "much" " ")
(string-split "too much data" " " 'strict-infix 3 1 11))
(test '() (string-split "" "" 'prefix 3 0 0))
(test '("b") (string-split "abc" "" 'prefix 3 1 2))
(test '("oo" "" "much" " ")
(string-split "too much data" " " 'prefix 3 1 11))
(test '() (string-split "" "" 'suffix 3 0 0))
(test '("b") (string-split "abc" "" 'suffix 3 1 2))
(test '("oo" "" "much" " ")
(string-split "too much data" " " 'suffix 3 1 11))
;;; Regression tests: check that reported bugs have been fixed
;; From: Matthias Radestock <matthias@sorted.org>
;; Date: Wed, 10 Dec 2003 21:05:22 +0100