From 8b27ce97265e5028c61b2386a86a2c43c1cfba0d Mon Sep 17 00:00:00 2001
From: Alex Shinn <alexshinn@gmail.com>
Date: Fri, 2 Apr 2021 13:51:02 +0900
Subject: [PATCH] add proper grammar support to srfi 130 string-split

---
 lib/srfi/130.scm      |  36 +++++++++++---
 lib/srfi/130/test.sld | 109 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 137 insertions(+), 8 deletions(-)

diff --git a/lib/srfi/130.scm b/lib/srfi/130.scm
index 34462bfc..e528be8a 100644
--- a/lib/srfi/130.scm
+++ b/lib/srfi/130.scm
@@ -252,6 +252,8 @@
   (let* ((delim-len (string-length delim))
          (grammar (if (pair? o) (car o) 'infix))
          (o (if (pair? o) (cdr o) '()))
+         ;; default to an arbitrary limit guaranteed to be more than
+         ;; the maximum number of matches
          (limit (or (and (pair? o) (car o)) (string-length str)))
          (o (if (pair? o) (cdr o) '()))
          (start (cursor-arg str
@@ -261,17 +263,37 @@
                                   (string-cursor-end str)))))
     (if (and (eq? grammar 'strict-infix) (string-cursor>=? start end))
         (error "string-split 'strict-infix called on an empty string"))
-    (let lp ((sc start) (i 0) (res '()))
+    (let lp ((sc start) (found? #f) (i 1) (res '()))
       (cond
        ((string-cursor>=? sc end)
-        (reverse res))
-       ((and (< i limit) (string-contains str delim sc end))
+        (if (and found? (not (eq? 'suffix grammar)))
+            (reverse (cons "" res))
+            (reverse res)))
+       ((string-contains str delim sc end)
         => (lambda (sc2)
-             (lp (string-cursor-forward str sc2 delim-len)
-                 (+ i 1)
-                 (cons (substring-cursor str sc sc2) res))))
+             (let ((sc3 (string-cursor-forward str sc2 delim-len)))
+               (cond
+                ((>= i limit)
+                 (let* ((res (if (equal? "" delim)
+                                 res
+                                 (cons (substring-cursor str sc sc2) res)))
+                        (res (if (and (string-cursor=? sc3 end)
+                                      (eq? 'suffix grammar))
+                                 res
+                                 (cons (substring-cursor str sc3 end) res))))
+                   (lp end #f i res)))
+                ((equal? "" delim)
+                 (lp (string-cursor-forward str sc2 1)
+                     #f
+                     (+ i 1)
+                     (cons (string (string-cursor-ref str sc2)) res)))
+                ((and (string-cursor=? sc2 start) (eq? 'prefix grammar))
+                 (lp sc3 #t (+ i 1) res))
+                (else
+                 (lp sc3 #t (+ i 1)
+                     (cons (substring-cursor str sc sc2) res)))))))
        (else
-        (lp end i (cons (substring-cursor str sc end) res)))))))
+        (lp end #f i (cons (substring-cursor str sc end) res)))))))
 
 (define (string-filter pred str . o)
   (let ((out (open-output-string)))
diff --git a/lib/srfi/130/test.sld b/lib/srfi/130/test.sld
index bfe99813..6c7cdf7b 100644
--- a/lib/srfi/130/test.sld
+++ b/lib/srfi/130/test.sld
@@ -343,6 +343,8 @@
 
       (test '("foo" "bar" "baz")
           (string-split "foo:bar:baz" ":"))
+      (test '("foo" "bar" "baz" "")
+          (string-split "foo:bar:baz:" ":"))
       (test '("foo" "bar" "baz")
           (string-split "foo:bar:baz:" ":" 'suffix))
       (test '("foo" "bar:baz:")
@@ -352,8 +354,113 @@
       (test '() (string-split "" ":"))
       (test '() (string-split "" ":" 'suffix))
       (test '("") (string-split ":" ":" 'suffix))
+      (test '("foo" "bar" "baz")
+          (string-split ":foo:bar:baz" ":" 'prefix))
 
-      ;;; Regression tests: check that reported bugs have been fixed
+      ;; from SRFI 130 test suite
+      (test '() (string-split "" ""))
+      (test '("a" "b" "c") (string-split "abc" ""))
+      (test '("too" "" "much" "" "data")
+          (string-split "too  much  data" " "))
+      (test '("" "there" "ya" "go" "")
+          (string-split "***there***ya***go***" "***"))
+      (test '() (string-split "" "" 'infix))
+      (test '("a" "b" "c") (string-split "abc" "" 'infix))
+      (test '("too" "" "much" "" "data")
+          (string-split "too  much  data" " " 'infix))
+      (test '("" "there" "ya" "go" "")
+          (string-split "***there***ya***go***" "***" 'infix))
+      (test-error (string-split "" "" 'strict-infix))
+      (test '("a" "b" "c") (string-split "abc" "" 'strict-infix))
+      (test '("too" "" "much" "" "data")
+          (string-split "too  much  data" " " 'strict-infix))
+      (test '("" "there" "ya" "go" "")
+          (string-split "***there***ya***go***" "***" 'strict-infix))
+      (test '() (string-split "" "" 'prefix))
+      (test '("a" "b" "c") (string-split "abc" "" 'prefix))
+      (test '("too" "" "much" "" "data")
+          (string-split "too  much  data" " " 'prefix))
+      (test '("there" "ya" "go" "")
+          (string-split "***there***ya***go***" "***" 'prefix))
+      (test '() (string-split "" "" 'suffix))
+      (test '("a" "b" "c") (string-split "abc" "" 'suffix))
+      (test '("too" "" "much" "" "data")
+          (string-split "too  much  data" " " 'suffix))
+      (test '("" "there" "ya" "go")
+          (string-split "***there***ya***go***" "***" 'suffix))
+      (test '() (string-split "" "" 'infix #f))
+      (test '("a" "b" "c") (string-split "abc" "" 'infix #f))
+      (test '("too" "" "much" "" "data")
+          (string-split "too  much  data" " " 'infix #f))
+      (test '("" "there" "ya" "go" "")
+          (string-split "***there***ya***go***" "***" 'infix #f))
+      (test '("a" "b" "c") (string-split "abc" "" 'strict-infix #f))
+      (test '("too" "" "much" "" "data")
+          (string-split "too  much  data" " " 'strict-infix #f))
+      (test '("" "there" "ya" "go" "")
+          (string-split "***there***ya***go***" "***" 'strict-infix #f))
+      (test '() (string-split "" "" 'prefix #f))
+      (test '("a" "b" "c") (string-split "abc" "" 'prefix #f))
+      (test '("too" "" "much" "" "data")
+          (string-split "too  much  data" " " 'prefix #f))
+      (test '("there" "ya" "go" "")
+          (string-split "***there***ya***go***" "***" 'prefix #f))
+      (test '() (string-split "" "" 'suffix #f))
+      (test '("a" "b" "c") (string-split "abc" "" 'suffix #f))
+      (test '("too" "" "much" "" "data")
+          (string-split "too  much  data" " " 'suffix #f))
+      (test '("" "there" "ya" "go")
+          (string-split "***there***ya***go***" "***" 'suffix #f))
+      (test-error (string-split "" "" 'strict-infix 3))
+      (test '("a" "b" "c") (string-split "abc" "" 'strict-infix 3))
+      (test '("too" "" "much" " data")
+          (string-split "too  much  data" " " 'strict-infix 3))
+      (test '("" "there" "ya" "go***")
+          (string-split "***there***ya***go***" "***" 'strict-infix 3))
+      (test '() (string-split "" "" 'prefix 3))
+      (test '("a" "b" "c") (string-split "abc" "" 'prefix 3))
+      (test '("too" "" "much" " data")
+          (string-split "too  much  data" " " 'prefix 3))
+      (test '("there" "ya" "go***")
+          (string-split "***there***ya***go***" "***" 'prefix 3))
+      (test '() (string-split "" "" 'suffix 3))
+      (test '("a" "b" "c") (string-split "abc" "" 'suffix 3))
+      (test '("too" "" "much" " data")
+          (string-split "too  much  data" " " 'suffix 3))
+      (test '("" "there" "ya" "go***")
+          (string-split "***there***ya***go***" "***" 'suffix 3))
+      (test-error (string-split "" "" 'strict-infix 3 0))
+      (test '("b" "c") (string-split "abc" "" 'strict-infix 3 1))
+      (test '("oo" "" "much" " data")
+          (string-split "too  much  data" " " 'strict-infix 3 1))
+      (test '("**there" "ya" "go" "")
+          (string-split "***there***ya***go***" "***" 'strict-infix 3 1))
+      (test '() (string-split "" "" 'prefix 3 0))
+      (test '("b" "c") (string-split "abc" "" 'prefix 3 1))
+      (test '("oo" "" "much" " data")
+          (string-split "too  much  data" " " 'prefix 3 1))
+      (test '("**there" "ya" "go" "")
+          (string-split "***there***ya***go***" "***" 'prefix 3 1))
+      (test '() (string-split "" "" 'suffix 3 0))
+      (test '("b" "c") (string-split "abc" "" 'suffix 3 1))
+      (test '("oo" "" "much" " data")
+          (string-split "too  much  data" " " 'suffix 3 1))
+      (test '("**there" "ya" "go")
+          (string-split "***there***ya***go***" "***" 'suffix 3 1))
+      (test-error (string-split "" "" 'strict-infix 3 0 0))
+      (test '("b") (string-split "abc" "" 'strict-infix 3 1 2))
+      (test '("oo" "" "much" " ")
+          (string-split "too  much  data" " " 'strict-infix 3 1 11))
+      (test '() (string-split "" "" 'prefix 3 0 0))
+      (test '("b") (string-split "abc" "" 'prefix 3 1 2))
+      (test '("oo" "" "much" " ")
+          (string-split "too  much  data" " " 'prefix 3 1 11))
+      (test '() (string-split "" "" 'suffix 3 0 0))
+      (test '("b") (string-split "abc" "" 'suffix 3 1 2))
+      (test '("oo" "" "much" " ")
+          (string-split "too  much  data" " " 'suffix 3 1 11))
+
+;;; Regression tests: check that reported bugs have been fixed
 
       ;; From: Matthias Radestock <matthias@sorted.org>
       ;; Date: Wed, 10 Dec 2003 21:05:22 +0100