From 53c7dfd71ebd0f2a3c0a31643af004eeec15ac96 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Thu, 10 Jul 2014 22:59:12 +0900 Subject: [PATCH] When annotating regexp states with the leftmost longest match, prefer the leftmost even if the end of either match has not yet been completed. If two matches start on the same state, prefer an uncompleted end to a completed one. Fixes issue #229. --- lib/chibi/regexp.scm | 13 +++++++------ tests/regexp-tests.scm | 5 +++++ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/lib/chibi/regexp.scm b/lib/chibi/regexp.scm index 1440a86a..40ef0070 100644 --- a/lib/chibi/regexp.scm +++ b/lib/chibi/regexp.scm @@ -267,15 +267,16 @@ (regexp-match-ref m2 (+ i 1)))) (lp (+ i 2))) ((and (string-cursor? (regexp-match-ref m2 i)) - (string-cursor? (regexp-match-ref m2 (+ i 1))) (or (not (string-cursor? (regexp-match-ref m1 i))) - (not (string-cursor? (regexp-match-ref m1 (+ i 1)))) (string-cursor? (regexp-match-ref m2 (+ i 1)) - (regexp-match-ref m1 (+ i 1)))))) + (and + (string-cursor=? (regexp-match-ref m2 i) + (regexp-match-ref m1 i)) + (or (not (string-cursor? (regexp-match-ref m2 (+ i 1)))) + (and (string-cursor? (regexp-match-ref m1 (+ i 1))) + (string-cursor>? (regexp-match-ref m2 (+ i 1)) + (regexp-match-ref m1 (+ i 1)))))))) #f) (else #t))))) diff --git a/tests/regexp-tests.scm b/tests/regexp-tests.scm index 72a034b0..ef21d0e3 100644 --- a/tests/regexp-tests.scm +++ b/tests/regexp-tests.scm @@ -72,6 +72,11 @@ (regexp-matches '(or (-> foo "ab") (-> foo "cd")) "cd") 'foo)) +;; non-deterministic case from issue #229 +(let* ((elapsed '(: (** 1 2 num) ":" num num (? ":" num num))) + (span (rx ,elapsed "-" ,elapsed))) + (test-re-search '("1:45:02-2:06:13") span " 1:45:02-2:06:13 ")) + (test-re '("ababc" "abab") '(: bos ($ (* "ab")) "c") "ababc")