mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-19 13:49:17 +02:00
They can be close()d explicitly with close-file-descriptor, and will close() on gc, but only explicitly closing the last port on them will close the fileno. Notably needed for network sockets where we open separate input and output ports on the same socket.
235 lines
7.1 KiB
Scheme
235 lines
7.1 KiB
Scheme
|
||
(import (chibi) (chibi regexp) (chibi regexp pcre)
|
||
(chibi string) (chibi io) (chibi match) (chibi test))
|
||
|
||
(define (maybe-match->sexp rx str . o)
|
||
(let ((res (apply regexp-matches rx str o)))
|
||
(and res (regexp-match->sexp res))))
|
||
|
||
(define-syntax test-re
|
||
(syntax-rules ()
|
||
((test-re res rx str start end)
|
||
(test res (maybe-match->sexp rx str start end)))
|
||
((test-re res rx str start)
|
||
(test-re res rx str start (string-length str)))
|
||
((test-re res rx str)
|
||
(test-re res rx str 0))))
|
||
|
||
(define (maybe-search->sexp rx str . o)
|
||
(let ((res (apply regexp-search rx str o)))
|
||
(and res (regexp-match->sexp res))))
|
||
|
||
(define-syntax test-re-search
|
||
(syntax-rules ()
|
||
((test-re-search res rx str start end)
|
||
(test res (maybe-search->sexp rx str start end)))
|
||
((test-re-search res rx str start)
|
||
(test-re-search res rx str start (string-length str)))
|
||
((test-re-search res rx str)
|
||
(test-re-search res rx str 0))))
|
||
|
||
(test-begin "regexp")
|
||
|
||
(test-re '("ababc" "abab")
|
||
'(: ($ (* "ab")) "c")
|
||
"ababc")
|
||
|
||
(test-re '("ababc" "abab")
|
||
'(: ($ (* "ab")) "c")
|
||
"xababc"
|
||
1)
|
||
|
||
(test-re-search '("y") '(: "y") "xy")
|
||
|
||
(test-re-search '("ababc" "abab")
|
||
'(: ($ (* "ab")) "c")
|
||
"xababc")
|
||
|
||
(test-re #f
|
||
'(: (* any) ($ "foo" (* any)) ($ "bar" (* any)))
|
||
"fooxbafba")
|
||
|
||
(test-re '("fooxbarfbar" "fooxbarf" "bar")
|
||
'(: (* any) ($ "foo" (* any)) ($ "bar" (* any)))
|
||
"fooxbarfbar")
|
||
|
||
(test-re '("abcd" "abcd")
|
||
'($ (* (or "ab" "cd")))
|
||
"abcd")
|
||
|
||
;; first match is a list of ab's, second match is the last (temporary) cd
|
||
(test-re '("abcdc" (("ab") ("cd")) "cd")
|
||
'(: (* (*$ (or "ab" "cd"))) "c")
|
||
"abcdc")
|
||
|
||
(test-re '("ababc" "abab")
|
||
'(: bos ($ (* "ab")) "c")
|
||
"ababc")
|
||
(test-re '("ababc" "abab")
|
||
'(: ($ (* "ab")) "c" eos)
|
||
"ababc")
|
||
(test-re '("ababc" "abab")
|
||
'(: bos ($ (* "ab")) "c" eos)
|
||
"ababc")
|
||
(test-re #f
|
||
'(: bos ($ (* "ab")) eos "c")
|
||
"ababc")
|
||
(test-re #f
|
||
'(: ($ (* "ab")) bos "c" eos)
|
||
"ababc")
|
||
|
||
(test-re '("ababc" "abab")
|
||
'(: bol ($ (* "ab")) "c")
|
||
"ababc")
|
||
(test-re '("ababc" "abab")
|
||
'(: ($ (* "ab")) "c" eol)
|
||
"ababc")
|
||
(test-re '("ababc" "abab")
|
||
'(: bol ($ (* "ab")) "c" eol)
|
||
"ababc")
|
||
(test-re #f
|
||
'(: bol ($ (* "ab")) eol "c")
|
||
"ababc")
|
||
(test-re #f
|
||
'(: ($ (* "ab")) bol "c" eol)
|
||
"ababc")
|
||
(test-re '("\nabc\n" "abc")
|
||
'(: (* #\newline) bol ($ (* alpha)) eol (* #\newline))
|
||
"\nabc\n")
|
||
(test-re #f
|
||
'(: (* #\newline) bol ($ (* alpha)) eol (* #\newline))
|
||
"\n'abc\n")
|
||
(test-re #f
|
||
'(: (* #\newline) bol ($ (* alpha)) eol (* #\newline))
|
||
"\nabc.\n")
|
||
|
||
(test-re '("ababc" "abab")
|
||
'(: bow ($ (* "ab")) "c")
|
||
"ababc")
|
||
(test-re '("ababc" "abab")
|
||
'(: ($ (* "ab")) "c" eow)
|
||
"ababc")
|
||
(test-re '("ababc" "abab")
|
||
'(: bow ($ (* "ab")) "c" eow)
|
||
"ababc")
|
||
(test-re #f
|
||
'(: bow ($ (* "ab")) eow "c")
|
||
"ababc")
|
||
(test-re #f
|
||
'(: ($ (* "ab")) bow "c" eow)
|
||
"ababc")
|
||
(test-re '(" abc " "abc")
|
||
'(: (* space) bow ($ (* alpha)) eow (* space))
|
||
" abc ")
|
||
(test-re #f
|
||
'(: (* space) bow ($ (* alpha)) eow (* space))
|
||
" 'abc ")
|
||
(test-re #f
|
||
'(: (* space) bow ($ (* alpha)) eow (* space))
|
||
" abc. ")
|
||
|
||
(test-re '("beef")
|
||
'(* (/"af"))
|
||
"beef")
|
||
|
||
(test-re '("12345beef" "beef")
|
||
'(: (* digit) ($ (* (/"af"))))
|
||
"12345beef")
|
||
|
||
(test-re '("12345BeeF" "BeeF")
|
||
'(: (* digit) (w/nocase ($ (* (/"af")))))
|
||
"12345BeeF")
|
||
|
||
(test-re #f '(* lower) "abcD")
|
||
(test-re '("abcD") '(w/nocase (* lower)) "abcD")
|
||
|
||
(test-re '("кириллица") '(* alpha) "кириллица")
|
||
(test-re #f '(w/ascii (* alpha)) "кириллица")
|
||
|
||
(test-re '("12345") '(* digit) "12345")
|
||
(test-re #f '(w/ascii (* digit)) "12345")
|
||
|
||
(test-re '("한") 'grapheme "한")
|
||
(test-re '("글") 'grapheme "글")
|
||
|
||
(test-re '("한") '(: bog grapheme eog) "한")
|
||
(test-re #f '(: "ᄒ" bog grapheme eog "ᆫ") "한")
|
||
|
||
(test '("123" "456" "789") (regexp-extract '(+ digit) "abc123def456ghi789"))
|
||
(test '("123" "456" "789") (regexp-extract '(* digit) "abc123def456ghi789"))
|
||
(test '("abc" "def" "ghi") (regexp-split '(+ digit) "abc123def456ghi789"))
|
||
(test '("a" "b" "c" "d" "e" "f" "g" "h" "i")
|
||
(regexp-split '(* digit) "abc123def456ghi789"))
|
||
(test '("a" "b") (regexp-split '(+ whitespace) "a b"))
|
||
(test '("한" "글")
|
||
(regexp-extract
|
||
'grapheme
|
||
(utf8->string '#u8(#xe1 #x84 #x92 #xe1 #x85 #xa1 #xe1 #x86 #xab
|
||
#xe1 #x84 #x80 #xe1 #x85 #xb3 #xe1 #x86 #xaf))))
|
||
|
||
(test "abc def" (regexp-replace '(+ space) "abc \t\n def" " "))
|
||
(test " abc-abc"
|
||
(regexp-replace '(: ($ (+ alpha)) ":" (* space)) " abc: " '(1 "-" 1)))
|
||
(test " abc- abc"
|
||
(regexp-replace '(: ($ (+ alpha)) ":" (* space)) " abc: " '(1 "-" pre 1)))
|
||
|
||
(test " abc d ef " (regexp-replace-all '(+ space) " abc \t\n d ef " " "))
|
||
|
||
(define (subst-matches matches input subst)
|
||
(define (submatch n)
|
||
(regexp-match-submatch matches n))
|
||
(and
|
||
matches
|
||
(call-with-output-string
|
||
(lambda (out)
|
||
(call-with-input-string subst
|
||
(lambda (in)
|
||
(let lp ()
|
||
(let ((c (read-char in)))
|
||
(cond
|
||
((not (eof-object? c))
|
||
(case c
|
||
((#\&)
|
||
(display (or (submatch 0) "") out))
|
||
((#\\)
|
||
(let ((c (read-char in)))
|
||
(if (char-numeric? c)
|
||
(let lp ((res (list c)))
|
||
(if (and (char? (peek-char in))
|
||
(char-numeric? (peek-char in)))
|
||
(lp (cons (read-char in) res))
|
||
(display
|
||
(or (submatch (string->number
|
||
(list->string (reverse res))))
|
||
"")
|
||
out)))
|
||
(write-char c out))))
|
||
(else
|
||
(write-char c out)))
|
||
(lp)))))))))))
|
||
|
||
(define (test-pcre line)
|
||
(match (string-split line #\tab)
|
||
((pattern input result subst output)
|
||
(let ((name (string-append pattern " " input " " result " " subst)))
|
||
(cond
|
||
((equal? "c" result)
|
||
(test-error name (regexp-search (pcre->sre pattern) input)))
|
||
((equal? "n" result)
|
||
(test-assert name (not (regexp-search (pcre->sre pattern) input))))
|
||
(else
|
||
(test name output
|
||
(subst-matches (regexp-search (pcre->sre pattern) input)
|
||
input
|
||
subst))))))
|
||
(else
|
||
(error "invalid regex test line" line))))
|
||
|
||
(test-group "pcre"
|
||
(call-with-input-file "tests/re-tests.txt"
|
||
(lambda (in)
|
||
(for-each
|
||
(lambda (line) (test-pcre line))
|
||
(port->list read-line in)))))
|
||
|
||
(test-end)
|