chibi-scheme/lib/chibi/string.scm
2016-03-29 22:25:09 +09:00

342 lines
12 KiB
Scheme

;; strings.scm -- cursor-oriented string library
;; Copyright (c) 2012-2015 Alex Shinn. All rights reserved.
;; BSD-style license: http://synthcode.com/license.txt
;;> \section{High-level API}
;;> The procedures below are similar to those in SRFI 13 or other
;;> string libraries, except instead of receiving and returning
;;> character indexes they use opaque string cursors.
;;> \procedure{(string-null? str)}
;;> Returns true iff \var{str} is equal to the empty string \scheme{""}.
(define (string-null? str)
(equal? str ""))
(define (make-char-predicate x)
(cond ((procedure? x) x)
((char? x) (lambda (ch) (eq? ch x)))
((char-set? x) (lambda (ch) (char-set-contains? x ch)))
(else (error "invalid character predicate" x))))
(define (complement pred) (lambda (x) (not (pred x))))
;;> Returns true iff \var{check} is true for any character in
;;> \var{str}. \var{check} can be a procedure, char (to test for
;;> \scheme{char=?} equivalence) or char-set (to test for
;;> \var{char-set-contains?}). Always returns false if \var{str} is
;;> empty.
(define (string-any check str)
(let ((pred (make-char-predicate check))
(end (string-cursor-end str)))
(and (string-cursor>? end (string-cursor-start str))
(let lp ((i (string-cursor-start str)))
(let ((i2 (string-cursor-next str i))
(ch (string-cursor-ref str i)))
(if (string-cursor>=? i2 end)
(pred ch) ;; tail call
(or (pred ch) (lp i2))))))))
;;> Returns true iff \var{check} is true for every character in
;;> \var{str}. \var{check} can be a procedure, char or char-set as in
;;> \scheme{string-any}. Always returns true if \var{str} is empty.
(define (string-every check str)
(not (string-any (complement (make-char-predicate check)) str)))
;;> Returns a cursor pointing to the first position from the left in
;;> string for which \var{check} is true. \var{check} can be a
;;> procedure, char or char-set as in \scheme{string-any}. The
;;> optional cursors \var{start} and \var{end} can specify a substring
;;> to search, and default to the whole string. Returns a cursor just
;;> past the end of \var{str} if no character matches.
(define (string-find str check . o)
(let ((pred (make-char-predicate check))
(end (if (and (pair? o) (pair? (cdr o)))
(cadr o)
(string-cursor-end str))))
(let lp ((i (if (pair? o) (car o) (string-cursor-start str))))
(cond ((string-cursor>=? i end) end)
((pred (string-cursor-ref str i)) i)
(else (lp (string-cursor-next str i)))))))
;;> As above, ignoring the position and returning true iff any
;;> character matches.
(define (string-find? str check . o)
(let ((start (if (pair? o) (car o) (string-cursor-start str)))
(end (if (and (pair? o) (pair? (cdr o)))
(cadr o)
(string-cursor-end str))))
(< (string-find str check start end) end)))
;;> As \scheme{string-find}, but returns the position of the first
;;> character from the right of \var{str}. If no character matches,
;;> returns a string cursor pointing just before \var{start}.
(define (string-find-right str check . o)
(let ((pred (make-char-predicate check))
(start (if (pair? o) (car o) (string-cursor-start str))))
(let lp ((i (if (and (pair? o) (pair? (cdr o)))
(cadr o)
(string-cursor-end str))))
(let ((i2 (string-cursor-prev str i)))
(cond ((string-cursor<? i2 start) start)
((pred (string-cursor-ref str i2)) i)
(else (lp i2)))))))
;;> As \scheme{string-find}, but inverts the check, returning the
;;> position of the first character which doesn't match.
(define (string-skip str check . o)
(apply string-find str (complement (make-char-predicate check)) o))
;;> As \scheme{string-find-right}, but inverts the check, returning
;;> the position of the first character which doesn't match.
(define (string-skip-right str check . o)
(apply string-find-right str (complement (make-char-predicate check)) o))
;;> \procedure{(string-join list-of-strings [separator])}
;;>
;;> Concatenates the \var{list-of-strings} and return the result as a
;;> single string. If \var{separator} is provided it is inserted
;;> between each pair of strings.
(define string-join string-concatenate)
;;> Split \var{str} into a list of substrings separated by \var{pred},
;;> which defaults to \scheme{#\\space}. Multiple adjacent characters
;;> which satisy \var{pred} will result in empty strings in the list.
;;> If the optional \var{limit} is provided, splits into at most that
;;> many substrings starting from the left.
(define (string-split str . o)
(let ((pred (make-char-predicate (if (pair? o) (car o) #\space)))
(limit (if (and (pair? o) (pair? (cdr o)))
(cadr o)
(+ 1 (string-size str))))
(start (string-cursor-start str))
(end (string-cursor-end str)))
(if (string-cursor>=? start end)
'()
(let lp ((i start) (n 1) (res '()))
(cond
((>= n limit)
(reverse (cons (substring-cursor str i) res)))
(else
(let* ((j (string-find str pred i))
(res (cons (substring-cursor str i j) res)))
(if (string-cursor>=? j end)
(reverse res)
(lp (string-cursor-next str j) (+ n 1) res)))))))))
;;> Returns a copy of the string \var{str} with all characters
;;> matching \var{pred} (default \scheme{#\\space}) removed from the
;;> left.
(define (string-trim-left str . o)
(let ((pred (make-char-predicate (if (pair? o) (car o) #\space))))
(substring-cursor str (string-skip str pred))))
;;> Returns a copy of the string \var{str} with all characters
;;> matching \var{pred} (default \scheme{#\\space}) removed from the
;;> right.
(define (string-trim-right str . o)
(let ((pred (make-char-predicate (if (pair? o) (car o) #\space))))
(substring-cursor str
(string-cursor-start str)
(string-skip-right str pred))))
;;> Returns a copy of the string \var{str} with all characters
;;> matching \var{pred} (default \scheme{#\\space}) removed from both
;;> sides.
(define (string-trim str . o)
(let* ((pred (if (pair? o) (car o) #\space))
(left (string-skip str pred))
(right (string-skip-right str pred)))
(if (string-cursor>=? left right)
""
(substring-cursor str left right))))
(define (string-mismatch prefix str)
(let ((end1 (string-cursor-end prefix))
(end2 (string-cursor-end str)))
(let lp ((i (string-cursor-start prefix))
(j (string-cursor-start str)))
(if (or (string-cursor>=? i end1)
(string-cursor>=? j end2)
(not (eq? (string-cursor-ref prefix i) (string-cursor-ref str j))))
j
(lp (string-cursor-next prefix i) (string-cursor-next str j))))))
(define (string-mismatch-right suffix str)
(let ((end1 (string-cursor-start suffix))
(end2 (string-cursor-start str)))
(let lp ((i (string-cursor-prev suffix (string-cursor-end suffix)))
(j (string-cursor-prev str (string-cursor-end str))))
(if (or (string-cursor<? i end1)
(string-cursor<? j end2)
(not (eq? (string-cursor-ref suffix i) (string-cursor-ref str j))))
j
(lp (string-cursor-prev suffix i) (string-cursor-prev str j))))))
;; TODO: These definitions are specific to the Chibi implementation of
;; cursors. Possibly the mismatch API should be modified to allow an
;; efficient portable definition.
;;> Returns true iff \var{prefix} is a prefix of \var{str}.
(define (string-prefix? prefix str)
(string-cursor=? (string-cursor-end prefix) (string-mismatch prefix str)))
;;> Returns true iff \var{suffix} is a suffix of \var{str}.
(define (string-suffix? suffix str)
(string-cursor=? (string-cursor-prev suffix (string-cursor-start suffix))
(string-cursor-backward
str
(string-mismatch-right suffix str)
(- (string-size str) (string-size suffix)))))
;;> The fundamental string iterator. Calls \var{kons} on each
;;> character of \var{str} and an accumulator, starting with
;;> \var{knil}. If multiple strings are provided, calls \var{kons} on
;;> the corresponding characters of all strings, with the accumulator
;;> as the final argument, and terminates when the shortest string
;;> runs out.
(define (string-fold kons knil str . los)
(if (null? los)
(let ((end (string-cursor-end str)))
(let lp ((i (string-cursor-start str)) (acc knil))
(if (string-cursor>=? i end)
acc
(lp (string-cursor-next str i)
(kons (string-cursor-ref str i) acc)))))
(let ((los (cons str los)))
(let lp ((is (map string-cursor-start los))
(acc knil))
(if (any (lambda (str i)
(string-cursor>=? i (string-cursor-end str)))
los is)
acc
(lp (map string-cursor-next los is)
(apply kons (append (map string-cursor-ref los is)
(list acc)))))))))
;;> Equivalent to \scheme{string-fold}, but iterates over \var{str}
;;> from right to left.
(define (string-fold-right kons knil str)
(let ((end (string-cursor-end str)))
(let lp ((i (string-cursor-start str)))
(if (string-cursor>=? i end)
knil
(kons (string-cursor-ref str i) (lp (string-cursor-next str i)))))))
;;> \procedure{(string-map proc str)}
;;>
;;> Returns a new string composed of applying the procedure \var{proc}
;;> to every character in \var{string}.
;;> \procedure{(string-for-each proc str)}
;;>
;;> Apply \var{proc} to every character in \var{str} in order and
;;> discard the result.
;;> \procedure{(string-count str check)}
;;>
;;> Count the number of characters in \var{str} for which \var{check}
;;> is true.
(define (string-count str check)
(let ((pred (make-char-predicate check)))
(string-fold (lambda (ch count) (if (pred ch) (+ count 1) count)) 0 str)))
;;> \procedure{(string-contain s1 s2)}
;;>
;;> Returns a cursor pointing to the first position in the string
;;> \var{s1} where \var{s2} occurs, or \scheme{#f} if there is no such
;;> match.
;;> \procedure{(mamke-string-searcher needle)}
;;>
;;> Partial application of \scheme{string-contain}. Return a
;;> procedure of one argument, a string, which runs
;;> \scheme{(string-contain str \var{needle})}.
(define (make-string-searcher needle)
(lambda (haystack) (string-contains haystack needle)))
;;> Return a copy of string \var{s} with all 26 upper-case ASCII
;;> characters mapped to their corresponding 26 lower-case ASCII
;;> characters.
(define (string-downcase-ascii s)
(call-with-output-string
(lambda (out)
(string-for-each (lambda (ch) (write-char (char-downcase ch) out)) s))))
;;> Return a copy of string \var{s} with all 26 lower-case ASCII
;;> characters mapped to their corresponding 26 upper-case ASCII
;;> characters.
(define (string-upcase-ascii s)
(call-with-output-string
(lambda (out)
(string-for-each (lambda (ch) (write-char (char-upcase ch) out)) s))))
;;> \section{Cursor API}
;;> \procedure{(substring-cursor str i [j])}
;;>
;;> Returns the substring of \var{str} between \var{i} (inclusive) and
;;> optional \var{j} (exclusive), which defaults to the end of the
;;> string.
;;> \procedure{(string-cursor-ref str i)}
;;>
;;> Returns the character of \var{str} at position \var{i}.
;;> \procedure{(string-cursor-start str)}
;;>
;;> Returns a string cursor pointing to the start of \var{str}.
;;> \procedure{(string-cursor-end str)}
;;>
;;> Returns a string cursor pointing just past the end of \var{str}.
;;> \procedure{(string-cursor-next str i)}
;;>
;;> Returns a string cursor to the character in \var{str} just after
;;> the cursor \var{i}.
;;> \procedure{(string-cursor-prev str i)}
;;>
;;> Returns a string cursor to the character in \var{str} just before
;;> the cursor \var{i}.
(define (string-cursor-forward str cursor n)
(if (zero? n)
cursor
(string-cursor-forward str (string-cursor-next str cursor) (- n 1))))
(define (string-cursor-backward str cursor n)
(if (zero? n)
cursor
(string-cursor-backward str (string-cursor-prev str cursor) (- n 1))))
;;> \procedure{(string-cursor<? i j)}
;;> \procedure{(string-cursor>? i j)}
;;> \procedure{(string-cursor=? i j)}
;;> \procedure{(string-cursor<=? i j)}
;;> \procedure{(string-cursor>=? i j)}
;;>
;;> String cursor comparators.
;;/