diff --git a/AUTHORS b/AUTHORS index 8f85548a..c5f67f1f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -18,6 +18,7 @@ The following distributed SRFIs use the reference implementations: (srfi 101) is adapted from David van Horn's implementation (srfi 134) is Shiro Kawai's implementation + (srfi 135) is Will Clinger's implementation The benchmarks are based on the Racket versions of the classic Gabriel benchmarks from diff --git a/doc/chibi.scrbl b/doc/chibi.scrbl index ea0b877d..9ada49f3 100755 --- a/doc/chibi.scrbl +++ b/doc/chibi.scrbl @@ -1211,6 +1211,7 @@ snow-fort): \item{\hyperlink["http://srfi.schemers.org/srfi-132/srfi-132.html"]{(srfi 132) - sort libraries}} \item{\hyperlink["http://srfi.schemers.org/srfi-133/srfi-133.html"]{(srfi 133) - vector library}} \item{\hyperlink["http://srfi.schemers.org/srfi-134/srfi-134.html"]{(srfi 134) - immutable deques}} +\item{\hyperlink["http://srfi.schemers.org/srfi-135/srfi-135.html"]{(srfi 135) - immutable texts}} \item{\hyperlink["http://srfi.schemers.org/srfi-139/srfi-139.html"]{(srfi 139) - syntax parameters}} \item{\hyperlink["http://srfi.schemers.org/srfi-141/srfi-141.html"]{(srfi 141) - integer division}} \item{\hyperlink["http://srfi.schemers.org/srfi-143/srfi-143.html"]{(srfi 143) - fixnums}} diff --git a/lib/srfi/135.scm b/lib/srfi/135.scm new file mode 100644 index 00000000..3602fb22 --- /dev/null +++ b/lib/srfi/135.scm @@ -0,0 +1,1819 @@ +;;; Copyright (C) William D Clinger (2016). All Rights Reserved. +;;; +;;; Permission is hereby granted, free of charge, to any person +;;; obtaining a copy of this software and associated documentation +;;; files (the "Software"), to deal in the Software without +;;; restriction, including without limitation the rights to use, +;;; copy, modify, merge, publish, distribute, sublicense, and/or +;;; sell copies of the Software, and to permit persons to whom the +;;; Software is furnished to do so, subject to the following +;;; conditions: +;;; +;;; The above copyright notice and this permission notice shall be +;;; included in all copies or substantial portions of the Software. +;;; +;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +;;; OTHER DEALINGS IN THE SOFTWARE. + +;;; FIXME: a lot of these procedures should do more error checking +;;; up front, instead of letting some other procedure deal with it. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; The following procedures are not part of R7RS (small). +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (last-pair x) + (if (and (pair? x) (pair? (cdr x))) + (last-pair (cdr x)) + x)) + +;;; Returns first n elements of the list x. + +(define (list-take x n) + (let loop ((n n) + (x x) + (y '())) + (if (= n 0) + (reverse y) + (loop (- n 1) (cdr x) (cons (car x) y))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Some macros to make textual arguments and optional arguments +;;; less painful. +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define-syntax %textual->text + (syntax-rules () + ((_ x) + (if (string? x) + (string->text x) + x)) + ((_ x name arg ...) + (cond ((string? x) + (string->text x)) + ((text? x) + x) + (else + (complain name arg ...)))))) + +;;; Several procedures take a first argument that can be either +;;; a text or a string. They can be written as though the first +;;; argument is always a text: +;;; +;;; (define-textual (f textual args ...) ...) + +(define-syntax define-textual + (syntax-rules () + ((_ (f textual arg . args) expr1 expr2 ...) + (define (f textual arg . args) + (let ((textual (%textual->text textual 'f textual arg))) + expr1 expr2 ...))))) + +;;; Several procedures take optional start and end arguments +;;; that follow a textual argument. They can be written as +;;; though the textual argument is always a text, the start +;;; and end arguments are always provided, and the start and +;;; end arguments are always legal: +;;; +;;; (define-textual-start-end (f args ... textual start end) +;;; ...) + +(define-syntax define-textual-start-end + (syntax-rules () + ((_ (f args ... textual start end) expr1 expr2 ...) + (define f + ;; Don't change this to letrec or an internal definition, + ;; because recursive calls should call the version that checks. + (let ((f + (lambda (args ... textual start end) expr1 expr2 ...))) + (case-lambda + ((args ... textual) + (let ((text (%textual->text textual f args ... textual))) + (f args ... text 0 (%text-length text)))) + ((args ... textual start) + (let* ((text (%textual->text textual f args ... textual start)) + (n (%text-length text))) + (if (and (exact-integer? start) + (<= 0 start n)) + (f args ... text start n) + (complain 'f args ... textual start)))) + ((args ... textual start end) + (let* ((text (%textual->text textual f args ... textual start end)) + (n (%text-length text))) + (if (and (exact-integer? start) + (exact-integer? end) + (<= 0 start end n)) + (f args ... text start end) + (complain 'f args ... textual start end)))))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Predicates +;;; +;;; text? is defined by the kernel + +(define (textual? x) + (or (text? x) + (string? x))) + +(define (textual-null? txt) + (= 0 (textual-length txt))) + +(define-textual-start-end (textual-every pred textual start end) + (if (= start end) + #t + (let ((end-1 (- end 1))) + (let loop ((i start)) + (if (= i end-1) + (pred (%text-ref textual i)) + (and (pred (%text-ref textual i)) + (loop (+ i 1)))))))) + +(define-textual-start-end (textual-any pred textual start end) + (let loop ((i start)) + (if (= i end) + #f + (or (pred (%text-ref textual i)) + (loop (+ i 1)))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Constructors +;;; +;;; text-tabulate is defined by the kernel + +(define (make-text n c) + (text-tabulate (lambda (i) c) n)) + +(define (text . chars) + (string->text (list->string chars))) + +;;; These next two procedures take care to accumulate texts of +;;; the kernel's preferred size, N. + +(define text-unfold + (case-lambda + ((stop? mapper succ seed) + (text-unfold stop? mapper succ seed (text) (lambda (x) (text)))) + ((stop? mapper succ seed base) + (text-unfold stop? mapper succ seed base (lambda (x) (text)))) + ((stop? mapper succ seed base make-final) + (let* ((txt (%textual->text (if (char? base) (text base) base) + 'text-unfold + stop? mapper succ seed base make-final)) + (k (%text-length txt))) + (let loop ((k k) + (texts (list txt)) + (chars '()) + (seed seed)) + (cond ((>= k N) + (let* ((k/N (quotient k N)) + (k (- k (* k/N N))) + (texts (cons (reverse-list->text (list-tail chars k)) + texts)) + (chars (list-take chars k))) + (loop k texts chars seed))) + ((stop? seed) + (let* ((texts (if (null? chars) + texts + (cons (reverse-list->text chars) texts))) + (final (make-final seed)) + (final (cond ((char? final) (text final)) + ((string? final) (string->text final)) + ((text? final) final) + (else + (%bad-final 'text-unfold final))))) + (textual-concatenate-reverse texts final))) + (else + (let ((x (mapper seed))) + (cond ((char? x) + (loop (+ k 1) + texts + (cons x chars) + (succ seed))) + ((string? x) + (loop (+ k (string-length x)) + texts + (append (reverse (string->list x)) chars) + (succ seed))) + ((text? x) + (loop (+ k (%text-length x)) + texts + (append (reverse (textual->list x)) chars) + (succ seed))) + (else + (complain 'text-unfold + stop? mapper succ seed + base make-final))))))))))) + +(define text-unfold-right + (case-lambda + ((stop? mapper succ seed) + (text-unfold-right stop? mapper succ seed (text) (lambda (x) (text)))) + ((stop? mapper succ seed base) + (text-unfold-right stop? mapper succ seed base (lambda (x) (text)))) + ((stop? mapper succ seed base make-final) + (let* ((txt (%textual->text (if (char? base) (text base) base) + 'text-unfold-right + stop? mapper succ seed base make-final)) + (k (%text-length txt))) + (let loop ((k k) + (texts (list txt)) + (chars '()) + (seed seed)) + (cond ((>= k N) + (let* ((k/N (quotient k N)) + (k (- k (* k/N N))) + (texts (cons (list->text (list-tail chars k)) texts)) + (chars (list-take chars k))) + (loop k texts chars seed))) + ((stop? seed) + (let* ((texts (if (null? chars) + texts + (cons (list->text chars) texts))) + (final (make-final seed)) + (final (cond ((char? final) (text final)) + ((string? final) (string->text final)) + ((text? final) final) + (else + (%bad-final 'text-unfold-right + final))))) + (textual-concatenate (cons final texts)))) + (else + (let ((x (mapper seed))) + (cond ((char? x) + (loop (+ k 1) + texts + (cons x chars) + (succ seed))) + ((string? x) + (loop (+ k (string-length x)) + texts + (append (string->list x) chars) + (succ seed))) + ((text? x) + (loop (+ k (%text-length x)) + texts + (append (textual->list x) chars) + (succ seed))) + (else + (complain 'text-unfold-right + stop? mapper succ seed + base make-final))))))))))) + +(define (%bad-final name final) + (error (string-append (symbol->string name) + " : make-final returned illegal value : ") + final)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Conversion +;;; +;;; FIXME: a lot of these could be made more efficient, especially +;;; when a string is passed instead of a text. + +(define (textual->text x . rest) + (cond ((string? x) + (string->text x)) + ((text? x) + x) + ((null? rest) + (error "illegal argument passed to textual->text : " x)) + (else (apply error rest)))) + +(define textual->string + (case-lambda + ((txt) + (if (string? txt) + txt + (textual->string txt 0 (textual-length txt)))) + ((txt start) + (if (string? txt) + (substring txt start (string-length txt)) + (textual->string txt start (textual-length txt)))) + ((txt start end) + (let* ((txt (%textual->text txt 'textual->string txt start end)) + (n (- end start)) + (s (make-string n))) + (do ((i start (+ i 1))) + ((= i end) + s) + (string-set! s (- i start) (%text-ref txt i))))))) + +(define-textual-start-end (textual->vector txt start end) + (list->vector (string->list (textual->string (subtext txt start end))))) + +(define-textual-start-end (textual->list txt start end) + (string->list (textual->string (subtext txt start end)))) + +(define string->text + (case-lambda + ((s) + (%string->text s)) + ((s start) + (%string->text (substring s start (string-length s)))) + ((s start end) + (%string->text (substring s start end))))) + +(define (vector->text v . start/end) + (%string->text (list->string (apply vector->list v start/end)))) + +(define (list->text chars . start/end) + (apply string->text (list->string chars) start/end)) + +(define (reverse-list->text chars) + (string->text (list->string (reverse chars)))) + +;;; FIXME: if txt is a string, should just call string->utf8 + +(define-textual-start-end (textual->utf8 txt start end) + (string->utf8 (textual->string (subtext txt start end)))) + +(define-textual-start-end (textual->utf16 txt start end) + (%textual->utf16 txt start end #f)) + +(define-textual-start-end (textual->utf16be txt start end) + (%textual->utf16 txt start end 'big)) + +(define-textual-start-end (textual->utf16le txt start end) + (%textual->utf16 txt start end 'little)) + +;;; FIXME: should this check for illegal code points? + +(define (%textual->utf16 txt start end endianness) + (let* ((n (textual-fold (lambda (c n) + (cond ((< (char->integer c) #x10000) + (+ n 2)) + (else + (+ n 4)))) + 0 + txt start end)) + (n (if endianness n (+ n 2))) + (result (make-bytevector n 0)) + (hibits (case endianness + ((big) 0) + ((little) 1) + (else 0))) + (lobits (- 1 hibits))) + (if (not endianness) + (begin (bytevector-u8-set! result 0 #xfe) + (bytevector-u8-set! result 1 #xff))) + (let loop ((i start) + (j (if endianness 0 2))) + (if (= i end) + result + (let* ((c (text-ref txt i)) + (cp (char->integer c))) + (cond ((< cp #x10000) + (let* ((high (quotient cp 256)) + (low (- cp (* 256 high)))) + (bytevector-u8-set! result (+ j hibits) high) + (bytevector-u8-set! result (+ j lobits) low)) + (loop (+ i 1) (+ j 2))) + (else + (let* ((k (- cp #x10000)) + (high-surrogate (+ #xd800 (quotient k 1024))) + (low-surrogate (+ #xdc00 (remainder k 1024))) + (high0 (quotient high-surrogate 256)) + (low0 (- high-surrogate (* 256 high0))) + (high1 (quotient low-surrogate 256)) + (low1 (- low-surrogate (* 256 high1)))) + (bytevector-u8-set! result (+ j hibits) high0) + (bytevector-u8-set! result (+ j lobits) low0) + (bytevector-u8-set! result (+ j 2 hibits) high1) + (bytevector-u8-set! result (+ j 2 lobits) low1)) + (loop (+ i 1) (+ j 4))))))))) + +(define utf8->text + (case-lambda + ((bv) + (if (bytevector? bv) + (string->text (utf8->string bv)) + (complain 'utf8->text bv))) + ((bv start) + (if (and (bytevector? bv) + (exact-integer? start) + (<= 0 start (bytevector-length bv))) + (string->text (utf8->string bv start)) + (complain 'utf8->text bv start))) + ((bv start end) + (if (and (bytevector? bv) + (exact-integer? start) + (exact-integer? end) + (<= 0 start end (bytevector-length bv))) + (string->text (utf8->string bv start end)) + (complain 'utf8->text bv start end))))) + +(define utf16->text + (case-lambda + ((bv) + (if (bytevector? bv) + (%utf16->text bv 0 (bytevector-length bv) #f) + (complain 'utf16->text bv))) + ((bv start) + (if (and (bytevector? bv) + (exact-integer? start) + (<= 0 start (bytevector-length bv))) + (%utf16->text bv start (bytevector-length bv) #f) + (complain 'utf16->text bv start))) + ((bv start end) + (if (and (bytevector? bv) + (exact-integer? start) + (exact-integer? end) + (<= 0 start end (bytevector-length bv))) + (%utf16->text bv start end #f) + (complain 'utf16->text bv start end))))) + +(define utf16be->text + (case-lambda + ((bv) + (if (bytevector? bv) + (%utf16->text bv 0 (bytevector-length bv) 'big) + (complain 'utf16be->text bv))) + ((bv start) + (if (and (bytevector? bv) + (exact-integer? start) + (<= 0 start (bytevector-length bv))) + (%utf16->text bv start (bytevector-length bv) 'big) + (complain 'utf16be->text bv start))) + ((bv start end) + (if (and (bytevector? bv) + (exact-integer? start) + (exact-integer? end) + (<= 0 start end (bytevector-length bv))) + (%utf16->text bv start end 'big) + (complain 'utf16be->text bv start end))))) + +(define utf16le->text + (case-lambda + ((bv) + (if (bytevector? bv) + (%utf16->text bv 0 (bytevector-length bv) 'little) + (complain 'utf16le->text bv))) + ((bv start) + (if (and (bytevector? bv) + (exact-integer? start) + (even? start) + (<= 0 start (bytevector-length bv))) + (%utf16->text bv start (bytevector-length bv) 'little) + (complain 'utf16le->text bv start))) + ((bv start end) + (if (and (bytevector? bv) + (exact-integer? start) + (exact-integer? end) + (even? start) + (even? end) + (<= 0 start end (bytevector-length bv))) + (%utf16->text bv start end 'little) + (complain 'utf16le->text bv start end))))) + +(define (%utf16->text bv start end endianness) + (let* ((bom (and (not endianness) + (< start end) + (let ((byte0 (bytevector-u8-ref bv start)) + (byte1 (bytevector-u8-ref bv (+ start 1)))) + (cond ((and (= byte0 #xfe) (= byte1 #xff)) + 'big) + ((and (= byte1 #xfe) (= byte0 #xff)) + 'little) + (else #f))))) + (start (if bom (+ start 2) start)) + (endianness (or endianness bom 'big)) + (hibits (if (eq? endianness 'big) 0 1)) + (lobits (- 1 hibits))) + (text-unfold + (lambda (i) (>= i end)) + (lambda (i) + (let* ((high (bytevector-u8-ref bv (+ i hibits))) + (low (bytevector-u8-ref bv (+ i lobits))) + (cp (if (= high 0) low (+ (* 256 high) low)))) + (cond ((< cp #xd800) + (integer->char cp)) + ((and (< cp #xdc00) + (< (+ i 2) end)) + (let* ((i (+ i 2)) + (high (bytevector-u8-ref bv (+ i hibits))) + (low (bytevector-u8-ref bv (+ i lobits))) + (cp2 (if (= high 0) low (+ (* 256 high) low)))) + (cond ((<= #xdc00 cp2 #xdfff) + (integer->char + (+ #x10000 + (* 1024 (- cp #xd800)) + (- cp2 #xdc00)))) + (else + (%illegal-utf16 bv (- i 2) cp cp2))))) + ((< cp #x10000) + (integer->char cp)) + (else + (%illegal-utf16 bv i cp))))) + (lambda (i) + (let ((cp (+ (* 256 (bytevector-u8-ref bv (+ i hibits))) + (bytevector-u8-ref bv (+ i lobits))))) + (if (or (< cp #xd800) + (<= #xe000 cp #xffff)) + (+ i 2) + (+ i 4)))) + start))) + +(define (%illegal-utf16 bv i cp . rest) + (if (null? rest) + (error "illegal UTF-16: " bv i cp) + (error "illegal UTF-16: " bv i cp (car rest)))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Selection +;;; +;;; text-length, text-ref, and subtext are defined by the kernel + +(define (textual-length txt) + (cond ((string? txt) + (string-length txt)) + ((text? txt) + (%text-length txt)) + (else + (complain 'textual-length txt)))) + +(define (textual-ref txt i) + (cond ((string? txt) + (string-ref txt i)) + ((text? txt) + (%text-ref txt i)) + (else + (complain 'textual-ref txt)))) + +(define-textual (subtextual txt start end) + (subtext txt start end)) + +;;; FIXME: could be faster, but this procedure shouldn't be used much + +(define-textual-start-end (textual-copy text start end) + (string->text (textual->string text start end))) + +(define-textual (textual-take txt nchars) + (subtextual txt 0 nchars)) + +(define-textual (textual-drop txt nchars) + (subtextual txt nchars (%text-length txt))) + +(define-textual (textual-take-right txt nchars) + (let ((n (%text-length txt))) + (subtextual txt (- n nchars) n))) + +(define-textual (textual-drop-right txt nchars) + (let ((n (%text-length txt))) + (subtextual txt 0 (- n nchars)))) + +(define textual-pad + (case-lambda + ((txt len) + (let ((txt (%textual->text txt 'textual-pad txt len))) + (%text-pad txt len #\space 0 (%text-length txt)))) + ((txt len c) + (let ((txt (%textual->text txt 'textual-pad txt len c))) + (%text-pad txt len c 0 (%text-length txt)))) + ((txt len c start) + (let ((txt (%textual->text txt 'textual-pad txt len c start))) + (%text-pad txt len c start (%text-length txt)))) + ((txt len c start end) + (%text-pad (%textual->text txt 'textual-pad txt len c start end) + len c start end)))) + +(define (%text-pad txt len c start end) + (if (and (exact-integer? len) + (char? c) + (exact-integer? start) + (exact-integer? end) + (<= 0 len) + (<= 0 start end)) + (let* ((n (%text-length txt)) + (k (- end start))) + (cond ((not (<= end n)) + (complain 'textual-pad txt len c start end)) + ((= n k len) + txt) + ((= k len) + (if (= n k) + txt + (subtext txt start end))) + ((< k len) + (textual-append (make-text (- len k) c) + (if (= n k) + txt + (subtext txt start end)))) + (else + (subtext txt (- end len) end)))) + (complain 'textual-pad txt len c start end))) + +(define textual-pad-right + (case-lambda + ((txt len) + (let ((txt (%textual->text txt 'textual-pad-right txt len))) + (%text-pad-right txt len #\space 0 (%text-length txt)))) + ((txt len c) + (let ((txt (%textual->text txt 'textual-pad-right txt len c))) + (%text-pad-right txt len c 0 (%text-length txt)))) + ((txt len c start) + (let ((txt (%textual->text txt 'textual-pad-right txt len c start))) + (%text-pad-right txt len c start (%text-length txt)))) + ((txt len c start end) + (%text-pad-right (%textual->text txt + 'textual-pad-right txt len c start end) + len c start end)))) + +(define (%text-pad-right txt len c start end) + (if (and (exact-integer? len) + (char? c) + (exact-integer? start) + (exact-integer? end) + (<= 0 len) + (<= 0 start end)) + (let* ((n (%text-length txt)) + (k (- end start))) + (cond ((not (<= end n)) + (complain 'textual-pad-right txt len c start end)) + ((= n k len) + txt) + ((= k len) + (if (= n k) + txt + (subtext txt start end))) + ((< k len) + (textual-append (if (= n k) + txt + (subtext txt start end)) + (make-text (- len k) c))) + (else + (subtext txt start (+ start len))))) + (complain 'textual-pad-right txt len c start end))) + +(define textual-trim + (case-lambda + ((txt) + (textual-trim txt char-whitespace? 0)) + ((txt pred) + (textual-trim txt pred 0)) + ((txt pred start) + (let ((txt (%textual->text txt 'textual-trim txt pred start))) + (%text-trim txt pred start (%text-length txt)))) + ((txt pred start end) + (let ((txt (%textual->text txt 'textual-trim txt pred start end))) + (%text-trim txt pred start end))))) + +(define (%text-trim txt pred start end) + (if (and (procedure? pred) + (exact-integer? start) + (exact-integer? end) + (<= 0 start end (%text-length txt))) + (let loop ((i start)) + (cond ((= i end) + (text)) + ((pred (%text-ref txt i)) + (loop (+ i 1))) + (else + (subtext txt i end)))) + (complain 'textual-trim txt pred start end))) + +(define textual-trim-right + (case-lambda + ((txt) + (textual-trim-right txt char-whitespace? 0)) + ((txt pred) + (textual-trim-right txt pred 0)) + ((txt pred start) + (let ((txt (%textual->text txt 'textual-trim-right txt pred start))) + (%text-trim-right txt pred start (%text-length txt)))) + ((txt pred start end) + (let ((txt (%textual->text txt 'textual-trim-right txt pred start end))) + (%text-trim-right txt pred start end))))) + +(define (%text-trim-right txt pred start end) + (if (and (procedure? pred) + (exact-integer? start) + (exact-integer? end) + (<= 0 start end (%text-length txt))) + (let loop ((i (- end 1))) + (cond ((< i start) + (text)) + ((pred (%text-ref txt i)) + (loop (- i 1))) + (else + (subtext txt start (+ i 1))))) + (complain 'textual-trim-right txt pred start end))) + +(define textual-trim-both + (case-lambda + ((txt) + (textual-trim-both txt char-whitespace? 0)) + ((txt pred) + (textual-trim-both txt pred 0)) + ((txt pred start) + (let ((txt (%textual->text txt 'textual-trim-both txt pred start))) + (%text-trim-both txt pred start (%text-length txt)))) + ((txt pred start end) + (let ((txt (%textual->text txt 'textual-trim-both txt pred start end))) + (%text-trim-both txt pred start end))))) + +;;; This is efficient because subtext is fast. + +(define (%text-trim-both txt pred start end) + (if (and (procedure? pred) + (exact-integer? start) + (exact-integer? end) + (<= 0 start end (%text-length txt))) + (textual-trim (textual-trim-right txt pred start end) + pred) + (complain 'textual-trim-both txt pred start end))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Replacement + +(define textual-replace + (case-lambda + ((txt1 txt2 start1 end1 start2 end2) + (textual-append (subtextual txt1 0 start1) + (subtextual txt2 start2 end2) + (subtextual txt1 end1 (textual-length txt1)))) + ((txt1 txt2 start1 end1 start2) + (textual-append (subtextual txt1 0 start1) + (subtextual txt2 start2 (textual-length txt2)) + (subtextual txt1 end1 (textual-length txt1)))) + ((txt1 txt2 start1 end1) + (textual-append (subtextual txt1 0 start1) + txt2 + (subtextual txt1 end1 (textual-length txt1)))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Comparison + +(define (make-nary-comparison name binop0) + (let ((binop (lambda (a b) + (let ((a (%textual->text a name a b)) + (b (%textual->text b name a b))) + (binop0 a b))))) + (letrec ((loop (lambda (first rest) + (cond ((null? rest) + #t) + ((binop first (car rest)) + (loop (car rest) (cdr rest))) + (else + #f))))) + (lambda (a b . rest) + (if (null? rest) + (binop a b) + (and (binop a b) + (loop b rest))))))) + +(define textual=? + (make-nary-comparison 'textual=? + (lambda (a b) + (%text-compare a b =)))) + +(define textual? + (make-nary-comparison 'textual>? + (lambda (a b) + (%text-compare a b >)))) + +(define textual>=? + (make-nary-comparison 'textual>=? + (lambda (a b) + (%text-compare a b >=)))) + +(define textual-ci=? + (make-nary-comparison 'textual-ci=? + (lambda (a b) + (%text-compare-ci a b = string-ci=?)))) + +(define textual-ci? + (make-nary-comparison 'textual-ci>? + (lambda (a b) + (%text-compare-ci a b > string-ci>?)))) + +(define textual-ci>=? + (make-nary-comparison 'textual-ci>=? + (lambda (a b) + (%text-compare-ci a b >= string-ci>=?)))) + +;;; Compares texts a and b. +;;; Determines whether a is less than b (-1), equal (0), or +;;; greater than b (+1), computes the boolean result by +;;; calling make-boolean on that numerical value and 0. + +(define (%text-compare a b make-boolean) + (let* ((na (%text-length a)) + (nb (%text-length b)) + (n (if (<= na nb) na nb))) + (define (loop i) + (if (= i n) + (cond ((< na nb) (make-boolean -1 0)) + ((> na nb) (make-boolean +1 0)) + (else (make-boolean 0 0))) + (let ((ca (%text-ref a i)) + (cb (%text-ref b i))) + (cond ((char? ca cb) (make-boolean +1 0)) + (else (loop (+ i 1))))))) + (loop 0))) + +;;; Compares texts a and b, folding case. +;;; If either text contains non-ASCII characters, both are converted +;;; to strings and compared using string-pred. + +(define (%text-compare-ci a b make-boolean string-pred) + (let* ((na (%text-length a)) + (nb (%text-length b)) + (n (if (<= na nb) na nb))) + (define (loop i) + (if (= i n) + (cond ((< na nb) (make-boolean -1 0)) + ((> na nb) (make-boolean +1 0)) + (else (make-boolean 0 0))) + (let ((ca (%text-ref a i)) + (cb (%text-ref b i))) + (if (or (char>? ca #\delete) + (char>? cb #\delete)) + (string-pred (textual->string a) + (textual->string b)) + (let ((ca (char-foldcase ca)) + (cb (char-foldcase cb))) + (cond ((char? ca cb) (make-boolean +1 0)) + (else (loop (+ i 1))))))))) + (loop 0))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Prefixes & suffixes + +;;; FIXME: this is a prototype of how optional arguments should +;;; be handled. + +(define (%make-text-prefix/suffix-proc proc name) + (case-lambda + (() + (complain name)) + ((x) + (complain name x)) + ((t1 t2) + (let ((txt1 (%textual->text t1 name t1 t2)) + (txt2 (%textual->text t2 name t1 t2))) + (proc txt1 txt2 0 (%text-length txt1) 0 (%text-length txt2)))) + ((t1 t2 start1) + (let* ((txt1 (%textual->text t1 name t1 t2)) + (txt2 (%textual->text t2 name t1 t2)) + (n1 (%text-length txt1))) + (if (and (exact-integer? start1) + (<= 0 start1 n1)) + (proc txt1 txt2 start1 n1 0 (%text-length txt2)) + (complain name t1 t2 start1)))) + ((t1 t2 start1 end1) + (let* ((txt1 (%textual->text t1 name t1 t2)) + (txt2 (%textual->text t2 name t1 t2)) + (n1 (%text-length txt1))) + (if (and (exact-integer? start1) + (exact-integer? end1) + (<= 0 start1 end1 n1)) + (proc txt1 txt2 start1 end1 0 (%text-length txt2)) + (complain name t1 t2 start1 end1)))) + ((t1 t2 start1 end1 start2) + (let* ((txt1 (%textual->text t1 name t1 t2)) + (txt2 (%textual->text t2 name t1 t2)) + (n1 (%text-length txt1)) + (n2 (%text-length txt2))) + (if (and (exact-integer? start1) + (exact-integer? end1) + (exact-integer? start2) + (<= 0 start1 end1 n1) + (<= 0 start2 n2)) + (proc txt1 txt2 start1 end1 start2 n2) + (complain name t1 t2 start1 end1 start2)))) + ((t1 t2 start1 end1 start2 end2) + (let* ((txt1 (%textual->text t1 name t1 t2)) + (txt2 (%textual->text t2 name t1 t2)) + (n1 (%text-length txt1)) + (n2 (%text-length txt2))) + (if (and (exact-integer? start1) + (exact-integer? end1) + (exact-integer? start2) + (exact-integer? end2) + (<= 0 start1 end1 n1) + (<= 0 start2 end2 n2)) + (proc txt1 txt2 start1 end1 start2 end2) + (complain name t1 t2 start1 end1 start2 end2)))) + ((t1 t2 start1 end1 start2 end2 oops . rest) + (apply complain name t1 t2 start1 end1 start2 end2 oops rest)))) + +(define textual-prefix-length + (%make-text-prefix/suffix-proc + (lambda (txt1 txt2 start1 end1 start2 end2) + (%text-prefix-length txt1 txt2 start1 end1 start2 end2)) + 'textual-prefix-length)) + +(define textual-suffix-length + (%make-text-prefix/suffix-proc + (lambda (txt1 txt2 start1 end1 start2 end2) + (%text-suffix-length txt1 txt2 start1 end1 start2 end2)) + 'textual-suffix-length)) + +(define textual-prefix? + (%make-text-prefix/suffix-proc + (lambda (txt1 txt2 start1 end1 start2 end2) + (%text-prefix? txt1 txt2 start1 end1 start2 end2)) + 'textual-prefix?)) + +(define textual-suffix? + (%make-text-prefix/suffix-proc + (lambda (txt1 txt2 start1 end1 start2 end2) + (%text-suffix? txt1 txt2 start1 end1 start2 end2)) + 'textual-suffix?)) + +;;; All error checking has already been done. + +(define (%text-prefix-length txt1 txt2 start1 end1 start2 end2) + (let* ((k1 (- end1 start1)) + (k2 (- end2 start2)) + (k (min k1 k2)) + (end1 (+ start1 k))) + (let loop ((i start1) + (j start2)) + (cond ((= i end1) k) + ((char=? (%text-ref txt1 i) (%text-ref txt2 j)) + (loop (+ i 1) (+ j 1))) + (else (- i start1)))))) + +(define (%text-suffix-length txt1 txt2 start1 end1 start2 end2) + (let* ((k1 (- end1 start1)) + (k2 (- end2 start2)) + (k (min k1 k2)) + (start1 (- end1 k))) + (let loop ((i (- end1 1)) + (j (- end2 1))) + (cond ((< i start1) k) + ((char=? (%text-ref txt1 i) (%text-ref txt2 j)) + (loop (- i 1) (- j 1))) + (else (- end1 i 1)))))) + +(define (%text-prefix? txt1 txt2 start1 end1 start2 end2) + (let ((k1 (- end1 start1)) + (k2 (- end2 start2))) + (and (<= k1 k2) + (= k1 (%text-prefix-length txt1 txt2 start1 end1 start2 end2))))) + +(define (%text-suffix? txt1 txt2 start1 end1 start2 end2) + (let ((k1 (- end1 start1)) + (k2 (- end2 start2))) + (and (<= k1 k2) + (= k1 (%text-suffix-length txt1 txt2 start1 end1 start2 end2))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Searching + +(define-textual (textual-index txt pred . rest) + (let ((start (if (null? rest) 0 (car rest))) + (end (if (or (null? rest) (null? (cdr rest))) + (%text-length txt) + (car (cdr rest))))) + (if (and (procedure? pred) + (exact-integer? start) + (exact-integer? end) + (<= 0 start end (%text-length txt))) + (let loop ((i start)) + (cond ((= i end) + #f) + ((pred (%text-ref txt i)) + i) + (else + (loop (+ i 1))))) + (apply complain 'textual-index txt pred rest)))) + +(define-textual (textual-index-right txt pred . rest) + (let ((start (if (null? rest) 0 (car rest))) + (end (if (or (null? rest) (null? (cdr rest))) + (%text-length txt) + (car (cdr rest))))) + (if (and (procedure? pred) + (exact-integer? start) + (exact-integer? end) + (<= 0 start end (%text-length txt))) + (let loop ((i (- end 1))) + (cond ((< i start) + #f) + ((pred (%text-ref txt i)) + i) + (else + (loop (- i 1))))) + (apply complain 'textual-index-right txt pred rest)))) + +(define (textual-skip txt pred . rest) + (apply textual-index txt (lambda (x) (not (pred x))) rest)) + +(define (textual-skip-right txt pred . rest) + (apply textual-index-right txt (lambda (x) (not (pred x))) rest)) + +(define (textual-contains t1 t2 . rest0) + (let* ((txt1 (%textual->text t1 'textual-contains t1 t2)) + (txt2 (%textual->text t2 'textual-contains t1 t2)) + (rest rest0) + (start1 (if (null? rest) 0 (car rest))) + (rest (if (null? rest) rest (cdr rest))) + (end1 (if (null? rest) (%text-length txt1) (car rest))) + (rest (if (null? rest) rest (cdr rest))) + (start2 (if (null? rest) 0 (car rest))) + (rest (if (null? rest) rest (cdr rest))) + (end2 (if (null? rest) (%text-length txt2) (car rest))) + (rest (if (null? rest) rest (cdr rest)))) + (if (and (null? rest) + (exact-integer? start1) + (exact-integer? end1) + (exact-integer? start2) + (exact-integer? end2) + (<= 0 start1 end1 (%text-length txt1)) + (<= 0 start2 end2 (%text-length txt2))) + (%textual-contains txt1 txt2 start1 end1 start2 end2) + (apply complain 'textual-contains t1 t2 rest0)))) + +;;; No checking needed here. +;;; +;;; Naive search works well when +;;; txt1 is very short +;;; txt2 is very short +;;; txt2 is almost as long as txt1 +;;; Boyer-Moore-Horspool search works well when +;;; txt2 is very short +;;; txt1 is considerably longer than txt2, txt2 is not too short, +;;; and the rightmost character of txt2 is distinct from +;;; (in its low 8 bits) from several characters that precede it +;;; Rabin-Karp works reasonably well all the time, so is used when +;;; neither naive search nor Boyer-Moore-Horspool do well + +(define %threshold:short1 10) ; is txt1 shorter than this? +(define %threshold:short2 3) ; is txt2 shorter than this? +(define %threshold:longer 1) ; is txt1 at least this much longer? +(define %threshold:rightmost 2) ; are rightmost characters the same? + +(define (%textual-contains txt1 txt2 start1 end1 start2 end2) + (let ((n1 (- end1 start1)) + (n2 (- end2 start2))) + (cond ((< n1 %threshold:short1) + (%textual-contains:naive txt1 txt2 start1 end1 start2 end2)) + ((< (- n1 n2) %threshold:longer) + (%textual-contains:naive txt1 txt2 start1 end1 start2 end2)) + ((< n2 %threshold:short2) + (%textual-contains:boyer-moore txt1 txt2 start1 end1 start2 end2)) + ((and (> n2 %threshold:rightmost) + (let ((j (remainder (char->integer (text-ref txt2 (- end2 1))) + 128))) + (let loop ((i (- end2 %threshold:rightmost))) + (cond ((= i (- end2 1)) + #t) + ((= j + (remainder (char->integer (text-ref txt2 i)) + 128)) + #f) + (else + (loop (+ i 1))))))) + (%textual-contains:boyer-moore txt1 txt2 start1 end1 start2 end2)) + (else + (%textual-contains:rabin-karp txt1 txt2 start1 end1 start2 end2))))) + +(define (%textual-contains:naive txt1 txt2 start1 end1 start2 end2) + (let* ((n1 (- end1 start1)) + (n2 (- end2 start2)) + (lim1 (- end1 n2))) + (let loop ((i start1)) + (cond ((> i lim1) + #f) + ((textual-prefix? txt2 txt1 start2 end2 i end1) + i) + (else + (loop (+ i 1))))))) + +(define (%textual-contains:rabin-karp txt1 txt2 start1 end1 start2 end2) + (define (hash txt start end) + (do ((i start (+ i 1)) + (h 0 (+ h (char->integer (text-ref txt i))))) + ((= i end) + h))) + (let* ((n1 (- end1 start1)) + (n2 (- end2 start2)) + (lim1 (- end1 n2)) + (h1 (hash txt1 start1 (min (+ start1 n2) end1))) + (h2 (hash txt2 start2 end2))) + (let loop ((i start1) + (h1 h1)) + (cond ((> i lim1) + #f) + ((and (= h1 h2) + (textual-prefix? txt2 txt1 start2 end2 i end1)) + i) + ((= i lim1) + #f) + (else + (loop (+ i 1) + (+ (- h1 (char->integer (text-ref txt1 i))) + (char->integer (text-ref txt1 (+ i n2)))))))))) + +;;; This is actually the Boyer-Moore-Horspool algorithm, +;;; but the name is already pretty long. + +(define (%textual-contains:boyer-moore txt1 txt2 start1 end1 start2 end2) + (if (= start2 end2) + start1 + (let* ((n1 (- end1 start1)) + (n2 (- end2 start2)) + (lim1 (- end1 n2)) + (lastchar (text-ref txt2 (- end2 1))) + (lastj (remainder (char->integer lastchar) 128)) + (table (make-vector 128 n2))) + (do ((i 0 (+ i 1))) + ((>= i (- n2 1))) + (let* ((c (text-ref txt2 (+ i start2))) + (cp (char->integer c)) + (j (remainder cp 128))) + (vector-set! table j (- n2 i 1)))) + (let loop ((i start1)) + (if (>= i lim1) + (if (and (= i lim1) + (textual-prefix? txt2 txt1 start2 end2 i end1)) + i + #f) + (let* ((c (text-ref txt1 (+ i n2 -1))) + (cp (char->integer c)) + (j (remainder cp 128))) + (cond ((not (char=? c lastchar)) + (loop (+ i (vector-ref table j)))) + ((textual-prefix? txt2 txt1 start2 end2 i end1) + i) + (else + (loop (+ i (vector-ref table lastj))))))))))) + +;;; FIXME: no Rabin-Karp algorithm for now + +(define (textual-contains-right t1 t2 . rest0) + (let* ((txt1 (%textual->text t1 'textual-contains-right t1 t2)) + (txt2 (%textual->text t2 'textual-contains-right t1 t2)) + (rest rest0) + (start1 (if (null? rest) 0 (car rest))) + (rest (if (null? rest) rest (cdr rest))) + (end1 (if (null? rest) (%text-length txt1) (car rest))) + (rest (if (null? rest) rest (cdr rest))) + (start2 (if (null? rest) 0 (car rest))) + (rest (if (null? rest) rest (cdr rest))) + (end2 (if (null? rest) (%text-length txt2) (car rest))) + (rest (if (null? rest) rest (cdr rest)))) + (if (and (null? rest) + (exact-integer? start1) + (exact-integer? end1) + (exact-integer? start2) + (exact-integer? end2) + (<= 0 start1 end1 (%text-length txt1)) + (<= 0 start2 end2 (%text-length txt2))) + (%textual-contains-right txt1 txt2 start1 end1 start2 end2) + (apply complain 'textual-contains-right t1 t2 rest0)))) + +(define (%textual-contains-right txt1 txt2 start1 end1 start2 end2) + (let ((n1 (- end1 start1)) + (n2 (- end2 start2))) + (cond ((< n1 %threshold:short1) + (%textual-contains-right:naive + txt1 txt2 start1 end1 start2 end2)) + ((< (- n1 n2) %threshold:longer) + (%textual-contains-right:naive + txt1 txt2 start1 end1 start2 end2)) + ((< n2 %threshold:short2) + (%textual-contains-right:boyer-moore + txt1 txt2 start1 end1 start2 end2)) + (else + (%textual-contains-right:boyer-moore + txt1 txt2 start1 end1 start2 end2))))) + +(define (%textual-contains-right:naive txt1 txt2 start1 end1 start2 end2) + (let* ((n1 (- end1 start1)) + (n2 (- end2 start2)) + (lim1 (- end1 n2))) + (let loop ((i lim1)) + (cond ((< i start1) + #f) + ((textual-prefix? txt2 txt1 start2 end2 i end1) + i) + (else + (loop (- i 1))))))) + +;;; This is actually the Boyer-Moore-Horspool algorithm, +;;; but the name is already pretty long. + +(define (%textual-contains-right:boyer-moore txt1 txt2 start1 end1 start2 end2) + (if (= start2 end2) + end1 + (let* ((n1 (- end1 start1)) + (n2 (- end2 start2)) + (firstchar (text-ref txt2 0)) + (firstj (remainder (char->integer firstchar) 128)) + (table (make-vector 128 n2))) + (do ((i (- n2 1) (- i 1))) + ((<= i 0)) + (let* ((c (text-ref txt2 (+ i start2))) + (cp (char->integer c)) + (j (remainder cp 128))) + (vector-set! table j i))) + (let loop ((i (- end1 n2))) + (if (<= i start1) + (if (and (= i start1) + (textual-prefix? txt2 txt1 start2 end2 i end1)) + i + #f) + (let* ((c (text-ref txt1 i)) + (cp (char->integer c)) + (j (remainder cp 128))) + (cond ((not (char=? c firstchar)) + (loop (- i (vector-ref table j)))) + ((textual-prefix? txt2 txt1 start2 end2 i end1) + i) + (else + (loop (- i (vector-ref table firstj))))))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Case conversion + +;;; Two special cases: +;;; the given text can be returned as is +;;; the given text is entirely ASCII +;;; +;;; For all other cases, calls the corresponding procedures for strings. + +(define (textual-upcase txt) + (cond ((string? txt) + (string->text (string-upcase txt))) + ((text? txt) + (%text-upcase txt)) + (else + (complain 'textual-upcase txt)))) + +(define (textual-downcase txt) + (cond ((string? txt) + (string->text (string-downcase txt))) + ((text? txt) + (%text-downcase txt string-downcase)) + (else + (complain 'textual-downcase txt)))) + +(define (textual-foldcase txt) + (cond ((string? txt) + (string->text (string-foldcase txt))) + ((text? txt) + (%text-downcase txt string-foldcase)) + (else + (complain 'textual-foldcase txt)))) + +(define (textual-titlecase txt) + (cond ((string? txt) + (string->text (string-titlecase txt))) + ((text? txt) + (string->text + (string-titlecase (textual->string txt)))) + (else + (complain 'textual-titlecase txt)))) + +(define (%text-upcase txt) + (let* ((n (%text-length txt))) + + ;; So far, no conversion has been necessary. + + (define (fastest i) + (if (= i n) + txt + (let ((c (%text-ref txt i))) + (cond ((char>? c #\delete) + (textual-upcase (textual->string txt))) + ((char<=? #\a c #\z) + (fast i (list (subtext txt 0 i)) '())) + (else + (fastest (+ i 1))))))) + + ;; Conversions are necessary but it's been all-ASCII so far. + ;; The upcased text for characters with index < i is + ;; (text-concatenate (reverse (cons (list->text (reverse chars)) + ;; texts))) + + (define (fast i texts chars) + (cond ((= i n) + (if (null? chars) + (textual-concatenate-reverse texts) + (textual-concatenate-reverse texts + (reverse-list->text chars)))) + ((and (= 0 (remainder i N)) + (not (null? chars))) + (fast i (cons (reverse-list->text chars) texts) '())) + (else + (let ((c (%text-ref txt i))) + (cond ((char>? c #\delete) + (textual-append (textual-concatenate-reverse texts) + (reverse-list->text chars) + (string->text + (string-upcase (subtext txt i n))))) + ((char<=? #\a c #\z) + (fast (+ i 1) texts (cons (char-upcase c) chars))) + (else + (fast (+ i 1) texts (cons c chars)))))))) + + (fastest 0))) + +;;; The string-caser is either string-downcase or string-foldcase. +;;; For ASCII, down-casing and fold-casing are the same. + +(define (%text-downcase txt string-caser) + (let* ((n (%text-length txt))) + + ;; So far, no conversion has been necessary. + + (define (fastest i) + (if (= i n) + txt + (let ((c (%text-ref txt i))) + (cond ((char>? c #\delete) + (textual-downcase (textual->string txt))) + ((char<=? #\A c #\Z) + (fast i (list (subtext txt 0 i)) '())) + (else + (fastest (+ i 1))))))) + + ;; Conversions are necessary but it's been all-ASCII so far. + ;; The downcased text for characters with index < i is + ;; (textual-concatenate (reverse (cons (list->text (reverse chars)) + ;; texts))) + + (define (fast i texts chars) + (cond ((= i n) + (if (null? chars) + (textual-concatenate-reverse texts) + (textual-concatenate-reverse texts + (reverse-list->text chars)))) + ((and (= 0 (remainder i N)) + (not (null? chars))) + (fast i (cons (reverse-list->text chars) texts) '())) + (else + (let ((c (%text-ref txt i))) + (cond ((char>? c #\delete) + (textual-append (textual-concatenate-reverse texts) + (reverse-list->text chars) + (string->text + (string-caser (subtext txt i n))))) + ((char<=? #\A c #\Z) + (fast (+ i 1) texts (cons (char-downcase c) chars))) + (else + (fast (+ i 1) texts (cons c chars)))))))) + + (fastest 0))) + +;;; This is a fake version of string-titlecase, to be used only +;;; if there is no Unicode-conforming version available. + +(cond-expand + ((and (not (library (rnrs unicode))) + (not (library (srfi 129)))) + (define (%string-titlecase s) + (let* ((s (string-copy (string-foldcase s))) + (n (string-length s))) + (define (first-character-of-word! i) + (if (< i n) + (let ((c (string-ref s i))) + (if (char-whitespace? c) + (first-character-of-word! (+ i 1)) + (begin (string-set! s i (char-upcase c)) + (subsequent-character! (+ i 1))))))) + (define (subsequent-character! i) + (if (< i n) + (let ((c (string-ref s i))) + (if (char-whitespace? c) + (first-character-of-word! (+ i 1)) + (subsequent-character! (+ i 1)))))) + (first-character-of-word! 0) + s))) + (else)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Concatenation +;;; +;;; textual-concatenate is defined by the kernel + +(define (textual-append . texts) + (textual-concatenate texts)) + +(define textual-concatenate-reverse + (case-lambda + ((texts) + (textual-concatenate (reverse texts))) + ((texts final-textual) + (textual-concatenate-reverse (cons final-textual texts))) + ((texts final-textual end) + (textual-concatenate-reverse texts + (subtext + (%textual->text final-textual + 'textual-concatenate-reverse + texts final-textual end) + 0 end))))) + +(define textual-join + (case-lambda + ((textuals) + (textual-join textuals " " 'infix)) + ((textuals delimiter) + (textual-join textuals delimiter 'infix)) + ((textuals delimiter grammar) + (let* ((texts (map (lambda (t) (%textual->text t 'textual-join textuals)) + textuals)) + (delimiter (%textual->text delimiter + 'textual-join textuals delimiter))) + (if (memq grammar '(infix strict-infix prefix suffix)) + (if (null? texts) + (case grammar + ((strict-infix) + (complain 'textual-join textuals delimiter grammar)) + (else (text))) + (let loop ((rtxts (reverse texts)) + (texts (if (eq? grammar 'suffix) + (list delimiter) + '()))) + (cond ((null? rtxts) + (let ((texts (if (eq? grammar 'prefix) + texts + (cdr texts)))) + (textual-concatenate texts))) + (else + (loop (cdr rtxts) + (cons delimiter (cons (car rtxts) texts))))))) + (complain 'textual-join textuals delimiter grammar)))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Fold & map & friends + +(define-textual-start-end (textual-fold kons knil txt start end) + (if (procedure? kons) + (let loop ((knil knil) + (i start)) + (if (< i end) + (loop (kons (%text-ref txt i) knil) + (+ i 1)) + knil)) + (complain 'textual-fold kons knil txt start end))) + +(define-textual-start-end (textual-fold-right kons knil txt start end) + (if (procedure? kons) + (let loop ((knil knil) + (i (- end 1))) + (if (>= i start) + (loop (kons (%text-ref txt i) knil) + (- i 1)) + knil)) + (complain 'textual-fold-right kons knil txt start end))) + +(define textual-map + (case-lambda + ((proc txt) + (%textual-map1 proc txt)) + ((proc txt1 txt2 . rest) + (%textual-mapn proc (cons txt1 (cons txt2 rest)))))) + +(define (%textual-map1 proc txt) + (let ((txt (%textual->text txt 'textual-map proc txt))) + (if (procedure? proc) + (let ((n (%text-length txt))) + (let loop ((i 0) + (pieces '()) + (chars '()) + (k 0)) + (cond ((= i n) + (textual-concatenate + (reverse (%text-map-pieces pieces chars)))) + ((>= k N) + (loop i + (%text-map-pieces pieces chars) + '() + (remainder k N))) + (else + (let ((x (proc (%text-ref txt i)))) + (loop (+ i 1) + pieces + (cons x chars) + (+ k (cond ((char? x) 1) + ((string? x) (string-length x)) + ((text? x) (%text-length x)) + (else + (%textual-map-bad-result proc x)))))))))) + (complain 'textual-map proc txt)))) + +(define (%textual-mapn proc textuals) + (if (procedure? proc) + (let* ((texts (map (lambda (txt) + (%textual->text txt 'textual-map textuals)) + textuals)) + (n (apply min (map %text-length texts)))) + (let loop ((i 0) + (pieces '()) + (chars '()) + (k 0)) + (cond ((= i n) + (textual-concatenate + (reverse (%text-map-pieces pieces chars)))) + ((>= k N) + (loop i + (%text-map-pieces pieces chars) + '() + (remainder k N))) + (else + (let ((x (apply proc (%fetch-all texts i)))) + (loop (+ i 1) + pieces + (cons x chars) + (+ k (cond ((char? x) 1) + ((string? x) (string-length x)) + ((text? x) (%text-length x)) + (else + (%textual-map-bad-result proc x)))))))))) + (complain 'textual-map proc textuals))) + +(define (%textual-map-bad-result proc x) + (error "textual-map: proc returned non-character" x)) + +;;; Given a list of texts and a list of mixed characters/strings/texts, +;;; in reverse order, converts the second argument into a text and +;;; returns that text consed onto the first argument. + +(define (%text-map-pieces texts stuff) + (let loop ((revstuff stuff) + (stuff '()) + (n 0)) + (if (null? revstuff) + (let ((s (make-string n))) ; probably short + (let inner-loop ((stuff stuff) + (i 0)) + (if (null? stuff) + (cons (string->text s) texts) + (let ((x (car stuff))) + (cond ((char? x) + (string-set! s i x) + (inner-loop (cdr stuff) (+ i 1))) + ((string? x) + (string-copy! s i x) + (inner-loop (cdr stuff) (+ i (string-length x)))) + (else + (string-copy! s i (textual->string x)) + (inner-loop (cdr stuff) (+ i (text-length x))))))))) + (let* ((x (car revstuff)) + (revstuff (cdr revstuff)) + (stuff (cons x stuff))) + (loop revstuff + stuff + (+ n (cond ((char? x) 1) + ((string? x) (string-length x)) + (else (text-length x))))))))) + +(define textual-for-each + (case-lambda + ((proc txt) + (%textual-for-each1 proc txt)) + ((proc txt1 txt2 . rest) + (%textual-for-eachn proc (cons txt1 (cons txt2 rest)))))) + +(define (%textual-for-each1 proc txt) + (let ((txt (%textual->text txt 'textual-for-each proc txt))) + (if (procedure? proc) + (let ((n (%text-length txt))) + (let loop ((i 0)) + (if (< i n) + (begin (proc (%text-ref txt i)) + (loop (+ i 1)))))) + (complain 'textual-for-each proc txt)))) + +(define (%textual-for-eachn proc textuals) + (if (procedure? proc) + (let* ((texts (map (lambda (txt) + (%textual->text txt 'textual-map textuals)) + textuals)) + (n (apply min (map %text-length texts)))) + (let loop ((i 0)) + (if (< i n) + (begin (apply proc (%fetch-all texts i)) + (loop (+ i 1)))))) + (complain 'textual-for-each proc textuals))) + +(define (%fetch-all texts i) + (if (null? texts) + '() + (cons (%text-ref (car texts) i) + (%fetch-all (cdr texts) i)))) + +;;; FIXME: there's no reason to convert a string to a text here + +(define-textual-start-end (textual-map-index proc txt start end) + (if (procedure? proc) + (let ((n end)) + (let loop ((i start) + (pieces '()) + (chars '()) + (k 0)) + (cond ((= i n) + (textual-concatenate + (reverse (%text-map-pieces pieces chars)))) + ((>= k N) + (loop i + (%text-map-pieces pieces chars) + '() + (remainder k N))) + (else + (let ((x (proc i))) + (loop (+ i 1) + pieces + (cons x chars) + (+ k (cond ((char? x) 1) + ((string? x) (string-length x)) + ((text? x) (%text-length x)) + (else + (%textual-map-bad-result proc x)))))))))) + (complain 'textual-map-index proc txt))) + +;;; FIXME: there's no reason to convert a string to a text here + +(define-textual-start-end (textual-for-each-index proc txt start end) + (if (procedure? proc) + (let ((n end)) + (let loop ((i start)) + (if (< i n) + (begin (proc i) + (loop (+ i 1)))))) + (complain 'textual-for-each-index proc txt))) + +(define-textual (textual-count txt pred . rest) + (let ((start (if (null? rest) 0 (car rest))) + (end (if (or (null? rest) (null? (cdr rest))) + (%text-length txt) + (car (cdr rest))))) + (if (and (procedure? pred) + (or (null? rest) (null? (cdr rest)) (null? (cdr (cdr rest)))) + (exact-integer? start) + (exact-integer? end) + (<= 0 start end (%text-length txt))) + (textual-fold (lambda (c n) + (if (pred c) + (+ n 1) + n)) + 0 txt start end) + (complain 'textual-count txt pred start end)))) + +(define-textual-start-end (textual-filter pred txt start end) + (if (procedure? pred) + (textual-map (lambda (c) (if (pred c) c "")) + (subtext txt start end)) + (complain 'textual-filter pred txt start end))) + +;;; FIXME: checks arguments twice + +(define-textual-start-end (textual-remove pred txt start end) + (textual-filter (lambda (c) (not (pred c))) txt start end)) + +;;; FIXME: not linear-time unless string-set! is O(1) +;;; (but this is a pretty useless procedure anyway) + +(define-textual-start-end (textual-reverse txt start end) + (let* ((n (- end start)) + (s (make-string n))) + (do ((i start (+ i 1))) + ((= i end) + (string->text s)) + (string-set! s (- n (- i start) 1) (%text-ref txt i))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; Replication & splitting + +(define textual-replicate + (case-lambda + ((s from to start end) + (let ((s (%textual->text s 'textual-replicate s from to start end))) + (textual-replicate (subtext s start end) from to))) + ((s from to start) + (let ((s (%textual->text s 'textual-replicate s from to start))) + (textual-replicate (subtext s start (textual-length s)) from to))) + ((s0 from to) + (let* ((s (%textual->text s0 'textual-replicate s0 from to)) + (n (- to from)) + (len (%text-length s))) + (cond ((= n 0) + "") + ((or (< n 0) + (= len 0)) + (complain 'textual-replicate s from to)) + (else + (let* ((from (mod from len)) ; make from non-negative + (to (+ from n))) + (do ((replicates '() (cons s replicates)) + (replicates-length 0 (+ replicates-length len))) + ((>= replicates-length to) + (subtext (apply textual-append replicates) + from to)))))))))) + +(define textual-split + (case-lambda + ((s delimiter grammar limit start end) + (textual-split (subtextual s start end) delimiter grammar limit)) + ((s delimiter grammar limit start) + (textual-split (subtextual s start (textual-length s)) + delimiter grammar limit)) + ((s delimiter) + (textual-split s delimiter 'infix #f)) + ((s delimiter grammar) + (textual-split s delimiter grammar #f)) + ((s0 delimiter grammar limit) + (define (bad-arguments) + (complain 'textual-split s0 delimiter grammar limit)) + (let* ((s (%textual->text s0 'textual-split s0 delimiter grammar limit)) + (delimiter + (%textual->text delimiter + 'textual-split s0 delimiter grammar limit)) + (limit (or limit (%text-length s))) + (splits + (cond ((= 0 (%text-length delimiter)) + (%text-split-into-characters s limit)) + (else + (%text-split-using-word s delimiter limit))))) + (case grammar + ((infix strict-infix) + (if (= 0 (%text-length s)) + (if (eq? grammar 'infix) + '() + (bad-arguments)) + splits)) + ((prefix) + (if (and (pair? splits) + (= 0 (%text-length (car splits)))) + (cdr splits) + splits)) + ((suffix) + (if (and (pair? splits) + (= 0 (%text-length (car (last-pair splits))))) + (reverse (cdr (reverse splits))) + splits)) + (else + (bad-arguments))))))) + +(define (%text-split-into-characters s limit) + (let ((n (%text-length s))) + (cond ((> n (+ limit 1)) + (append (%text-split-into-characters (subtext s 0 limit) limit) + (list (subtext s limit n)))) + (else + (map text (textual->list s)))))) + +;;; FIXME: inefficient + +(define (%text-split-using-word txt sep limit) + (let loop ((i 0) + (limit limit) + (texts '())) + (if (= 0 limit) + (reverse (cons (subtext txt i (%text-length txt)) texts)) + (let ((i2 (textual-contains txt sep i))) + (if i2 + (loop (+ i2 (%text-length sep)) + (- limit 1) + (cons (subtext txt i i2) texts)) + (loop i 0 texts)))))) + +;;; eof diff --git a/lib/srfi/135.sld b/lib/srfi/135.sld new file mode 100644 index 00000000..f954538f --- /dev/null +++ b/lib/srfi/135.sld @@ -0,0 +1,223 @@ +;;; Copyright (C) William D Clinger (2016). +;;; +;;; Permission is hereby granted, free of charge, to any person +;;; obtaining a copy of this software and associated documentation +;;; files (the "Software"), to deal in the Software without +;;; restriction, including without limitation the rights to use, +;;; copy, modify, merge, publish, distribute, sublicense, and/or +;;; sell copies of the Software, and to permit persons to whom the +;;; Software is furnished to do so, subject to the following +;;; conditions: +;;; +;;; The above copyright notice and this permission notice shall be +;;; included in all copies or substantial portions of the Software. +;;; +;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +;;; OTHER DEALINGS IN THE SOFTWARE. + +(define-library (srfi 135) + + (export + + ;; Predicates + + text? textual? + textual-null? + textual-every textual-any + + ;; Constructors + + make-text text + text-tabulate + text-unfold text-unfold-right + + ;; Conversion + + textual->text + textual->string textual->vector textual->list + string->text vector->text list->text reverse-list->text + textual->utf8 textual->utf16be + textual->utf16 textual->utf16le + utf8->text utf16be->text + utf16->text utf16le->text + + ;; Selection + + text-length textual-length + text-ref textual-ref + subtext subtextual + textual-copy + textual-take textual-take-right + textual-drop textual-drop-right + textual-pad textual-pad-right + textual-trim textual-trim-right textual-trim-both + + ;; Replacement + + textual-replace + + ;; Comparison + + textual=? textual-ci=? + textual? textual-ci>? + textual<=? textual-ci<=? + textual>=? textual-ci>=? + + ;; Prefixes & suffixes + + textual-prefix-length textual-suffix-length + textual-prefix? textual-suffix? + + ;; Searching + + textual-index textual-index-right + textual-skip textual-skip-right + textual-contains textual-contains-right + + ;; Case conversion + + textual-upcase textual-downcase + textual-foldcase textual-titlecase + + ;; Concatenation + + textual-append textual-concatenate textual-concatenate-reverse + textual-join + + ;; Fold & map & friends + + textual-fold textual-fold-right + textual-map textual-for-each + textual-map-index textual-for-each-index + textual-count + textual-filter textual-remove +; textual-reverse + + ;; Replication & splitting + + textual-replicate textual-split + ) + + (import (scheme base) + (scheme case-lambda) + (scheme char) + (srfi 135 kernel8)) + + (cond-expand + ((library (rnrs unicode)) + (import (only (rnrs unicode) string-titlecase))) + ((library (srfi 129)) + (import (only (srfi 129) string-titlecase))) + (else + (begin + (define (string-titlecase s) + (%string-titlecase s))))) + + ;; textual-replicate needs a sensible mod procedure + + (cond-expand + ((library (rnrs base)) + (import (only (rnrs base) div mod))) + (else + (begin + + (define (assertion-violation procname msg . irritants) + (apply error msg irritants)) + + ;; Restricted to exact integers, which is all we need here. + + (define (div-and-mod x y) + (cond ((and (exact-integer? x) (exact-integer? y)) + (cond ((= y 0) + (error "mod: zero divisor" x y)) + ((>= x 0) + (values (quotient x y) (remainder x y))) + ((< y 0) + ; x < 0, y < 0 + (let* ((q (quotient x y)) + (r (- x (* q y)))) + (if (= r 0) + (values q 0) + (values (+ q 1) (- r y))))) + (else + ; x < 0, y > 0 + (let* ((q (quotient x y)) + (r (- x (* q y)))) + (if (= r 0) + (values q 0) + (values (- q 1) (+ r y))))))) + (else + (error "div or mod: illegal arguments" x y)))) + + (define (div x y) + (cond ((and (exact-integer? x) + (exact-integer? y) + (>= x 0)) + (quotient x y)) + (else + (call-with-values + (lambda () (div-and-mod x y)) + (lambda (q r) q))))) + + (define (mod x y) + (cond ((and (exact-integer? x) + (exact-integer? y) + (>= x 0)) + (remainder x y)) + (else + (call-with-values + (lambda () (div-and-mod x y)) + (lambda (q r) r)))))))) + + ;; To run texts-search-test.sps, change the (or) to (and). + + (cond-expand ((or) + (export + %textual-contains:naive + %textual-contains:rabin-karp + %textual-contains:boyer-moore + + %textual-contains-right:naive + %textual-contains-right:boyer-moore + )) + (else)) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + ;;; The recommended external syntax cannot be implemented portably. + ;;; Here is Larceny-specific code that generates the recommended + ;;; syntax for output, but does not make the changes necessary to + ;;; accept that recommended syntax for input or as a literal in + ;;; programs. + + (cond-expand (larceny + (import (scheme write) + (primitives rtd-printer-set!))) + (else)) + + (cond-expand (larceny + (begin + (define (text-write txt p) + (let* ((q (open-output-string)) + (s (begin (write (textual->string txt) q) + (get-output-string q)))) + (write-char (integer->char #x00ab) p) + (write-string (substring s 1 (- (string-length s) 1)) p) + (write-char (integer->char #x00bb) p))) + + (define ignored-result-from-rtd-printer-set! ; for R6RS + (rtd-printer-set! text-rtd text-write)))) + (else)) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + (include "135.scm")) + +;;; eof diff --git a/lib/srfi/135/kernel8.body.scm b/lib/srfi/135/kernel8.body.scm new file mode 100644 index 00000000..6385944f --- /dev/null +++ b/lib/srfi/135/kernel8.body.scm @@ -0,0 +1,518 @@ +;;; Copyright (C) William D Clinger (2016). +;;; +;;; Permission is hereby granted, free of charge, to any person +;;; obtaining a copy of this software and associated documentation +;;; files (the "Software"), to deal in the Software without +;;; restriction, including without limitation the rights to use, +;;; copy, modify, merge, publish, distribute, sublicense, and/or +;;; sell copies of the Software, and to permit persons to whom the +;;; Software is furnished to do so, subject to the following +;;; conditions: +;;; +;;; The above copyright notice and this permission notice shall be +;;; included in all copies or substantial portions of the Software. +;;; +;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +;;; OTHER DEALINGS IN THE SOFTWARE. + +;;; FIXME: these utilities should be in a separate file + +(define (complain name . args) + (apply error + (string-append (symbol->string name) ": illegal arguments") + args)) + +(define (list->bytevector bytes) + (let* ((n (length bytes)) + (bv (make-bytevector n))) + (do ((i 0 (+ i 1)) + (bytes bytes (cdr bytes))) + ((= i n)) + (bytevector-u8-set! bv i (car bytes))) + bv)) + +;;; 1-argument version for internal use + +(define (%string->text s) + (if (string? s) + (text-tabulate (lambda (i) (string-ref s i)) + (string-length s)) + (complain 'string->text s))) + +;;; A portable implementation can't rely on inlining, +;;; but it can rely on macros. + +(define N 128) + +(define (length&i0 len i0) + (+ (* N len) i0)) + +#; +(define (length&i0.length k) + (quotient k N)) + +#; +(define (length&i0.i0 k) + (remainder k N)) + +(define-syntax length&i0.length + (syntax-rules () + ((_ k) + (quotient k N)))) + +(define-syntax length&i0.i0 + (syntax-rules () + ((_ k) + (remainder k N)))) + +(define-record-type text-rtd + (new-text0 k chunks) + text? + (k text.k) + (chunks text.chunks)) + +(define (%new-text len i0 chunks) + (new-text0 (length&i0 len i0) chunks)) + +(define the-empty-text + (%new-text 0 0 (vector (make-bytevector 0)))) + +;;; text? is defined by the record definition above. + +(define (text-length txt) + (if (text? txt) + (length&i0.length (text.k txt)) + (error "text-length: not a text" txt))) + +(define (text-ref txt i) + (if (and (text? txt) + (exact-integer? i) + (<= 0 i)) + (let* ((k (text.k txt)) + (chunks (text.chunks txt)) + (len (length&i0.length k)) + (i0 (length&i0.i0 k)) + (i+i0 (+ i i0)) + (j (quotient i+i0 N)) + (ii (remainder i+i0 N))) + (if (< i len) + (let* ((sj (vector-ref chunks j)) + (sjn (bytevector-length sj))) + (if (if (< j (- (vector-length chunks) 1)) + (= sjn N) + (= sjn (remainder (+ i0 len) N))) + (integer->char (bytevector-u8-ref sj ii)) + (%utf8-ref sj ii))) + (error "text-ref: index out of range" txt i))) + (error "text-ref: illegal arguments" txt i))) + +;;; Non-checking versions for internal use. + +(define (%text-length txt) + (length&i0.length (text.k txt))) + +(define (%text-ref txt i) + (let* ((k (text.k txt)) + (chunks (text.chunks txt)) + (len (length&i0.length k)) + (i0 (length&i0.i0 k)) + (i+i0 (+ i i0)) + (j (quotient i+i0 N)) + (ii (remainder i+i0 N)) + (sj (vector-ref chunks j)) + (sjn (bytevector-length sj))) + (if (if (< j (- (vector-length chunks) 1)) + (= sjn N) + (= sjn (remainder (+ i0 len) N))) + (integer->char (bytevector-u8-ref sj ii)) + (%utf8-ref sj ii)))) + +;;; Returns character i of the UTF-8. + +(define (%utf8-ref bv i) + (let loop ((j 0) ; character index of (bytevector-u8-ref bv k) + (k 0)) ; byte index into bv + (if (= i j) + (%utf8-char-at bv k) + (let ((byte (bytevector-u8-ref bv k))) + (cond ((< byte 128) + (loop (+ j 1) (+ k 1))) + ((< byte #b11100000) + (loop (+ j 1) (+ k 2))) + ((< byte #b11110000) + (loop (+ j 1) (+ k 3))) + (else + (loop (+ j 1) (+ k 4)))))))) + +(define (%utf8-char-at bv k) + (let ((byte (bytevector-u8-ref bv k))) + (cond ((< byte 128) + (integer->char byte)) + ((< byte #b11100000) + (let* ((byte2 (bytevector-u8-ref bv (+ k 1))) + (bits1 (- byte #b11000000)) + (bits2 (- byte2 #b10000000)) + (cp (+ (* 64 bits1) bits2))) + (integer->char cp))) + ((< byte #b11110000) + (let* ((byte2 (bytevector-u8-ref bv (+ k 1))) + (byte3 (bytevector-u8-ref bv (+ k 2))) + (bits1 (- byte #b11100000)) + (bits2 (- byte2 #b10000000)) + (bits3 (- byte3 #b10000000)) + (cp (+ (* 64 64 bits1) (* 64 bits2) bits3))) + (integer->char cp))) + (else + (let* ((byte2 (bytevector-u8-ref bv (+ k 1))) + (byte3 (bytevector-u8-ref bv (+ k 2))) + (byte4 (bytevector-u8-ref bv (+ k 3))) + (bits1 (- byte #b11110000)) + (bits2 (- byte2 #b10000000)) + (bits3 (- byte3 #b10000000)) + (bits4 (- byte4 #b10000000)) + (cp (+ (* 64 64 64 bits1) + (* 64 64 bits2) + (* 64 bits3) + bits4))) + (integer->char cp)))))) + +;;; text-tabulate avoids side effects (in case proc returns more than once) + +(define (text-tabulate proc len) + (if (= 0 len) + the-empty-text + (let loop ((i len) ; highest index that's been tabulated + (chunks '()) + (bytes '())) + (cond ((= 0 i) + (%new-text len + 0 + (list->vector + (cons (list->bytevector bytes) + chunks)))) + ((and (= 0 (remainder i N)) + (not (null? bytes))) + (loop i + (cons (list->bytevector bytes) chunks) + '())) + (else + (let* ((i-1 (- i 1)) + (c (proc i-1))) + (if (char? c) + (let ((cp (char->integer c))) + (loop i-1 + chunks + (cond ((< cp #x0080) + (cons cp bytes)) + ((< cp #x0800) + (let* ((bits1 (quotient cp 64)) + (bits2 (remainder cp 64)) + (byte1 (+ bits1 #b11000000)) + (byte2 (+ bits2 #b10000000))) + (cons byte1 (cons byte2 bytes)))) + ((< cp #x10000) + (let* ((bits1 (quotient cp (* 64 64))) + (bits2 (quotient + (remainder cp (* 64 64)) + 64)) + (bits3 (remainder cp 64)) + (byte1 (+ bits1 #b11100000)) + (byte2 (+ bits2 #b10000000)) + (byte3 (+ bits3 #b10000000))) + (cons byte1 + (cons byte2 + (cons byte3 bytes))))) + (else + (let* ((bits1 (quotient cp (* 64 64 64))) + (bits2 (quotient + (remainder cp (* 64 64 64)) + (* 64 64))) + (bits3 (quotient + (remainder cp (* 64 64)) + 64)) + (bits4 (remainder cp 64)) + (byte1 (+ bits1 #b11110000)) + (byte2 (+ bits2 #b10000000)) + (byte3 (+ bits3 #b10000000)) + (byte4 (+ bits4 #b10000000))) + (cons byte1 + (cons byte2 + (cons byte3 + (cons byte4 + bytes))))))))) + (error "text-tabulate: proc returned a non-character" + proc len c)))))))) + +;;; FIXME: should the fast case do something different +;;; if the length of the result is sufficiently small? +;;; Probably not: splitting a 100-character text into +;;; 100 1-character texts should be fast, and programmers +;;; can now use text-copy instead if they're worried about it. +;;; +;;; subtext is now defined only for texts; use subtextual +;;; if the first argument might be a string. + +(define (subtext txt start end) + (cond ((and (text? txt) + (exact-integer? start) + (exact-integer? end) + (<= 0 start end)) + (%subtext txt start end)) +#; ((string? txt) + (%string->text (substring txt start end))) + (else + (complain 'subtext txt start end)))) + +(define (%subtext txt start end) + (let* ((k (text.k txt)) + (chunks (text.chunks txt)) + (len (length&i0.length k)) + (i0 (length&i0.i0 k)) + (i+i0 (+ start i0)) + (end+i0 (+ end i0)) + (len+i0 (+ len i0)) + (jstart (quotient i+i0 N)) + (jend (quotient end+i0 N)) + (jlen (quotient len N))) + (if (<= end len) + (cond ((= start end) + the-empty-text) + ((and (= 0 jstart) + (= jlen jend)) + ;; the fast case + (%new-text (- end start) i+i0 chunks)) + (else + (let* ((v (make-vector (+ 1 (- jend jstart))))) + (do ((j jstart (+ j 1))) + ((> j jend)) + (vector-set! v (- j jstart) (vector-ref chunks j))) + (%new-text (- end start) + (remainder i+i0 N) + v)))) + (error "subtext: end out of range" txt start end)))) + +;;; There are a lot of special cases that could be exploited here: +;;; share the characters of the longest text +;;; share the characters of the longest run of texts +;;; whose characters don't have to be copied +;;; if (text-length txt1) is a multiple of N, +;;; and txt2 starts at offset 0, +;;; then txt1 and txt2 can be concatenated +;;; without copying any of their characters +;;; +;;; That's a partial list. +;;; It would be easy to spend more time detecting special cases +;;; than would be saved on average. +;;; In the interest of simplicity and reliability, this code +;;; currently implements only two special cases: +;;; share the full chunks of the longest text +;;; provided +;;; it contains at least N characters +;;; it contains at least half the characters of the result +;;; its characters start at offset zero +;;; share the full chunks of the first text +;;; provided +;;; it contains at least N characters +;;; its characters start at offset zero + +(define (textual-concatenate texts) + (cond ((not (list? texts)) + (complain 'textual-concatenate texts)) + ((null? texts) the-empty-text) + ((null? (cdr texts)) + (let ((txt (car texts))) + (cond ((text? txt) txt) + ((string? txt) + (%string->text txt)) + (else (complain 'textual-concatenate texts))))) + (else + (let loop ((items (reverse texts)) + (real-texts '()) + (n 0) + (longest #f) + (longest-length 0)) + (cond ((null? items) + (%text-concatenate-n real-texts n longest longest-length)) + ((text? (car items)) + (let* ((txt (car items)) + (k (%text-length txt))) + (loop (cdr items) + (cons txt real-texts) + (+ n k) + (if (> k longest-length) txt longest) + (max k longest-length)))) + ((string? (car items)) + (loop (cons (%string->text (car items)) (cdr items)) + real-texts n longest longest-length)) + (else + (complain 'textual-concatenate texts))))))) + +;;; All of the texts are really texts. No strings. +;;; n is the length of the result. +;;; longest is #f or the longest of the texts, and +;;; longest-length is its length (or zero). + +(define (%text-concatenate-n texts n longest longest-length) + (if (and longest + (> longest-length N) + (< n (+ longest-length longest-length)) + (= 0 (length&i0.i0 (text.k longest)))) + (if (eq? longest (car texts)) + (%%text-concatenate-n texts n) + (let loop ((texts texts) + (front '()) + (front-length 0)) + (cond ((eq? longest (car texts)) + (%%text-concatenate-front + (reverse front) + (%%text-concatenate-n texts (- n front-length)) + front-length + n)) + (else + (let ((txt (car texts))) + (loop (cdr texts) + (cons txt front) + (+ front-length (%text-length txt)))))))) + (%%text-concatenate-n texts n))) + +;;; texts is a non-empty list of texts, with no strings. +;;; n is the length of the result. +;;; +;;; The text returned has a start index of zero. +;;; +;;; Special case: +;;; If the first text has a start index of zero, +;;; then its full chunks don't have to be copied. + +(define (%%text-concatenate-n texts n) + (if (= 0 n) + the-empty-text + (let* ((n/N (quotient n N)) + (m (remainder n N)) + (nchunks (+ n/N (if (= 0 m) 0 1))) + (chunks (make-vector nchunks 'bug-in-text-concatenate)) + (txt (car texts)) + (k (text.k txt)) + (len (length&i0.length k)) + (i0 (length&i0.i0 k))) + (if (and (> len N) + (= 0 i0)) + (let* ((j (quotient len N)) + (ti (* j N)) + (chunks0 (text.chunks txt))) + (do ((i 0 (+ i 1))) + ((= i j)) + (vector-set! chunks i (vector-ref chunks0 i))) + (%%text-concatenate-finish n 0 chunks j texts ti)) + (%%text-concatenate-finish n 0 chunks 0 texts 0))))) + +;;; All of the texts are really texts. No strings. +;;; The second argument is a text that starts at offset zero. +;;; k is the total length of the texts passed as first argument. +;;; n is the length of the result. +;;; +;;; Returns the texts concatenated with the second argument, +;;; without copying any chunks of the second argument. + +(define (%%text-concatenate-front texts txt k n) + (let* ((k/N (quotient k N)) + (mk (remainder k N)) + (i0 (if (= 0 mk) 0 (- N mk))) ; start offset for result + (kchunks (+ k/N (if (= 0 mk) 0 1))) ; number of new chunks + (n-k (- n k)) + (n-k/N (quotient n-k N)) + (m (remainder n-k N)) + (nchunks (+ kchunks + n-k/N + (if (= 0 m) 0 1))) + (chunks (make-vector nchunks 'bug-in-text-concatenate)) + (chunks2 (text.chunks txt))) + + ;; copy extant chunks + + (do ((i kchunks (+ i 1))) + ((= i nchunks)) + (vector-set! chunks i (vector-ref chunks2 (- i kchunks)))) + + (%%text-concatenate-finish n i0 chunks 0 texts 0))) + +;;; Given: +;;; +;;; n : the length of a text to be created +;;; i0 : start offset for the text to be created +;;; chunks : the vector of chunks for that new text +;;; j : vector index of first uninitialized chunk +;;; texts : a non-empty list of texts to be copied into the chunks +;;; ti : index of first uncopied character in the first text +;;; +;;; Creates new chunks as necessary, copies the texts into those chunks +;;; and returns a new text. +;;; The given texts may not fill the chunks because the chunks +;;; of some shared text may already have been copied into some +;;; tail of the chunks vector. + +(define (%%text-concatenate-finish n i0 chunks j texts ti) + (let loop ((texts (cdr texts)) + (txt (car texts)) + (j j) ; index into chunks + (k i0) ; index into (vector-ref chunks j) + (ti ti) ; index into txt + (bytes (make-list i0 0))) ; bytes being collected for next chunk + (cond ((= k N) + (let ((bv (list->bytevector (reverse bytes)))) + (vector-set! chunks j bv)) + (loop texts txt (+ j 1) 0 ti '())) + ((= ti (%text-length txt)) + (if (null? texts) + (begin (if (not (null? bytes)) + (let ((bv (list->bytevector (reverse bytes)))) + (vector-set! chunks j bv))) + (%new-text n i0 chunks)) + (loop (cdr texts) (car texts) j k 0 bytes))) + (else + (let* ((cp (char->integer (%text-ref txt ti))) + (bytes (cond ((< cp #x0080) + (cons cp bytes)) + ((< cp #x0800) + (let* ((bits1 (quotient cp 64)) + (bits2 (remainder cp 64)) + (byte1 (+ bits1 #b11000000)) + (byte2 (+ bits2 #b10000000))) + (cons byte2 (cons byte1 bytes)))) + ((< cp #x10000) + (let* ((bits1 (quotient cp (* 64 64))) + (bits2 (quotient + (remainder cp (* 64 64)) + 64)) + (bits3 (remainder cp 64)) + (byte1 (+ bits1 #b11100000)) + (byte2 (+ bits2 #b10000000)) + (byte3 (+ bits3 #b10000000))) + (cons byte3 + (cons byte2 + (cons byte1 bytes))))) + (else + (let* ((bits1 (quotient cp (* 64 64 64))) + (bits2 (quotient + (remainder cp (* 64 64 64)) + (* 64 64))) + (bits3 (quotient + (remainder cp (* 64 64)) + 64)) + (bits4 (remainder cp 64)) + (byte1 (+ bits1 #b11110000)) + (byte2 (+ bits2 #b10000000)) + (byte3 (+ bits3 #b10000000)) + (byte4 (+ bits4 #b10000000))) + (cons byte4 + (cons byte3 + (cons byte2 + (cons byte1 + bytes))))))))) + (loop texts txt j (+ k 1) (+ ti 1) bytes)))))) diff --git a/lib/srfi/135/kernel8.sld b/lib/srfi/135/kernel8.sld new file mode 100644 index 00000000..1fcbacb4 --- /dev/null +++ b/lib/srfi/135/kernel8.sld @@ -0,0 +1,56 @@ +;;; Copyright (C) William D Clinger (2016). +;;; +;;; Permission is hereby granted, free of charge, to any person +;;; obtaining a copy of this software and associated documentation +;;; files (the "Software"), to deal in the Software without +;;; restriction, including without limitation the rights to use, +;;; copy, modify, merge, publish, distribute, sublicense, and/or +;;; sell copies of the Software, and to permit persons to whom the +;;; Software is furnished to do so, subject to the following +;;; conditions: +;;; +;;; The above copyright notice and this permission notice shall be +;;; included in all copies or substantial portions of the Software. +;;; +;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +;;; OTHER DEALINGS IN THE SOFTWARE. + +(define-library (srfi 135 kernel8) + + (export + + ;; for internal use only + + complain ; for reporting illegal arguments + + text-rtd ; FIXME: for debugging only + %new-text ; FIXME: for debugging only + text.k text.chunks ; FIXME: for debugging only + + %text-length ; non-checking version + %text-ref ; non-checking version +; %subtext ; non-checking version + %string->text ; 1-argument version + + N ; preferred text size for pieces of long texts + the-empty-text ; there should be only one empty text + + ;; will be exported by (srfi 135) + + text? + text-tabulate + text-length + text-ref + subtext + textual-concatenate + ) + + (import (scheme base)) + + (include "kernel8.body.scm")) diff --git a/lib/srfi/135/test.sld b/lib/srfi/135/test.sld new file mode 100644 index 00000000..8075604a --- /dev/null +++ b/lib/srfi/135/test.sld @@ -0,0 +1,3007 @@ + +;; Adapted from the reference implementation test suite. + +;;; Copyright (C) William D Clinger (2016). +;;; +;;; Permission is hereby granted, free of charge, to any person +;;; obtaining a copy of this software and associated documentation +;;; files (the "Software"), to deal in the Software without +;;; restriction, including without limitation the rights to use, +;;; copy, modify, merge, publish, distribute, sublicense, and/or +;;; sell copies of the Software, and to permit persons to whom the +;;; Software is furnished to do so, subject to the following +;;; conditions: +;;; +;;; The above copyright notice and this permission notice shall be +;;; included in all copies or substantial portions of the Software. +;;; +;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +;;; OTHER DEALINGS IN THE SOFTWARE. + +(define-library (srfi 135 test) + (import (scheme base) + (scheme write) + (scheme char) + (srfi 135) + (chibi test)) + (export run-tests) + (begin + (define (run-tests) + ;; Help functions for testing. + (define (as-text . args) + (textual-concatenate + (map (lambda (x) + (cond ((text? x) x) + ((string? x) (string->text x)) + ((char? x) (text x)) + (else + (error "as-text: illegal argument" x)))) + args))) + (define (result=? str txt) + (and (text? txt) + (textual=? str txt))) + + (define-syntax test-text + (syntax-rules () + ((test-text expect expr) + (test-equal result=? expect expr)))) + + ;; Unicode is a strong motivation for immutable texts, so we ought + ;; to use at least some non-ASCII strings for testing. + ;; Some systems would blow up if this file were to contain non-ASCII + ;; characters, however, so we have to be careful here. + ;; + ;; FIXME: need more tests with really high code points + + (cond-expand + ((or sagittarius + chibi + full-unicode-strings) + (define ABC + (as-text + (list->string (map integer->char + '(#x3b1 #x3b2 #x3b3))))) + (define ABCDEF + (as-text + (list->string (map integer->char + '(#x0c0 #x062 #x0c7 #x064 #x0c9 #x066))))) + (define DEFABC + (as-text + (list->string (map integer->char + '(#x064 #x0c9 #x066 #x0c0 #x062 #x0c7))))) + (define eszett (integer->char #xDF)) + (define fuss (text #\F #\u eszett)) + (define chaos0 + (as-text + (list->string (map integer->char + '(#x39E #x391 #x39F #x3A3))))) + (define chaos1 + (as-text + (list->string (map integer->char + '(#x3BE #x3B1 #x3BF #x3C2))))) + (define chaos2 + (as-text + (list->string (map integer->char + '(#x3BE #x3B1 #x3BF #x3C3))))) + (define beyondBMP + (as-text + (list->string (map integer->char + '(#x61 #xc0 #x3bf + #x1d441 #x1d113 #x1d110 #x7a)))))) + (else + (define ABC (as-text "abc")) + (define ABCDEF (as-text "ABCdef")) + (define DEFABC (as-text "defabc")))) + + (test-begin "srfi-135: immutable texts") + + ;; Predicates + + (test-assert (text? (text))) + (test-assert (not (text? (string)))) + (test-not (text? #\a)) + (test-assert (textual? (text))) + (test-assert (textual? (string))) + (test-not (textual? #\a)) + (test-assert (textual-null? (text))) + (test-not (textual-null? ABC)) + + (test-assert (textual-every (lambda (c) (if (char? c) c #f)) + (text))) + + (test #\c (textual-every (lambda (c) (if (char? c) c #f)) + (as-text "abc"))) + + (test-not (textual-every (lambda (c) (if (char>? c #\b) c #f)) + (as-text "abc"))) + + (test #\c (textual-every (lambda (c) (if (char>? c #\b) c #f)) + (as-text "abc") 2)) + + (test-assert (textual-every (lambda (c) (if (char>? c #\b) c #f)) + (as-text "abc") 1 1)) + + (test-not (textual-any (lambda (c) (if (char? c) c #f)) + (text))) + + (test #\a (textual-any (lambda (c) (if (char? c) c #f)) + (as-text "abc"))) + + (test #\c (textual-any (lambda (c) (if (char>? c #\b) c #f)) + (as-text "abc"))) + + (test #\c (textual-any (lambda (c) (if (char>? c #\b) c #f)) + (as-text "abc") 2)) + + (test-not (textual-any (lambda (c) (if (char>? c #\b) c #f)) + (as-text "abc") 0 2)) + + (test-assert (textual-every (lambda (c) (if (char? c) c #f)) "")) + + (test #\c (textual-every (lambda (c) (if (char? c) c #f)) "abc")) + + (test-not (textual-every (lambda (c) (if (char>? c #\b) c #f)) "abc")) + + (test #\c (textual-every (lambda (c) (if (char>? c #\b) c #f)) "abc" 2)) + + (test-assert (textual-every (lambda (c) (if (char>? c #\b) c #f)) "abc" 1 1)) + + (test-not (textual-any (lambda (c) (if (char? c) c #f)) "")) + + (test-assert #\a (textual-any (lambda (c) (if (char? c) c #f)) "abc")) + + (test #\c (textual-any (lambda (c) (if (char>? c #\b) c #f)) "abc")) + + (test #\c (textual-any (lambda (c) (if (char>? c #\b) c #f)) "abc" 2)) + + (test-not (textual-any (lambda (c) (if (char>? c #\b) c #f)) "abc" 0 2)) + + ;; Constructors + + (test-text "" + (text-tabulate (lambda (i) + (integer->char (+ i (char->integer #\a)))) + 0)) + + (test-text "abc" + (text-tabulate (lambda (i) + (integer->char (+ i (char->integer #\a)))) + 3)) + + (test-text "abc" + (let ((p (open-input-string "abc"))) + (text-unfold eof-object? + values + (lambda (x) (read-char p)) + (read-char p)))) + + (test-text "" (text-unfold null? car cdr '())) + + (test-text "abc" + (text-unfold null? car cdr (string->list "abc"))) + + (test-text "def" + (text-unfold null? car cdr '() (string->text "def"))) + + (test-text "defabcG" + (text-unfold null? + car + cdr + (string->list "abc") + (string->text "def") + (lambda (x) (if (null? x) (text #\G) "")))) + + (test-text "" (text-unfold-right null? car cdr '())) + + (test-text "cba" + (text-unfold-right null? car cdr (string->list "abc"))) + + (test-text "def" + (text-unfold-right null? car cdr '() (string->text "def"))) + + (test-text "Gcbadef" + (text-unfold-right null? + car + cdr + (string->list "abc") + (string->text "def") + (lambda (x) (if (null? x) (text #\G) "")))) + + + (test-text "def" + (text-unfold null? car cdr '() "def")) + + (test-text "defabcG" + (text-unfold null? + car + cdr + (string->list "abc") + "def" + (lambda (x) (if (null? x) "G" "")))) + + (test-text "dabcG" + (text-unfold null? + car + cdr + (string->list "abc") + #\d + (lambda (x) (if (null? x) "G" "")))) + + (test-text (string-append "%=" + (make-string 200 #\*) + "A B C D E F G H I J K L M " + "N O P Q R S T U V W X Y Z " + (make-string (* 200 (- (char->integer #\a) + (char->integer #\Z) + 1)) + #\*) + "abcdefghijklmnopqrstuvwxyz" + " ") + (text-unfold (lambda (n) (char>? (integer->char n) #\z)) + (lambda (n) + (let ((c (integer->char n))) + (cond ((char<=? #\a c #\z) c) + ((char<=? #\A c #\Z) (text c #\space)) + (else (make-string 200 #\*))))) + (lambda (n) (+ n 1)) + (char->integer #\@) + "%=" + (lambda (n) #\space))) + + (test-text "def" + (text-unfold-right null? car cdr '() "def")) + + (test-text "Gcbadef" + (text-unfold-right null? + car + cdr + (string->list "abc") + "def" + (lambda (x) (if (null? x) "G" "")))) + + (test-text "Gcbad" + (text-unfold-right null? + car + cdr + (string->list "abc") + #\d + (lambda (x) (if (null? x) "G" "")))) + + (test-text (string-append " " + (list->string + (reverse + (string->list "abcdefghijklmnopqrstuvwxyz"))) + (make-string (* 200 (- (char->integer #\a) + (char->integer #\Z) + 1)) + #\*) + "Z Y X W V U T S R Q P O N " + "M L K J I H G F E D C B A " + (make-string 200 #\*) + "%=") + (text-unfold-right + (lambda (n) (char>? (integer->char n) #\z)) + (lambda (n) + (let ((c (integer->char n))) + (cond ((char<=? #\a c #\z) c) + ((char<=? #\A c #\Z) (text c #\space)) + (else (make-string 200 #\*))))) + (lambda (n) (+ n 1)) + (char->integer #\@) + "%=" + (lambda (n) #\space))) + + (test-text " The English alphabet: abcdefghijklmnopqrstuvwxyz " + (text-unfold-right (lambda (n) (< n (char->integer #\A))) + (lambda (n) + (char-downcase (integer->char n))) + (lambda (n) (- n 1)) + (char->integer #\Z) + #\space + (lambda (n) " The English alphabet: "))) + + + ;; Conversion + + (test-text "str" (textual->text "str")) + + (test-text "str" (textual->text (text #\s #\t #\r))) + + (test-text "str" (textual->text "str" "not a textual")) + + (test-text "str" (textual->text (text #\s #\t #\r) "bad textual")) + + + (test "" (textual->string (text))) + + (test "" (textual->string (text) 0)) + + (test "" (textual->string (text) 0 0)) + + (test "abc" (textual->string (text #\a #\b #\c))) + + (test "" (textual->string (text #\a #\b #\c) 3)) + + (test "bc" (textual->string (text #\a #\b #\c) 1 3)) + + + (test "" (textual->string "")) + + (test "" (textual->string "" 0)) + + (test "" (textual->string "" 0 0)) + + (test "abc" (textual->string "abc")) + + (test "" (textual->string "abc" 3)) + + (test "bc" (textual->string "abc" 1 3)) + + + (test '#() (textual->vector (text))) + + (test '#() (textual->vector (text) 0)) + + (test '#() (textual->vector (text) 0 0)) + + (test '#(#\a #\b #\c) (textual->vector (text #\a #\b #\c))) + + (test '#() (textual->vector (text #\a #\b #\c) 3)) + + (test '#(#\b #\c) (textual->vector (text #\a #\b #\c) 1 3)) + + + (test '#() (textual->vector "")) + + (test '#() (textual->vector "" 0)) + + (test '#() (textual->vector "" 0 0)) + + (test '#(#\a #\b #\c) (textual->vector "abc")) + + (test '#() (textual->vector "abc" 3)) + + (test '#(#\b #\c) (textual->vector "abc" 1 3)) + + + (test '() (textual->list (text))) + + (test '() (textual->list (text) 0)) + + (test '() (textual->list (text) 0 0)) + + (test '(#\a #\b #\c) (textual->list (text #\a #\b #\c))) + + (test '() (textual->list (text #\a #\b #\c) 3)) + + (test '(#\b #\c) (textual->list (text #\a #\b #\c) 1 3)) + + + (test '() (textual->list "")) + + (test '() (textual->list "" 0)) + + (test '() (textual->list "" 0 0)) + + (test '(#\a #\b #\c) (textual->list "abc")) + + (test '() (textual->list "abc" 3)) + + (test '(#\b #\c) (textual->list "abc" 1 3)) + + + (test-text "" (string->text "")) + + (test-text "" (string->text "" 0)) + + (test-text "" (string->text "" 0 0)) + + (test-text "abc" (string->text "abc")) + + (test-text "bc" (string->text "abc" 1)) + + (test-text "" (string->text "abc" 3)) + + (test-text "b" (string->text "abc" 1 2)) + + (test-text "bc" (string->text "abc" 1 3)) + + + (test-text "" (vector->text '#())) + + (test-text "" (vector->text '#() 0)) + + (test-text "" (vector->text '#() 0 0)) + + (test-text "abc" (vector->text '#(#\a #\b #\c))) + + (test-text "bc" (vector->text '#(#\a #\b #\c) 1)) + + (test-text "" (vector->text '#(#\a #\b #\c) 3)) + + (test-text "b" (vector->text '#(#\a #\b #\c) 1 2)) + + (test-text "bc" (vector->text '#(#\a #\b #\c) 1 3)) + + + (test-text "" (list->text '())) + + (test-text "" (list->text '() 0)) + + (test-text "" (list->text '() 0 0)) + + (test-text "abc" (list->text '(#\a #\b #\c))) + + (test-text "bc" (list->text '(#\a #\b #\c) 1)) + + (test-text "" (list->text '(#\a #\b #\c) 3)) + + (test-text "b" (list->text '(#\a #\b #\c) 1 2)) + + (test-text "bc" (list->text '(#\a #\b #\c) 1 3)) + + + (test-text "" (reverse-list->text '())) + + (test-text "cba" (reverse-list->text '(#\a #\b #\c))) + + + (test '#u8(97 98 99) + (textual->utf8 (as-text "abc"))) + + (test '#u8(97 98 99) + (textual->utf8 "abc")) + + (test '#u8(97 98 99 121 121 121 122 122 122) + (textual->utf8 (as-text "xxxabcyyyzzz") 3)) + + (test '#u8(97 98 99 121 121 121 122 122 122) + (textual->utf8 "xxxabcyyyzzz" 3)) + + (test '#u8(97 98 99) + (textual->utf8 (as-text "xxxabcyyyzzz") 3 6)) + + (test '#u8(97 98 99) + (textual->utf8 "xxxabcyyyzzz" 3 6)) + + + (test '#u8(254 255 0 97 0 98 0 99) + (textual->utf16 (as-text "abc"))) + + (test '#u8(254 255 0 97 0 98 0 99) + (textual->utf16 "abc")) + + (test '#u8(254 255 0 97 0 98 0 99 0 121 0 121 0 121 0 122 0 122 0 122) + (textual->utf16 (as-text "xxxabcyyyzzz") 3)) + + (test '#u8(254 255 0 97 0 98 0 99 0 121 0 121 0 121 0 122 0 122 0 122) + (textual->utf16 "xxxabcyyyzzz" 3)) + + (test '#u8(254 255 0 97 0 98 0 99) + (textual->utf16 (as-text "xxxabcyyyzzz") 3 6)) + + (test '#u8(254 255 0 97 0 98 0 99) + (textual->utf16 "xxxabcyyyzzz" 3 6)) + + + (test '#u8(0 97 0 98 0 99) + (textual->utf16be (as-text "abc"))) + + (test '#u8(0 97 0 98 0 99) + (textual->utf16be "abc")) + + (test '#u8(0 97 0 98 0 99 0 121 0 121 0 121 0 122 0 122 0 122) + (textual->utf16be (as-text "xxxabcyyyzzz") 3)) + + (test '#u8(0 97 0 98 0 99 0 121 0 121 0 121 0 122 0 122 0 122) + (textual->utf16be "xxxabcyyyzzz" 3)) + + (test '#u8(0 97 0 98 0 99) + (textual->utf16be (as-text "xxxabcyyyzzz") 3 6)) + + (test '#u8(0 97 0 98 0 99) + (textual->utf16be "xxxabcyyyzzz" 3 6)) + + + (test '#u8(97 0 98 0 99 0) + (textual->utf16le (as-text "abc"))) + + (test '#u8(97 0 98 0 99 0) + (textual->utf16le "abc")) + + (test '#u8(97 0 98 0 99 0 121 0 121 0 121 0 122 0 122 0 122 0) + (textual->utf16le (as-text "xxxabcyyyzzz") 3)) + + (test '#u8(97 0 98 0 99 0 121 0 121 0 121 0 122 0 122 0 122 0) + (textual->utf16le "xxxabcyyyzzz" 3)) + + (test '#u8(97 0 98 0 99 0) + (textual->utf16le (as-text "xxxabcyyyzzz") 3 6)) + + (test '#u8(97 0 98 0 99 0) + (textual->utf16le "xxxabcyyyzzz" 3 6)) + + + (test-text "abc" + (utf8->text '#u8(97 98 99))) + + (test-text "abcyyyzzz" + (utf8->text '#u8(0 1 2 97 98 99 121 121 121 122 122 122) 3)) + + (test-text "abc" + (utf8->text '#u8(41 42 43 97 98 99 100 101 102) 3 6)) + + + (test-text "abc" + (utf16->text '#u8(254 255 0 97 0 98 0 99))) + + (test-text "abc" + (utf16->text '#u8(255 254 97 0 98 0 99 0))) + + (test-text "abc" + (utf16->text (textual->utf16 "abc") 2)) + + (test-text "bcdef" + (utf16->text (textual->utf16 "abcdef") 4)) + + (test-text "bcd" + (utf16->text (textual->utf16 "abcdef") 4 10)) + + + (test-text "abc" + (utf16be->text '#u8(0 97 0 98 0 99))) + + (test-text "bc" + (utf16be->text (textual->utf16be "abc") 2)) + + (test-text "bcd" + (utf16be->text (textual->utf16be "abcdef") 2 8)) + + + (test-text "abc" + (utf16le->text '#u8(97 0 98 0 99 0))) + + (test-text "bc" + (utf16le->text (textual->utf16le "abc") 2)) + + (test-text "bcd" + (utf16le->text (textual->utf16le "abcdef") 2 8)) + + + (cond-expand + ((or sagittarius + chibi + full-unicode-strings) + + (test + '#u8(97 195 128 206 191 + 240 157 145 129 240 157 132 147 240 157 132 144 122) + (textual->utf8 beyondBMP)) + + (let ((bv (textual->utf16 beyondBMP))) + (test-assert + (or (equal? bv + '#u8(254 255 0 97 0 192 3 191 + 216 53 220 65 216 52 221 19 216 52 221 16 0 122)) + (equal? bv + '#u8(255 254 97 0 192 0 191 3 + 53 216 65 220 52 216 19 221 52 216 16 221 122 0))))) + + (test + '#u8(0 97 0 192 3 191 216 53 220 65 216 52 221 19 216 52 221 16 0 122) + (textual->utf16be beyondBMP)) + + (test + '#u8(97 0 192 0 191 3 53 216 65 220 52 216 19 221 52 216 16 221 122 0) + (textual->utf16le beyondBMP)) + + (test-assert + (textual=? + beyondBMP + (utf8->text + '#u8(97 195 128 206 191 + 240 157 145 129 240 157 132 147 240 157 132 144 122)))) + + (test-assert (textual=? beyondBMP (utf16->text (textual->utf16 beyondBMP)))) + + (test-assert (textual=? beyondBMP + (utf16->text (textual->utf16 beyondBMP) 2))) + + (test-assert (textual=? beyondBMP (utf16be->text (textual->utf16be beyondBMP)))) + + (test-assert (textual=? beyondBMP (utf16le->text (textual->utf16le beyondBMP)))) + + (test-assert (result=? (string-append (string (integer->char #xfeff)) "abc") + (utf16be->text '#u8(254 255 0 97 0 98 0 99)))) + + (test-assert (result=? (string-append (string (integer->char #xfeff)) "abc") + (utf16le->text '#u8(255 254 97 0 98 0 99 0)))) + ) + + (else)) + + ;; Selection + + (test 0 (text-length (text))) + + (test 6 (text-length ABCDEF)) + + (test 1234 (text-length (make-text 1234 (text-ref ABC 0)))) + + + (test #\a (text-ref (text #\a #\b #\c) 0)) + + (test #\c (text-ref (text #\a #\b #\c) 2)) + + (test (string-ref (textual->string ABCDEF) 3) + (text-ref ABCDEF 3)) + + + (test 0 (textual-length (text))) + + (test 6 (textual-length ABCDEF)) + + (test 1234 (textual-length (make-text 1234 (text-ref ABC 0)))) + + + (test #\a (textual-ref (text #\a #\b #\c) 0)) + + (test #\c (textual-ref (text #\a #\b #\c) 2)) + + (test (string-ref (textual->string ABCDEF) 3) + (textual-ref ABCDEF 3)) + + + (test-text "" + (subtext (text) 0 0)) + + (test-text "" + (subtext (string->text "abcdef") 0 0)) + + (test-text "" + (subtext (string->text "abcdef") 4 4)) + + (test-text "" + (subtext (string->text "abcdef") 6 6)) + + (test-text "abcd" + (subtext (string->text "abcdef") 0 4)) + + (test-text "cde" + (subtext (string->text "abcdef") 2 5)) + + (test-text "cdef" + (subtext (string->text "abcdef") 2 6)) + + (test-text "abcdef" + (subtext (string->text "abcdef") 0 6)) + + + (test-text "" + (subtextual (text) 0 0)) + + (test-text "" + (subtextual (string->text "abcdef") 0 0)) + + (test-text "" + (subtextual (string->text "abcdef") 4 4)) + + (test-text "" + (subtextual (string->text "abcdef") 6 6)) + + (test-text "abcd" + (subtextual (string->text "abcdef") 0 4)) + + (test-text "cde" + (subtextual (string->text "abcdef") 2 5)) + + (test-text "cdef" + (subtextual (string->text "abcdef") 2 6)) + + (test-text "abcdef" + (subtextual (string->text "abcdef") 0 6)) + + + (test-text "" + (subtextual "" 0 0)) + + (test-text "" + (subtextual "abcdef" 0 0)) + + (test-text "" + (subtextual "abcdef" 4 4)) + + (test-text "" + (subtextual "abcdef" 6 6)) + + (test-text "abcd" + (subtextual "abcdef" 0 4)) + + (test-text "cde" + (subtextual "abcdef" 2 5)) + + (test-text "cdef" + (subtextual "abcdef" 2 6)) + + (test-text "abcdef" + (subtextual "abcdef" 0 6)) + + + (test-text "" + (textual-copy (text))) + + (test-assert (let* ((txt (string->text "abcdef")) + (copy (textual-copy txt))) + (and (result=? "abcdef" + copy) + (not (eqv? txt copy))))) + + + (test-text "" + (textual-copy "")) + + (test-text "abcdef" + (textual-copy "abcdef")) + + + (test-text "" + (textual-copy (text) 0)) + + (test-text "abcdef" + (textual-copy (string->text "abcdef") 0)) + + (test-text "ef" + (textual-copy (string->text "abcdef") 4)) + + (test-text "" + (textual-copy (string->text "abcdef") 6)) + + + (test-text "" + (textual-copy "" 0)) + + (test-text "abcdef" + (textual-copy "abcdef" 0)) + + (test-text "ef" + (textual-copy "abcdef" 4)) + + (test-text "" + (textual-copy "abcdef" 6)) + + + (test-text "" + (textual-copy (text) 0 0)) + + (test-text "" + (textual-copy (string->text "abcdef") 0 0)) + + (test-text "" + (textual-copy (string->text "abcdef") 4 4)) + + (test-text "" + (textual-copy (string->text "abcdef") 6 6)) + + (test-text "abcd" + (textual-copy (string->text "abcdef") 0 4)) + + (test-text "cde" + (textual-copy (string->text "abcdef") 2 5)) + + (test-text "cdef" + (textual-copy (string->text "abcdef") 2 6)) + + (test-text "abcdef" + (textual-copy (string->text "abcdef") 0 6)) + + + (test-text "" + (textual-copy "" 0 0)) + + (test-text "" + (textual-copy "abcdef" 0 0)) + + (test-text "" + (textual-copy "abcdef" 4 4)) + + (test-text "" + (textual-copy "abcdef" 6 6)) + + (test-text "abcd" + (textual-copy "abcdef" 0 4)) + + (test-text "cde" + (textual-copy "abcdef" 2 5)) + + (test-text "cdef" + (textual-copy "abcdef" 2 6)) + + (test-text "abcdef" + (textual-copy "abcdef" 0 6)) + + + (test-text "" (textual-take (text) 0)) + + (test-text "" (textual-take (string->text "abcdef") 0)) + + (test-text "ab" (textual-take (string->text "abcdef") 2)) + + (test-text "" (textual-drop (string->text "") 0)) + + (test-text "abcdef" (textual-drop (string->text "abcdef") 0)) + + (test-text "cdef" (textual-drop (string->text "abcdef") 2)) + + (test-text "" (textual-take-right (text) 0)) + + (test-text "" (textual-take-right (string->text "abcdef") 0)) + + (test-text "ef" (textual-take-right (string->text "abcdef") 2)) + + (test-text "" (textual-drop-right (text) 0)) + + (test-text "abcdef" + (textual-drop-right (string->text "abcdef") 0)) + + (test-text "abcd" + (textual-drop-right (string->text "abcdef") 2)) + + + (test-text "" (textual-take "" 0)) + + (test-text "" (textual-take "abcdef" 0)) + + (test-text "ab" (textual-take "abcdef" 2)) + + (test-text "" (textual-drop "" 0)) + + (test-text "abcdef" (textual-drop "abcdef" 0)) + + (test-text "cdef" (textual-drop "abcdef" 2)) + + (test-text "" (textual-take-right "" 0)) + + (test-text "" (textual-take-right "abcdef" 0)) + + (test-text "ef" (textual-take-right "abcdef" 2)) + + (test-text "" (textual-drop-right "" 0)) + + (test-text "abcdef" (textual-drop-right "abcdef" 0)) + + (test-text "abcd" (textual-drop-right "abcdef" 2)) + + + (test-text "" + (textual-pad (string->text "") 0)) + + (test-text " " + (textual-pad (string->text "") 5)) + + (test-text " 325" + (textual-pad (string->text "325") 5)) + + (test-text "71325" + (textual-pad (string->text "71325") 5)) + + (test-text "71325" + (textual-pad (string->text "8871325") 5)) + + (test-text "" + (textual-pad (string->text "") 0 #\*)) + + (test-text "*****" + (textual-pad (string->text "") 5 #\*)) + + (test-text "**325" + (textual-pad (string->text "325") 5 #\*)) + + (test-text "71325" + (textual-pad (string->text "71325") 5 #\*)) + + (test-text "71325" + (textual-pad (string->text "8871325") 5 #\*)) + + (test-text "" + (textual-pad (string->text "") 0 #\* 0)) + + (test-text "*****" + (textual-pad (string->text "") 5 #\* 0)) + + (test-text "**325" + (textual-pad (string->text "325") 5 #\* 0)) + + (test-text "71325" + (textual-pad (string->text "71325") 5 #\* 0)) + + (test-text "71325" + (textual-pad (string->text "8871325") 5 #\* 0)) + + (test-text "***25" + (textual-pad (string->text "325") 5 #\* 1)) + + (test-text "*1325" + (textual-pad (string->text "71325") 5 #\* 1)) + + (test-text "71325" + (textual-pad (string->text "8871325") 5 #\* 1)) + + (test-text "" + (textual-pad (string->text "") 0 #\* 0 0)) + + (test-text "*****" + (textual-pad (string->text "") 5 #\* 0 0)) + + (test-text "**325" + (textual-pad (string->text "325") 5 #\* 0 3)) + + (test-text "**713" + (textual-pad (string->text "71325") 5 #\* 0 3)) + + (test-text "**887" + (textual-pad (string->text "8871325") 5 #\* 0 3)) + + (test-text "***25" + (textual-pad (string->text "325") 5 #\* 1 3)) + + (test-text "**132" + (textual-pad (string->text "71325") 5 #\* 1 4)) + + (test-text "*8713" + (textual-pad (string->text "8871325") 5 #\* 1 5)) + + (test-text "" + (textual-pad-right (string->text "") 0)) + + (test-text " " + (textual-pad-right (string->text "") 5)) + + (test-text "325 " + (textual-pad-right (string->text "325") 5)) + + (test-text "71325" + (textual-pad-right (string->text "71325") 5)) + + (test-text "88713" + (textual-pad-right (string->text "8871325") 5)) + + (test-text "" + (textual-pad-right (string->text "") 0 #\*)) + + (test-text "*****" + (textual-pad-right (string->text "") 5 #\*)) + + (test-text "325**" + (textual-pad-right (string->text "325") 5 #\*)) + + (test-text "71325" + (textual-pad-right (string->text "71325") 5 #\*)) + + (test-text "88713" + (textual-pad-right (string->text "8871325") 5 #\*)) + + (test-text "" + (textual-pad-right (string->text "") 0 #\* 0)) + + (test-text "*****" + (textual-pad-right (string->text "") 5 #\* 0)) + + (test-text "325**" + (textual-pad-right (string->text "325") 5 #\* 0)) + + (test-text "71325" + (textual-pad-right (string->text "71325") 5 #\* 0)) + + (test-text "88713" + (textual-pad-right (string->text "8871325") 5 #\* 0)) + + (test-text "25***" + (textual-pad-right (string->text "325") 5 #\* 1)) + + (test-text "1325*" + (textual-pad-right (string->text "71325") 5 #\* 1)) + + (test-text "87132" + (textual-pad-right (string->text "8871325") 5 #\* 1)) + + (test-text "" + (textual-pad-right (string->text "") 0 #\* 0 0)) + + (test-text "*****" + (textual-pad-right (string->text "") 5 #\* 0 0)) + + (test-text "325**" + (textual-pad-right (string->text "325") 5 #\* 0 3)) + + (test-text "713**" + (textual-pad-right (string->text "71325") 5 #\* 0 3)) + + (test-text "887**" + + (textual-pad-right (string->text "8871325") 5 #\* 0 3)) + + (test-text "25***" + (textual-pad-right (string->text "325") 5 #\* 1 3)) + + (test-text "132**" + (textual-pad-right (string->text "71325") 5 #\* 1 4)) + + (test-text "8713*" + + (textual-pad-right (string->text "8871325") 5 #\* 1 5)) + + + (test-text "" (textual-pad "" 0)) + + (test-text " " (textual-pad "" 5)) + + (test-text " 325" (textual-pad "325" 5)) + + (test-text "71325" (textual-pad "71325" 5)) + + (test-text "71325" (textual-pad "8871325" 5)) + + (test-text "" (textual-pad "" 0 #\*)) + + (test-text "*****" (textual-pad "" 5 #\*)) + + (test-text "**325" (textual-pad "325" 5 #\*)) + + (test-text "71325" (textual-pad "71325" 5 #\*)) + + (test-text "71325" (textual-pad "8871325" 5 #\*)) + + (test-text "" (textual-pad "" 0 #\* 0)) + + (test-text "*****" (textual-pad "" 5 #\* 0)) + + (test-text "**325" (textual-pad "325" 5 #\* 0)) + + (test-text "71325" (textual-pad "71325" 5 #\* 0)) + + (test-text "71325" (textual-pad "8871325" 5 #\* 0)) + + (test-text "***25" (textual-pad "325" 5 #\* 1)) + + (test-text "*1325" (textual-pad "71325" 5 #\* 1)) + + (test-text "71325" (textual-pad "8871325" 5 #\* 1)) + + (test-text "" (textual-pad "" 0 #\* 0 0)) + + (test-text "*****" (textual-pad "" 5 #\* 0 0)) + + (test-text "**325" (textual-pad "325" 5 #\* 0 3)) + + (test-text "**713" (textual-pad "71325" 5 #\* 0 3)) + + (test-text "**887" (textual-pad "8871325" 5 #\* 0 3)) + + (test-text "***25" (textual-pad "325" 5 #\* 1 3)) + + (test-text "**132" (textual-pad "71325" 5 #\* 1 4)) + + (test-text "*8713" (textual-pad "8871325" 5 #\* 1 5)) + + (test-text "" (textual-pad-right "" 0)) + + (test-text " " (textual-pad-right "" 5)) + + (test-text "325 " (textual-pad-right "325" 5)) + + (test-text "71325" (textual-pad-right "71325" 5)) + + (test-text "88713" (textual-pad-right "8871325" 5)) + + (test-text "" (textual-pad-right "" 0 #\*)) + + (test-text "*****" (textual-pad-right "" 5 #\*)) + + (test-text "325**" (textual-pad-right "325" 5 #\*)) + + (test-text "71325" (textual-pad-right "71325" 5 #\*)) + + (test-text "88713" (textual-pad-right "8871325" 5 #\*)) + + (test-text "" (textual-pad-right "" 0 #\* 0)) + + (test-text "*****" (textual-pad-right "" 5 #\* 0)) + + (test-text "325**" (textual-pad-right "325" 5 #\* 0)) + + (test-text "71325" (textual-pad-right "71325" 5 #\* 0)) + + (test-text "88713" (textual-pad-right "8871325" 5 #\* 0)) + + (test-text "25***" (textual-pad-right "325" 5 #\* 1)) + + (test-text "1325*" (textual-pad-right "71325" 5 #\* 1)) + + (test-text "87132" (textual-pad-right "8871325" 5 #\* 1)) + + (test-text "" (textual-pad-right "" 0 #\* 0 0)) + + (test-text "*****" (textual-pad-right "" 5 #\* 0 0)) + + (test-text "325**" (textual-pad-right "325" 5 #\* 0 3)) + + (test-text "713**" (textual-pad-right "71325" 5 #\* 0 3)) + + (test-text "887**" (textual-pad-right "8871325" 5 #\* 0 3)) + + (test-text "25***" (textual-pad-right "325" 5 #\* 1 3)) + + (test-text "132**" (textual-pad-right "71325" 5 #\* 1 4)) + + (test-text "8713*" (textual-pad-right "8871325" 5 #\* 1 5)) + + + (test-text "" + (textual-trim (string->text ""))) + + (test-text "a b c " + (textual-trim (string->text " a b c "))) + + (test-text "" + (textual-trim (string->text "") char-whitespace?)) + + (test-text "a b c " + (textual-trim (string->text " a b c ") char-whitespace?)) + + (test-text "" + (textual-trim (string->text " a b c ") char?)) + + (test-text "" + (textual-trim (string->text "") char-whitespace? 0)) + + (test-text "a b c " + (textual-trim (string->text " a b c ") char-whitespace? 0)) + + (test-text "" + (textual-trim (string->text " a b c ") char? 0)) + + (test-text "b c " + (textual-trim (string->text " a b c ") char-whitespace? 3)) + + (test-text "" + (textual-trim (string->text " a b c ") char? 3)) + + (test-text "" + (textual-trim (string->text " a b c ") char? 0 11)) + + (test-text "b c " + (textual-trim (string->text " a b c ") + char-whitespace? 3 11)) + + (test-text "" + (textual-trim (string->text " a b c ") char? 3 11)) + + (test-text "" + (textual-trim (string->text " a b c ") char? 0 8)) + + (test-text "b " + (textual-trim (string->text " a b c ") + char-whitespace? 3 8)) + + (test-text "" + (textual-trim (string->text " a b c ") char? 3 8)) + + + (test-text "" + (textual-trim-right (string->text ""))) + + (test-text " a b c" + (textual-trim-right (string->text " a b c "))) + + (test-text "" + (textual-trim-right (string->text "") char-whitespace?)) + + (test-text " a b c" + (textual-trim-right (string->text " a b c ") + char-whitespace?)) + + (test-text "" + (textual-trim-right (string->text " a b c ") char?)) + + (test-text "" + (textual-trim-right (string->text "") char-whitespace? 0)) + + (test-text " a b c" + (textual-trim-right (string->text " a b c ") + char-whitespace? 0)) + + (test-text "" + (textual-trim-right (string->text " a b c ") char? 0)) + + (test-text " b c" + (textual-trim-right (string->text " a b c ") + char-whitespace? 3)) + + (test-text "" + (textual-trim-right (string->text " a b c ") char? 3)) + + (test-text "" + (textual-trim-right (string->text " a b c ") char? 0 11)) + + (test-text " b c" + (textual-trim-right (string->text " a b c ") + char-whitespace? 3 11)) + + (test-text "" + (textual-trim-right (string->text " a b c ") char? 3 11)) + + (test-text "" + (textual-trim-right (string->text " a b c ") char? 0 8)) + + (test-text " b" + (textual-trim-right (string->text " a b c ") + char-whitespace? 3 8)) + + (test-text "" + (textual-trim-right (string->text " a b c ") char? 3 8)) + + + (test-text "" + (textual-trim-both (string->text ""))) + + (test-text "a b c" + (textual-trim-both (string->text " a b c "))) + + (test-text "" + (textual-trim-both (string->text "") char-whitespace?)) + + (test-text "a b c" + (textual-trim-both (string->text " a b c ") + char-whitespace?)) + + (test-text "" + (textual-trim-both (string->text " a b c ") char?)) + + (test-text "" + (textual-trim-both (string->text "") char-whitespace? 0)) + + (test-text "a b c" + (textual-trim-both (string->text " a b c ") + char-whitespace? 0)) + + (test-text "" + (textual-trim-both (string->text " a b c ") char? 0)) + + (test-text "b c" + (textual-trim-both (string->text " a b c ") + char-whitespace? 3)) + + (test-text "" + (textual-trim-both (string->text " a b c ") char? 3)) + + (test-text "" + (textual-trim-both (string->text " a b c ") char? 0 11)) + + (test-text "b c" + (textual-trim-both (string->text " a b c ") + char-whitespace? 3 11)) + + (test-text "" + (textual-trim-both (string->text " a b c ") char? 3 11)) + + (test-text "" + (textual-trim-both (string->text " a b c ") char? 0 8)) + + (test-text "b" + (textual-trim-both (string->text " a b c ") + char-whitespace? 3 8)) + + (test-text "" + (textual-trim-both (string->text " a b c ") char? 3 8)) + + + (test-text "" + (textual-trim "")) + + (test-text "a b c " + (textual-trim " a b c ")) + + (test-text "" + (textual-trim "" char-whitespace?)) + + (test-text "a b c " + (textual-trim " a b c " char-whitespace?)) + + (test-text "" + (textual-trim " a b c " char?)) + + (test-text "" + (textual-trim "" char-whitespace? 0)) + + (test-text "a b c " + (textual-trim " a b c " char-whitespace? 0)) + + (test-text "" + (textual-trim " a b c " char? 0)) + + (test-text "b c " + (textual-trim " a b c " char-whitespace? 3)) + + (test-text "" + (textual-trim " a b c " char? 3)) + + (test-text "" + (textual-trim " a b c " char? 0 11)) + + (test-text "b c " + (textual-trim " a b c " char-whitespace? 3 11)) + + (test-text "" + (textual-trim " a b c " char? 3 11)) + + (test-text "" + (textual-trim " a b c " char? 0 8)) + + (test-text "b " + (textual-trim " a b c " char-whitespace? 3 8)) + + (test-text "" + (textual-trim " a b c " char? 3 8)) + + + (test-text "" + (textual-trim-right "")) + + (test-text " a b c" + (textual-trim-right " a b c ")) + + (test-text "" + (textual-trim-right "" char-whitespace?)) + + (test-text " a b c" + (textual-trim-right " a b c " char-whitespace?)) + + (test-text "" + (textual-trim-right " a b c " char?)) + + (test-text "" + (textual-trim-right "" char-whitespace? 0)) + + (test-text " a b c" + (textual-trim-right " a b c " char-whitespace? 0)) + + (test-text "" + (textual-trim-right " a b c " char? 0)) + + (test-text " b c" + (textual-trim-right " a b c " char-whitespace? 3)) + + (test-text "" + (textual-trim-right " a b c " char? 3)) + + (test-text "" + (textual-trim-right " a b c " char? 0 11)) + + (test-text " b c" + (textual-trim-right " a b c " char-whitespace? 3 11)) + + (test-text "" + (textual-trim-right " a b c " char? 3 11)) + + (test-text "" + (textual-trim-right " a b c " char? 0 8)) + + (test-text " b" + (textual-trim-right " a b c " char-whitespace? 3 8)) + + (test-text "" + (textual-trim-right " a b c " char? 3 8)) + + + (test-text "" + (textual-trim-both "")) + + (test-text "a b c" + (textual-trim-both " a b c ")) + + (test-text "" + (textual-trim-both "" char-whitespace?)) + + (test-text "a b c" + (textual-trim-both " a b c " char-whitespace?)) + + (test-text "" + (textual-trim-both " a b c " char?)) + + (test-text "" + (textual-trim-both "" char-whitespace? 0)) + + (test-text "a b c" + (textual-trim-both " a b c " char-whitespace? 0)) + + (test-text "" + (textual-trim-both " a b c " char? 0)) + + (test-text "b c" + (textual-trim-both " a b c " char-whitespace? 3)) + + (test-text "" + (textual-trim-both " a b c " char? 3)) + + (test-text "" + (textual-trim-both " a b c " char? 0 11)) + + (test-text "b c" + (textual-trim-both " a b c " char-whitespace? 3 11)) + + (test-text "" + (textual-trim-both " a b c " char? 3 11)) + + (test-text "" + (textual-trim-both " a b c " char? 0 8)) + + (test-text "b" + (textual-trim-both " a b c " char-whitespace? 3 8)) + + (test-text "" + (textual-trim-both " a b c " char? 3 8)) + + + ;; Replacement + + (test-text "It's lots of fun to code it up in Scheme." + (textual-replace (as-text "It's easy to code it up in Scheme.") + (as-text "lots of fun") + 5 9)) + + (test-text "The miserable perl programmer endured daily ridicule." + (textual-replace "The TCL programmer endured daily ridicule." + (as-text "another miserable perl drone") + 4 7 8 22)) + + (test-text "It's really easy to code it up in Scheme." + (textual-replace (as-text "It's easy to code it up in Scheme.") + "really " + 5 5)) + + (test-text "Runs in O(1) time." ; for texts (using sample implementations) + (textual-replace "Runs in O(n) time." (text #\1) 10 11)) + + ;; Comparison + ;; + ;; The comparison tests aren't perfectly black-box because the + ;; specification of these comparison procedures allows them to + ;; use an ordering other than the usual lexicographic ordering. + ;; The sample implementations use lexicographic ordering, however, + ;; and a test program that discourages implementations from using + ;; orderings that differ from the usual on such simple cases is + ;; probably doing a public service. + + (test #t (textual=? (as-text "Strasse") (as-text "Strasse"))) + + (test #t (textual=? "Strasse" (as-text "Strasse") "Strasse")) + + (test #f (textual? (as-text "z") "zz")) + (test #t (textual>? "z" (as-text "Z"))) + (test #f (textual>=? (as-text "z") "zz")) + (test #t (textual>=? "z" "Z")) + (test #t (textual>=? (as-text "z") (as-text "z"))) + + + (let* ((w "a") + (x "abc") + (y "def") + (z (text #\a #\b #\c))) + + (test #f (textual=? x y z)) + (test #t (textual=? x x z)) + (test #f (textual=? w x y)) + (test #f (textual=? y x w)) + + (test #f (textual? x y z)) + (test #f (textual>? x x z)) + (test #f (textual>? w x y)) + (test #t (textual>? y x w)) + + (test #f (textual<=? x y z)) + (test #t (textual<=? x x z)) + (test #t (textual<=? w x y)) + (test #f (textual<=? y x w)) + + (test #f (textual>=? x y z)) + (test #t (textual>=? x x z)) + (test #f (textual>=? w x y)) + (test #t (textual>=? y x w)) + + + (test #t (textual=? x x)) + (test #f (textual=? w x)) + (test #f (textual=? y x)) + + (test #f (textual? x x)) + (test #f (textual>? w x)) + (test #t (textual>? y x)) + + (test #t (textual<=? x x)) + (test #t (textual<=? w x)) + (test #f (textual<=? y x)) + + (test #t (textual>=? x x)) + (test #f (textual>=? w x)) + (test #t (textual>=? y x))) + + + (test #t (textual-ci? "a" "Z")) + (test #f (textual-ci>? "A" "z")) + (test #t (textual-ci>? "Z" "a")) + (test #t (textual-ci>? "z" "A")) + (test #f (textual-ci>? "z" "Z")) + (test #f (textual-ci>? "Z" "z")) + (test #t (textual-ci=? "z" "Z")) + (test #f (textual-ci=? "z" "a")) + (test #t (textual-ci<=? "a" "Z")) + (test #t (textual-ci<=? "A" "z")) + (test #f (textual-ci<=? "Z" "a")) + (test #f (textual-ci<=? "z" "A")) + (test #t (textual-ci<=? "z" "Z")) + (test #t (textual-ci<=? "Z" "z")) + (test #f (textual-ci>=? "a" "Z")) + (test #f (textual-ci>=? "A" "z")) + (test #t (textual-ci>=? "Z" "a")) + (test #t (textual-ci>=? "z" "A")) + (test #t (textual-ci>=? "z" "Z")) + (test #t (textual-ci>=? "Z" "z")) + + ;; The full-unicode feature doesn't imply full Unicode in strings, + ;; so these tests might fail even in a conforming implementation. + ;; Implementations that support full Unicode strings often have + ;; this feature, however, even though it isn't listed in the R7RS. + + (cond-expand + (full-unicode-strings + (test #f (textual=? ABCDEF DEFABC)) + (test #f (textual=? DEFABC ABCDEF)) + (test #t (textual=? DEFABC DEFABC)) + + (test #f (textual? ABCDEF DEFABC)) + (test #f (textual>? DEFABC ABCDEF)) + (test #f (textual>? DEFABC DEFABC)) + + (test #f (textual<=? ABCDEF DEFABC)) + (test #t (textual<=? DEFABC ABCDEF)) + (test #t (textual<=? DEFABC DEFABC)) + + (test #t (textual>=? ABCDEF DEFABC)) + (test #f (textual>=? DEFABC ABCDEF)) + (test #t (textual>=? DEFABC DEFABC)) + + (test #f (textual=? "Fuss" fuss)) + (test #f (textual=? "Fuss" "Fuss" fuss)) + (test #f (textual=? "Fuss" fuss "Fuss")) + (test #f (textual=? fuss "Fuss" "Fuss")) + (test #t (textual? "z" (as-text eszett))) + (test #t (textual>? (as-text eszett) "z")) + (test #f (textual>=? "z" (as-text eszett))) + (test #t (textual>=? (as-text eszett) "z")) + (test-assert (textual-ci=? fuss "Fuss")) + (test-assert (textual-ci=? fuss "FUSS")) + (test-assert (textual-ci=? chaos0 chaos1 chaos2))) + (else)) + + ;; Prefixes and suffixes + + (test 0 (textual-prefix-length ABC ABCDEF)) + + (test 0 (textual-prefix-length ABCDEF ABC)) + + (test 0 (textual-prefix-length ABCDEF DEFABC)) + + (test 6 (textual-prefix-length DEFABC DEFABC)) + + (test 6 (textual-prefix-length (textual->string DEFABC) DEFABC)) + + (test 6 (textual-prefix-length DEFABC (textual->string DEFABC))) + + (test 6 (textual-prefix-length (textual->string DEFABC) + (textual->string DEFABC))) + + (test 0 (textual-prefix-length (as-text "") (as-text ""))) + + (test 0 (textual-prefix-length (as-text "") (as-text "aabbccddee"))) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text ""))) + + (test 0 (textual-prefix-length (as-text "") (as-text "aabbccddee"))) + + (test 1 (textual-prefix-length (as-text "aisle") (as-text "aabbccddee"))) + + (test 0 (textual-prefix-length (as-text "bail") (as-text "aabbccddee"))) + + (test 4 (textual-prefix-length (as-text "prefix") (as-text "preface"))) + + (test 0 (textual-prefix-length (as-text "") (as-text "") 0)) + + (test 0 (textual-prefix-length (as-text "") (as-text "aabbccddee") 0)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "") 0)) + + (test 1 (textual-prefix-length (as-text "aisle") (as-text "aabbccddee") 0)) + + (test 0 (textual-prefix-length (as-text "bail") (as-text "aabbccddee") 0)) + + (test 4 (textual-prefix-length (as-text "prefix") (as-text "preface") 0)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "") 1)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "aabbccddee") 1)) + + (test 1 (textual-prefix-length (as-text "bail") (as-text "aabbccddee") 1)) + + (test 0 (textual-prefix-length (as-text "prefix") (as-text "preface") 1)) + + (test 0 (textual-prefix-length (as-text "") (as-text "") 0 0)) + + (test 0 (textual-prefix-length (as-text "") (as-text "aabbccddee") 0 0)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "") 0 4)) + + (test 1 (textual-prefix-length (as-text "aisle") (as-text "aabbccddee") 0 4)) + + (test 0 (textual-prefix-length (as-text "bail") (as-text "aabbccddee") 0 1)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "") 1 4)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "aabbccddee") 1 4)) + + (test 1 (textual-prefix-length (as-text "bail") (as-text "aabbccddee") 1 4)) + + (test 0 (textual-prefix-length (as-text "prefix") (as-text "preface") 1 5)) + + (test 0 (textual-prefix-length (as-text "") (as-text "") 0 0 0)) + + (test 0 (textual-prefix-length (as-text "") (as-text "aabbccddee") 0 0 0)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "") 0 4 0)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "aabbccddee") 0 4 2)) + + (test 1 (textual-prefix-length (as-text "bail") (as-text "aabbccddee") 0 1 2)) + + (test 0 (textual-prefix-length (as-text "prefix") (as-text "preface") 0 5 1)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "") 1 4 0)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "aabbccddee") 1 4 3)) + + (test 0 (textual-prefix-length (as-text "bail") (as-text "aabbccddee") 1 4 3)) + + (test 3 (textual-prefix-length (as-text "prefix") (as-text "preface") 1 5 1)) + + (test 0 (textual-prefix-length (as-text "") (as-text "") 0 0 0 0)) + + (test 0 (textual-prefix-length (as-text "") (as-text "aabbccddee") 0 0 0 0)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "") 0 4 0 0)) + + (test 0 (textual-prefix-length (as-text "aisle") "aabbccddee" 0 4 2 10)) + + (test 1 (textual-prefix-length (as-text "bail") (as-text "aabbccddee") 0 1 2 10)) + + (test 0 (textual-prefix-length (as-text "prefix") (as-text "preface") 0 5 1 6)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "") 1 4 0 0)) + + (test 0 (textual-prefix-length (as-text "aisle") (as-text "aabbccddee") 1 4 3 3)) + + (test 0 (textual-prefix-length (as-text "bail") (as-text "aabbccddee") 1 4 3 6)) + + (test 3 (textual-prefix-length (as-text "prefix") (as-text "preface") 1 5 1 7)) + + + (test 0 (textual-suffix-length ABC ABCDEF)) + + (test 0 (textual-suffix-length ABCDEF ABC)) + + (test 0 (textual-suffix-length ABCDEF DEFABC)) + + (test 6 (textual-suffix-length DEFABC DEFABC)) + + (test 6 (textual-suffix-length (textual->string DEFABC) DEFABC)) + + (test 6 (textual-suffix-length DEFABC (textual->string DEFABC))) + + (test 6 (textual-suffix-length (textual->string DEFABC) (textual->string DEFABC))) + + (test 0 (textual-suffix-length (as-text "") (as-text ""))) + + (test 0 (textual-suffix-length (as-text "") (as-text "aabbccddee"))) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text ""))) + + (test 0 (textual-suffix-length (as-text "") (as-text "aabbccddee"))) + + (test 1 (textual-suffix-length (as-text "aisle") (as-text "aabbccddee"))) + + (test 0 (textual-suffix-length (as-text "bail") (as-text "aabbccddee"))) + + (test 3 (textual-suffix-length (as-text "place") (as-text "preface"))) + + (test 0 (textual-suffix-length (as-text "") (as-text "") 0)) + + (test 0 (textual-suffix-length (as-text "") (as-text "aabbccddee") 0)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "") 0)) + + (test 1 (textual-suffix-length (as-text "aisle") (as-text "aabbccddee") 0)) + + (test 0 (textual-suffix-length (as-text "bail") (as-text "aabbccddee") 0)) + + (test 3 (textual-suffix-length (as-text "place") (as-text "preface") 0)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "") 1)) + + (test 1 (textual-suffix-length (as-text "aisle") (as-text "aabbccddee") 1)) + + (test 0 (textual-suffix-length (as-text "bail") (as-text "aabbccddee") 1)) + + (test 3 (textual-suffix-length (as-text "place") (as-text "preface") 1)) + + (test 0 (textual-suffix-length (as-text "") (as-text "") 0 0)) + + (test 0 (textual-suffix-length (as-text "") (as-text "aabbccddee") 0 0)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "") 0 4)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "aabbccddee") 0 4)) + + (test 0 (textual-suffix-length (as-text "bail") (as-text "aabbccddee") 0 1)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "") 1 4)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "aabbccddee") 1 4)) + + (test 1 (textual-suffix-length (as-text "aisle") (as-text "aabbccddee") 1 5)) + + (test 0 (textual-suffix-length (as-text "bail") (as-text "aabbccddee") 1 4)) + + (test 3 (textual-suffix-length (as-text "place") (as-text "preface") 1 5)) + + (test 0 (textual-suffix-length (as-text "") (as-text "") 0 0 0)) + + (test 0 (textual-suffix-length (as-text "") (as-text "aabbccddee") 0 0 0)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "") 0 4 0)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "aabbccddee") 0 4 2)) + + (test 0 (textual-suffix-length (as-text "bail") (as-text "aabbccddee") 0 1 2)) + + (test 3 (textual-suffix-length (as-text "place") (as-text "preface") 0 5 1)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "") 1 4 0)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "aabbccddee") 1 4 3)) + + (test 0 (textual-suffix-length (as-text "bail") (as-text "aabbccddee") 1 4 3)) + + (test 3 (textual-suffix-length (as-text "place") (as-text "preface") 1 5 1)) + + (test 0 (textual-suffix-length (as-text "") (as-text "") 0 0 0 0)) + + (test 0 (textual-suffix-length (as-text "") (as-text "aabbccddee") 0 0 0 0)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "") 0 4 0 0)) + + (test 1 (textual-suffix-length "aisle" (as-text "aabbccddee") 0 5 2 10)) + + (test 1 (textual-suffix-length (as-text "bail") (as-text "aabbccddee") 0 1 2 4)) + + (test 0 (textual-suffix-length (as-text "place") (as-text "preface") 0 5 1 6)) + + (test 2 (textual-suffix-length (as-text "place") (as-text "preface") 0 4 1 6)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "") 1 4 0 0)) + + (test 0 (textual-suffix-length (as-text "aisle") (as-text "aabbccddee") 1 4 3 3)) + + (test 0 (textual-suffix-length (as-text "bail") (as-text "aabbccddee") 1 4 3 6)) + + (test 3 (textual-suffix-length (as-text "place") (as-text "preface") 1 5 1 7)) + + + (test-assert (eq? #f (textual-prefix? ABC ABCDEF))) + + (test-assert (eq? #f (textual-prefix? ABCDEF ABC))) + + (test-assert (eq? #f (textual-prefix? ABCDEF DEFABC))) + + (test-assert (eq? #t (textual-prefix? DEFABC DEFABC))) + + (test-assert (eq? #t (textual-prefix? (textual->string DEFABC) DEFABC))) + + (test-assert (eq? #t (textual-prefix? DEFABC (textual->string DEFABC)))) + + (test-assert (eq? #t (textual-prefix? (textual->string DEFABC) (textual->string DEFABC)))) + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "")))) + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "abc")))) + + (test-assert (eq? #t (textual-prefix? (as-text "a") (as-text "abc")))) + + (test-assert (eq? #f (textual-prefix? (as-text "c") (as-text "abc")))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc")))) + + (test-assert (eq? #f (textual-prefix? (as-text "ac") (as-text "abc")))) + + (test-assert (eq? #t (textual-prefix? (as-text "abc") (as-text "abc")))) + + (test-assert (eq? #f (textual-suffix? ABC ABCDEF))) + + (test-assert (eq? #f (textual-suffix? ABCDEF ABC))) + + (test-assert (eq? #f (textual-suffix? ABCDEF DEFABC))) + + (test-assert (eq? #t (textual-suffix? DEFABC DEFABC))) + + (test-assert (eq? #t (textual-suffix? (textual->string DEFABC) DEFABC))) + + (test-assert (eq? #t (textual-suffix? DEFABC (textual->string DEFABC)))) + + (test-assert (eq? #t (textual-suffix? (as-text "") (as-text "")))) + + (test-assert (eq? #t (textual-suffix? (as-text "") (as-text "abc")))) + + (test-assert (eq? #f (textual-suffix? (as-text "a") (as-text "abc")))) + + (test-assert (eq? #t (textual-suffix? (as-text "c") (as-text "abc")))) + + (test-assert (eq? #f (textual-suffix? (as-text "ac") (as-text "abc")))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc")))) + + (test-assert (eq? #t (textual-suffix? (as-text "abc") (as-text "abc")))) + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "") 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "abc") 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "a") (as-text "abc") 0))) + + (test-assert (eq? #f (textual-prefix? (as-text "c") (as-text "abc") 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 0))) + + (test-assert (eq? #f (textual-prefix? (as-text "ac") (as-text "abc") 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "abc") (as-text "abc") 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "") (as-text "") 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "") (as-text "abc") 0))) + + (test-assert (eq? #f (textual-suffix? (as-text "a") (as-text "abc") 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "c") (as-text "abc") 0))) + + (test-assert (eq? #f (textual-suffix? (as-text "ac") (as-text "abc") 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc") 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "abc") (as-text "abc") 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 2))) + + (test-assert (eq? #t (textual-prefix? (as-text "ac") (as-text "abc") 2))) + + (test-assert (eq? #f (textual-prefix? (as-text "abc") (as-text "abc") 2))) + + (test-assert (eq? #t (textual-suffix? (as-text "ac") (as-text "abc") 2))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc") 2))) + + (test-assert (eq? #t (textual-suffix? (as-text "abc") (as-text "abc") 2))) + + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "") 0 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "abc") 0 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "a") (as-text "abc") 0 0))) + + (test-assert (eq? #f (textual-prefix? (as-text "c") (as-text "abc") 0 1))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 0 1))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 0 2))) + + (test-assert (eq? #f (textual-prefix? (as-text "ac") (as-text "abc") 0 2))) + + (test-assert (eq? #t (textual-prefix? (as-text "abc") (as-text "abc") 0 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "") (as-text "") 0 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "") (as-text "abc") 0 0))) + + (test-assert (eq? #f (textual-suffix? (as-text "a") (as-text "abc") 0 1))) + + (test-assert (eq? #t (textual-suffix? (as-text "c") (as-text "abc") 0 1))) + + (test-assert (eq? #t (textual-suffix? (as-text "ac") (as-text "abc") 1 2))) + + (test-assert (eq? #f (textual-suffix? (as-text "ac") (as-text "abc") 0 2))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc") 0 2))) + + (test-assert (eq? #t (textual-suffix? (as-text "abc") (as-text "abc") 0 3))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 2 2))) + + (test-assert (eq? #t (textual-prefix? (as-text "ac") (as-text "abc") 2 2))) + + (test-assert (eq? #f (textual-prefix? (as-text "abc") (as-text "abc") 2 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "ac") (as-text "abc") 2 2))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc") 2 2))) + + (test-assert (eq? #t (textual-suffix? (as-text "abc") (as-text "abc") 2 3))) + + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "") 0 0 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "abc") 0 0 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "a") (as-text "abc") 0 0 0))) + + (test-assert (eq? #f (textual-prefix? (as-text "c") (as-text "abc") 0 1 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 0 1 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 0 2 0))) + + (test-assert (eq? #f (textual-prefix? (as-text "ac") (as-text "abc") 0 2 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "abc") (as-text "abc") 0 3 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "") (as-text "") 0 0 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "") (as-text "abc") 0 0 0))) + + (test-assert (eq? #f (textual-suffix? (as-text "a") (as-text "abc") 0 1 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "c") (as-text "abc") 0 1 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "ac") (as-text "abc") 1 2 0))) + + (test-assert (eq? #f (textual-suffix? (as-text "ac") (as-text "abc") 0 2 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc") 0 2 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "abc") (as-text "abc") 0 3 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 2 2 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "ac") (as-text "abc") 2 2 0))) + + (test-assert (eq? #f (textual-prefix? (as-text "abc") (as-text "abc") 2 3 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "ac") (as-text "abc") 2 2 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc") 2 2 0))) + + (test-assert (eq? #t (textual-suffix? (as-text "abc") (as-text "abc") 2 3 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "abc") 0 0 1))) + + (test-assert (eq? #t (textual-prefix? (as-text "a") (as-text "abc") 0 0 1))) + + (test-assert (eq? #t (textual-prefix? (as-text "c") (as-text "abc") 0 1 2))) + + (test-assert (eq? #f (textual-prefix? (as-text "ab") (as-text "abc") 0 1 2))) + + (test-assert (eq? #f (textual-prefix? (as-text "ab") (as-text "abc") 0 2 1))) + + (test-assert (eq? #f (textual-prefix? (as-text "ac") (as-text "abc") 0 2 1))) + + (test-assert (eq? #f (textual-prefix? (as-text "abc") (as-text "abc") 0 3 1))) + + (test-assert (eq? #f (textual-suffix? (as-text "a") (as-text "abc") 0 1 2))) + + (test-assert (eq? #t (textual-suffix? (as-text "c") (as-text "abc") 0 1 1))) + + (test-assert (eq? #t (textual-suffix? (as-text "ac") (as-text "abc") 1 2 2))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc") 0 2 1))) + + (test-assert (eq? #f (textual-suffix? (as-text "bc") (as-text "abc") 0 2 2))) + + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "") 0 0 0 0))) + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "abc") 0 0 0 3))) + + (test-assert (eq? #t (textual-prefix? (as-text "a") (as-text "abc") 0 0 0 3))) + + (test-assert (eq? #f (textual-prefix? (as-text "c") (as-text "abc") 0 1 0 3))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 0 1 0 3))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 0 2 0 3))) + + (test-assert (eq? #f (textual-prefix? (as-text "ac") (as-text "abc") 0 2 0 3))) + + (test-assert (eq? #t (textual-prefix? (as-text "abc") (as-text "abc") 0 3 0 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "") (as-text "abc") 0 0 0 3))) + + (test-assert (eq? #f (textual-suffix? (as-text "a") (as-text "abc") 0 1 0 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "c") (as-text "abc") 0 1 0 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "ac") (as-text "abc") 1 2 0 3))) + + (test-assert (eq? #f (textual-suffix? (as-text "ac") (as-text "abc") 0 2 0 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc") 0 2 0 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "abc") (as-text "abc") 0 3 0 3))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 2 2 0 3))) + + (test-assert (eq? #t (textual-prefix? (as-text "ac") (as-text "abc") 2 2 0 3))) + + (test-assert (eq? #f (textual-prefix? (as-text "abc") (as-text "abc") 2 3 0 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "ac") (as-text "abc") 2 2 0 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc") 2 2 0 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "abc") (as-text "abc") 2 3 0 3))) + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "abc") 0 0 1 3))) + + (test-assert (eq? #t (textual-prefix? (as-text "a") (as-text "abc") 0 0 1 3))) + + (test-assert (eq? #t (textual-prefix? (as-text "c") (as-text "abc") 0 1 2 3))) + + (test-assert (eq? #f (textual-prefix? (as-text "ab") (as-text "abc") 0 1 2 3))) + + (test-assert (eq? #f (textual-prefix? (as-text "ab") (as-text "abc") 0 2 1 3))) + + (test-assert (eq? #f (textual-prefix? (as-text "ac") (as-text "abc") 0 2 1 3))) + + (test-assert (eq? #f (textual-prefix? (as-text "abc") (as-text "abc") 0 3 1 3))) + + (test-assert (eq? #f (textual-suffix? (as-text "a") (as-text "abc") 0 1 2 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "c") (as-text "abc") 0 1 1 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "ac") (as-text "abc") 1 2 2 3))) + + (test-assert (eq? #t (textual-suffix? (as-text "bc") (as-text "abc") 0 2 1 3))) + + (test-assert (eq? #f (textual-suffix? (as-text "bc") (as-text "abc") 0 2 2 3))) + + + (test-assert (eq? #t (textual-prefix? (as-text "") (as-text "abc") 0 0 0 2))) + + (test-assert (eq? #t (textual-prefix? (as-text "a") (as-text "abc") 0 0 0 2))) + + (test-assert (eq? #f (textual-prefix? (as-text "c") (as-text "abc") 0 1 0 2))) + + (test-assert (eq? #t (textual-prefix? (as-text "ab") (as-text "abc") 0 1 0 2))) + + (test-assert (eq? #f (textual-prefix? (as-text "abc") (as-text "abc") 0 3 0 2))) + + (test-assert (eq? #t (textual-suffix? (as-text "") (as-text "abc") 0 0 0 2))) + + (test-assert (eq? #f (textual-suffix? (as-text "c") (as-text "abc") 0 1 0 2))) + + (test-assert (eq? #f (textual-suffix? (as-text "ac") (as-text "abc") 1 2 0 2))) + + ;; Searching + + (test-assert (eqv? #f (textual-index (as-text "") char?))) + + (test-assert (eqv? 0 (textual-index (as-text "abcdef") char?))) + + (test-assert (eqv? 4 (textual-index (as-text "abcdef") (lambda (c) (char>? c #\d))))) + + (test-assert (eqv? #f (textual-index (as-text "abcdef") char-whitespace?))) + + (test-assert (eqv? #f (textual-index-right (as-text "") char?))) + + (test-assert (eqv? 5 (textual-index-right (as-text "abcdef") char?))) + + (test-assert (eqv? 5 (textual-index-right (as-text "abcdef") + (lambda (c) (char>? c #\d))))) + + + (test-assert (eqv? #f (textual-index-right (as-text "abcdef") char-whitespace?))) + + (test-assert (eqv? #f (textual-skip (as-text "") string?))) + + (test-assert (eqv? 0 (textual-skip (as-text "abcdef") string?))) + + (test-assert (eqv? 4 (textual-skip (as-text "abcdef") (lambda (c) (char<=? c #\d))))) + + (test-assert (eqv? #f (textual-skip (as-text "abcdef") char?))) + + (test-assert (eqv? #f (textual-skip-right (as-text "") string?))) + + (test-assert (eqv? 5 (textual-skip-right (as-text "abcdef") string?))) + + (test-assert (eqv? 5 (textual-skip-right (as-text "abcdef") + (lambda (c) (char<=? c #\d))))) + + (test-assert (eqv? #f (textual-skip-right (as-text "abcdef") char?))) + + + (test-assert (eqv? 2 (textual-index "abcdef" char? 2))) + + (test-assert (eqv? 4 (textual-index "abcdef" (lambda (c) (char>? c #\d)) 2))) + + (test-assert (eqv? #f (textual-index "abcdef" char-whitespace? 2))) + + (test-assert (eqv? 5 (textual-index-right "abcdef" char? 2))) + + (test-assert (eqv? 5 (textual-index-right "abcdef" + (lambda (c) + (char>? c #\d)) 2))) + + (test-assert (eqv? #f (textual-index-right "abcdef" char-whitespace? 2))) + + (test-assert (eqv? 2 (textual-skip "abcdef" string? 2))) + + (test-assert (eqv? 4 (textual-skip "abcdef" + (lambda (c) + (char<=? c #\d)) 2))) + + (test-assert (eqv? #f (textual-skip "abcdef" char? 2))) + + (test-assert (eqv? 5 (textual-skip-right "abcdef" string? 2))) + + (test-assert (eqv? 5 (textual-skip-right "abcdef" + (lambda (c) + (char<=? c #\d)) 2))) + + (test-assert (eqv? #f (textual-skip-right "abcdef" char? 2))) + + + (test-assert (eqv? 2 (textual-index (as-text "abcdef") char? 2 5))) + + (test-assert (eqv? 4 (textual-index (as-text "abcdef") + (lambda (c) (char>? c #\d)) 2 5))) + + (test-assert (eqv? #f (textual-index (as-text "abcdef") char-whitespace? 2 5))) + + (test-assert (eqv? 4 (textual-index-right (as-text "abcdef") char? 2 5))) + + (test-assert (eqv? 4 (textual-index-right (as-text "abcdef") + (lambda (c) + (char>? c #\d)) 2 5))) + + (test-assert (eqv? #f (textual-index-right (as-text "abcdef") + char-whitespace? 2 5))) + + + (test-assert (eqv? 2 (textual-skip (as-text "abcdef") string? 2 5))) + + (test-assert (eqv? 4 (textual-skip (as-text "abcdef") + (lambda (c) (char<=? c #\d)) 2 5))) + + (test-assert (eqv? #f (textual-skip (as-text "abcdef") char? 2 5))) + + (test-assert (eqv? 4 (textual-skip-right (as-text "abcdef") string? 2 5))) + + (test-assert (eqv? 4 (textual-skip-right (as-text "abcdef") + (lambda (c) + (char<=? c #\d)) 2 5))) + + (test-assert (eqv? #f (textual-skip-right (as-text "abcdef") char? 2 5))) + + + (test-assert (eqv? 0 (textual-contains (as-text "") (as-text "")))) + + (test-assert (eqv? 0 (textual-contains (as-text "abcdeffffoo") (as-text "")))) + + (test-assert (eqv? 0 (textual-contains (as-text "abcdeffffoo") (as-text "a")))) + + (test-assert (eqv? 5 (textual-contains (as-text "abcdeffffoo") (as-text "ff")))) + + (test-assert (eqv? 4 (textual-contains (as-text "abcdeffffoo") (as-text "eff")))) + + (test-assert (eqv? 8 (textual-contains (as-text "abcdeffffoo") (as-text "foo")))) + + (test-assert (eqv? #f (textual-contains (as-text "abcdeffffoo") (as-text "efffoo")))) + + (test-assert (eqv? 0 (textual-contains-right (as-text "") (as-text "")))) + + (test-assert (eqv? 11 (textual-contains-right (as-text "abcdeffffoo") (as-text "")))) + + (test-assert (eqv? 0 (textual-contains-right (as-text "abcdeffffoo") (as-text "a")))) + + (test-assert (eqv? 7 (textual-contains-right (as-text "abcdeffffoo") (as-text "ff")))) + + (test-assert (eqv? 4 (textual-contains-right (as-text "abcdeffffoo") (as-text "eff")))) + + (test-assert (eqv? 8 (textual-contains-right (as-text "abcdeffffoo") (as-text "foo")))) + + (test-assert (eqv? #f (textual-contains-right (as-text "abcdeffffoo") + (as-text "efffoo")))) + + + (test-assert (eqv? 0 (textual-contains "" "" 0))) + + (test-assert (eqv? 2 (textual-contains "abcdeffffoo" "" 2))) + + (test-assert (eqv? #f (textual-contains "abcdeffffoo" "a" 2))) + + (test-assert (eqv? 5 (textual-contains "abcdeffffoo" "ff" 2))) + + (test-assert (eqv? 4 (textual-contains "abcdeffffoo" "eff" 2))) + + (test-assert (eqv? 8 (textual-contains "abcdeffffoo" "foo" 2))) + + (test-assert (eqv? #f (textual-contains "abcdeffffoo" "efffoo" 2))) + + (test-assert (eqv? 0 (textual-contains-right "" "" 0))) + + (test-assert (eqv? 11 (textual-contains-right "abcdeffffoo" "" 2))) + + (test-assert (eqv? #f (textual-contains-right "abcdeffffoo" "a" 2))) + + (test-assert (eqv? 7 (textual-contains-right "abcdeffffoo" "ff" 2))) + + (test-assert (eqv? 4 (textual-contains-right "abcdeffffoo" "eff" 2))) + + (test-assert (eqv? 8 (textual-contains-right "abcdeffffoo" "foo" 2))) + + (test-assert (eqv? #f (textual-contains-right "abcdeffffoo" "efffoo" 2))) + + + (test-assert (eqv? 0 (textual-contains (as-text "") (as-text "") 0 0))) + + (test-assert (eqv? 2 (textual-contains (as-text "abcdeffffoo") (as-text "") 2 10))) + + (test-assert (eqv? #f (textual-contains (as-text "abcdeffffoo") (as-text "a") 2 10))) + + (test-assert (eqv? 5 (textual-contains (as-text "abcdeffffoo") (as-text "ff") 2 10))) + + (test-assert (eqv? 4 (textual-contains (as-text "abcdeffffoo") (as-text "eff") 2 10))) + + (test-assert (eqv? #f (textual-contains (as-text "abcdeffffoo") (as-text "foo") 2 10))) + + (test-assert (eqv? #f (textual-contains (as-text "abcdeffffoo") (as-text "efffoo") 2 10))) + + (test-assert (eqv? 0 (textual-contains-right (as-text "") (as-text "") 0 0))) + + (test-assert (eqv? 10 (textual-contains-right (as-text "abcdeffffoo") (as-text "") 2 10))) + + (test-assert (eqv? #f (textual-contains-right (as-text "abcdeffffoo") (as-text "a") 2 10))) + + (test-assert (eqv? 7 (textual-contains-right (as-text "abcdeffffoo") (as-text "ff") 2 10))) + + (test-assert (eqv? 4 (textual-contains-right (as-text "abcdeffffoo") (as-text "eff") 2 10))) + + (test-assert (eqv? #f (textual-contains-right (as-text "abcdeffffoo") "foo" 2 10))) + + (test-assert (eqv? #f (textual-contains-right "abcdeffffoo" (as-text "efffoo") 2 10))) + + + (test-assert (eqv? 0 (textual-contains "" "" 0 0 0))) + + (test-assert (eqv? 2 (textual-contains "abcdeffffoo" "" 2 10 0))) + + (test-assert (eqv? 2 (textual-contains "abcdeffffoo" "a" 2 10 1))) + + (test-assert (eqv? 5 (textual-contains "abcdeffffoo" "ff" 2 10 1))) + + (test-assert (eqv? 5 (textual-contains "abcdeffffoo" "eff" 2 10 1))) + + (test-assert (eqv? #f (textual-contains "abcdeffffoo" "foo" 2 10 1))) + + (test-assert (eqv? #f (textual-contains "abcdeffffoo" "efffoo" 2 10 1))) + + (test-assert (eqv? 0 (textual-contains-right "" "" 0 0 0))) + + (test-assert (eqv? 10 (textual-contains-right "abcdeffffoo" "" 2 10 0))) + + (test-assert (eqv? 10 (textual-contains-right "abcdeffffoo" "a" 2 10 1))) + + (test-assert (eqv? 8 (textual-contains-right "abcdeffffoo" "ff" 2 10 1))) + + (test-assert (eqv? 7 (textual-contains-right "abcdeffffoo" "eff" 2 10 1))) + + (test-assert (eqv? #f (textual-contains-right "abcdeffffoo" "foo" 2 10 1))) + + (test-assert (eqv? #f (textual-contains-right "abcdeffffoo" "efffoo" 2 10 1))) + + + (test-assert (eqv? 0 (textual-contains "" "" 0 0 0 0))) + + (test-assert (eqv? 2 (textual-contains "abcdeffffoo" "" 2 10 0 0))) + + (test-assert (eqv? 2 (textual-contains "abcdeffffoo" "a" 2 10 1 1))) + + (test-assert (eqv? 5 (textual-contains "abcdeffffoo" "ff" 2 10 1 2))) + + (test-assert (eqv? 5 (textual-contains "abcdeffffoo" "eff" 2 10 1 2))) + + (test-assert (eqv? 9 (textual-contains "abcdeffffoo" "foo" 2 10 1 2))) + + (test-assert (eqv? 4 (textual-contains "abcdeffffoo" "efffoo" 2 10 0 2))) + + (test-assert (eqv? 0 (textual-contains-right "" "" 0 0 0 0))) + + (test-assert (eqv? 10 (textual-contains-right "abcdeffffoo" "" 2 10 0 0))) + + (test-assert (eqv? 10 (textual-contains-right "abcdeffffoo" "a" 2 10 1 1))) + + (test-assert (eqv? 8 (textual-contains-right "abcdeffffoo" "ff" 2 10 1 2))) + + (test-assert (eqv? 8 (textual-contains-right "abcdeffffoo" "eff" 2 10 1 2))) + + (test-assert (eqv? 9 (textual-contains-right "abcdeffffoo" "foo" 2 10 1 2))) + + (test-assert (eqv? 7 (textual-contains-right "abcdeffffoo" "efffoo" 2 10 1 3))) + + + ;; Case conversion + + ;; FIXME: should test some non-ASCII cases here. + + (test-text "1234STRIKES" (textual-upcase (as-text "1234Strikes"))) + + (test-text "1234STRIKES" (textual-upcase (as-text "1234strikes"))) + + (test-text "1234STRIKES" (textual-upcase (as-text "1234STRIKES"))) + + (test-text "1234strikes" (textual-downcase (as-text "1234Strikes"))) + + (test-text "1234strikes" (textual-downcase (as-text "1234strikes"))) + + (test-text "1234strikes" (textual-downcase (as-text "1234STRIKES"))) + + (test-text "1234strikes" (textual-foldcase (as-text "1234Strikes"))) + + (test-text "1234strikes" (textual-foldcase (as-text "1234strikes"))) + + (test-text "1234strikes" (textual-foldcase (as-text "1234STRIKES"))) + + (test-text "And With Three Strikes You Are Out" + (textual-titlecase + (as-text "and with THREE STRIKES you are oUT"))) + + ;; Concatenation + + (test-text "" (textual-append)) + + (test-text "abcdef" + (textual-append (as-text "") + (as-text "a") + (as-text "bcd") + "" "ef" "" "")) + + (test-text "" (textual-concatenate '())) + + (test-text "abcdef" + (textual-concatenate + (map string->text '("" "a" "bcd" "" "ef" "" "")))) + + ;; textual-concatenate is likely to have special cases for longer texts. + + (let* ((alphabet "abcdefghijklmnopqrstuvwxyz") + (str1 alphabet) + (str10 (apply string-append (vector->list (make-vector 10 str1)))) + (str100 (apply string-append (vector->list (make-vector 10 str10)))) + (str100-500 (substring str100 100 500)) + (str600-999 (substring str100 600 999)) + (alph1 (textual-copy alphabet)) + (alph10 (textual-concatenate (vector->list (make-vector 10 alph1)))) + (alph100 (textual-concatenate (vector->list (make-vector 10 alph10)))) + (t100-500 (subtext alph100 100 500)) + (t600-999 (subtext alph100 600 999))) + + (test-assert (result=? str10 alph10)) + + (test-assert (result=? str100 alph100)) + + (test-assert (result=? str100-500 t100-500)) + + (test-assert (result=? str600-999 t600-999)) + + ;; concatenating a short text with a long text + + (test-text (string-append str1 str600-999) + (textual-concatenate (list alph1 t600-999))) + + (test-text (string-append str1 str600-999) + (textual-concatenate (list alph1 (textual-copy t600-999)))) + + (test-text (string-append str600-999 str1) + (textual-concatenate (list t600-999 alph1))) + + (test-text (string-append str600-999 str1) + (textual-concatenate (list (textual-copy t600-999) alph1)))) + + + (test-text "" (textual-concatenate-reverse '())) + + (test-text "efbcda" + (textual-concatenate-reverse + (map string->text '("" "a" "bcd" "" "ef" "" "")))) + + (test-text "huh?" + (textual-concatenate-reverse '() "huh?")) + + (test-text "efbcdaxy" + (textual-concatenate-reverse '("" "a" "bcd" "" "ef" "" "") "xy")) + + (test-text "huh" + (textual-concatenate-reverse '() "huh?" 3)) + + (test-text "efbcdax" + (textual-concatenate-reverse + '("" "a" "bcd" "" "ef" "" "") "x" 1)) + + + (test-text "" (textual-join '())) + + (test-text " ab cd e f " + (textual-join (map string->text '("" "ab" "cd" "" "e" "f" "")))) + + (test-text "" + (textual-join '() "")) + + (test-text "abcdef" + (textual-join '("" "ab" "cd" "" "e" "f" "") "")) + + (test-text "" + (textual-join '() "xyz")) + + (test-text "xyzabxyzcdxyzxyzexyzfxyz" + (textual-join '("" "ab" "cd" "" "e" "f" "") "xyz")) + + (test-text "" + (textual-join '() "" 'infix)) + + (test-text "abcdef" + (textual-join '("" "ab" "cd" "" "e" "f" "") "" 'infix)) + + (test-text "" + (textual-join '() "xyz" 'infix)) + + (test-text "xyzabxyzcdxyzxyzexyzfxyz" + (textual-join '("" "ab" "cd" "" "e" "f" "") (as-text "xyz") 'infix)) + + (test-assert (equal? 'horror + (guard (exn (#t 'horror)) + (textual-join '() "" 'strict-infix)))) + + (test-text "abcdef" + (textual-join '("" "ab" "cd" "" "e" "f" "") "" 'strict-infix)) + + (test-assert (equal? 'wham + (guard (exn (else 'wham)) + (textual-join '() "xyz" 'strict-infix)))) + + (test-text "xyzabxyzcdxyzxyzexyzfxyz" + (textual-join '("" "ab" "cd" "" "e" "f" "") "xyz" 'strict-infix)) + + (test-text "" + (textual-join '() "" 'suffix)) + + (test-text "abcdef" + (textual-join '("" "ab" "cd" "" "e" "f" "") "" 'suffix)) + + (test-text "" + (textual-join '() "xyz" 'suffix)) + + (test-text "xyzabxyzcdxyzxyzexyzfxyzxyz" + (textual-join '("" "ab" "cd" "" "e" "f" "") "xyz" 'suffix)) + + (test-text "" + (textual-join '() "" 'prefix)) + + (test-text "abcdef" + (textual-join '("" "ab" "cd" "" "e" "f" "") "" 'prefix)) + + (test-text "" + (textual-join '() "xyz" 'prefix)) + + (test-text "xyzxyzabxyzcdxyzxyzexyzfxyz" + (textual-join '("" "ab" "cd" "" "e" "f" "") "xyz" 'prefix)) + + + ;; Fold & map & friends + + (test-assert (= 8 + (textual-fold (lambda (c count) + (if (char-whitespace? c) + (+ count 1) + count)) + 0 + (as-text " ...a couple of spaces in this one... ")))) + + (test-assert (= 7 + (textual-fold (lambda (c count) + (if (char-whitespace? c) + (+ count 1) + count)) + 0 + " ...a couple of spaces in this one... " + 1))) + + (test-assert (= 6 + (textual-fold (lambda (c count) + (if (char-whitespace? c) + (+ count 1) + count)) + 0 + " ...a couple of spaces in this one... " + 1 + 32))) + + (test-assert (equal? (string->list "abcdef") + (textual-fold-right cons '() "abcdef"))) + + (test-assert (equal? (string->list "def") + (textual-fold-right cons '() (as-text "abcdef") 3))) + + (test-assert (equal? (string->list "cde") + (textual-fold-right cons '() (as-text "abcdef") 2 5))) + + (test-assert (string=? "aabraacaadaabraa" + (let* ((s (as-text "abracadabra")) + (ans-len (textual-fold (lambda (c sum) + (+ sum (if (char=? c #\a) 2 1))) + 0 s)) + (ans (make-string ans-len))) + (textual-fold (lambda (c i) + (let ((i (if (char=? c #\a) + (begin (string-set! ans i #\a) + (+ i 1)) + i))) + (string-set! ans i c) + (+ i 1))) + 0 s) + ans))) + + + (test-text "abc" (textual-map string (as-text "abc"))) + + (test-text "ABC" (textual-map char-upcase "abc")) + + (test-text "Hear-here!" + (textual-map (lambda (c0 c1 c2) + (case c0 + ((#\1) c1) + ((#\2) (string c2)) + ((#\-) (text #\- c1)))) + (string->text "1222-1111-2222") + (string->text "Hi There!") + (string->text "Dear John"))) + + (test-assert (string=? "abc" + (let ((q (open-output-string))) + (textual-for-each (lambda (c) (write-char c q)) + (as-text "abc")) + (get-output-string q)))) + + (test-assert (equal? '("cfi" "beh" "adg") + (let ((x '())) + (textual-for-each (lambda (c1 c2 c3) + (set! x (cons (string c1 c2 c3) x))) + "abc" + (as-text "defxyz") + (as-text "ghijklmnopqrstuvwxyz")) + x))) + + (test-text "abc" + (textual-map-index (lambda (i) + (integer->char (+ i (char->integer #\a)))) + "xyz")) + + (test-text "def" + (textual-map-index (lambda (i) + (integer->char (+ i (char->integer #\a)))) + "xyz***" 3)) + + (test-text "cde" + (textual-map-index (lambda (i) + (integer->char (+ i (char->integer #\a)))) + "......" 2 5)) + + (test-assert (equal? '(101 100 99 98 97) + (let ((s (as-text "abcde")) + (v '())) + (textual-for-each-index + (lambda (i) + (set! v (cons (char->integer (textual-ref s i)) v))) + s) + v))) + + (test-assert (equal? '(101 100 99) + (let ((s (as-text "abcde")) + (v '())) + (textual-for-each-index + (lambda (i) + (set! v (cons (char->integer (textual-ref s i)) v))) + s 2) + v))) + + (test-assert (equal? '(99 98) + (let ((s (as-text "abcde")) + (v '())) + (textual-for-each-index + (lambda (i) + (set! v (cons (char->integer (textual-ref s i)) v))) + s 1 3) + v))) + + (test-assert (= 6 (textual-count "abcdef" char?))) + + (test-assert (= 4 (textual-count "counting whitespace, again " char-whitespace? 5))) + + (test-assert (= 3 (textual-count "abcdefwxyz" + (lambda (c) (odd? (char->integer c))) + 2 8))) + + + (test-text "aiueaaaoi" + (textual-filter (lambda (c) (memv c (textual->list "aeiou"))) + (as-text "What is number, that man may know it?"))) + + (test-text "And wmn, tht sh my knw nmbr?" + (textual-remove (lambda (c) (memv c (textual->list "aeiou"))) + "And woman, that she may know number?")) + + (test-text "iueaaaoi" + (textual-filter (lambda (c) (memv c (textual->list "aeiou"))) + (as-text "What is number, that man may know it?") + 4)) + + (test-text "mn, tht sh my knw nmbr?" + (textual-remove (lambda (c) (memv c (textual->list "aeiou"))) + "And woman, that she may know number?" + 6)) + + (test-text "aaao" + (textual-filter (lambda (c) (memv c (textual->list "aeiou"))) + (as-text "What is number, that man may know it?") + 16 32)) + + (test-text "And woman, that sh may know" + (textual-remove (lambda (c) (memv c (textual->list "eiu"))) + "And woman, that she may know number?" + 0 28)) + + ;; Replication and splitting ; ; ; + + (test-text "cdefabcdefabcd" + (textual-replicate "abcdef" -4 10)) + + (test-text "bcdefbcdefbcd" + (textual-replicate "abcdef" 90 103 1)) + + (test-text "ecdecdecde" + (textual-replicate "abcdef" -13 -3 2 5)) + + (test-assert (equal? '() (map textual->string (textual-split "" "")))) + + (test-assert (equal? '("a" "b" "c") (map textual->string (textual-split "abc" "")))) + + (test-assert (equal? '("too" "" "much" "" "data") + (map textual->string + (textual-split "too much data" " ")))) + + (test-assert (equal? '("" "there" "ya" "go" "") + (map textual->string + (textual-split "***there***ya***go***" "***")))) + + (test-assert (equal? '() (map textual->string (textual-split "" "" 'infix)))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string (textual-split "abc" "" 'infix)))) + + (test-assert (equal? '("too" "" "much" "" "data") + (map textual->string + (textual-split "too much data" " " 'infix)))) + + (test-assert (equal? '("" "there" "ya" "go" "") + (map textual->string + (textual-split "***there***ya***go***" "***" 'infix)))) + + (test-assert (equal? 'error + (guard (exn (else 'error)) + (map textual->string + (textual-split "" "" 'strict-infix))))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string + (textual-split "abc" "" 'strict-infix)))) + + (test-assert (equal? '("too" "" "much" "" "data") + (map textual->string + (textual-split "too much data" " " 'strict-infix)))) + + (test-assert (equal? '("" "there" "ya" "go" "") + (map textual->string + (textual-split "***there***ya***go***" "***" 'strict-infix)))) + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'prefix)))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string + (textual-split "abc" "" 'prefix)))) + + (test-assert (equal? '("too" "" "much" "" "data") + (map textual->string + (textual-split "too much data" " " 'prefix)))) + + (test-assert (equal? '("there" "ya" "go" "") + (map textual->string + (textual-split "***there***ya***go***" "***" 'prefix)))) + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'suffix)))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string + (textual-split "abc" "" 'suffix)))) + + (test-assert (equal? '("too" "" "much" "" "data") + (map textual->string + (textual-split "too much data" " " 'suffix)))) + + (test-assert (equal? '("" "there" "ya" "go") + (map textual->string + (textual-split "***there***ya***go***" "***" 'suffix)))) + + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'infix #f)))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string + (textual-split "abc" "" 'infix #f)))) + + (test-assert (equal? '("too" "" "much" "" "data") + (map textual->string + (textual-split "too much data" " " 'infix #f)))) + + (test-assert (equal? '("" "there" "ya" "go" "") + (map textual->string + (textual-split "***there***ya***go***" "***" 'infix #f)))) + + (test-assert (equal? 'error + (guard (exn (else 'error)) + (map textual->string + (textual-split "" "" 'strict-infix #f))))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string + (textual-split "abc" "" 'strict-infix #f)))) + + (test-assert (equal? '("too" "" "much" "" "data") + (map textual->string + (textual-split "too much data" " " 'strict-infix #f)))) + + (test-assert (equal? '("" "there" "ya" "go" "") + (map textual->string + (textual-split "***there***ya***go***" "***" 'strict-infix #f)))) + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'prefix #f)))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string + (textual-split "abc" "" 'prefix #f)))) + + (test-assert (equal? '("too" "" "much" "" "data") + (map textual->string + (textual-split "too much data" " " 'prefix #f)))) + + (test-assert (equal? '("there" "ya" "go" "") + (map textual->string + (textual-split "***there***ya***go***" "***" 'prefix #f)))) + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'suffix #f)))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string + (textual-split "abc" "" 'suffix #f)))) + + (test-assert (equal? '("too" "" "much" "" "data") + (map textual->string + (textual-split "too much data" " " 'suffix #f)))) + + (test-assert (equal? '("" "there" "ya" "go") + (map textual->string + (textual-split "***there***ya***go***" "***" 'suffix #f)))) + + + (test-assert (equal? 'error + (guard (exn (else 'error)) + (map textual->string + (textual-split "" "" 'strict-infix 3))))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string + (textual-split "abc" "" 'strict-infix 3)))) + + (test-assert (equal? '("too" "" "much" " data") + (map textual->string + (textual-split "too much data" " " 'strict-infix 3)))) + + (test-assert (equal? '("" "there" "ya" "go***") + (map textual->string + (textual-split "***there***ya***go***" "***" 'strict-infix 3)))) + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'prefix 3)))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string + (textual-split "abc" "" 'prefix 3)))) + + (test-assert (equal? '("too" "" "much" " data") + (map textual->string + (textual-split "too much data" " " 'prefix 3)))) + + (test-assert (equal? '("there" "ya" "go***") + (map textual->string + (textual-split "***there***ya***go***" "***" 'prefix 3)))) + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'suffix 3)))) + + (test-assert (equal? '("a" "b" "c") + (map textual->string + (textual-split "abc" "" 'suffix 3)))) + + (test-assert (equal? '("too" "" "much" " data") + (map textual->string + (textual-split "too much data" " " 'suffix 3)))) + + (test-assert (equal? '("" "there" "ya" "go***") + (map textual->string + (textual-split "***there***ya***go***" "***" 'suffix 3)))) + + + (test-assert (equal? 'error + (guard (exn (else 'error)) + (map textual->string + (textual-split "" "" 'strict-infix 3 0))))) + + (test-assert (equal? '("b" "c") + (map textual->string + (textual-split "abc" "" 'strict-infix 3 1)))) + + (test-assert (equal? '("oo" "" "much" " data") + (map textual->string + (textual-split "too much data" " " 'strict-infix 3 1)))) + + (test-assert (equal? '("**there" "ya" "go" "") + (map textual->string + (textual-split "***there***ya***go***" "***" 'strict-infix 3 1)))) + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'prefix 3 0)))) + + (test-assert (equal? '("b" "c") + (map textual->string + (textual-split "abc" "" 'prefix 3 1)))) + + (test-assert (equal? '("oo" "" "much" " data") + (map textual->string + (textual-split "too much data" " " 'prefix 3 1)))) + + (test-assert (equal? '("**there" "ya" "go" "") + (map textual->string + (textual-split "***there***ya***go***" "***" 'prefix 3 1)))) + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'suffix 3 0)))) + + (test-assert (equal? '("b" "c") + (map textual->string + (textual-split "abc" "" 'suffix 3 1)))) + + (test-assert (equal? '("oo" "" "much" " data") + (map textual->string + (textual-split "too much data" " " 'suffix 3 1)))) + + (test-assert (equal? '("**there" "ya" "go") + (map textual->string + (textual-split "***there***ya***go***" "***" 'suffix 3 1)))) + + + (test-assert (equal? 'error + (guard (exn (else 'error)) + (map textual->string + (textual-split "" "" 'strict-infix 3 0 0))))) + + (test-assert (equal? '("b") + (map textual->string + (textual-split "abc" "" 'strict-infix 3 1 2)))) + + (test-assert (equal? '("oo" "" "much" " ") + (map textual->string + (textual-split "too much data" " " 'strict-infix 3 1 11)))) + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'prefix 3 0 0)))) + + (test-assert (equal? '("b") + (map textual->string + (textual-split "abc" "" 'prefix 3 1 2)))) + + (test-assert (equal? '("oo" "" "much" " ") + (map textual->string + (textual-split "too much data" " " 'prefix 3 1 11)))) + + (test-assert (equal? '() + (map textual->string + (textual-split "" "" 'suffix 3 0 0)))) + + (test-assert (equal? '("b") + (map textual->string + (textual-split "abc" "" 'suffix 3 1 2)))) + + (test-assert (equal? '("oo" "" "much" " ") + (map textual->string + (textual-split "too much data" " " 'suffix 3 1 11)))) + + (test-end)))) + +;; Local variables: +;; eval: (put 'test-text 'scheme-indent-function 1) +;; End: