mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-19 21:59:17 +02:00
adding (srfi 135)
This commit is contained in:
parent
bd9ea1d3ac
commit
f6f470c3e5
7 changed files with 5625 additions and 0 deletions
1
AUTHORS
1
AUTHORS
|
@ -18,6 +18,7 @@ The following distributed SRFIs use the reference implementations:
|
||||||
|
|
||||||
(srfi 101) is adapted from David van Horn's implementation
|
(srfi 101) is adapted from David van Horn's implementation
|
||||||
(srfi 134) is Shiro Kawai's implementation
|
(srfi 134) is Shiro Kawai's implementation
|
||||||
|
(srfi 135) is Will Clinger's implementation
|
||||||
|
|
||||||
The benchmarks are based on the Racket versions of the classic
|
The benchmarks are based on the Racket versions of the classic
|
||||||
Gabriel benchmarks from
|
Gabriel benchmarks from
|
||||||
|
|
|
@ -1211,6 +1211,7 @@ snow-fort):
|
||||||
\item{\hyperlink["http://srfi.schemers.org/srfi-132/srfi-132.html"]{(srfi 132) - sort libraries}}
|
\item{\hyperlink["http://srfi.schemers.org/srfi-132/srfi-132.html"]{(srfi 132) - sort libraries}}
|
||||||
\item{\hyperlink["http://srfi.schemers.org/srfi-133/srfi-133.html"]{(srfi 133) - vector library}}
|
\item{\hyperlink["http://srfi.schemers.org/srfi-133/srfi-133.html"]{(srfi 133) - vector library}}
|
||||||
\item{\hyperlink["http://srfi.schemers.org/srfi-134/srfi-134.html"]{(srfi 134) - immutable deques}}
|
\item{\hyperlink["http://srfi.schemers.org/srfi-134/srfi-134.html"]{(srfi 134) - immutable deques}}
|
||||||
|
\item{\hyperlink["http://srfi.schemers.org/srfi-135/srfi-135.html"]{(srfi 135) - immutable texts}}
|
||||||
\item{\hyperlink["http://srfi.schemers.org/srfi-139/srfi-139.html"]{(srfi 139) - syntax parameters}}
|
\item{\hyperlink["http://srfi.schemers.org/srfi-139/srfi-139.html"]{(srfi 139) - syntax parameters}}
|
||||||
\item{\hyperlink["http://srfi.schemers.org/srfi-141/srfi-141.html"]{(srfi 141) - integer division}}
|
\item{\hyperlink["http://srfi.schemers.org/srfi-141/srfi-141.html"]{(srfi 141) - integer division}}
|
||||||
\item{\hyperlink["http://srfi.schemers.org/srfi-143/srfi-143.html"]{(srfi 143) - fixnums}}
|
\item{\hyperlink["http://srfi.schemers.org/srfi-143/srfi-143.html"]{(srfi 143) - fixnums}}
|
||||||
|
|
1819
lib/srfi/135.scm
Normal file
1819
lib/srfi/135.scm
Normal file
File diff suppressed because it is too large
Load diff
223
lib/srfi/135.sld
Normal file
223
lib/srfi/135.sld
Normal file
|
@ -0,0 +1,223 @@
|
||||||
|
;;; Copyright (C) William D Clinger (2016).
|
||||||
|
;;;
|
||||||
|
;;; Permission is hereby granted, free of charge, to any person
|
||||||
|
;;; obtaining a copy of this software and associated documentation
|
||||||
|
;;; files (the "Software"), to deal in the Software without
|
||||||
|
;;; restriction, including without limitation the rights to use,
|
||||||
|
;;; copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
;;; sell copies of the Software, and to permit persons to whom the
|
||||||
|
;;; Software is furnished to do so, subject to the following
|
||||||
|
;;; conditions:
|
||||||
|
;;;
|
||||||
|
;;; The above copyright notice and this permission notice shall be
|
||||||
|
;;; included in all copies or substantial portions of the Software.
|
||||||
|
;;;
|
||||||
|
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
;;; OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
(define-library (srfi 135)
|
||||||
|
|
||||||
|
(export
|
||||||
|
|
||||||
|
;; Predicates
|
||||||
|
|
||||||
|
text? textual?
|
||||||
|
textual-null?
|
||||||
|
textual-every textual-any
|
||||||
|
|
||||||
|
;; Constructors
|
||||||
|
|
||||||
|
make-text text
|
||||||
|
text-tabulate
|
||||||
|
text-unfold text-unfold-right
|
||||||
|
|
||||||
|
;; Conversion
|
||||||
|
|
||||||
|
textual->text
|
||||||
|
textual->string textual->vector textual->list
|
||||||
|
string->text vector->text list->text reverse-list->text
|
||||||
|
textual->utf8 textual->utf16be
|
||||||
|
textual->utf16 textual->utf16le
|
||||||
|
utf8->text utf16be->text
|
||||||
|
utf16->text utf16le->text
|
||||||
|
|
||||||
|
;; Selection
|
||||||
|
|
||||||
|
text-length textual-length
|
||||||
|
text-ref textual-ref
|
||||||
|
subtext subtextual
|
||||||
|
textual-copy
|
||||||
|
textual-take textual-take-right
|
||||||
|
textual-drop textual-drop-right
|
||||||
|
textual-pad textual-pad-right
|
||||||
|
textual-trim textual-trim-right textual-trim-both
|
||||||
|
|
||||||
|
;; Replacement
|
||||||
|
|
||||||
|
textual-replace
|
||||||
|
|
||||||
|
;; Comparison
|
||||||
|
|
||||||
|
textual=? textual-ci=?
|
||||||
|
textual<? textual-ci<?
|
||||||
|
textual>? textual-ci>?
|
||||||
|
textual<=? textual-ci<=?
|
||||||
|
textual>=? textual-ci>=?
|
||||||
|
|
||||||
|
;; Prefixes & suffixes
|
||||||
|
|
||||||
|
textual-prefix-length textual-suffix-length
|
||||||
|
textual-prefix? textual-suffix?
|
||||||
|
|
||||||
|
;; Searching
|
||||||
|
|
||||||
|
textual-index textual-index-right
|
||||||
|
textual-skip textual-skip-right
|
||||||
|
textual-contains textual-contains-right
|
||||||
|
|
||||||
|
;; Case conversion
|
||||||
|
|
||||||
|
textual-upcase textual-downcase
|
||||||
|
textual-foldcase textual-titlecase
|
||||||
|
|
||||||
|
;; Concatenation
|
||||||
|
|
||||||
|
textual-append textual-concatenate textual-concatenate-reverse
|
||||||
|
textual-join
|
||||||
|
|
||||||
|
;; Fold & map & friends
|
||||||
|
|
||||||
|
textual-fold textual-fold-right
|
||||||
|
textual-map textual-for-each
|
||||||
|
textual-map-index textual-for-each-index
|
||||||
|
textual-count
|
||||||
|
textual-filter textual-remove
|
||||||
|
; textual-reverse
|
||||||
|
|
||||||
|
;; Replication & splitting
|
||||||
|
|
||||||
|
textual-replicate textual-split
|
||||||
|
)
|
||||||
|
|
||||||
|
(import (scheme base)
|
||||||
|
(scheme case-lambda)
|
||||||
|
(scheme char)
|
||||||
|
(srfi 135 kernel8))
|
||||||
|
|
||||||
|
(cond-expand
|
||||||
|
((library (rnrs unicode))
|
||||||
|
(import (only (rnrs unicode) string-titlecase)))
|
||||||
|
((library (srfi 129))
|
||||||
|
(import (only (srfi 129) string-titlecase)))
|
||||||
|
(else
|
||||||
|
(begin
|
||||||
|
(define (string-titlecase s)
|
||||||
|
(%string-titlecase s)))))
|
||||||
|
|
||||||
|
;; textual-replicate needs a sensible mod procedure
|
||||||
|
|
||||||
|
(cond-expand
|
||||||
|
((library (rnrs base))
|
||||||
|
(import (only (rnrs base) div mod)))
|
||||||
|
(else
|
||||||
|
(begin
|
||||||
|
|
||||||
|
(define (assertion-violation procname msg . irritants)
|
||||||
|
(apply error msg irritants))
|
||||||
|
|
||||||
|
;; Restricted to exact integers, which is all we need here.
|
||||||
|
|
||||||
|
(define (div-and-mod x y)
|
||||||
|
(cond ((and (exact-integer? x) (exact-integer? y))
|
||||||
|
(cond ((= y 0)
|
||||||
|
(error "mod: zero divisor" x y))
|
||||||
|
((>= x 0)
|
||||||
|
(values (quotient x y) (remainder x y)))
|
||||||
|
((< y 0)
|
||||||
|
; x < 0, y < 0
|
||||||
|
(let* ((q (quotient x y))
|
||||||
|
(r (- x (* q y))))
|
||||||
|
(if (= r 0)
|
||||||
|
(values q 0)
|
||||||
|
(values (+ q 1) (- r y)))))
|
||||||
|
(else
|
||||||
|
; x < 0, y > 0
|
||||||
|
(let* ((q (quotient x y))
|
||||||
|
(r (- x (* q y))))
|
||||||
|
(if (= r 0)
|
||||||
|
(values q 0)
|
||||||
|
(values (- q 1) (+ r y)))))))
|
||||||
|
(else
|
||||||
|
(error "div or mod: illegal arguments" x y))))
|
||||||
|
|
||||||
|
(define (div x y)
|
||||||
|
(cond ((and (exact-integer? x)
|
||||||
|
(exact-integer? y)
|
||||||
|
(>= x 0))
|
||||||
|
(quotient x y))
|
||||||
|
(else
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (div-and-mod x y))
|
||||||
|
(lambda (q r) q)))))
|
||||||
|
|
||||||
|
(define (mod x y)
|
||||||
|
(cond ((and (exact-integer? x)
|
||||||
|
(exact-integer? y)
|
||||||
|
(>= x 0))
|
||||||
|
(remainder x y))
|
||||||
|
(else
|
||||||
|
(call-with-values
|
||||||
|
(lambda () (div-and-mod x y))
|
||||||
|
(lambda (q r) r))))))))
|
||||||
|
|
||||||
|
;; To run texts-search-test.sps, change the (or) to (and).
|
||||||
|
|
||||||
|
(cond-expand ((or)
|
||||||
|
(export
|
||||||
|
%textual-contains:naive
|
||||||
|
%textual-contains:rabin-karp
|
||||||
|
%textual-contains:boyer-moore
|
||||||
|
|
||||||
|
%textual-contains-right:naive
|
||||||
|
%textual-contains-right:boyer-moore
|
||||||
|
))
|
||||||
|
(else))
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;;; The recommended external syntax cannot be implemented portably.
|
||||||
|
;;; Here is Larceny-specific code that generates the recommended
|
||||||
|
;;; syntax for output, but does not make the changes necessary to
|
||||||
|
;;; accept that recommended syntax for input or as a literal in
|
||||||
|
;;; programs.
|
||||||
|
|
||||||
|
(cond-expand (larceny
|
||||||
|
(import (scheme write)
|
||||||
|
(primitives rtd-printer-set!)))
|
||||||
|
(else))
|
||||||
|
|
||||||
|
(cond-expand (larceny
|
||||||
|
(begin
|
||||||
|
(define (text-write txt p)
|
||||||
|
(let* ((q (open-output-string))
|
||||||
|
(s (begin (write (textual->string txt) q)
|
||||||
|
(get-output-string q))))
|
||||||
|
(write-char (integer->char #x00ab) p)
|
||||||
|
(write-string (substring s 1 (- (string-length s) 1)) p)
|
||||||
|
(write-char (integer->char #x00bb) p)))
|
||||||
|
|
||||||
|
(define ignored-result-from-rtd-printer-set! ; for R6RS
|
||||||
|
(rtd-printer-set! text-rtd text-write))))
|
||||||
|
(else))
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(include "135.scm"))
|
||||||
|
|
||||||
|
;;; eof
|
518
lib/srfi/135/kernel8.body.scm
Normal file
518
lib/srfi/135/kernel8.body.scm
Normal file
|
@ -0,0 +1,518 @@
|
||||||
|
;;; Copyright (C) William D Clinger (2016).
|
||||||
|
;;;
|
||||||
|
;;; Permission is hereby granted, free of charge, to any person
|
||||||
|
;;; obtaining a copy of this software and associated documentation
|
||||||
|
;;; files (the "Software"), to deal in the Software without
|
||||||
|
;;; restriction, including without limitation the rights to use,
|
||||||
|
;;; copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
;;; sell copies of the Software, and to permit persons to whom the
|
||||||
|
;;; Software is furnished to do so, subject to the following
|
||||||
|
;;; conditions:
|
||||||
|
;;;
|
||||||
|
;;; The above copyright notice and this permission notice shall be
|
||||||
|
;;; included in all copies or substantial portions of the Software.
|
||||||
|
;;;
|
||||||
|
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
;;; OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
;;; FIXME: these utilities should be in a separate file
|
||||||
|
|
||||||
|
(define (complain name . args)
|
||||||
|
(apply error
|
||||||
|
(string-append (symbol->string name) ": illegal arguments")
|
||||||
|
args))
|
||||||
|
|
||||||
|
(define (list->bytevector bytes)
|
||||||
|
(let* ((n (length bytes))
|
||||||
|
(bv (make-bytevector n)))
|
||||||
|
(do ((i 0 (+ i 1))
|
||||||
|
(bytes bytes (cdr bytes)))
|
||||||
|
((= i n))
|
||||||
|
(bytevector-u8-set! bv i (car bytes)))
|
||||||
|
bv))
|
||||||
|
|
||||||
|
;;; 1-argument version for internal use
|
||||||
|
|
||||||
|
(define (%string->text s)
|
||||||
|
(if (string? s)
|
||||||
|
(text-tabulate (lambda (i) (string-ref s i))
|
||||||
|
(string-length s))
|
||||||
|
(complain 'string->text s)))
|
||||||
|
|
||||||
|
;;; A portable implementation can't rely on inlining,
|
||||||
|
;;; but it can rely on macros.
|
||||||
|
|
||||||
|
(define N 128)
|
||||||
|
|
||||||
|
(define (length&i0 len i0)
|
||||||
|
(+ (* N len) i0))
|
||||||
|
|
||||||
|
#;
|
||||||
|
(define (length&i0.length k)
|
||||||
|
(quotient k N))
|
||||||
|
|
||||||
|
#;
|
||||||
|
(define (length&i0.i0 k)
|
||||||
|
(remainder k N))
|
||||||
|
|
||||||
|
(define-syntax length&i0.length
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ k)
|
||||||
|
(quotient k N))))
|
||||||
|
|
||||||
|
(define-syntax length&i0.i0
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ k)
|
||||||
|
(remainder k N))))
|
||||||
|
|
||||||
|
(define-record-type text-rtd
|
||||||
|
(new-text0 k chunks)
|
||||||
|
text?
|
||||||
|
(k text.k)
|
||||||
|
(chunks text.chunks))
|
||||||
|
|
||||||
|
(define (%new-text len i0 chunks)
|
||||||
|
(new-text0 (length&i0 len i0) chunks))
|
||||||
|
|
||||||
|
(define the-empty-text
|
||||||
|
(%new-text 0 0 (vector (make-bytevector 0))))
|
||||||
|
|
||||||
|
;;; text? is defined by the record definition above.
|
||||||
|
|
||||||
|
(define (text-length txt)
|
||||||
|
(if (text? txt)
|
||||||
|
(length&i0.length (text.k txt))
|
||||||
|
(error "text-length: not a text" txt)))
|
||||||
|
|
||||||
|
(define (text-ref txt i)
|
||||||
|
(if (and (text? txt)
|
||||||
|
(exact-integer? i)
|
||||||
|
(<= 0 i))
|
||||||
|
(let* ((k (text.k txt))
|
||||||
|
(chunks (text.chunks txt))
|
||||||
|
(len (length&i0.length k))
|
||||||
|
(i0 (length&i0.i0 k))
|
||||||
|
(i+i0 (+ i i0))
|
||||||
|
(j (quotient i+i0 N))
|
||||||
|
(ii (remainder i+i0 N)))
|
||||||
|
(if (< i len)
|
||||||
|
(let* ((sj (vector-ref chunks j))
|
||||||
|
(sjn (bytevector-length sj)))
|
||||||
|
(if (if (< j (- (vector-length chunks) 1))
|
||||||
|
(= sjn N)
|
||||||
|
(= sjn (remainder (+ i0 len) N)))
|
||||||
|
(integer->char (bytevector-u8-ref sj ii))
|
||||||
|
(%utf8-ref sj ii)))
|
||||||
|
(error "text-ref: index out of range" txt i)))
|
||||||
|
(error "text-ref: illegal arguments" txt i)))
|
||||||
|
|
||||||
|
;;; Non-checking versions for internal use.
|
||||||
|
|
||||||
|
(define (%text-length txt)
|
||||||
|
(length&i0.length (text.k txt)))
|
||||||
|
|
||||||
|
(define (%text-ref txt i)
|
||||||
|
(let* ((k (text.k txt))
|
||||||
|
(chunks (text.chunks txt))
|
||||||
|
(len (length&i0.length k))
|
||||||
|
(i0 (length&i0.i0 k))
|
||||||
|
(i+i0 (+ i i0))
|
||||||
|
(j (quotient i+i0 N))
|
||||||
|
(ii (remainder i+i0 N))
|
||||||
|
(sj (vector-ref chunks j))
|
||||||
|
(sjn (bytevector-length sj)))
|
||||||
|
(if (if (< j (- (vector-length chunks) 1))
|
||||||
|
(= sjn N)
|
||||||
|
(= sjn (remainder (+ i0 len) N)))
|
||||||
|
(integer->char (bytevector-u8-ref sj ii))
|
||||||
|
(%utf8-ref sj ii))))
|
||||||
|
|
||||||
|
;;; Returns character i of the UTF-8.
|
||||||
|
|
||||||
|
(define (%utf8-ref bv i)
|
||||||
|
(let loop ((j 0) ; character index of (bytevector-u8-ref bv k)
|
||||||
|
(k 0)) ; byte index into bv
|
||||||
|
(if (= i j)
|
||||||
|
(%utf8-char-at bv k)
|
||||||
|
(let ((byte (bytevector-u8-ref bv k)))
|
||||||
|
(cond ((< byte 128)
|
||||||
|
(loop (+ j 1) (+ k 1)))
|
||||||
|
((< byte #b11100000)
|
||||||
|
(loop (+ j 1) (+ k 2)))
|
||||||
|
((< byte #b11110000)
|
||||||
|
(loop (+ j 1) (+ k 3)))
|
||||||
|
(else
|
||||||
|
(loop (+ j 1) (+ k 4))))))))
|
||||||
|
|
||||||
|
(define (%utf8-char-at bv k)
|
||||||
|
(let ((byte (bytevector-u8-ref bv k)))
|
||||||
|
(cond ((< byte 128)
|
||||||
|
(integer->char byte))
|
||||||
|
((< byte #b11100000)
|
||||||
|
(let* ((byte2 (bytevector-u8-ref bv (+ k 1)))
|
||||||
|
(bits1 (- byte #b11000000))
|
||||||
|
(bits2 (- byte2 #b10000000))
|
||||||
|
(cp (+ (* 64 bits1) bits2)))
|
||||||
|
(integer->char cp)))
|
||||||
|
((< byte #b11110000)
|
||||||
|
(let* ((byte2 (bytevector-u8-ref bv (+ k 1)))
|
||||||
|
(byte3 (bytevector-u8-ref bv (+ k 2)))
|
||||||
|
(bits1 (- byte #b11100000))
|
||||||
|
(bits2 (- byte2 #b10000000))
|
||||||
|
(bits3 (- byte3 #b10000000))
|
||||||
|
(cp (+ (* 64 64 bits1) (* 64 bits2) bits3)))
|
||||||
|
(integer->char cp)))
|
||||||
|
(else
|
||||||
|
(let* ((byte2 (bytevector-u8-ref bv (+ k 1)))
|
||||||
|
(byte3 (bytevector-u8-ref bv (+ k 2)))
|
||||||
|
(byte4 (bytevector-u8-ref bv (+ k 3)))
|
||||||
|
(bits1 (- byte #b11110000))
|
||||||
|
(bits2 (- byte2 #b10000000))
|
||||||
|
(bits3 (- byte3 #b10000000))
|
||||||
|
(bits4 (- byte4 #b10000000))
|
||||||
|
(cp (+ (* 64 64 64 bits1)
|
||||||
|
(* 64 64 bits2)
|
||||||
|
(* 64 bits3)
|
||||||
|
bits4)))
|
||||||
|
(integer->char cp))))))
|
||||||
|
|
||||||
|
;;; text-tabulate avoids side effects (in case proc returns more than once)
|
||||||
|
|
||||||
|
(define (text-tabulate proc len)
|
||||||
|
(if (= 0 len)
|
||||||
|
the-empty-text
|
||||||
|
(let loop ((i len) ; highest index that's been tabulated
|
||||||
|
(chunks '())
|
||||||
|
(bytes '()))
|
||||||
|
(cond ((= 0 i)
|
||||||
|
(%new-text len
|
||||||
|
0
|
||||||
|
(list->vector
|
||||||
|
(cons (list->bytevector bytes)
|
||||||
|
chunks))))
|
||||||
|
((and (= 0 (remainder i N))
|
||||||
|
(not (null? bytes)))
|
||||||
|
(loop i
|
||||||
|
(cons (list->bytevector bytes) chunks)
|
||||||
|
'()))
|
||||||
|
(else
|
||||||
|
(let* ((i-1 (- i 1))
|
||||||
|
(c (proc i-1)))
|
||||||
|
(if (char? c)
|
||||||
|
(let ((cp (char->integer c)))
|
||||||
|
(loop i-1
|
||||||
|
chunks
|
||||||
|
(cond ((< cp #x0080)
|
||||||
|
(cons cp bytes))
|
||||||
|
((< cp #x0800)
|
||||||
|
(let* ((bits1 (quotient cp 64))
|
||||||
|
(bits2 (remainder cp 64))
|
||||||
|
(byte1 (+ bits1 #b11000000))
|
||||||
|
(byte2 (+ bits2 #b10000000)))
|
||||||
|
(cons byte1 (cons byte2 bytes))))
|
||||||
|
((< cp #x10000)
|
||||||
|
(let* ((bits1 (quotient cp (* 64 64)))
|
||||||
|
(bits2 (quotient
|
||||||
|
(remainder cp (* 64 64))
|
||||||
|
64))
|
||||||
|
(bits3 (remainder cp 64))
|
||||||
|
(byte1 (+ bits1 #b11100000))
|
||||||
|
(byte2 (+ bits2 #b10000000))
|
||||||
|
(byte3 (+ bits3 #b10000000)))
|
||||||
|
(cons byte1
|
||||||
|
(cons byte2
|
||||||
|
(cons byte3 bytes)))))
|
||||||
|
(else
|
||||||
|
(let* ((bits1 (quotient cp (* 64 64 64)))
|
||||||
|
(bits2 (quotient
|
||||||
|
(remainder cp (* 64 64 64))
|
||||||
|
(* 64 64)))
|
||||||
|
(bits3 (quotient
|
||||||
|
(remainder cp (* 64 64))
|
||||||
|
64))
|
||||||
|
(bits4 (remainder cp 64))
|
||||||
|
(byte1 (+ bits1 #b11110000))
|
||||||
|
(byte2 (+ bits2 #b10000000))
|
||||||
|
(byte3 (+ bits3 #b10000000))
|
||||||
|
(byte4 (+ bits4 #b10000000)))
|
||||||
|
(cons byte1
|
||||||
|
(cons byte2
|
||||||
|
(cons byte3
|
||||||
|
(cons byte4
|
||||||
|
bytes)))))))))
|
||||||
|
(error "text-tabulate: proc returned a non-character"
|
||||||
|
proc len c))))))))
|
||||||
|
|
||||||
|
;;; FIXME: should the fast case do something different
|
||||||
|
;;; if the length of the result is sufficiently small?
|
||||||
|
;;; Probably not: splitting a 100-character text into
|
||||||
|
;;; 100 1-character texts should be fast, and programmers
|
||||||
|
;;; can now use text-copy instead if they're worried about it.
|
||||||
|
;;;
|
||||||
|
;;; subtext is now defined only for texts; use subtextual
|
||||||
|
;;; if the first argument might be a string.
|
||||||
|
|
||||||
|
(define (subtext txt start end)
|
||||||
|
(cond ((and (text? txt)
|
||||||
|
(exact-integer? start)
|
||||||
|
(exact-integer? end)
|
||||||
|
(<= 0 start end))
|
||||||
|
(%subtext txt start end))
|
||||||
|
#; ((string? txt)
|
||||||
|
(%string->text (substring txt start end)))
|
||||||
|
(else
|
||||||
|
(complain 'subtext txt start end))))
|
||||||
|
|
||||||
|
(define (%subtext txt start end)
|
||||||
|
(let* ((k (text.k txt))
|
||||||
|
(chunks (text.chunks txt))
|
||||||
|
(len (length&i0.length k))
|
||||||
|
(i0 (length&i0.i0 k))
|
||||||
|
(i+i0 (+ start i0))
|
||||||
|
(end+i0 (+ end i0))
|
||||||
|
(len+i0 (+ len i0))
|
||||||
|
(jstart (quotient i+i0 N))
|
||||||
|
(jend (quotient end+i0 N))
|
||||||
|
(jlen (quotient len N)))
|
||||||
|
(if (<= end len)
|
||||||
|
(cond ((= start end)
|
||||||
|
the-empty-text)
|
||||||
|
((and (= 0 jstart)
|
||||||
|
(= jlen jend))
|
||||||
|
;; the fast case
|
||||||
|
(%new-text (- end start) i+i0 chunks))
|
||||||
|
(else
|
||||||
|
(let* ((v (make-vector (+ 1 (- jend jstart)))))
|
||||||
|
(do ((j jstart (+ j 1)))
|
||||||
|
((> j jend))
|
||||||
|
(vector-set! v (- j jstart) (vector-ref chunks j)))
|
||||||
|
(%new-text (- end start)
|
||||||
|
(remainder i+i0 N)
|
||||||
|
v))))
|
||||||
|
(error "subtext: end out of range" txt start end))))
|
||||||
|
|
||||||
|
;;; There are a lot of special cases that could be exploited here:
|
||||||
|
;;; share the characters of the longest text
|
||||||
|
;;; share the characters of the longest run of texts
|
||||||
|
;;; whose characters don't have to be copied
|
||||||
|
;;; if (text-length txt1) is a multiple of N,
|
||||||
|
;;; and txt2 starts at offset 0,
|
||||||
|
;;; then txt1 and txt2 can be concatenated
|
||||||
|
;;; without copying any of their characters
|
||||||
|
;;;
|
||||||
|
;;; That's a partial list.
|
||||||
|
;;; It would be easy to spend more time detecting special cases
|
||||||
|
;;; than would be saved on average.
|
||||||
|
;;; In the interest of simplicity and reliability, this code
|
||||||
|
;;; currently implements only two special cases:
|
||||||
|
;;; share the full chunks of the longest text
|
||||||
|
;;; provided
|
||||||
|
;;; it contains at least N characters
|
||||||
|
;;; it contains at least half the characters of the result
|
||||||
|
;;; its characters start at offset zero
|
||||||
|
;;; share the full chunks of the first text
|
||||||
|
;;; provided
|
||||||
|
;;; it contains at least N characters
|
||||||
|
;;; its characters start at offset zero
|
||||||
|
|
||||||
|
(define (textual-concatenate texts)
|
||||||
|
(cond ((not (list? texts))
|
||||||
|
(complain 'textual-concatenate texts))
|
||||||
|
((null? texts) the-empty-text)
|
||||||
|
((null? (cdr texts))
|
||||||
|
(let ((txt (car texts)))
|
||||||
|
(cond ((text? txt) txt)
|
||||||
|
((string? txt)
|
||||||
|
(%string->text txt))
|
||||||
|
(else (complain 'textual-concatenate texts)))))
|
||||||
|
(else
|
||||||
|
(let loop ((items (reverse texts))
|
||||||
|
(real-texts '())
|
||||||
|
(n 0)
|
||||||
|
(longest #f)
|
||||||
|
(longest-length 0))
|
||||||
|
(cond ((null? items)
|
||||||
|
(%text-concatenate-n real-texts n longest longest-length))
|
||||||
|
((text? (car items))
|
||||||
|
(let* ((txt (car items))
|
||||||
|
(k (%text-length txt)))
|
||||||
|
(loop (cdr items)
|
||||||
|
(cons txt real-texts)
|
||||||
|
(+ n k)
|
||||||
|
(if (> k longest-length) txt longest)
|
||||||
|
(max k longest-length))))
|
||||||
|
((string? (car items))
|
||||||
|
(loop (cons (%string->text (car items)) (cdr items))
|
||||||
|
real-texts n longest longest-length))
|
||||||
|
(else
|
||||||
|
(complain 'textual-concatenate texts)))))))
|
||||||
|
|
||||||
|
;;; All of the texts are really texts. No strings.
|
||||||
|
;;; n is the length of the result.
|
||||||
|
;;; longest is #f or the longest of the texts, and
|
||||||
|
;;; longest-length is its length (or zero).
|
||||||
|
|
||||||
|
(define (%text-concatenate-n texts n longest longest-length)
|
||||||
|
(if (and longest
|
||||||
|
(> longest-length N)
|
||||||
|
(< n (+ longest-length longest-length))
|
||||||
|
(= 0 (length&i0.i0 (text.k longest))))
|
||||||
|
(if (eq? longest (car texts))
|
||||||
|
(%%text-concatenate-n texts n)
|
||||||
|
(let loop ((texts texts)
|
||||||
|
(front '())
|
||||||
|
(front-length 0))
|
||||||
|
(cond ((eq? longest (car texts))
|
||||||
|
(%%text-concatenate-front
|
||||||
|
(reverse front)
|
||||||
|
(%%text-concatenate-n texts (- n front-length))
|
||||||
|
front-length
|
||||||
|
n))
|
||||||
|
(else
|
||||||
|
(let ((txt (car texts)))
|
||||||
|
(loop (cdr texts)
|
||||||
|
(cons txt front)
|
||||||
|
(+ front-length (%text-length txt))))))))
|
||||||
|
(%%text-concatenate-n texts n)))
|
||||||
|
|
||||||
|
;;; texts is a non-empty list of texts, with no strings.
|
||||||
|
;;; n is the length of the result.
|
||||||
|
;;;
|
||||||
|
;;; The text returned has a start index of zero.
|
||||||
|
;;;
|
||||||
|
;;; Special case:
|
||||||
|
;;; If the first text has a start index of zero,
|
||||||
|
;;; then its full chunks don't have to be copied.
|
||||||
|
|
||||||
|
(define (%%text-concatenate-n texts n)
|
||||||
|
(if (= 0 n)
|
||||||
|
the-empty-text
|
||||||
|
(let* ((n/N (quotient n N))
|
||||||
|
(m (remainder n N))
|
||||||
|
(nchunks (+ n/N (if (= 0 m) 0 1)))
|
||||||
|
(chunks (make-vector nchunks 'bug-in-text-concatenate))
|
||||||
|
(txt (car texts))
|
||||||
|
(k (text.k txt))
|
||||||
|
(len (length&i0.length k))
|
||||||
|
(i0 (length&i0.i0 k)))
|
||||||
|
(if (and (> len N)
|
||||||
|
(= 0 i0))
|
||||||
|
(let* ((j (quotient len N))
|
||||||
|
(ti (* j N))
|
||||||
|
(chunks0 (text.chunks txt)))
|
||||||
|
(do ((i 0 (+ i 1)))
|
||||||
|
((= i j))
|
||||||
|
(vector-set! chunks i (vector-ref chunks0 i)))
|
||||||
|
(%%text-concatenate-finish n 0 chunks j texts ti))
|
||||||
|
(%%text-concatenate-finish n 0 chunks 0 texts 0)))))
|
||||||
|
|
||||||
|
;;; All of the texts are really texts. No strings.
|
||||||
|
;;; The second argument is a text that starts at offset zero.
|
||||||
|
;;; k is the total length of the texts passed as first argument.
|
||||||
|
;;; n is the length of the result.
|
||||||
|
;;;
|
||||||
|
;;; Returns the texts concatenated with the second argument,
|
||||||
|
;;; without copying any chunks of the second argument.
|
||||||
|
|
||||||
|
(define (%%text-concatenate-front texts txt k n)
|
||||||
|
(let* ((k/N (quotient k N))
|
||||||
|
(mk (remainder k N))
|
||||||
|
(i0 (if (= 0 mk) 0 (- N mk))) ; start offset for result
|
||||||
|
(kchunks (+ k/N (if (= 0 mk) 0 1))) ; number of new chunks
|
||||||
|
(n-k (- n k))
|
||||||
|
(n-k/N (quotient n-k N))
|
||||||
|
(m (remainder n-k N))
|
||||||
|
(nchunks (+ kchunks
|
||||||
|
n-k/N
|
||||||
|
(if (= 0 m) 0 1)))
|
||||||
|
(chunks (make-vector nchunks 'bug-in-text-concatenate))
|
||||||
|
(chunks2 (text.chunks txt)))
|
||||||
|
|
||||||
|
;; copy extant chunks
|
||||||
|
|
||||||
|
(do ((i kchunks (+ i 1)))
|
||||||
|
((= i nchunks))
|
||||||
|
(vector-set! chunks i (vector-ref chunks2 (- i kchunks))))
|
||||||
|
|
||||||
|
(%%text-concatenate-finish n i0 chunks 0 texts 0)))
|
||||||
|
|
||||||
|
;;; Given:
|
||||||
|
;;;
|
||||||
|
;;; n : the length of a text to be created
|
||||||
|
;;; i0 : start offset for the text to be created
|
||||||
|
;;; chunks : the vector of chunks for that new text
|
||||||
|
;;; j : vector index of first uninitialized chunk
|
||||||
|
;;; texts : a non-empty list of texts to be copied into the chunks
|
||||||
|
;;; ti : index of first uncopied character in the first text
|
||||||
|
;;;
|
||||||
|
;;; Creates new chunks as necessary, copies the texts into those chunks
|
||||||
|
;;; and returns a new text.
|
||||||
|
;;; The given texts may not fill the chunks because the chunks
|
||||||
|
;;; of some shared text may already have been copied into some
|
||||||
|
;;; tail of the chunks vector.
|
||||||
|
|
||||||
|
(define (%%text-concatenate-finish n i0 chunks j texts ti)
|
||||||
|
(let loop ((texts (cdr texts))
|
||||||
|
(txt (car texts))
|
||||||
|
(j j) ; index into chunks
|
||||||
|
(k i0) ; index into (vector-ref chunks j)
|
||||||
|
(ti ti) ; index into txt
|
||||||
|
(bytes (make-list i0 0))) ; bytes being collected for next chunk
|
||||||
|
(cond ((= k N)
|
||||||
|
(let ((bv (list->bytevector (reverse bytes))))
|
||||||
|
(vector-set! chunks j bv))
|
||||||
|
(loop texts txt (+ j 1) 0 ti '()))
|
||||||
|
((= ti (%text-length txt))
|
||||||
|
(if (null? texts)
|
||||||
|
(begin (if (not (null? bytes))
|
||||||
|
(let ((bv (list->bytevector (reverse bytes))))
|
||||||
|
(vector-set! chunks j bv)))
|
||||||
|
(%new-text n i0 chunks))
|
||||||
|
(loop (cdr texts) (car texts) j k 0 bytes)))
|
||||||
|
(else
|
||||||
|
(let* ((cp (char->integer (%text-ref txt ti)))
|
||||||
|
(bytes (cond ((< cp #x0080)
|
||||||
|
(cons cp bytes))
|
||||||
|
((< cp #x0800)
|
||||||
|
(let* ((bits1 (quotient cp 64))
|
||||||
|
(bits2 (remainder cp 64))
|
||||||
|
(byte1 (+ bits1 #b11000000))
|
||||||
|
(byte2 (+ bits2 #b10000000)))
|
||||||
|
(cons byte2 (cons byte1 bytes))))
|
||||||
|
((< cp #x10000)
|
||||||
|
(let* ((bits1 (quotient cp (* 64 64)))
|
||||||
|
(bits2 (quotient
|
||||||
|
(remainder cp (* 64 64))
|
||||||
|
64))
|
||||||
|
(bits3 (remainder cp 64))
|
||||||
|
(byte1 (+ bits1 #b11100000))
|
||||||
|
(byte2 (+ bits2 #b10000000))
|
||||||
|
(byte3 (+ bits3 #b10000000)))
|
||||||
|
(cons byte3
|
||||||
|
(cons byte2
|
||||||
|
(cons byte1 bytes)))))
|
||||||
|
(else
|
||||||
|
(let* ((bits1 (quotient cp (* 64 64 64)))
|
||||||
|
(bits2 (quotient
|
||||||
|
(remainder cp (* 64 64 64))
|
||||||
|
(* 64 64)))
|
||||||
|
(bits3 (quotient
|
||||||
|
(remainder cp (* 64 64))
|
||||||
|
64))
|
||||||
|
(bits4 (remainder cp 64))
|
||||||
|
(byte1 (+ bits1 #b11110000))
|
||||||
|
(byte2 (+ bits2 #b10000000))
|
||||||
|
(byte3 (+ bits3 #b10000000))
|
||||||
|
(byte4 (+ bits4 #b10000000)))
|
||||||
|
(cons byte4
|
||||||
|
(cons byte3
|
||||||
|
(cons byte2
|
||||||
|
(cons byte1
|
||||||
|
bytes)))))))))
|
||||||
|
(loop texts txt j (+ k 1) (+ ti 1) bytes))))))
|
56
lib/srfi/135/kernel8.sld
Normal file
56
lib/srfi/135/kernel8.sld
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
;;; Copyright (C) William D Clinger (2016).
|
||||||
|
;;;
|
||||||
|
;;; Permission is hereby granted, free of charge, to any person
|
||||||
|
;;; obtaining a copy of this software and associated documentation
|
||||||
|
;;; files (the "Software"), to deal in the Software without
|
||||||
|
;;; restriction, including without limitation the rights to use,
|
||||||
|
;;; copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
;;; sell copies of the Software, and to permit persons to whom the
|
||||||
|
;;; Software is furnished to do so, subject to the following
|
||||||
|
;;; conditions:
|
||||||
|
;;;
|
||||||
|
;;; The above copyright notice and this permission notice shall be
|
||||||
|
;;; included in all copies or substantial portions of the Software.
|
||||||
|
;;;
|
||||||
|
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
;;; OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
(define-library (srfi 135 kernel8)
|
||||||
|
|
||||||
|
(export
|
||||||
|
|
||||||
|
;; for internal use only
|
||||||
|
|
||||||
|
complain ; for reporting illegal arguments
|
||||||
|
|
||||||
|
text-rtd ; FIXME: for debugging only
|
||||||
|
%new-text ; FIXME: for debugging only
|
||||||
|
text.k text.chunks ; FIXME: for debugging only
|
||||||
|
|
||||||
|
%text-length ; non-checking version
|
||||||
|
%text-ref ; non-checking version
|
||||||
|
; %subtext ; non-checking version
|
||||||
|
%string->text ; 1-argument version
|
||||||
|
|
||||||
|
N ; preferred text size for pieces of long texts
|
||||||
|
the-empty-text ; there should be only one empty text
|
||||||
|
|
||||||
|
;; will be exported by (srfi 135)
|
||||||
|
|
||||||
|
text?
|
||||||
|
text-tabulate
|
||||||
|
text-length
|
||||||
|
text-ref
|
||||||
|
subtext
|
||||||
|
textual-concatenate
|
||||||
|
)
|
||||||
|
|
||||||
|
(import (scheme base))
|
||||||
|
|
||||||
|
(include "kernel8.body.scm"))
|
3007
lib/srfi/135/test.sld
Normal file
3007
lib/srfi/135/test.sld
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue