chibi-scheme/lib/srfi/135/kernel8.body.scm
2018-01-16 01:14:40 +09:00

518 lines
20 KiB
Scheme

;;; Copyright (C) William D Clinger (2016).
;;;
;;; Permission is hereby granted, free of charge, to any person
;;; obtaining a copy of this software and associated documentation
;;; files (the "Software"), to deal in the Software without
;;; restriction, including without limitation the rights to use,
;;; copy, modify, merge, publish, distribute, sublicense, and/or
;;; sell copies of the Software, and to permit persons to whom the
;;; Software is furnished to do so, subject to the following
;;; conditions:
;;;
;;; The above copyright notice and this permission notice shall be
;;; included in all copies or substantial portions of the Software.
;;;
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
;;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
;;; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
;;; OTHER DEALINGS IN THE SOFTWARE.
;;; FIXME: these utilities should be in a separate file
(define (complain name . args)
(apply error
(string-append (symbol->string name) ": illegal arguments")
args))
(define (list->bytevector bytes)
(let* ((n (length bytes))
(bv (make-bytevector n)))
(do ((i 0 (+ i 1))
(bytes bytes (cdr bytes)))
((= i n))
(bytevector-u8-set! bv i (car bytes)))
bv))
;;; 1-argument version for internal use
(define (%string->text s)
(if (string? s)
(text-tabulate (lambda (i) (string-ref s i))
(string-length s))
(complain 'string->text s)))
;;; A portable implementation can't rely on inlining,
;;; but it can rely on macros.
(define N 128)
(define (length&i0 len i0)
(+ (* N len) i0))
#;
(define (length&i0.length k)
(quotient k N))
#;
(define (length&i0.i0 k)
(remainder k N))
(define-syntax length&i0.length
(syntax-rules ()
((_ k)
(quotient k N))))
(define-syntax length&i0.i0
(syntax-rules ()
((_ k)
(remainder k N))))
(define-record-type text-rtd
(new-text0 k chunks)
text?
(k text.k)
(chunks text.chunks))
(define (%new-text len i0 chunks)
(new-text0 (length&i0 len i0) chunks))
(define the-empty-text
(%new-text 0 0 (vector (make-bytevector 0))))
;;; text? is defined by the record definition above.
(define (text-length txt)
(if (text? txt)
(length&i0.length (text.k txt))
(error "text-length: not a text" txt)))
(define (text-ref txt i)
(if (and (text? txt)
(exact-integer? i)
(<= 0 i))
(let* ((k (text.k txt))
(chunks (text.chunks txt))
(len (length&i0.length k))
(i0 (length&i0.i0 k))
(i+i0 (+ i i0))
(j (quotient i+i0 N))
(ii (remainder i+i0 N)))
(if (< i len)
(let* ((sj (vector-ref chunks j))
(sjn (bytevector-length sj)))
(if (if (< j (- (vector-length chunks) 1))
(= sjn N)
(= sjn (remainder (+ i0 len) N)))
(integer->char (bytevector-u8-ref sj ii))
(%utf8-ref sj ii)))
(error "text-ref: index out of range" txt i)))
(error "text-ref: illegal arguments" txt i)))
;;; Non-checking versions for internal use.
(define (%text-length txt)
(length&i0.length (text.k txt)))
(define (%text-ref txt i)
(let* ((k (text.k txt))
(chunks (text.chunks txt))
(len (length&i0.length k))
(i0 (length&i0.i0 k))
(i+i0 (+ i i0))
(j (quotient i+i0 N))
(ii (remainder i+i0 N))
(sj (vector-ref chunks j))
(sjn (bytevector-length sj)))
(if (if (< j (- (vector-length chunks) 1))
(= sjn N)
(= sjn (remainder (+ i0 len) N)))
(integer->char (bytevector-u8-ref sj ii))
(%utf8-ref sj ii))))
;;; Returns character i of the UTF-8.
(define (%utf8-ref bv i)
(let loop ((j 0) ; character index of (bytevector-u8-ref bv k)
(k 0)) ; byte index into bv
(if (= i j)
(%utf8-char-at bv k)
(let ((byte (bytevector-u8-ref bv k)))
(cond ((< byte 128)
(loop (+ j 1) (+ k 1)))
((< byte #b11100000)
(loop (+ j 1) (+ k 2)))
((< byte #b11110000)
(loop (+ j 1) (+ k 3)))
(else
(loop (+ j 1) (+ k 4))))))))
(define (%utf8-char-at bv k)
(let ((byte (bytevector-u8-ref bv k)))
(cond ((< byte 128)
(integer->char byte))
((< byte #b11100000)
(let* ((byte2 (bytevector-u8-ref bv (+ k 1)))
(bits1 (- byte #b11000000))
(bits2 (- byte2 #b10000000))
(cp (+ (* 64 bits1) bits2)))
(integer->char cp)))
((< byte #b11110000)
(let* ((byte2 (bytevector-u8-ref bv (+ k 1)))
(byte3 (bytevector-u8-ref bv (+ k 2)))
(bits1 (- byte #b11100000))
(bits2 (- byte2 #b10000000))
(bits3 (- byte3 #b10000000))
(cp (+ (* 64 64 bits1) (* 64 bits2) bits3)))
(integer->char cp)))
(else
(let* ((byte2 (bytevector-u8-ref bv (+ k 1)))
(byte3 (bytevector-u8-ref bv (+ k 2)))
(byte4 (bytevector-u8-ref bv (+ k 3)))
(bits1 (- byte #b11110000))
(bits2 (- byte2 #b10000000))
(bits3 (- byte3 #b10000000))
(bits4 (- byte4 #b10000000))
(cp (+ (* 64 64 64 bits1)
(* 64 64 bits2)
(* 64 bits3)
bits4)))
(integer->char cp))))))
;;; text-tabulate avoids side effects (in case proc returns more than once)
(define (text-tabulate proc len)
(if (= 0 len)
the-empty-text
(let loop ((i len) ; highest index that's been tabulated
(chunks '())
(bytes '()))
(cond ((= 0 i)
(%new-text len
0
(list->vector
(cons (list->bytevector bytes)
chunks))))
((and (= 0 (remainder i N))
(not (null? bytes)))
(loop i
(cons (list->bytevector bytes) chunks)
'()))
(else
(let* ((i-1 (- i 1))
(c (proc i-1)))
(if (char? c)
(let ((cp (char->integer c)))
(loop i-1
chunks
(cond ((< cp #x0080)
(cons cp bytes))
((< cp #x0800)
(let* ((bits1 (quotient cp 64))
(bits2 (remainder cp 64))
(byte1 (+ bits1 #b11000000))
(byte2 (+ bits2 #b10000000)))
(cons byte1 (cons byte2 bytes))))
((< cp #x10000)
(let* ((bits1 (quotient cp (* 64 64)))
(bits2 (quotient
(remainder cp (* 64 64))
64))
(bits3 (remainder cp 64))
(byte1 (+ bits1 #b11100000))
(byte2 (+ bits2 #b10000000))
(byte3 (+ bits3 #b10000000)))
(cons byte1
(cons byte2
(cons byte3 bytes)))))
(else
(let* ((bits1 (quotient cp (* 64 64 64)))
(bits2 (quotient
(remainder cp (* 64 64 64))
(* 64 64)))
(bits3 (quotient
(remainder cp (* 64 64))
64))
(bits4 (remainder cp 64))
(byte1 (+ bits1 #b11110000))
(byte2 (+ bits2 #b10000000))
(byte3 (+ bits3 #b10000000))
(byte4 (+ bits4 #b10000000)))
(cons byte1
(cons byte2
(cons byte3
(cons byte4
bytes)))))))))
(error "text-tabulate: proc returned a non-character"
proc len c))))))))
;;; FIXME: should the fast case do something different
;;; if the length of the result is sufficiently small?
;;; Probably not: splitting a 100-character text into
;;; 100 1-character texts should be fast, and programmers
;;; can now use text-copy instead if they're worried about it.
;;;
;;; subtext is now defined only for texts; use subtextual
;;; if the first argument might be a string.
(define (subtext txt start end)
(cond ((and (text? txt)
(exact-integer? start)
(exact-integer? end)
(<= 0 start end))
(%subtext txt start end))
#; ((string? txt)
(%string->text (substring txt start end)))
(else
(complain 'subtext txt start end))))
(define (%subtext txt start end)
(let* ((k (text.k txt))
(chunks (text.chunks txt))
(len (length&i0.length k))
(i0 (length&i0.i0 k))
(i+i0 (+ start i0))
(end+i0 (+ end i0))
(len+i0 (+ len i0))
(jstart (quotient i+i0 N))
(jend (quotient end+i0 N))
(jlen (quotient len N)))
(if (<= end len)
(cond ((= start end)
the-empty-text)
((and (= 0 jstart)
(= jlen jend))
;; the fast case
(%new-text (- end start) i+i0 chunks))
(else
(let* ((v (make-vector (+ 1 (- jend jstart)))))
(do ((j jstart (+ j 1)))
((> j jend))
(vector-set! v (- j jstart) (vector-ref chunks j)))
(%new-text (- end start)
(remainder i+i0 N)
v))))
(error "subtext: end out of range" txt start end))))
;;; There are a lot of special cases that could be exploited here:
;;; share the characters of the longest text
;;; share the characters of the longest run of texts
;;; whose characters don't have to be copied
;;; if (text-length txt1) is a multiple of N,
;;; and txt2 starts at offset 0,
;;; then txt1 and txt2 can be concatenated
;;; without copying any of their characters
;;;
;;; That's a partial list.
;;; It would be easy to spend more time detecting special cases
;;; than would be saved on average.
;;; In the interest of simplicity and reliability, this code
;;; currently implements only two special cases:
;;; share the full chunks of the longest text
;;; provided
;;; it contains at least N characters
;;; it contains at least half the characters of the result
;;; its characters start at offset zero
;;; share the full chunks of the first text
;;; provided
;;; it contains at least N characters
;;; its characters start at offset zero
(define (textual-concatenate texts)
(cond ((not (list? texts))
(complain 'textual-concatenate texts))
((null? texts) the-empty-text)
((null? (cdr texts))
(let ((txt (car texts)))
(cond ((text? txt) txt)
((string? txt)
(%string->text txt))
(else (complain 'textual-concatenate texts)))))
(else
(let loop ((items (reverse texts))
(real-texts '())
(n 0)
(longest #f)
(longest-length 0))
(cond ((null? items)
(%text-concatenate-n real-texts n longest longest-length))
((text? (car items))
(let* ((txt (car items))
(k (%text-length txt)))
(loop (cdr items)
(cons txt real-texts)
(+ n k)
(if (> k longest-length) txt longest)
(max k longest-length))))
((string? (car items))
(loop (cons (%string->text (car items)) (cdr items))
real-texts n longest longest-length))
(else
(complain 'textual-concatenate texts)))))))
;;; All of the texts are really texts. No strings.
;;; n is the length of the result.
;;; longest is #f or the longest of the texts, and
;;; longest-length is its length (or zero).
(define (%text-concatenate-n texts n longest longest-length)
(if (and longest
(> longest-length N)
(< n (+ longest-length longest-length))
(= 0 (length&i0.i0 (text.k longest))))
(if (eq? longest (car texts))
(%%text-concatenate-n texts n)
(let loop ((texts texts)
(front '())
(front-length 0))
(cond ((eq? longest (car texts))
(%%text-concatenate-front
(reverse front)
(%%text-concatenate-n texts (- n front-length))
front-length
n))
(else
(let ((txt (car texts)))
(loop (cdr texts)
(cons txt front)
(+ front-length (%text-length txt))))))))
(%%text-concatenate-n texts n)))
;;; texts is a non-empty list of texts, with no strings.
;;; n is the length of the result.
;;;
;;; The text returned has a start index of zero.
;;;
;;; Special case:
;;; If the first text has a start index of zero,
;;; then its full chunks don't have to be copied.
(define (%%text-concatenate-n texts n)
(if (= 0 n)
the-empty-text
(let* ((n/N (quotient n N))
(m (remainder n N))
(nchunks (+ n/N (if (= 0 m) 0 1)))
(chunks (make-vector nchunks 'bug-in-text-concatenate))
(txt (car texts))
(k (text.k txt))
(len (length&i0.length k))
(i0 (length&i0.i0 k)))
(if (and (> len N)
(= 0 i0))
(let* ((j (quotient len N))
(ti (* j N))
(chunks0 (text.chunks txt)))
(do ((i 0 (+ i 1)))
((= i j))
(vector-set! chunks i (vector-ref chunks0 i)))
(%%text-concatenate-finish n 0 chunks j texts ti))
(%%text-concatenate-finish n 0 chunks 0 texts 0)))))
;;; All of the texts are really texts. No strings.
;;; The second argument is a text that starts at offset zero.
;;; k is the total length of the texts passed as first argument.
;;; n is the length of the result.
;;;
;;; Returns the texts concatenated with the second argument,
;;; without copying any chunks of the second argument.
(define (%%text-concatenate-front texts txt k n)
(let* ((k/N (quotient k N))
(mk (remainder k N))
(i0 (if (= 0 mk) 0 (- N mk))) ; start offset for result
(kchunks (+ k/N (if (= 0 mk) 0 1))) ; number of new chunks
(n-k (- n k))
(n-k/N (quotient n-k N))
(m (remainder n-k N))
(nchunks (+ kchunks
n-k/N
(if (= 0 m) 0 1)))
(chunks (make-vector nchunks 'bug-in-text-concatenate))
(chunks2 (text.chunks txt)))
;; copy extant chunks
(do ((i kchunks (+ i 1)))
((= i nchunks))
(vector-set! chunks i (vector-ref chunks2 (- i kchunks))))
(%%text-concatenate-finish n i0 chunks 0 texts 0)))
;;; Given:
;;;
;;; n : the length of a text to be created
;;; i0 : start offset for the text to be created
;;; chunks : the vector of chunks for that new text
;;; j : vector index of first uninitialized chunk
;;; texts : a non-empty list of texts to be copied into the chunks
;;; ti : index of first uncopied character in the first text
;;;
;;; Creates new chunks as necessary, copies the texts into those chunks
;;; and returns a new text.
;;; The given texts may not fill the chunks because the chunks
;;; of some shared text may already have been copied into some
;;; tail of the chunks vector.
(define (%%text-concatenate-finish n i0 chunks j texts ti)
(let loop ((texts (cdr texts))
(txt (car texts))
(j j) ; index into chunks
(k i0) ; index into (vector-ref chunks j)
(ti ti) ; index into txt
(bytes (make-list i0 0))) ; bytes being collected for next chunk
(cond ((= k N)
(let ((bv (list->bytevector (reverse bytes))))
(vector-set! chunks j bv))
(loop texts txt (+ j 1) 0 ti '()))
((= ti (%text-length txt))
(if (null? texts)
(begin (if (not (null? bytes))
(let ((bv (list->bytevector (reverse bytes))))
(vector-set! chunks j bv)))
(%new-text n i0 chunks))
(loop (cdr texts) (car texts) j k 0 bytes)))
(else
(let* ((cp (char->integer (%text-ref txt ti)))
(bytes (cond ((< cp #x0080)
(cons cp bytes))
((< cp #x0800)
(let* ((bits1 (quotient cp 64))
(bits2 (remainder cp 64))
(byte1 (+ bits1 #b11000000))
(byte2 (+ bits2 #b10000000)))
(cons byte2 (cons byte1 bytes))))
((< cp #x10000)
(let* ((bits1 (quotient cp (* 64 64)))
(bits2 (quotient
(remainder cp (* 64 64))
64))
(bits3 (remainder cp 64))
(byte1 (+ bits1 #b11100000))
(byte2 (+ bits2 #b10000000))
(byte3 (+ bits3 #b10000000)))
(cons byte3
(cons byte2
(cons byte1 bytes)))))
(else
(let* ((bits1 (quotient cp (* 64 64 64)))
(bits2 (quotient
(remainder cp (* 64 64 64))
(* 64 64)))
(bits3 (quotient
(remainder cp (* 64 64))
64))
(bits4 (remainder cp 64))
(byte1 (+ bits1 #b11110000))
(byte2 (+ bits2 #b10000000))
(byte3 (+ bits3 #b10000000))
(byte4 (+ bits4 #b10000000)))
(cons byte4
(cons byte3
(cons byte2
(cons byte1
bytes)))))))))
(loop texts txt j (+ k 1) (+ ti 1) bytes))))))