mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-19 05:39:18 +02:00
284 lines
11 KiB
Scheme
284 lines
11 KiB
Scheme
|
|
;;> Returns the length in codepoints of the text object.
|
|
(define (text-char-length text)
|
|
(let lp ((text (text-first text)) (sum 0))
|
|
(if text
|
|
(lp (text-next text)
|
|
(+ sum
|
|
(string-length (utf8->string! (text-bytes text)
|
|
(text-start text)
|
|
(text-end text)))))
|
|
sum)))
|
|
|
|
;;> Returns the length the text object would require encoded as UTF-8.
|
|
(define (text-utf8-length text)
|
|
(let lp ((text (text-first text)) (sum 0))
|
|
(if text
|
|
(lp (text-next text) (+ sum (- (text-end text) (text-start text))))
|
|
sum)))
|
|
|
|
(define (text-piece-length text)
|
|
(let lp ((text (text-first text)) (count 0))
|
|
(if text
|
|
(lp (text-next text) (+ count 1))
|
|
count)))
|
|
|
|
(define (text-empty? text)
|
|
(zero? (text-char-length text)))
|
|
|
|
;;> Returns a new text object representing the same codepoints as the string \var{str}.
|
|
(define (string->text str . o)
|
|
(let* ((bv (string->utf8 str))
|
|
(start (if (pair? o) (car o) 0))
|
|
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (bytevector-length bv))))
|
|
(make-text bv start end #f #f '() #f)))
|
|
|
|
(define (text-string text)
|
|
(utf8->string (text-bytes text) (text-start text) (text-end text)))
|
|
|
|
;;> Returns the utf8 representation of the codepoints in \var{text}.
|
|
(define (text->utf8 text)
|
|
(if (and (not (text-prev text)) (not (text-next text)))
|
|
(bytevector-copy (text-bytes text) (text-start text) (text-end text))
|
|
(let ((out (open-output-bytevector)))
|
|
(let lp ((piece (text-first text)))
|
|
(cond
|
|
((not piece)
|
|
(get-output-bytevector out))
|
|
(else
|
|
(write-bytevector (text-bytes piece) out (text-start piece) (text-end piece))
|
|
(lp (text-next piece))))))))
|
|
|
|
;;> Returns a string representing the same codepoints as \var{text}.
|
|
(define (text->string text)
|
|
(utf8->string (text->utf8 text)))
|
|
|
|
;;> Returns the character \var{mark} points to, or \scheme{#f}
|
|
;;> if mark points to the end of the text.
|
|
(define (text-ref mark)
|
|
(cond
|
|
((< (mark-offset mark) (text-start (mark-text mark)))
|
|
(let lp ((text (text-prev (mark-text mark))))
|
|
(and text
|
|
(if (< (text-start text) (text-end text))
|
|
(utf8-ref (text-bytes text) (- (text-end text) 1))
|
|
(lp (text-prev text))))))
|
|
((>= (mark-offset mark) (text-end (mark-text mark)))
|
|
(let lp ((text (text-next (mark-text mark))))
|
|
(and text
|
|
(if (< (text-start text) (text-end text))
|
|
(utf8-ref (text-bytes text) (text-start text))
|
|
(lp (text-next text))))))
|
|
(else
|
|
(utf8-ref (text-bytes (mark-text mark)) (mark-offset mark)))))
|
|
|
|
(define (text-piece-copy text)
|
|
(let ((res (make-text (bytevector-copy (text-bytes text))
|
|
(text-start text)
|
|
(text-end text)
|
|
(text-prev text)
|
|
(text-next text)
|
|
'()
|
|
(text-source text))))
|
|
(text-marks-set! res
|
|
(map (lambda (mk)
|
|
(make-mark res (mark-offset mk) (mark-data mk)))
|
|
(text-marks res)))
|
|
res))
|
|
|
|
(define (text-copy text)
|
|
(let ((text0 (text-piece-copy (text-first text))))
|
|
(let lp ((text (text-next text0))
|
|
(prev text0))
|
|
(if text
|
|
(let ((text1 (text-piece-copy text)))
|
|
(text-next-set! text0 text1)
|
|
(text-prev-set! text1 text0)
|
|
(lp text1 (text-next text1)))
|
|
text0))))
|
|
|
|
(define (->text obj)
|
|
(cond
|
|
((text? obj) obj)
|
|
((string? obj) (string->text obj))
|
|
((char? obj) (string->text (string obj)))
|
|
(else (error "not a textlike object" obj))))
|
|
|
|
(define (text-append! . ls)
|
|
(text-concatenate! ls))
|
|
|
|
(define (text-append . ls)
|
|
(text-concatenate ls))
|
|
|
|
(define (text-concatenate ls)
|
|
(text-concatenate! (map (lambda (x) (if (text? x) (text-copy x) x)) ls)))
|
|
|
|
(define (text-concatenate! ls)
|
|
(if (null? ls)
|
|
(string->text "")
|
|
(let ((res (->text (car ls))))
|
|
(let lp ((tx (text-last res))
|
|
(ls (cdr ls)))
|
|
(if (null? ls)
|
|
res
|
|
(let* ((tx2 (->text (car ls)))
|
|
(tx2-first (text-first tx2)))
|
|
(text-next-set! tx tx2-first)
|
|
(text-prev-set! tx2-first tx)
|
|
(lp (text-last tx2) (cdr ls))))))))
|
|
|
|
;; inserts a new right piece and returns it
|
|
(define (text-new-right! text . o)
|
|
(let* ((size (if (pair? o) (car o) 256))
|
|
(right (make-text (make-bytevector size) 0 0 text (text-next text) '() #f)))
|
|
(cond ((text-next text) => (lambda (orig-right) (text-prev-set! orig-right right))))
|
|
(text-next-set! text right)
|
|
right))
|
|
|
|
;; splits the text at the given point into two pieces, returning the original
|
|
;; text which becomes the left piece
|
|
(define (text-split! text at . o)
|
|
(receive (text at) (text&mark-at text at)
|
|
(let* ((at-mark (if (mark? at) at (text-mark text at)))
|
|
(at-offset (mark-offset at-mark))
|
|
(size (max (- at-offset (text-start text))
|
|
(if (pair? o) (car o) 64)))
|
|
(right (text-new-right! text size)))
|
|
(bytevector-copy! (text-bytes right)
|
|
0
|
|
(text-bytes text)
|
|
at-offset
|
|
(text-end text))
|
|
(text-end-set! right (- (text-end text) at-offset))
|
|
(text-end-set! text at-offset)
|
|
(receive (left-marks right-marks)
|
|
(partition (lambda (mk) (<= (mark-offset mk) at-offset)) (text-marks text))
|
|
(text-marks-set! text left-marks)
|
|
(text-marks-set! right (map (lambda (mk)
|
|
(mark-text-set! mk right)
|
|
(mark-offset-set! mk (- (mark-offset mk) at-offset))
|
|
mk)
|
|
right-marks))
|
|
text))))
|
|
|
|
;;> Inserts \var{textlike} into the text immediately before
|
|
;;> the point indicated by \var{mark1}, leaving \var{mark1}
|
|
;;> (and all same position marks) after the inserted text.
|
|
;;> Returns \var{mark1}.
|
|
(define (text-insert! text str . o)
|
|
(receive (text at-mark) (text&mark-at text (if (pair? o) (car o) 0))
|
|
(let* ((at-offset (mark-offset at-mark))
|
|
(src (string->utf8 str))
|
|
(size (bytevector-length src))
|
|
(dst (text-bytes text))
|
|
(dst-size (bytevector-length dst)))
|
|
(cond
|
|
((= at-offset (text-end text))
|
|
(let* ((avail-size (- dst-size at-offset))
|
|
(end (min (+ at-offset size) dst-size))
|
|
(copy-size (- end at-offset)))
|
|
;; TODO: Don't insert a partial utf8 char.
|
|
(bytevector-copy! dst at-offset src 0 copy-size)
|
|
(text-end-set! text end)
|
|
;; Advance marks that were pointing to the end of this piece.
|
|
(for-each (lambda (mk)
|
|
(if (>= (mark-offset mk) at-offset)
|
|
(mark-offset-set! mk (+ (mark-offset mk) copy-size))))
|
|
(text-marks text))
|
|
(if (< copy-size size)
|
|
;; TODO: better sizing?
|
|
(let ((right (text-new-right! text (* 2 size)))
|
|
(right-size (- size copy-size)))
|
|
(bytevector-copy! (text-bytes right) 0 src copy-size size)
|
|
(text-end-set! right right-size)))
|
|
text))
|
|
;; ((= at-offset (text-start text))
|
|
;; TODO: insert before start
|
|
;; )
|
|
;; TODO: optimization: use prev buffer if at start of text?
|
|
;; TODO: optimization: shift bytes in place if near end?
|
|
((negative? at-offset)
|
|
(error "bad offset" at-offset))
|
|
(else
|
|
(text-split! text at-mark)
|
|
(text-insert! text str at-mark))))))
|
|
|
|
;; Set the start of text and adjust marks before that to the new start
|
|
;; accordingly.
|
|
(define (text-truncate-left! text new-start)
|
|
(text-start-set! text new-start)
|
|
(text-marks-set! text
|
|
(map (lambda (mk)
|
|
(mark-offset-set! mk (max (mark-offset mk) new-start))
|
|
mk)
|
|
(text-marks text))))
|
|
|
|
;; Set the end of text and adjust marks after that to the new end accordingly.
|
|
(define (text-truncate-right! text new-end)
|
|
(text-end-set! text new-end)
|
|
(text-marks-set! text
|
|
(map (lambda (mk)
|
|
(mark-offset-set! mk (min (mark-offset mk) new-end))
|
|
mk)
|
|
(text-marks text))))
|
|
|
|
;;> Deletes the codepoints between \var{from} (inclusive)
|
|
;;> and \var{to} (exclusive, defaulting to the end of the
|
|
;;> text), leaving the two marks pointing to the same location.
|
|
;;> Returns \var{from}.
|
|
(define (text-delete! text from to)
|
|
(let-values (((from-text from-mark) (text&mark-at text from))
|
|
((to-text to-mark) (text&mark-at text to)))
|
|
(let ((from-at (mark-offset from-mark))
|
|
(to-at (mark-offset to-mark)))
|
|
(cond
|
|
((eq? from-text to-text)
|
|
(let* ((from-text2 (text-split! from-text from-mark))
|
|
(to-text2 (text-next from-text2))
|
|
(to-at2 (mark-offset to-mark)))
|
|
(text-truncate-left! to-text2 to-at2)))
|
|
(else
|
|
(text-truncate-right! from-text from-at)
|
|
(text-truncate-left! to-text to-at)
|
|
;; Erase any pieces in between and point them to the start of to-text.
|
|
(let lp ((text (text-next from-text)) (marks '()))
|
|
(cond
|
|
((and text (not (eq? text to-text)))
|
|
(lp (text-next text) (cons (text-splice! text) marks)))
|
|
(else
|
|
(text-marks-set! to-text
|
|
(append
|
|
(map (lambda (mk)
|
|
(mark-text-set! mk to-text)
|
|
(mark-offset-set! mk to-at)
|
|
mk)
|
|
(concatenate (reverse marks)))
|
|
(text-marks to-text))))))))
|
|
)))
|
|
|
|
(define (text-piece-empty? text)
|
|
(= (text-start text) (text-end text)))
|
|
|
|
;; returns a reference to a new start of text with empty pieces removed
|
|
(define (text-clean! text)
|
|
(define (text-first-non-empty! text)
|
|
(let lp ((tx (text-first text)))
|
|
(cond
|
|
((and (text-piece-empty? tx) (text-next tx))
|
|
(let ((next (text-next tx)))
|
|
(text-next-set! tx #f)
|
|
(text-prev-set! next #f)
|
|
(lp next)))
|
|
(else
|
|
tx))))
|
|
(let ((tx-first (text-first-non-empty! text)))
|
|
(let lp ((prev tx-first) (tx (text-next tx-first)))
|
|
(cond
|
|
((not tx)
|
|
tx-first)
|
|
((text-piece-empty? tx)
|
|
(text-next-set! prev (text-next tx))
|
|
(text-prev-set! (text-next tx) prev)
|
|
(lp prev (text-next tx)))
|
|
(else
|
|
(lp tx (text-next tx)))))))
|