mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-19 13:49:17 +02:00
Adding initial text type.
This commit is contained in:
parent
c4611cc33f
commit
f25329b5aa
12 changed files with 819 additions and 0 deletions
129
lib/chibi/text-test.sld
Normal file
129
lib/chibi/text-test.sld
Normal file
|
@ -0,0 +1,129 @@
|
||||||
|
|
||||||
|
(define-library (chibi text-test)
|
||||||
|
(import (scheme base) (scheme write) (chibi test) (chibi text))
|
||||||
|
(export run-tests)
|
||||||
|
(begin
|
||||||
|
(define (run-tests)
|
||||||
|
(test-begin "(chibi text)")
|
||||||
|
(test-assert (text? (string->text "")))
|
||||||
|
(test 0 (text-char-length (string->text "")))
|
||||||
|
(test 3 (text-char-length (string->text "日本語")))
|
||||||
|
(test 0 (text-utf8-length (string->text "")))
|
||||||
|
(test 9 (text-utf8-length (string->text "日本語")))
|
||||||
|
(test "" (text->string (string->text "")))
|
||||||
|
(test "日本語" (text->string (string->text "日本語")))
|
||||||
|
(let ((tx (string->text "日本語")))
|
||||||
|
(text-insert! tx "!" 0)
|
||||||
|
(test "!日本語" (text->string tx))
|
||||||
|
(text-insert! tx "!" 2)
|
||||||
|
(test "!日!本語" (text->string tx))
|
||||||
|
(text-insert! tx "!" 4)
|
||||||
|
(test "!日!本!語" (text->string tx))
|
||||||
|
(text-insert! tx "!" 6)
|
||||||
|
(test "!日!本!語!" (text->string tx)))
|
||||||
|
(let ((tx (string->text "abc")))
|
||||||
|
(text-insert! tx "あ" 0)
|
||||||
|
(test "あabc" (text->string tx))
|
||||||
|
(text-insert! tx "い" 2)
|
||||||
|
(test "あaいbc" (text->string tx))
|
||||||
|
(text-insert! tx "う" 4)
|
||||||
|
(test "あaいbうc" (text->string tx))
|
||||||
|
(text-insert! tx "え" 6)
|
||||||
|
(test "あaいbうcえ" (text->string tx)))
|
||||||
|
(let* ((tx (string->text "0123456789"))
|
||||||
|
(mk (text-mark! tx 5)))
|
||||||
|
(test #\5 (text-ref mk))
|
||||||
|
(text-insert! tx "abc" 1)
|
||||||
|
(text-insert! tx "xyz" mk)
|
||||||
|
(test #\5 (text-ref mk))
|
||||||
|
(test "0abc1234xyz56789" (text->string tx)))
|
||||||
|
(let* ((tx (string->text "零一二三四五六七八九"))
|
||||||
|
(mk (text-mark! tx 5)))
|
||||||
|
(text-insert! tx "あいう" 1)
|
||||||
|
(text-insert! tx "xyz" mk)
|
||||||
|
(text-insert! tx "かきく" mk)
|
||||||
|
(test "零あいう一二三四xyzかきく五六七八九" (text->string tx)))
|
||||||
|
(let* ((tx (string->text "0123456789"))
|
||||||
|
(mk (text-mark! tx 5)))
|
||||||
|
(text-insert! tx "abc" mk)
|
||||||
|
(text-insert! tx "xyz" mk)
|
||||||
|
(test "01234abcxyz56789" (text->string tx)))
|
||||||
|
(let* ((tx (string->text "0123456789"))
|
||||||
|
(mk1 (text-mark! tx 5))
|
||||||
|
(mk2 (text-mark! tx 8))
|
||||||
|
(mk3 (text-mark! tx 6)))
|
||||||
|
(text-delete! tx mk1 mk2)
|
||||||
|
(test "0123489" (text->string tx))
|
||||||
|
;; The current order of mk2/mk3 is unspecified so we insert the same
|
||||||
|
;; value here/
|
||||||
|
(text-insert! tx "def" mk2)
|
||||||
|
(text-insert! tx "abc" mk1)
|
||||||
|
(text-insert! tx "def" mk3)
|
||||||
|
(test "01234abcdefdef89" (text->string tx)))
|
||||||
|
(let* ((tx (string->text "0123456789"))
|
||||||
|
(mk (text-mark! tx 5)))
|
||||||
|
(text-insert! tx (make-string 512 #\x) mk)
|
||||||
|
(test (string-append "01234" (make-string 512 #\x) "56789")
|
||||||
|
(text->string tx))
|
||||||
|
(let ((mk2 (text-mark! tx 512)))
|
||||||
|
(text-delete! tx 5 517)
|
||||||
|
(test "0123456789" (text->string tx))))
|
||||||
|
(let* ((tx (string->text "a一二三bc"))
|
||||||
|
(mk (text-mark! tx 0)))
|
||||||
|
(test #\a (text-forward-char! mk))
|
||||||
|
(test #\一 (text-forward-char! mk))
|
||||||
|
(test #\二 (text-forward-char! mk))
|
||||||
|
(test #\三 (text-forward-char! mk))
|
||||||
|
(test #\b (text-forward-char! mk))
|
||||||
|
(test #\c (text-forward-char! mk))
|
||||||
|
(test #f (text-forward-char! mk))
|
||||||
|
(test #\c (text-backward-char! mk))
|
||||||
|
(test #\b (text-backward-char! mk))
|
||||||
|
(test #\三 (text-backward-char! mk))
|
||||||
|
(test #\二 (text-backward-char! mk))
|
||||||
|
(test #\一 (text-backward-char! mk))
|
||||||
|
(test #\a (text-backward-char! mk))
|
||||||
|
(test #f (text-backward-char! mk)))
|
||||||
|
(let* ((tx (string->text "abc, (一二三): def"))
|
||||||
|
(mk (text-mark! tx 0)))
|
||||||
|
(test 3 (mark-offset (text-forward-word! mk)))
|
||||||
|
(test 15 (mark-offset (text-forward-word! mk)))
|
||||||
|
(test 21 (mark-offset (text-forward-word! mk)))
|
||||||
|
(test #f (text-forward-word! mk))
|
||||||
|
(test 21 (mark-offset mk))
|
||||||
|
(test 18 (mark-offset (text-backward-word! mk)))
|
||||||
|
(test 6 (mark-offset (text-backward-word! mk)))
|
||||||
|
(test 0 (mark-offset (text-backward-word! mk)))
|
||||||
|
(test #f (text-backward-word! mk))
|
||||||
|
)
|
||||||
|
(let* ((tx (string->text "0123456789\nabcdef\n零一二三四五六七八九"))
|
||||||
|
(mk (text-mark! tx 0)))
|
||||||
|
(test #t (text-beginning-of-line? mk))
|
||||||
|
(test #f (text-end-of-line? mk))
|
||||||
|
(test 10 (mark-offset (text-end-of-line! mk)))
|
||||||
|
(test #\newline (text-ref mk))
|
||||||
|
(test #f (text-beginning-of-line? mk))
|
||||||
|
(test #t (text-end-of-line? mk))
|
||||||
|
(text-forward-char! mk)
|
||||||
|
(text-forward-char! mk)
|
||||||
|
(text-forward-char! mk)
|
||||||
|
(text-forward-line! mk)
|
||||||
|
(test #\二 (text-ref mk))
|
||||||
|
(text-forward-char! mk)
|
||||||
|
(text-forward-char! mk)
|
||||||
|
(text-forward-char! mk)
|
||||||
|
(text-forward-char! mk)
|
||||||
|
(test #\六 (text-ref mk))
|
||||||
|
(text-backward-line! mk)
|
||||||
|
(test #\newline (text-ref mk))
|
||||||
|
(text-backward-line! mk)
|
||||||
|
(test #\6 (text-ref mk))
|
||||||
|
)
|
||||||
|
(let* ((tx (string->text "0123456789\nabcdef\n零一二三四五六七八九"))
|
||||||
|
(mk (text-mark! tx 0)))
|
||||||
|
(text-search! mk "一二三")
|
||||||
|
(test 30 (mark-offset mk))
|
||||||
|
(text-insert! tx "..." mk)
|
||||||
|
(test "0123456789\nabcdef\n零一二三...四五六七八九"
|
||||||
|
(text->string (mark-text mk))))
|
||||||
|
(test-end))))
|
26
lib/chibi/text.sld
Normal file
26
lib/chibi/text.sld
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
(define-library (chibi text)
|
||||||
|
(import (scheme base)
|
||||||
|
(chibi text base)
|
||||||
|
(chibi text search)
|
||||||
|
(chibi text types)
|
||||||
|
(chibi text utf8))
|
||||||
|
(export
|
||||||
|
make-text text? text-empty?
|
||||||
|
text-beginning-of-line? text-end-of-line?
|
||||||
|
text-char-length text-utf8-length text-piece-length
|
||||||
|
string->text text->string text->utf8
|
||||||
|
text-append text-append!
|
||||||
|
text-concatenate text-concatenate!
|
||||||
|
text-ref text-copy text-current-column
|
||||||
|
text-insert! text-delete!
|
||||||
|
text-mark text-mark!
|
||||||
|
text-forward-char! text-backward-char!
|
||||||
|
text-forward-word! text-backward-word!
|
||||||
|
text-beginning-of-line? text-end-of-line?
|
||||||
|
text-beginning-of-line! text-end-of-line!
|
||||||
|
text-forward-line! text-backward-line!
|
||||||
|
text-search!
|
||||||
|
mark-text mark-offset mark-copy
|
||||||
|
mark-anchor! mark-release!
|
||||||
|
))
|
288
lib/chibi/text/base.scm
Normal file
288
lib/chibi/text/base.scm
Normal file
|
@ -0,0 +1,288 @@
|
||||||
|
|
||||||
|
;;> Returns the length in codepoints of the text object.
|
||||||
|
(define (text-char-length text)
|
||||||
|
(let lp ((text (text-first text)) (sum 0))
|
||||||
|
(if text
|
||||||
|
(lp (text-next text)
|
||||||
|
(+ sum
|
||||||
|
(string-length (utf8->string! (text-source text)
|
||||||
|
(text-start text)
|
||||||
|
(text-end text)))))
|
||||||
|
sum)))
|
||||||
|
|
||||||
|
;;> Returns the length the text object would require encoded as UTF-8.
|
||||||
|
(define (text-utf8-length text)
|
||||||
|
(let lp ((text (text-first text)) (sum 0))
|
||||||
|
(if text
|
||||||
|
(lp (text-next text) (+ sum (- (text-end text) (text-start text))))
|
||||||
|
sum)))
|
||||||
|
|
||||||
|
(define (text-piece-length text)
|
||||||
|
(let lp ((text (text-first text)) (count 0))
|
||||||
|
(if text
|
||||||
|
(lp (text-next text) (+ count 1))
|
||||||
|
count)))
|
||||||
|
|
||||||
|
(define (text-empty? text)
|
||||||
|
(zero? (text-char-length text)))
|
||||||
|
|
||||||
|
;;> Returns a new text object representing the same codepoints as the string \var{str}.
|
||||||
|
(define (string->text str . o)
|
||||||
|
(let* ((bv (string->utf8 str))
|
||||||
|
(start (if (pair? o) (car o) 0))
|
||||||
|
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (bytevector-length bv))))
|
||||||
|
(make-text bv start end #f #f '() #f)))
|
||||||
|
|
||||||
|
(define (text-string text)
|
||||||
|
(utf8->string (text-source text) (text-start text) (text-end text)))
|
||||||
|
|
||||||
|
;;> Returns the utf8 representation of the codepoints in \var{text}.
|
||||||
|
(define (text->utf8 text)
|
||||||
|
(if (and (not (text-prev text)) (not (text-next text)))
|
||||||
|
(bytevector-copy (text-source text) (text-start text) (text-end text))
|
||||||
|
(let ((out (open-output-bytevector)))
|
||||||
|
(let lp ((piece (text-first text)))
|
||||||
|
(cond
|
||||||
|
((not piece)
|
||||||
|
(get-output-bytevector out))
|
||||||
|
(else
|
||||||
|
(write-bytevector (text-source piece) out (text-start piece) (text-end piece))
|
||||||
|
(lp (text-next piece))))))))
|
||||||
|
|
||||||
|
;;> Returns a string representing the same codepoints as \var{text}.
|
||||||
|
(define (text->string text)
|
||||||
|
(utf8->string (text->utf8 text)))
|
||||||
|
|
||||||
|
;;> Returns the character \var{mark} points to, or \scheme{#f}
|
||||||
|
;;> if mark points to the end of the text.
|
||||||
|
(define (text-ref mark)
|
||||||
|
(cond
|
||||||
|
((< (mark-offset mark) (text-start (mark-text mark)))
|
||||||
|
(let lp ((text (text-prev (mark-text mark))))
|
||||||
|
(and text
|
||||||
|
(if (< (text-start text) (text-end text))
|
||||||
|
(utf8-ref (text-source text) (- (text-end text) 1))
|
||||||
|
(lp (text-prev text))))))
|
||||||
|
((>= (mark-offset mark) (text-end (mark-text mark)))
|
||||||
|
(let lp ((text (text-next (mark-text mark))))
|
||||||
|
(and text
|
||||||
|
(if (< (text-start text) (text-end text))
|
||||||
|
(utf8-ref (text-source text) (text-start text))
|
||||||
|
(lp (text-next text))))))
|
||||||
|
(else
|
||||||
|
(utf8-ref (text-source (mark-text mark)) (mark-offset mark)))))
|
||||||
|
|
||||||
|
(define (text-piece-copy text)
|
||||||
|
(let ((res (make-text (bytevector-copy (text-source text))
|
||||||
|
(text-start text)
|
||||||
|
(text-end text)
|
||||||
|
(text-prev text)
|
||||||
|
(text-next text)
|
||||||
|
'()
|
||||||
|
(text-data text))))
|
||||||
|
(text-marks-set! res
|
||||||
|
(map (lambda (mk)
|
||||||
|
(make-mark res (mark-offset mk) (mark-data mk)))
|
||||||
|
(text-marks res)))
|
||||||
|
res))
|
||||||
|
|
||||||
|
(define (text-copy text)
|
||||||
|
(let ((text0 (text-piece-copy (text-first text))))
|
||||||
|
(let lp ((text (text-next text0))
|
||||||
|
(prev text0))
|
||||||
|
(if text
|
||||||
|
(let ((text1 (text-piece-copy text)))
|
||||||
|
(text-next-set! text0 text1)
|
||||||
|
(text-prev-set! text1 text0)
|
||||||
|
(lp text1 (text-next text1)))
|
||||||
|
text0))))
|
||||||
|
|
||||||
|
(define (->text obj)
|
||||||
|
(cond
|
||||||
|
((text? obj) obj)
|
||||||
|
((string? obj) (string->text obj))
|
||||||
|
((char? obj) (string->text (string obj)))
|
||||||
|
(else (error "not a textlike object" obj))))
|
||||||
|
|
||||||
|
(define (text-append! . ls)
|
||||||
|
(text-concatenate! ls))
|
||||||
|
|
||||||
|
(define (text-append . ls)
|
||||||
|
(text-concatenate ls))
|
||||||
|
|
||||||
|
(define (text-concatenate ls)
|
||||||
|
(text-concatenate! (map (lambda (x) (if (text? x) (text-copy x) x)) ls)))
|
||||||
|
|
||||||
|
(define (text-concatenate! ls)
|
||||||
|
(if (null? ls)
|
||||||
|
(string->text "")
|
||||||
|
(let ((res (->text (car ls))))
|
||||||
|
(let lp ((tx (text-last res))
|
||||||
|
(ls (cdr ls)))
|
||||||
|
(if (null? ls)
|
||||||
|
res
|
||||||
|
(let* ((tx2 (->text (car ls)))
|
||||||
|
(tx2-first (text-first tx2)))
|
||||||
|
(text-next-set! tx tx2-first)
|
||||||
|
(text-prev-set! tx2-first tx)
|
||||||
|
(lp (text-last tx2) (cdr ls))))))))
|
||||||
|
|
||||||
|
;; inserts a new right piece and returns it
|
||||||
|
(define (text-new-right! text . o)
|
||||||
|
(let* ((size (if (pair? o) (car o) 256))
|
||||||
|
(right (make-text (make-bytevector size) 0 0 text (text-next text) '() #f)))
|
||||||
|
(cond ((text-next text) => (lambda (orig-right) (text-prev-set! orig-right right))))
|
||||||
|
(text-next-set! text right)
|
||||||
|
right))
|
||||||
|
|
||||||
|
;; splits the text at the given point into two pieces, returning the original
|
||||||
|
;; text which becomes the left piece
|
||||||
|
(define (text-split! text at . o)
|
||||||
|
(receive (text at) (text&mark-at text at)
|
||||||
|
(let* ((at-mark (if (mark? at) at (text-mark text at)))
|
||||||
|
(at-offset (mark-offset at-mark))
|
||||||
|
(size (max (- at-offset (text-start text))
|
||||||
|
(if (pair? o) (car o) 64)))
|
||||||
|
(right (text-new-right! text size)))
|
||||||
|
(bytevector-copy! (text-source right)
|
||||||
|
0
|
||||||
|
(text-source text)
|
||||||
|
at-offset
|
||||||
|
(text-end text))
|
||||||
|
(text-end-set! right (- (text-end text) at-offset))
|
||||||
|
(text-end-set! text at-offset)
|
||||||
|
(receive (left-marks right-marks)
|
||||||
|
(partition (lambda (mk) (<= (mark-offset mk) at-offset)) (text-marks text))
|
||||||
|
(text-marks-set! text left-marks)
|
||||||
|
(text-marks-set! right (map (lambda (mk)
|
||||||
|
(mark-text-set! mk right)
|
||||||
|
(mark-offset-set! mk (- (mark-offset mk) at-offset))
|
||||||
|
mk)
|
||||||
|
right-marks))
|
||||||
|
text))))
|
||||||
|
|
||||||
|
;;> Inserts \var{textlike} into the text immediately before
|
||||||
|
;;> the point indicated by \var{mark1}, leaving \var{mark1}
|
||||||
|
;;> (and all same position marks) after the inserted text.
|
||||||
|
;;> Returns \var{mark1}.
|
||||||
|
(define (text-insert! text str . o)
|
||||||
|
(receive (text at-mark) (text&mark-at text (if (pair? o) (car o) 0))
|
||||||
|
(let* ((at-offset (mark-offset at-mark))
|
||||||
|
(src (string->utf8 str))
|
||||||
|
(size (bytevector-length src))
|
||||||
|
(dst (text-source text))
|
||||||
|
(dst-size (bytevector-length dst)))
|
||||||
|
(cond
|
||||||
|
((= at-offset (text-end text))
|
||||||
|
(let* ((avail-size (- dst-size at-offset))
|
||||||
|
(end (min (+ at-offset size) dst-size))
|
||||||
|
(copy-size (- end at-offset)))
|
||||||
|
;; TODO: Don't insert a partial utf8 char.
|
||||||
|
(bytevector-copy! dst at-offset src 0 copy-size)
|
||||||
|
(text-end-set! text end)
|
||||||
|
;; Advance marks that were pointing to the end of this piece.
|
||||||
|
(for-each (lambda (mk)
|
||||||
|
(if (>= (mark-offset mk) at-offset)
|
||||||
|
(mark-offset-set! mk (+ (mark-offset mk) copy-size))))
|
||||||
|
(text-marks text))
|
||||||
|
(if (< copy-size size)
|
||||||
|
;; TODO: better sizing?
|
||||||
|
(let ((right (text-new-right! text (* 2 size)))
|
||||||
|
(right-size (- size copy-size)))
|
||||||
|
(bytevector-copy! (text-source right) 0 src copy-size size)
|
||||||
|
(text-end-set! right right-size)))
|
||||||
|
text))
|
||||||
|
;; ((= at-offset (text-start text))
|
||||||
|
;; TODO: insert before start
|
||||||
|
;; )
|
||||||
|
;; TODO: optimization: use prev buffer if at start of text?
|
||||||
|
;; TODO: optimization: shift bytes in place if near end?
|
||||||
|
((negative? at-offset)
|
||||||
|
(error "bad offset" at-offset))
|
||||||
|
(else
|
||||||
|
(text-split! text at-mark)
|
||||||
|
(text-insert! text str at-mark))))))
|
||||||
|
|
||||||
|
;; Set the start of text and adjust marks before that to the new start
|
||||||
|
;; accordingly.
|
||||||
|
(define (text-truncate-left! text new-start)
|
||||||
|
(text-start-set! text new-start)
|
||||||
|
(text-marks-set! text
|
||||||
|
(map (lambda (mk)
|
||||||
|
(mark-offset-set! mk (max (mark-offset mk) new-start))
|
||||||
|
mk)
|
||||||
|
(text-marks text))))
|
||||||
|
|
||||||
|
;; Set the end of text and adjust marks after that to the new end accordingly.
|
||||||
|
(define (text-truncate-right! text new-end)
|
||||||
|
(text-end-set! text new-end)
|
||||||
|
(text-marks-set! text
|
||||||
|
(map (lambda (mk)
|
||||||
|
(mark-offset-set! mk (min (mark-offset mk) new-end))
|
||||||
|
mk)
|
||||||
|
(text-marks text))))
|
||||||
|
|
||||||
|
;;> Deletes the codepoints between \var{from} (inclusive)
|
||||||
|
;;> and \var{to} (exclusive, defaulting to the end of the
|
||||||
|
;;> text), leaving the two marks pointing to the same location.
|
||||||
|
;;> Returns \var{from}.
|
||||||
|
(define (text-delete! text from to)
|
||||||
|
(let-values (((from-text from-mark) (text&mark-at text from))
|
||||||
|
((to-text to-mark) (text&mark-at text to)))
|
||||||
|
(let ((from-at (mark-offset from-mark))
|
||||||
|
(to-at (mark-offset to-mark)))
|
||||||
|
(cond
|
||||||
|
((eq? from-text to-text)
|
||||||
|
(let* ((from-text2 (text-split! from-text from-mark))
|
||||||
|
(to-text2 (text-next from-text2))
|
||||||
|
(to-at2 (mark-offset to-mark)))
|
||||||
|
(text-truncate-left! to-text2 to-at2)))
|
||||||
|
(else
|
||||||
|
(text-truncate-right! from-text from-at)
|
||||||
|
(text-truncate-left! to-text to-at)
|
||||||
|
;; Erase any pieces in between and point them to the start of to-text.
|
||||||
|
(let lp ((text (text-next from-text)) (marks '()))
|
||||||
|
(cond
|
||||||
|
((and text (not (eq? text to-text)))
|
||||||
|
;; TODO: splice out the nodes themselves
|
||||||
|
(let ((new-marks (text-marks text)))
|
||||||
|
(text-start-set! text (text-end text))
|
||||||
|
(text-marks-set! text '())
|
||||||
|
(lp (text-next text) (cons new-marks marks))))
|
||||||
|
(else
|
||||||
|
(text-marks-set! to-text
|
||||||
|
(append
|
||||||
|
(map (lambda (mk)
|
||||||
|
(mark-text-set! mk to-text)
|
||||||
|
(mark-offset-set! mk to-at)
|
||||||
|
mk)
|
||||||
|
(concatenate (reverse marks)))
|
||||||
|
(text-marks to-text))))))))
|
||||||
|
)))
|
||||||
|
|
||||||
|
(define (text-piece-empty? text)
|
||||||
|
(= (text-start text) (text-end text)))
|
||||||
|
|
||||||
|
;; returns a reference to a new start of text with empty pieces removed
|
||||||
|
(define (text-clean! text)
|
||||||
|
(define (text-first-non-empty! text)
|
||||||
|
(let lp ((tx (text-first text)))
|
||||||
|
(cond
|
||||||
|
((and (text-piece-empty? tx) (text-next tx))
|
||||||
|
(let ((next (text-next tx)))
|
||||||
|
(text-next-set! tx #f)
|
||||||
|
(text-prev-set! next #f)
|
||||||
|
(lp next)))
|
||||||
|
(else
|
||||||
|
tx))))
|
||||||
|
(let ((tx-first (text-first-non-empty! text)))
|
||||||
|
(let lp ((prev tx-first) (tx (text-next tx-first)))
|
||||||
|
(cond
|
||||||
|
((not tx)
|
||||||
|
tx-first)
|
||||||
|
((text-piece-empty? tx)
|
||||||
|
(text-next-set! prev (text-next tx))
|
||||||
|
(text-prev-set! (text-next tx) prev)
|
||||||
|
(lp prev (text-next tx)))
|
||||||
|
(else
|
||||||
|
(lp tx (text-next tx)))))))
|
29
lib/chibi/text/base.sld
Normal file
29
lib/chibi/text/base.sld
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
|
||||||
|
(define-library (chibi text base)
|
||||||
|
(import (scheme base)
|
||||||
|
(scheme char)
|
||||||
|
(scheme write)
|
||||||
|
(srfi 1)
|
||||||
|
(srfi 8)
|
||||||
|
(chibi text types)
|
||||||
|
(chibi text utf8))
|
||||||
|
(export
|
||||||
|
make-text text? text-empty?
|
||||||
|
text-beginning-of-line? text-end-of-line?
|
||||||
|
text-char-length text-utf8-length text-piece-length
|
||||||
|
string->text text->string text->utf8
|
||||||
|
text-append text-append!
|
||||||
|
text-concatenate text-concatenate!
|
||||||
|
text-ref text-copy text-current-column
|
||||||
|
text-insert! text-delete!
|
||||||
|
text-mark text-mark!
|
||||||
|
text-forward-char! text-backward-char!
|
||||||
|
text-forward-word! text-backward-word!
|
||||||
|
text-beginning-of-line? text-end-of-line?
|
||||||
|
text-beginning-of-line! text-end-of-line!
|
||||||
|
text-forward-line! text-backward-line!
|
||||||
|
mark-text mark-offset mark-copy
|
||||||
|
mark-anchor! mark-release!)
|
||||||
|
(include "marks.scm")
|
||||||
|
(include "base.scm")
|
||||||
|
(include "movement.scm"))
|
56
lib/chibi/text/marks.scm
Normal file
56
lib/chibi/text/marks.scm
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
|
||||||
|
(define (mark-anchor! mark)
|
||||||
|
(text-marks-set! (mark-text mark) (cons mark (text-marks (mark-text mark))))
|
||||||
|
mark)
|
||||||
|
|
||||||
|
(define (mark-release! mark)
|
||||||
|
(text-marks-set! (mark-text mark) (remove! mark (text-marks (mark-text mark))))
|
||||||
|
mark)
|
||||||
|
|
||||||
|
(define (mark-copy mark)
|
||||||
|
(make-mark (mark-text mark) (mark-offset mark) (mark-data mark)))
|
||||||
|
|
||||||
|
(define (mark-copy! mark)
|
||||||
|
(let ((res (mark-copy mark)))
|
||||||
|
(mark-anchor! res)
|
||||||
|
res))
|
||||||
|
|
||||||
|
;;> Returns a new mark into \var{text} pointing at the current
|
||||||
|
;;> codepoint offset indicated by index (default the end of the
|
||||||
|
;;> text). Subsequent mutations to \var{text} may change the
|
||||||
|
;;> offset of the mark, but not it's relation to the surrounding
|
||||||
|
;;> text.
|
||||||
|
(define (text-mark! text index . o)
|
||||||
|
(mark-anchor! (apply text-mark text index o)))
|
||||||
|
|
||||||
|
;;> Similar to \scheme{text-mark!}, but doesn't anchor the new
|
||||||
|
;;> mark, such that mutations to \var{text} may break it.
|
||||||
|
(define (text-mark text index . o)
|
||||||
|
(receive (text mark) (apply text&mark-at text index o)
|
||||||
|
mark))
|
||||||
|
|
||||||
|
;;> (text&mark-at text mark-or-index [data])
|
||||||
|
(define (text&mark-at text index . o)
|
||||||
|
(if (mark? index)
|
||||||
|
(values (mark-text index) index)
|
||||||
|
(let ((at-offset (if (pair? o)
|
||||||
|
(if (mark? (car o)) (mark-offset (car o)) (car o))
|
||||||
|
(text-start text)))
|
||||||
|
(data (and (pair? o) (pair? (cdr o)) (cadr o))))
|
||||||
|
(let lp ((n index)
|
||||||
|
(text text)
|
||||||
|
(bv (text-source text))
|
||||||
|
(sc (text-start text)))
|
||||||
|
(cond
|
||||||
|
((positive? n)
|
||||||
|
(if (>= sc (text-end text))
|
||||||
|
(let ((text2 (text-next text)))
|
||||||
|
(lp n text2 (text-source text2) (text-start text2)))
|
||||||
|
(lp (- n 1) text bv (utf8-next bv sc (text-end text)))))
|
||||||
|
((zero? n)
|
||||||
|
(values text (make-mark text sc data)))
|
||||||
|
(else
|
||||||
|
(if (<= sc (text-start text))
|
||||||
|
(let ((text2 (text-prev text)))
|
||||||
|
(lp n text2 (text-source text2) (text-end text2)))
|
||||||
|
(lp (+ n 1) text bv (utf8-prev bv sc (text-start text))))))))))
|
162
lib/chibi/text/movement.scm
Normal file
162
lib/chibi/text/movement.scm
Normal file
|
@ -0,0 +1,162 @@
|
||||||
|
|
||||||
|
;;> Advances \var{mark} \var{count} codepoints forward (default 1),
|
||||||
|
;;> and returns \var{mark}. If \var{count} is negative, moves
|
||||||
|
;;> backwards. If this would advance beyond the end (or before the
|
||||||
|
;;> beginning) of the text, \var{mark} is bound to the end (start).
|
||||||
|
(define (text-forward-char! mark)
|
||||||
|
(cond
|
||||||
|
((< (mark-offset mark) (text-end (mark-text mark)))
|
||||||
|
;; there was space in the current piece
|
||||||
|
(let ((ch (utf8-ref (text-source (mark-text mark)) (mark-offset mark)))
|
||||||
|
(offset2 (utf8-next (text-source (mark-text mark))
|
||||||
|
(mark-offset mark)
|
||||||
|
(text-end (mark-text mark)))))
|
||||||
|
(mark-offset-set! mark offset2)
|
||||||
|
ch))
|
||||||
|
(else
|
||||||
|
(let lp ((text (text-next (mark-text mark))))
|
||||||
|
(and text
|
||||||
|
(if (< (text-start text) (text-end text))
|
||||||
|
;; advanced to a new piece, need to also move the mark
|
||||||
|
(let ((ch (utf8-ref (text-source text) (text-start text)))
|
||||||
|
(offset2 (utf8-next (text-source text)
|
||||||
|
(text-start text)
|
||||||
|
(text-end text))))
|
||||||
|
(mark-offset-set! mark offset2)
|
||||||
|
(text-marks-set! (mark-text mark)
|
||||||
|
(delete (text-marks (mark-text mark)) mark))
|
||||||
|
(text-marks-set! text (cons mark (text-marks text)))
|
||||||
|
ch)
|
||||||
|
(lp (text-next text))))))))
|
||||||
|
|
||||||
|
;;> Moves \var{mark} \var{count} codepoints backward (default 1),
|
||||||
|
;;> and returns the new char pointed to, or \scheme{#f} at the start
|
||||||
|
;;> of text. If \var{count} is negative, moves forward. If this
|
||||||
|
;;> would advance before the beginning (or beyond the end) of the
|
||||||
|
;;> text, \var{mark} is bound to the start (end).
|
||||||
|
(define (text-backward-char! mark)
|
||||||
|
(cond
|
||||||
|
((> (mark-offset mark) (text-start (mark-text mark)))
|
||||||
|
(cond
|
||||||
|
((utf8-prev (text-source (mark-text mark))
|
||||||
|
(mark-offset mark)
|
||||||
|
(text-start (mark-text mark)))
|
||||||
|
=> (lambda (offset) (mark-offset-set! mark offset) (text-ref mark)))
|
||||||
|
(else #f)))
|
||||||
|
(else
|
||||||
|
(let lp ((text (text-prev (mark-text mark))))
|
||||||
|
(and text
|
||||||
|
(if (< (text-start text) (text-end text))
|
||||||
|
;; advanced to a new piece, need to also move the mark
|
||||||
|
(cond
|
||||||
|
((utf8-prev (text-source text) (text-end text) (text-start text))
|
||||||
|
=> (lambda (offset)
|
||||||
|
(mark-offset-set! mark offset)
|
||||||
|
(text-marks-set! (mark-text mark)
|
||||||
|
(delete (text-marks (mark-text mark)) mark))
|
||||||
|
(text-marks-set! text (cons mark (text-marks text)))
|
||||||
|
(text-ref mark)))
|
||||||
|
(else #f))
|
||||||
|
(lp (text-prev text))))))))
|
||||||
|
|
||||||
|
;;> Similar to \scheme{text-forward-char!} but advances to the end of the next
|
||||||
|
;;> word (consecutive sequence of alphabetic characters).
|
||||||
|
(define (text-forward-word! mark)
|
||||||
|
(let lp ((in-word? #f))
|
||||||
|
(let ((ch (text-ref mark)))
|
||||||
|
(cond
|
||||||
|
((not ch) (and in-word? mark))
|
||||||
|
((char-alphabetic? ch) (text-forward-char! mark) (lp #t))
|
||||||
|
(in-word? mark)
|
||||||
|
(else (text-forward-char! mark) (lp #f))))))
|
||||||
|
|
||||||
|
;;> Similar to \scheme{text-backward-char!} but advances to the beginning
|
||||||
|
;;> of the prev word (consecutive sequence of alphabetic characters).
|
||||||
|
(define (text-backward-word! mark)
|
||||||
|
(let lp ((in-word? #f))
|
||||||
|
(let ((ch (text-backward-char! mark)))
|
||||||
|
(cond
|
||||||
|
((not ch) (and in-word? mark))
|
||||||
|
((char-alphabetic? ch) (lp #t))
|
||||||
|
(in-word? (text-forward-char! mark) mark)
|
||||||
|
(else (lp #f))))))
|
||||||
|
|
||||||
|
;;> Returns true iff \var{mark} is currently at the beginning of a line.
|
||||||
|
(define (text-beginning-of-line? mark)
|
||||||
|
(let ((ch (text-backward-char! (mark-copy mark))))
|
||||||
|
(or (not ch) (eqv? ch #\newline))))
|
||||||
|
|
||||||
|
;;> Returns true iff \var{mark} is currently at the end of a line.
|
||||||
|
(define (text-end-of-line? mark)
|
||||||
|
(let ((ch (text-ref mark)))
|
||||||
|
(or (not ch) (eqv? ch #\newline))))
|
||||||
|
|
||||||
|
;;> Advances \var{mark} to the beginning of the current line.
|
||||||
|
(define (text-beginning-of-line! mark)
|
||||||
|
(let lp ((ch (text-ref mark)) (count 0))
|
||||||
|
;; TODO: crlf
|
||||||
|
(cond
|
||||||
|
((not ch))
|
||||||
|
((eqv? ch #\newline)
|
||||||
|
(if (zero? count)
|
||||||
|
(lp (text-backward-char! mark) (+ count 1))
|
||||||
|
(text-forward-char! mark)))
|
||||||
|
(else (lp (text-backward-char! mark) (+ count 1)))))
|
||||||
|
mark)
|
||||||
|
|
||||||
|
;;> Advances \var{mark} to the end of the current line.
|
||||||
|
(define (text-end-of-line! mark)
|
||||||
|
(let lp ()
|
||||||
|
(let ((ch (text-ref mark)))
|
||||||
|
(cond
|
||||||
|
((not ch))
|
||||||
|
((eqv? ch #\newline))
|
||||||
|
(else (text-forward-char! mark) (lp)))))
|
||||||
|
mark)
|
||||||
|
|
||||||
|
(define (text-count-chars-since mark sentinel)
|
||||||
|
(let ((mark (mark-copy mark)))
|
||||||
|
(let lp ((count 1))
|
||||||
|
(let ((ch (text-backward-char! mark)))
|
||||||
|
(if (or (not ch) (eqv? ch sentinel))
|
||||||
|
count
|
||||||
|
(lp (+ count 1)))))))
|
||||||
|
|
||||||
|
;; Note in the full editor we should track horizontal position given dynamic
|
||||||
|
;; width fonts, composing codepoints, ligatures, half/full-width forms in fixed
|
||||||
|
;; width fonts, etc.
|
||||||
|
|
||||||
|
(define (text-current-column mark)
|
||||||
|
(text-count-chars-since mark #\newline))
|
||||||
|
|
||||||
|
;; Note in the full editor, when scrolling up multiple lines we should record
|
||||||
|
;; the original start column, even if it some lines are shorter.
|
||||||
|
|
||||||
|
;;> Advances \var{mark} to the next line. If the next line has at least as many
|
||||||
|
;;> characters as the current, advances to the same column, otherwise to the end
|
||||||
|
;;> of the line.
|
||||||
|
(define (text-forward-line! mark)
|
||||||
|
(let ((col (text-current-column mark)))
|
||||||
|
(text-end-of-line! mark)
|
||||||
|
(let lp ((i 1))
|
||||||
|
(text-forward-char! mark)
|
||||||
|
(or (>= i col)
|
||||||
|
(text-end-of-line? mark)
|
||||||
|
(lp (+ i 1))))
|
||||||
|
mark))
|
||||||
|
|
||||||
|
;;> Advances \var{mark} to the previous line. If the previous line has at least
|
||||||
|
;;> as many characters as the current, advances to the same column, otherwise to
|
||||||
|
;;> the end of the line.
|
||||||
|
(define (text-backward-line! mark)
|
||||||
|
(let ((col (text-current-column mark)))
|
||||||
|
(text-beginning-of-line! mark)
|
||||||
|
(text-backward-char! mark)
|
||||||
|
(text-beginning-of-line! mark)
|
||||||
|
(let lp ((i 1))
|
||||||
|
(or (>= i col)
|
||||||
|
(text-end-of-line? mark)
|
||||||
|
(begin
|
||||||
|
(text-forward-char! mark)
|
||||||
|
(lp (+ i 1)))))
|
||||||
|
mark))
|
26
lib/chibi/text/search.scm
Normal file
26
lib/chibi/text/search.scm
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
;;> Searches for the leftmost longest match for \var{rx} starting from the mark
|
||||||
|
;;> \var{mk}. If found, advances the mark. Returns the mark.
|
||||||
|
(define (text-search! mk rx)
|
||||||
|
(let ((rx (regexp rx))
|
||||||
|
(state (make-regexp-state)))
|
||||||
|
(let lp ((tx (mark-text mk)))
|
||||||
|
(and tx
|
||||||
|
;; Note string size is mis-named, it's actually the end offset.
|
||||||
|
(let* ((bv (text-source tx))
|
||||||
|
(start (if (eqv? tx (mark-text mk)) (mark-offset mk) (text-start tx)))
|
||||||
|
(end (text-end tx))
|
||||||
|
(str (utf8->string! bv start end))
|
||||||
|
(sc1 (string-cursor-start str))
|
||||||
|
(sc2 (string-cursor-end str)))
|
||||||
|
(regexp-advance! #t (eq? tx (mark-text mk)) rx str sc1 sc2 state)
|
||||||
|
(cond
|
||||||
|
((regexp-state-matches state)
|
||||||
|
=> (lambda (match)
|
||||||
|
(let ((offset (string-cursor-offset (regexp-match-ref match 1))))
|
||||||
|
;; TODO: the match could have been a previous text
|
||||||
|
(mark-text-set! mk tx)
|
||||||
|
(mark-offset-set! mk (+ start offset))
|
||||||
|
mk)))
|
||||||
|
(else
|
||||||
|
(lp (text-next tx)))))))))
|
17
lib/chibi/text/search.sld
Normal file
17
lib/chibi/text/search.sld
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
|
||||||
|
(define-library (chibi text search)
|
||||||
|
(import (scheme base)
|
||||||
|
(chibi regexp)
|
||||||
|
(chibi text base)
|
||||||
|
(chibi text types)
|
||||||
|
(chibi text utf8)
|
||||||
|
(srfi 130))
|
||||||
|
(cond-expand
|
||||||
|
(chibi
|
||||||
|
(import (only (chibi) string-cursor-offset)))
|
||||||
|
(else
|
||||||
|
;; assume cursors are indexes
|
||||||
|
(begin
|
||||||
|
(define (string-cursor-offset sc) sc))))
|
||||||
|
(export text-search!)
|
||||||
|
(include "search.scm"))
|
33
lib/chibi/text/types.scm
Normal file
33
lib/chibi/text/types.scm
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
|
||||||
|
;; A lightweight mutable string-like object supporting:
|
||||||
|
;; - text insertion at arbitrary points
|
||||||
|
;; - marks which preserve their position after insertions
|
||||||
|
;; - lazy loading of text data
|
||||||
|
;;
|
||||||
|
;; Basically implemented as a piece table with mark management.
|
||||||
|
|
||||||
|
(define-record-type Text
|
||||||
|
(make-text source start end prev next marks data)
|
||||||
|
text?
|
||||||
|
(source text-source text-source-set!)
|
||||||
|
(start text-start text-start-set!)
|
||||||
|
(end text-end text-end-set!)
|
||||||
|
(prev text-prev text-prev-set!)
|
||||||
|
(next text-next text-next-set!)
|
||||||
|
(marks text-marks text-marks-set!)
|
||||||
|
(data text-data text-data-set!))
|
||||||
|
|
||||||
|
(define (text-first text)
|
||||||
|
(cond ((text-prev text) => text-first)
|
||||||
|
(else text)))
|
||||||
|
|
||||||
|
(define (text-last text)
|
||||||
|
(cond ((text-next text) => text-last)
|
||||||
|
(else text)))
|
||||||
|
|
||||||
|
(define-record-type Mark
|
||||||
|
(make-mark text offset data)
|
||||||
|
mark?
|
||||||
|
(text mark-text mark-text-set!)
|
||||||
|
(offset mark-offset mark-offset-set!)
|
||||||
|
(data mark-data mark-data-set!))
|
18
lib/chibi/text/types.sld
Normal file
18
lib/chibi/text/types.sld
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
|
||||||
|
(define-library (chibi text types)
|
||||||
|
(import (scheme base))
|
||||||
|
(export
|
||||||
|
make-text text?
|
||||||
|
text-source text-source-set!
|
||||||
|
text-start text-start-set!
|
||||||
|
text-end text-end-set!
|
||||||
|
text-prev text-prev-set!
|
||||||
|
text-next text-next-set!
|
||||||
|
text-marks text-marks-set!
|
||||||
|
text-data text-data-set!
|
||||||
|
text-first text-last
|
||||||
|
make-mark mark?
|
||||||
|
mark-text mark-text-set!
|
||||||
|
mark-offset mark-offset-set!
|
||||||
|
mark-data mark-data-set!)
|
||||||
|
(include "types.scm"))
|
22
lib/chibi/text/utf8.scm
Normal file
22
lib/chibi/text/utf8.scm
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
|
||||||
|
(define (utf8-initial-byte-length bv offset)
|
||||||
|
(let ((ch (bytevector-u8-ref bv offset)))
|
||||||
|
(cond
|
||||||
|
((< ch #xC0) 1)
|
||||||
|
((< ch #xE0) 2)
|
||||||
|
(else (+ 3 (bitwise-and 1 (arithmetic-shift ch -4)))))))
|
||||||
|
|
||||||
|
(define (utf8-ref bv offset)
|
||||||
|
(let ((end (min (+ 4 offset) (bytevector-length bv))))
|
||||||
|
;; TODO: this is unsafe, read directly
|
||||||
|
(string-ref (utf8->string! bv offset end) 0)))
|
||||||
|
|
||||||
|
(define (utf8-next bv offset end)
|
||||||
|
(min end (+ offset (utf8-initial-byte-length bv offset))))
|
||||||
|
|
||||||
|
(define (utf8-prev bv offset start)
|
||||||
|
(let lp ((i (- offset 1)))
|
||||||
|
(and (>= i start)
|
||||||
|
(if (= #b10 (arithmetic-shift (bytevector-u8-ref bv i) -6))
|
||||||
|
(lp (- i 1))
|
||||||
|
i))))
|
13
lib/chibi/text/utf8.sld
Normal file
13
lib/chibi/text/utf8.sld
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
|
||||||
|
(define-library (chibi text utf8)
|
||||||
|
(export string->utf8! utf8->string! utf8-ref utf8-next utf8-prev)
|
||||||
|
(cond-expand
|
||||||
|
((and chibi (not portable))
|
||||||
|
(import (only (chibi io)
|
||||||
|
string->utf8! utf8->string! utf8-ref utf8-next utf8-prev)))
|
||||||
|
(else
|
||||||
|
(import (scheme base) (scheme bitwise))
|
||||||
|
(include "utf8.scm")
|
||||||
|
(begin
|
||||||
|
(define utf8->string! utf8->string)
|
||||||
|
(define string->utf8! string->utf8)))))
|
Loading…
Add table
Reference in a new issue