Adding initial text type.

This commit is contained in:
Alex Shinn 2024-03-19 23:03:21 +09:00
parent c4611cc33f
commit f25329b5aa
12 changed files with 819 additions and 0 deletions

129
lib/chibi/text-test.sld Normal file
View file

@ -0,0 +1,129 @@
(define-library (chibi text-test)
(import (scheme base) (scheme write) (chibi test) (chibi text))
(export run-tests)
(begin
(define (run-tests)
(test-begin "(chibi text)")
(test-assert (text? (string->text "")))
(test 0 (text-char-length (string->text "")))
(test 3 (text-char-length (string->text "日本語")))
(test 0 (text-utf8-length (string->text "")))
(test 9 (text-utf8-length (string->text "日本語")))
(test "" (text->string (string->text "")))
(test "日本語" (text->string (string->text "日本語")))
(let ((tx (string->text "日本語")))
(text-insert! tx "!" 0)
(test "!日本語" (text->string tx))
(text-insert! tx "!" 2)
(test "!日!本語" (text->string tx))
(text-insert! tx "!" 4)
(test "!日!本!語" (text->string tx))
(text-insert! tx "!" 6)
(test "!日!本!語!" (text->string tx)))
(let ((tx (string->text "abc")))
(text-insert! tx "あ" 0)
(test "あabc" (text->string tx))
(text-insert! tx "い" 2)
(test "あaいbc" (text->string tx))
(text-insert! tx "う" 4)
(test "あaいbうc" (text->string tx))
(text-insert! tx "え" 6)
(test "あaいbうcえ" (text->string tx)))
(let* ((tx (string->text "0123456789"))
(mk (text-mark! tx 5)))
(test #\5 (text-ref mk))
(text-insert! tx "abc" 1)
(text-insert! tx "xyz" mk)
(test #\5 (text-ref mk))
(test "0abc1234xyz56789" (text->string tx)))
(let* ((tx (string->text "零一二三四五六七八九"))
(mk (text-mark! tx 5)))
(text-insert! tx "あいう" 1)
(text-insert! tx "xyz" mk)
(text-insert! tx "かきく" mk)
(test "零あいう一二三四xyzかきく五六七八九" (text->string tx)))
(let* ((tx (string->text "0123456789"))
(mk (text-mark! tx 5)))
(text-insert! tx "abc" mk)
(text-insert! tx "xyz" mk)
(test "01234abcxyz56789" (text->string tx)))
(let* ((tx (string->text "0123456789"))
(mk1 (text-mark! tx 5))
(mk2 (text-mark! tx 8))
(mk3 (text-mark! tx 6)))
(text-delete! tx mk1 mk2)
(test "0123489" (text->string tx))
;; The current order of mk2/mk3 is unspecified so we insert the same
;; value here/
(text-insert! tx "def" mk2)
(text-insert! tx "abc" mk1)
(text-insert! tx "def" mk3)
(test "01234abcdefdef89" (text->string tx)))
(let* ((tx (string->text "0123456789"))
(mk (text-mark! tx 5)))
(text-insert! tx (make-string 512 #\x) mk)
(test (string-append "01234" (make-string 512 #\x) "56789")
(text->string tx))
(let ((mk2 (text-mark! tx 512)))
(text-delete! tx 5 517)
(test "0123456789" (text->string tx))))
(let* ((tx (string->text "a一二三bc"))
(mk (text-mark! tx 0)))
(test #\a (text-forward-char! mk))
(test #\一 (text-forward-char! mk))
(test #\二 (text-forward-char! mk))
(test #\三 (text-forward-char! mk))
(test #\b (text-forward-char! mk))
(test #\c (text-forward-char! mk))
(test #f (text-forward-char! mk))
(test #\c (text-backward-char! mk))
(test #\b (text-backward-char! mk))
(test #\三 (text-backward-char! mk))
(test #\二 (text-backward-char! mk))
(test #\一 (text-backward-char! mk))
(test #\a (text-backward-char! mk))
(test #f (text-backward-char! mk)))
(let* ((tx (string->text "abc, (一二三): def"))
(mk (text-mark! tx 0)))
(test 3 (mark-offset (text-forward-word! mk)))
(test 15 (mark-offset (text-forward-word! mk)))
(test 21 (mark-offset (text-forward-word! mk)))
(test #f (text-forward-word! mk))
(test 21 (mark-offset mk))
(test 18 (mark-offset (text-backward-word! mk)))
(test 6 (mark-offset (text-backward-word! mk)))
(test 0 (mark-offset (text-backward-word! mk)))
(test #f (text-backward-word! mk))
)
(let* ((tx (string->text "0123456789\nabcdef\n零一二三四五六七八九"))
(mk (text-mark! tx 0)))
(test #t (text-beginning-of-line? mk))
(test #f (text-end-of-line? mk))
(test 10 (mark-offset (text-end-of-line! mk)))
(test #\newline (text-ref mk))
(test #f (text-beginning-of-line? mk))
(test #t (text-end-of-line? mk))
(text-forward-char! mk)
(text-forward-char! mk)
(text-forward-char! mk)
(text-forward-line! mk)
(test #\二 (text-ref mk))
(text-forward-char! mk)
(text-forward-char! mk)
(text-forward-char! mk)
(text-forward-char! mk)
(test #\六 (text-ref mk))
(text-backward-line! mk)
(test #\newline (text-ref mk))
(text-backward-line! mk)
(test #\6 (text-ref mk))
)
(let* ((tx (string->text "0123456789\nabcdef\n零一二三四五六七八九"))
(mk (text-mark! tx 0)))
(text-search! mk "一二三")
(test 30 (mark-offset mk))
(text-insert! tx "..." mk)
(test "0123456789\nabcdef\n零一二三...四五六七八九"
(text->string (mark-text mk))))
(test-end))))

26
lib/chibi/text.sld Normal file
View file

@ -0,0 +1,26 @@
(define-library (chibi text)
(import (scheme base)
(chibi text base)
(chibi text search)
(chibi text types)
(chibi text utf8))
(export
make-text text? text-empty?
text-beginning-of-line? text-end-of-line?
text-char-length text-utf8-length text-piece-length
string->text text->string text->utf8
text-append text-append!
text-concatenate text-concatenate!
text-ref text-copy text-current-column
text-insert! text-delete!
text-mark text-mark!
text-forward-char! text-backward-char!
text-forward-word! text-backward-word!
text-beginning-of-line? text-end-of-line?
text-beginning-of-line! text-end-of-line!
text-forward-line! text-backward-line!
text-search!
mark-text mark-offset mark-copy
mark-anchor! mark-release!
))

288
lib/chibi/text/base.scm Normal file
View file

@ -0,0 +1,288 @@
;;> Returns the length in codepoints of the text object.
(define (text-char-length text)
(let lp ((text (text-first text)) (sum 0))
(if text
(lp (text-next text)
(+ sum
(string-length (utf8->string! (text-source text)
(text-start text)
(text-end text)))))
sum)))
;;> Returns the length the text object would require encoded as UTF-8.
(define (text-utf8-length text)
(let lp ((text (text-first text)) (sum 0))
(if text
(lp (text-next text) (+ sum (- (text-end text) (text-start text))))
sum)))
(define (text-piece-length text)
(let lp ((text (text-first text)) (count 0))
(if text
(lp (text-next text) (+ count 1))
count)))
(define (text-empty? text)
(zero? (text-char-length text)))
;;> Returns a new text object representing the same codepoints as the string \var{str}.
(define (string->text str . o)
(let* ((bv (string->utf8 str))
(start (if (pair? o) (car o) 0))
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (bytevector-length bv))))
(make-text bv start end #f #f '() #f)))
(define (text-string text)
(utf8->string (text-source text) (text-start text) (text-end text)))
;;> Returns the utf8 representation of the codepoints in \var{text}.
(define (text->utf8 text)
(if (and (not (text-prev text)) (not (text-next text)))
(bytevector-copy (text-source text) (text-start text) (text-end text))
(let ((out (open-output-bytevector)))
(let lp ((piece (text-first text)))
(cond
((not piece)
(get-output-bytevector out))
(else
(write-bytevector (text-source piece) out (text-start piece) (text-end piece))
(lp (text-next piece))))))))
;;> Returns a string representing the same codepoints as \var{text}.
(define (text->string text)
(utf8->string (text->utf8 text)))
;;> Returns the character \var{mark} points to, or \scheme{#f}
;;> if mark points to the end of the text.
(define (text-ref mark)
(cond
((< (mark-offset mark) (text-start (mark-text mark)))
(let lp ((text (text-prev (mark-text mark))))
(and text
(if (< (text-start text) (text-end text))
(utf8-ref (text-source text) (- (text-end text) 1))
(lp (text-prev text))))))
((>= (mark-offset mark) (text-end (mark-text mark)))
(let lp ((text (text-next (mark-text mark))))
(and text
(if (< (text-start text) (text-end text))
(utf8-ref (text-source text) (text-start text))
(lp (text-next text))))))
(else
(utf8-ref (text-source (mark-text mark)) (mark-offset mark)))))
(define (text-piece-copy text)
(let ((res (make-text (bytevector-copy (text-source text))
(text-start text)
(text-end text)
(text-prev text)
(text-next text)
'()
(text-data text))))
(text-marks-set! res
(map (lambda (mk)
(make-mark res (mark-offset mk) (mark-data mk)))
(text-marks res)))
res))
(define (text-copy text)
(let ((text0 (text-piece-copy (text-first text))))
(let lp ((text (text-next text0))
(prev text0))
(if text
(let ((text1 (text-piece-copy text)))
(text-next-set! text0 text1)
(text-prev-set! text1 text0)
(lp text1 (text-next text1)))
text0))))
(define (->text obj)
(cond
((text? obj) obj)
((string? obj) (string->text obj))
((char? obj) (string->text (string obj)))
(else (error "not a textlike object" obj))))
(define (text-append! . ls)
(text-concatenate! ls))
(define (text-append . ls)
(text-concatenate ls))
(define (text-concatenate ls)
(text-concatenate! (map (lambda (x) (if (text? x) (text-copy x) x)) ls)))
(define (text-concatenate! ls)
(if (null? ls)
(string->text "")
(let ((res (->text (car ls))))
(let lp ((tx (text-last res))
(ls (cdr ls)))
(if (null? ls)
res
(let* ((tx2 (->text (car ls)))
(tx2-first (text-first tx2)))
(text-next-set! tx tx2-first)
(text-prev-set! tx2-first tx)
(lp (text-last tx2) (cdr ls))))))))
;; inserts a new right piece and returns it
(define (text-new-right! text . o)
(let* ((size (if (pair? o) (car o) 256))
(right (make-text (make-bytevector size) 0 0 text (text-next text) '() #f)))
(cond ((text-next text) => (lambda (orig-right) (text-prev-set! orig-right right))))
(text-next-set! text right)
right))
;; splits the text at the given point into two pieces, returning the original
;; text which becomes the left piece
(define (text-split! text at . o)
(receive (text at) (text&mark-at text at)
(let* ((at-mark (if (mark? at) at (text-mark text at)))
(at-offset (mark-offset at-mark))
(size (max (- at-offset (text-start text))
(if (pair? o) (car o) 64)))
(right (text-new-right! text size)))
(bytevector-copy! (text-source right)
0
(text-source text)
at-offset
(text-end text))
(text-end-set! right (- (text-end text) at-offset))
(text-end-set! text at-offset)
(receive (left-marks right-marks)
(partition (lambda (mk) (<= (mark-offset mk) at-offset)) (text-marks text))
(text-marks-set! text left-marks)
(text-marks-set! right (map (lambda (mk)
(mark-text-set! mk right)
(mark-offset-set! mk (- (mark-offset mk) at-offset))
mk)
right-marks))
text))))
;;> Inserts \var{textlike} into the text immediately before
;;> the point indicated by \var{mark1}, leaving \var{mark1}
;;> (and all same position marks) after the inserted text.
;;> Returns \var{mark1}.
(define (text-insert! text str . o)
(receive (text at-mark) (text&mark-at text (if (pair? o) (car o) 0))
(let* ((at-offset (mark-offset at-mark))
(src (string->utf8 str))
(size (bytevector-length src))
(dst (text-source text))
(dst-size (bytevector-length dst)))
(cond
((= at-offset (text-end text))
(let* ((avail-size (- dst-size at-offset))
(end (min (+ at-offset size) dst-size))
(copy-size (- end at-offset)))
;; TODO: Don't insert a partial utf8 char.
(bytevector-copy! dst at-offset src 0 copy-size)
(text-end-set! text end)
;; Advance marks that were pointing to the end of this piece.
(for-each (lambda (mk)
(if (>= (mark-offset mk) at-offset)
(mark-offset-set! mk (+ (mark-offset mk) copy-size))))
(text-marks text))
(if (< copy-size size)
;; TODO: better sizing?
(let ((right (text-new-right! text (* 2 size)))
(right-size (- size copy-size)))
(bytevector-copy! (text-source right) 0 src copy-size size)
(text-end-set! right right-size)))
text))
;; ((= at-offset (text-start text))
;; TODO: insert before start
;; )
;; TODO: optimization: use prev buffer if at start of text?
;; TODO: optimization: shift bytes in place if near end?
((negative? at-offset)
(error "bad offset" at-offset))
(else
(text-split! text at-mark)
(text-insert! text str at-mark))))))
;; Set the start of text and adjust marks before that to the new start
;; accordingly.
(define (text-truncate-left! text new-start)
(text-start-set! text new-start)
(text-marks-set! text
(map (lambda (mk)
(mark-offset-set! mk (max (mark-offset mk) new-start))
mk)
(text-marks text))))
;; Set the end of text and adjust marks after that to the new end accordingly.
(define (text-truncate-right! text new-end)
(text-end-set! text new-end)
(text-marks-set! text
(map (lambda (mk)
(mark-offset-set! mk (min (mark-offset mk) new-end))
mk)
(text-marks text))))
;;> Deletes the codepoints between \var{from} (inclusive)
;;> and \var{to} (exclusive, defaulting to the end of the
;;> text), leaving the two marks pointing to the same location.
;;> Returns \var{from}.
(define (text-delete! text from to)
(let-values (((from-text from-mark) (text&mark-at text from))
((to-text to-mark) (text&mark-at text to)))
(let ((from-at (mark-offset from-mark))
(to-at (mark-offset to-mark)))
(cond
((eq? from-text to-text)
(let* ((from-text2 (text-split! from-text from-mark))
(to-text2 (text-next from-text2))
(to-at2 (mark-offset to-mark)))
(text-truncate-left! to-text2 to-at2)))
(else
(text-truncate-right! from-text from-at)
(text-truncate-left! to-text to-at)
;; Erase any pieces in between and point them to the start of to-text.
(let lp ((text (text-next from-text)) (marks '()))
(cond
((and text (not (eq? text to-text)))
;; TODO: splice out the nodes themselves
(let ((new-marks (text-marks text)))
(text-start-set! text (text-end text))
(text-marks-set! text '())
(lp (text-next text) (cons new-marks marks))))
(else
(text-marks-set! to-text
(append
(map (lambda (mk)
(mark-text-set! mk to-text)
(mark-offset-set! mk to-at)
mk)
(concatenate (reverse marks)))
(text-marks to-text))))))))
)))
(define (text-piece-empty? text)
(= (text-start text) (text-end text)))
;; returns a reference to a new start of text with empty pieces removed
(define (text-clean! text)
(define (text-first-non-empty! text)
(let lp ((tx (text-first text)))
(cond
((and (text-piece-empty? tx) (text-next tx))
(let ((next (text-next tx)))
(text-next-set! tx #f)
(text-prev-set! next #f)
(lp next)))
(else
tx))))
(let ((tx-first (text-first-non-empty! text)))
(let lp ((prev tx-first) (tx (text-next tx-first)))
(cond
((not tx)
tx-first)
((text-piece-empty? tx)
(text-next-set! prev (text-next tx))
(text-prev-set! (text-next tx) prev)
(lp prev (text-next tx)))
(else
(lp tx (text-next tx)))))))

29
lib/chibi/text/base.sld Normal file
View file

@ -0,0 +1,29 @@
(define-library (chibi text base)
(import (scheme base)
(scheme char)
(scheme write)
(srfi 1)
(srfi 8)
(chibi text types)
(chibi text utf8))
(export
make-text text? text-empty?
text-beginning-of-line? text-end-of-line?
text-char-length text-utf8-length text-piece-length
string->text text->string text->utf8
text-append text-append!
text-concatenate text-concatenate!
text-ref text-copy text-current-column
text-insert! text-delete!
text-mark text-mark!
text-forward-char! text-backward-char!
text-forward-word! text-backward-word!
text-beginning-of-line? text-end-of-line?
text-beginning-of-line! text-end-of-line!
text-forward-line! text-backward-line!
mark-text mark-offset mark-copy
mark-anchor! mark-release!)
(include "marks.scm")
(include "base.scm")
(include "movement.scm"))

56
lib/chibi/text/marks.scm Normal file
View file

@ -0,0 +1,56 @@
(define (mark-anchor! mark)
(text-marks-set! (mark-text mark) (cons mark (text-marks (mark-text mark))))
mark)
(define (mark-release! mark)
(text-marks-set! (mark-text mark) (remove! mark (text-marks (mark-text mark))))
mark)
(define (mark-copy mark)
(make-mark (mark-text mark) (mark-offset mark) (mark-data mark)))
(define (mark-copy! mark)
(let ((res (mark-copy mark)))
(mark-anchor! res)
res))
;;> Returns a new mark into \var{text} pointing at the current
;;> codepoint offset indicated by index (default the end of the
;;> text). Subsequent mutations to \var{text} may change the
;;> offset of the mark, but not it's relation to the surrounding
;;> text.
(define (text-mark! text index . o)
(mark-anchor! (apply text-mark text index o)))
;;> Similar to \scheme{text-mark!}, but doesn't anchor the new
;;> mark, such that mutations to \var{text} may break it.
(define (text-mark text index . o)
(receive (text mark) (apply text&mark-at text index o)
mark))
;;> (text&mark-at text mark-or-index [data])
(define (text&mark-at text index . o)
(if (mark? index)
(values (mark-text index) index)
(let ((at-offset (if (pair? o)
(if (mark? (car o)) (mark-offset (car o)) (car o))
(text-start text)))
(data (and (pair? o) (pair? (cdr o)) (cadr o))))
(let lp ((n index)
(text text)
(bv (text-source text))
(sc (text-start text)))
(cond
((positive? n)
(if (>= sc (text-end text))
(let ((text2 (text-next text)))
(lp n text2 (text-source text2) (text-start text2)))
(lp (- n 1) text bv (utf8-next bv sc (text-end text)))))
((zero? n)
(values text (make-mark text sc data)))
(else
(if (<= sc (text-start text))
(let ((text2 (text-prev text)))
(lp n text2 (text-source text2) (text-end text2)))
(lp (+ n 1) text bv (utf8-prev bv sc (text-start text))))))))))

162
lib/chibi/text/movement.scm Normal file
View file

@ -0,0 +1,162 @@
;;> Advances \var{mark} \var{count} codepoints forward (default 1),
;;> and returns \var{mark}. If \var{count} is negative, moves
;;> backwards. If this would advance beyond the end (or before the
;;> beginning) of the text, \var{mark} is bound to the end (start).
(define (text-forward-char! mark)
(cond
((< (mark-offset mark) (text-end (mark-text mark)))
;; there was space in the current piece
(let ((ch (utf8-ref (text-source (mark-text mark)) (mark-offset mark)))
(offset2 (utf8-next (text-source (mark-text mark))
(mark-offset mark)
(text-end (mark-text mark)))))
(mark-offset-set! mark offset2)
ch))
(else
(let lp ((text (text-next (mark-text mark))))
(and text
(if (< (text-start text) (text-end text))
;; advanced to a new piece, need to also move the mark
(let ((ch (utf8-ref (text-source text) (text-start text)))
(offset2 (utf8-next (text-source text)
(text-start text)
(text-end text))))
(mark-offset-set! mark offset2)
(text-marks-set! (mark-text mark)
(delete (text-marks (mark-text mark)) mark))
(text-marks-set! text (cons mark (text-marks text)))
ch)
(lp (text-next text))))))))
;;> Moves \var{mark} \var{count} codepoints backward (default 1),
;;> and returns the new char pointed to, or \scheme{#f} at the start
;;> of text. If \var{count} is negative, moves forward. If this
;;> would advance before the beginning (or beyond the end) of the
;;> text, \var{mark} is bound to the start (end).
(define (text-backward-char! mark)
(cond
((> (mark-offset mark) (text-start (mark-text mark)))
(cond
((utf8-prev (text-source (mark-text mark))
(mark-offset mark)
(text-start (mark-text mark)))
=> (lambda (offset) (mark-offset-set! mark offset) (text-ref mark)))
(else #f)))
(else
(let lp ((text (text-prev (mark-text mark))))
(and text
(if (< (text-start text) (text-end text))
;; advanced to a new piece, need to also move the mark
(cond
((utf8-prev (text-source text) (text-end text) (text-start text))
=> (lambda (offset)
(mark-offset-set! mark offset)
(text-marks-set! (mark-text mark)
(delete (text-marks (mark-text mark)) mark))
(text-marks-set! text (cons mark (text-marks text)))
(text-ref mark)))
(else #f))
(lp (text-prev text))))))))
;;> Similar to \scheme{text-forward-char!} but advances to the end of the next
;;> word (consecutive sequence of alphabetic characters).
(define (text-forward-word! mark)
(let lp ((in-word? #f))
(let ((ch (text-ref mark)))
(cond
((not ch) (and in-word? mark))
((char-alphabetic? ch) (text-forward-char! mark) (lp #t))
(in-word? mark)
(else (text-forward-char! mark) (lp #f))))))
;;> Similar to \scheme{text-backward-char!} but advances to the beginning
;;> of the prev word (consecutive sequence of alphabetic characters).
(define (text-backward-word! mark)
(let lp ((in-word? #f))
(let ((ch (text-backward-char! mark)))
(cond
((not ch) (and in-word? mark))
((char-alphabetic? ch) (lp #t))
(in-word? (text-forward-char! mark) mark)
(else (lp #f))))))
;;> Returns true iff \var{mark} is currently at the beginning of a line.
(define (text-beginning-of-line? mark)
(let ((ch (text-backward-char! (mark-copy mark))))
(or (not ch) (eqv? ch #\newline))))
;;> Returns true iff \var{mark} is currently at the end of a line.
(define (text-end-of-line? mark)
(let ((ch (text-ref mark)))
(or (not ch) (eqv? ch #\newline))))
;;> Advances \var{mark} to the beginning of the current line.
(define (text-beginning-of-line! mark)
(let lp ((ch (text-ref mark)) (count 0))
;; TODO: crlf
(cond
((not ch))
((eqv? ch #\newline)
(if (zero? count)
(lp (text-backward-char! mark) (+ count 1))
(text-forward-char! mark)))
(else (lp (text-backward-char! mark) (+ count 1)))))
mark)
;;> Advances \var{mark} to the end of the current line.
(define (text-end-of-line! mark)
(let lp ()
(let ((ch (text-ref mark)))
(cond
((not ch))
((eqv? ch #\newline))
(else (text-forward-char! mark) (lp)))))
mark)
(define (text-count-chars-since mark sentinel)
(let ((mark (mark-copy mark)))
(let lp ((count 1))
(let ((ch (text-backward-char! mark)))
(if (or (not ch) (eqv? ch sentinel))
count
(lp (+ count 1)))))))
;; Note in the full editor we should track horizontal position given dynamic
;; width fonts, composing codepoints, ligatures, half/full-width forms in fixed
;; width fonts, etc.
(define (text-current-column mark)
(text-count-chars-since mark #\newline))
;; Note in the full editor, when scrolling up multiple lines we should record
;; the original start column, even if it some lines are shorter.
;;> Advances \var{mark} to the next line. If the next line has at least as many
;;> characters as the current, advances to the same column, otherwise to the end
;;> of the line.
(define (text-forward-line! mark)
(let ((col (text-current-column mark)))
(text-end-of-line! mark)
(let lp ((i 1))
(text-forward-char! mark)
(or (>= i col)
(text-end-of-line? mark)
(lp (+ i 1))))
mark))
;;> Advances \var{mark} to the previous line. If the previous line has at least
;;> as many characters as the current, advances to the same column, otherwise to
;;> the end of the line.
(define (text-backward-line! mark)
(let ((col (text-current-column mark)))
(text-beginning-of-line! mark)
(text-backward-char! mark)
(text-beginning-of-line! mark)
(let lp ((i 1))
(or (>= i col)
(text-end-of-line? mark)
(begin
(text-forward-char! mark)
(lp (+ i 1)))))
mark))

26
lib/chibi/text/search.scm Normal file
View file

@ -0,0 +1,26 @@
;;> Searches for the leftmost longest match for \var{rx} starting from the mark
;;> \var{mk}. If found, advances the mark. Returns the mark.
(define (text-search! mk rx)
(let ((rx (regexp rx))
(state (make-regexp-state)))
(let lp ((tx (mark-text mk)))
(and tx
;; Note string size is mis-named, it's actually the end offset.
(let* ((bv (text-source tx))
(start (if (eqv? tx (mark-text mk)) (mark-offset mk) (text-start tx)))
(end (text-end tx))
(str (utf8->string! bv start end))
(sc1 (string-cursor-start str))
(sc2 (string-cursor-end str)))
(regexp-advance! #t (eq? tx (mark-text mk)) rx str sc1 sc2 state)
(cond
((regexp-state-matches state)
=> (lambda (match)
(let ((offset (string-cursor-offset (regexp-match-ref match 1))))
;; TODO: the match could have been a previous text
(mark-text-set! mk tx)
(mark-offset-set! mk (+ start offset))
mk)))
(else
(lp (text-next tx)))))))))

17
lib/chibi/text/search.sld Normal file
View file

@ -0,0 +1,17 @@
(define-library (chibi text search)
(import (scheme base)
(chibi regexp)
(chibi text base)
(chibi text types)
(chibi text utf8)
(srfi 130))
(cond-expand
(chibi
(import (only (chibi) string-cursor-offset)))
(else
;; assume cursors are indexes
(begin
(define (string-cursor-offset sc) sc))))
(export text-search!)
(include "search.scm"))

33
lib/chibi/text/types.scm Normal file
View file

@ -0,0 +1,33 @@
;; A lightweight mutable string-like object supporting:
;; - text insertion at arbitrary points
;; - marks which preserve their position after insertions
;; - lazy loading of text data
;;
;; Basically implemented as a piece table with mark management.
(define-record-type Text
(make-text source start end prev next marks data)
text?
(source text-source text-source-set!)
(start text-start text-start-set!)
(end text-end text-end-set!)
(prev text-prev text-prev-set!)
(next text-next text-next-set!)
(marks text-marks text-marks-set!)
(data text-data text-data-set!))
(define (text-first text)
(cond ((text-prev text) => text-first)
(else text)))
(define (text-last text)
(cond ((text-next text) => text-last)
(else text)))
(define-record-type Mark
(make-mark text offset data)
mark?
(text mark-text mark-text-set!)
(offset mark-offset mark-offset-set!)
(data mark-data mark-data-set!))

18
lib/chibi/text/types.sld Normal file
View file

@ -0,0 +1,18 @@
(define-library (chibi text types)
(import (scheme base))
(export
make-text text?
text-source text-source-set!
text-start text-start-set!
text-end text-end-set!
text-prev text-prev-set!
text-next text-next-set!
text-marks text-marks-set!
text-data text-data-set!
text-first text-last
make-mark mark?
mark-text mark-text-set!
mark-offset mark-offset-set!
mark-data mark-data-set!)
(include "types.scm"))

22
lib/chibi/text/utf8.scm Normal file
View file

@ -0,0 +1,22 @@
(define (utf8-initial-byte-length bv offset)
(let ((ch (bytevector-u8-ref bv offset)))
(cond
((< ch #xC0) 1)
((< ch #xE0) 2)
(else (+ 3 (bitwise-and 1 (arithmetic-shift ch -4)))))))
(define (utf8-ref bv offset)
(let ((end (min (+ 4 offset) (bytevector-length bv))))
;; TODO: this is unsafe, read directly
(string-ref (utf8->string! bv offset end) 0)))
(define (utf8-next bv offset end)
(min end (+ offset (utf8-initial-byte-length bv offset))))
(define (utf8-prev bv offset start)
(let lp ((i (- offset 1)))
(and (>= i start)
(if (= #b10 (arithmetic-shift (bytevector-u8-ref bv i) -6))
(lp (- i 1))
i))))

13
lib/chibi/text/utf8.sld Normal file
View file

@ -0,0 +1,13 @@
(define-library (chibi text utf8)
(export string->utf8! utf8->string! utf8-ref utf8-next utf8-prev)
(cond-expand
((and chibi (not portable))
(import (only (chibi io)
string->utf8! utf8->string! utf8-ref utf8-next utf8-prev)))
(else
(import (scheme base) (scheme bitwise))
(include "utf8.scm")
(begin
(define utf8->string! utf8->string)
(define string->utf8! string->utf8)))))