chibi-scheme/lib/chibi/text/utf8.scm
2024-03-19 23:03:21 +09:00

22 lines
683 B
Scheme

(define (utf8-initial-byte-length bv offset)
(let ((ch (bytevector-u8-ref bv offset)))
(cond
((< ch #xC0) 1)
((< ch #xE0) 2)
(else (+ 3 (bitwise-and 1 (arithmetic-shift ch -4)))))))
(define (utf8-ref bv offset)
(let ((end (min (+ 4 offset) (bytevector-length bv))))
;; TODO: this is unsafe, read directly
(string-ref (utf8->string! bv offset end) 0)))
(define (utf8-next bv offset end)
(min end (+ offset (utf8-initial-byte-length bv offset))))
(define (utf8-prev bv offset start)
(let lp ((i (- offset 1)))
(and (>= i start)
(if (= #b10 (arithmetic-shift (bytevector-u8-ref bv i) -6))
(lp (- i 1))
i))))