From a481f31571a9a3cad644f2a627f4de8c632c4b8a Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Wed, 26 Dec 2012 23:10:46 +0900 Subject: [PATCH] Supporting double-width characters in line editor. --- lib/chibi/term/edit-line.scm | 154 +++++++++++++++++++++++++++++++++-- 1 file changed, 145 insertions(+), 9 deletions(-) diff --git a/lib/chibi/term/edit-line.scm b/lib/chibi/term/edit-line.scm index 89fb5337..f356b9d5 100644 --- a/lib/chibi/term/edit-line.scm +++ b/lib/chibi/term/edit-line.scm @@ -178,13 +178,134 @@ (buffer-string-set! buf new) (buffer-gap-set! buf new-gap))))) +;; Adapted from fmt-unicode. TODO: convert to char-sets. +(define low-non-spacing-chars '#u8( +#xff #xff #xff #xff #xff #xff #xff #xff #xff #xff #xff #xff #xff #xff 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#x78 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 #xfe #xff #xff #xff #xff #xff #x1f 0 0 0 0 0 0 0 + 0 0 #x3f 0 0 0 0 0 0 #xf8 #xff #x01 0 0 #x01 0 + 0 0 0 0 0 0 0 0 0 0 #xc0 #xff #xff #x3f 0 0 + 0 0 #x02 0 0 0 #xff #xff #xff #x07 0 0 0 0 0 0 + 0 0 0 0 #xc0 #xff #x01 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +#x06 0 0 0 0 0 0 #x10 #xfe #x21 #x1e 0 #x0c 0 0 0 +#x02 0 0 0 0 0 0 #x10 #x1e #x20 0 0 #x0c 0 0 0 +#x06 0 0 0 0 0 0 #x10 #xfe #x3f 0 0 0 0 #x03 0 +#x06 0 0 0 0 0 0 #x30 #xfe #x21 0 0 #x0c 0 0 0 +#x02 0 0 0 0 0 0 #x90 #x0e #x20 #x40 0 0 0 0 0 +#x04 0 0 0 0 0 0 0 0 #x20 0 0 0 0 0 0 + 0 0 0 0 0 0 0 #xc0 #xc1 #xff #x7f 0 0 0 0 0 + 0 0 0 0 0 0 0 #x10 #x40 #x30 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 #x0e #x20 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 #x04 #x7c 0 0 0 0 0 + 0 0 0 0 0 0 #xf2 #x07 #x80 #x7f 0 0 0 0 0 0 + 0 0 0 0 0 0 #xf2 #x1f 0 #x3f 0 0 0 0 0 0 + 0 0 0 #x03 0 0 #xa0 #x02 0 0 0 0 0 0 #xfe #x7f +#xdf 0 #xff #xff #xff #xff #xff #x1f #x40 0 0 0 0 0 0 0 + 0 0 0 0 0 #xe0 #xfd #x02 0 0 0 #x03 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 #x1c 0 0 0 #x1c 0 0 0 #x0c 0 0 0 #x0c 0 + 0 0 0 0 0 0 #x80 #x3f #x40 #xfe #x0f #x20 0 0 0 0 + 0 #x38 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 #x02 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 #x87 #x01 #x04 #x0e 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 #xff #x1f #xe2 #x07)) + +(define (unicode-char-width c) + (let ((ci (char->integer c))) + (cond + ;; hand-checked ranges from EastAsianWidth.txt + ((<= #x1100 ci #x115F) 2) ; Hangul + ((<= #x2E80 ci #x4DB5) 2) ; CJK + ((<= #x4E00 ci #xA4C6) 2) + ((<= #xAC00 ci #xD7A3) 2) ; Hangul + ((<= #xF900 ci #xFAD9) 2) ; CJK compat + ((<= #xFE10 ci #xFE6B) 2) + ((<= #xFF01 ci #xFF60) 2) + ((<= #xFFE0 ci #xFFE6) 2) + ((<= #x20000 ci #x30000) 2) + ;; non-spacing mark (Mn) ranges from UnicodeData.txt + ((<= #x0300 ci #x3029) + ;; inlined bit-vector-ref for portability + (let* ((i (- ci #x0300)) + (byte (quotient i 8)) + (off (remainder i 8))) + (if (zero? (bitwise-and (bytevector-u8-ref low-non-spacing-chars byte) + (arithmetic-shift 1 off))) + 1 + 0))) + ((<= #x302A ci #x302F) 0) + ((<= #x3099 ci #x309A) 0) + ((= #xFB1E ci) 0) + ((<= #xFE00 ci #xFE23) 0) + ((<= #x1D167 ci #x1D169) 0) + ((<= #x1D17B ci #x1D182) 0) + ((<= #x1D185 ci #x1D18B) 0) + ((<= #x1D1AA ci #x1D1AD) 0) + ((<= #xE0100 ci #xE01EF) 0) + (else 1)))) + +(define (unicode-string-width str . o) + (let ((start (if (pair? o) (car o) 0)) + (end (if (and (pair? o) (pair? (cdr o))) + (cadr o) + (string-length str)))) + (let lp1 ((i start) (width 0)) + (if (>= i end) + width + (let ((c (string-ref str i))) + (cond + ;; ANSI escapes + ((and (= 27 (char->integer c)) ; esc + (< (+ i 1) end) + (eqv? #\[ (string-ref str (+ i 1)))) + (let lp2 ((i (+ i 2))) + (cond ((>= i end) width) + ((memv (string-ref str i) '(#\m #\newline)) + (lp1 (+ i 1) width)) + (else (lp2 (+ i 1)))))) + ;; unicode characters + ((>= (char->integer c) #x80) + (lp1 (+ i 1) (+ width (unicode-char-width c)))) + ;; normal ASCII + (else (lp1 (+ i 1) (+ width 1))))))))) + (define (buffer-update-position! buf) (let ((pos (buffer-pos buf)) (gap (buffer-gap buf)) (str (buffer-string buf)) (end (string-length (buffer-string buf))) (width (buffer-width buf))) - ;; TODO: Support double and zero-width chars and ANSI escapes. (cond ((buffer-single-line? buf) ;; The "start" is the last left-most column of the buffer when @@ -199,17 +320,31 @@ ;; at a time. A beginning-of-line command will restore the ;; "start" to 0 immediately. ;; We assume no embedded newlines in this case. - (let ((start (buffer-start buf))) + (let ((start (buffer-start buf)) + (min-width (unicode-string-width str 0 (buffer-min buf)))) (cond ((> start pos) + ;; We've moved back before the current start - reset to pos. (buffer-start-set! buf pos)) - ((> (+ 1 (buffer-min buf) (- pos start)) (buffer-width buf)) - (buffer-start-set! buf (max 0 (- (+ 1 (buffer-min buf) pos) - (buffer-width buf)))))) - (buffer-col-set! buf (+ (buffer-min buf) (- pos (buffer-start buf)))))) + ((> (+ 1 min-width (unicode-string-width str start pos)) + (buffer-width buf)) + ;; The edited text is wider than the screen - scroll the + ;; start so that pos fits on the end. + ;; TODO: This isn't correct for wide characters, need to + ;; loop checking individual widths. + (buffer-start-set! + buf + (max 0 (- (+ 1 min-width (unicode-string-width str 0 pos)) + (buffer-width buf)))))) + ;; Compute the current column as the width of the prompt plus + ;; the width of the visible buffer from start to pos. + (let* ((vis-width (unicode-string-width str (buffer-start buf) pos)) + (col (+ min-width vis-width))) + (buffer-col-set! buf col)))) (else ;; Otherwise, in a multi-line editor we need to scan for ;; newlines to determine the current (relative) row and column. + ;; TODO: Handle ANSI escapes. (let lp ((i 0) (row 0) (col 0)) ;; update row/col (cond ((= i pos) (buffer-row-set! buf row) @@ -220,10 +355,11 @@ buf (if (and (zero? col) (> row 0)) (- row 1) row))) ((eqv? #\newline (string-ref str i)) (lp (+ i 1) (+ row 1) 0)) - ((= (+ col 1) width) - (lp (+ i 1) (+ row 1) 0)) (else - (lp (+ i 1) row (+ col 1))))))))) + (let ((off (unicode-char-width (string-ref str i)))) + (if (>= (+ col off) width) + (lp (+ i 1) (+ row 1) 0) + (lp (+ i 1) row (+ col off))))))))))) (define (buffer-clear buf out) ;; goto start of input