adding substring/width

This commit is contained in:
Alex Shinn 2020-07-03 14:45:36 +09:00
parent 39344bcaa0
commit 36c3471fa7
7 changed files with 141 additions and 35 deletions

View file

@ -17,7 +17,8 @@
;; computations ;; computations
fn with with! forked call-with-output fn with with! forked call-with-output
;; state variables ;; state variables
port row col width output writer string-width pad-char ellipsis port row col width output writer pad-char ellipsis
string-width substring/width
radix precision decimal-sep decimal-align sign-rule radix precision decimal-sep decimal-align sign-rule
comma-sep comma-rule word-separator? ambiguous-is-wide? comma-sep comma-rule word-separator? ambiguous-is-wide?
;; pretty ;; pretty
@ -26,7 +27,10 @@
columnar tabular wrapped wrapped/list wrapped/char columnar tabular wrapped wrapped/list wrapped/char
justified from-file line-numbers justified from-file line-numbers
;; unicode ;; unicode
as-unicode unicode-terminal-width unicode-terminal-width/wide terminal-aware
string-terminal-width string-terminal-width/wide
substring-terminal-width substring-terminal-width/wide
substring-terminal-width substring-terminal-width/wide
upcased downcased upcased downcased
;; color ;; color
as-red as-blue as-green as-cyan as-yellow as-red as-blue as-green as-cyan as-yellow

View file

@ -44,6 +44,8 @@
(pad-char #\space) (pad-char #\space)
(output output-default) (output output-default)
(string-width substring-length) (string-width substring-length)
(string-take-width string-take)
(substring/width substring)
(word-separator? char-whitespace?) (word-separator? char-whitespace?)
(ambiguous-is-wide? #f) (ambiguous-is-wide? #f)
(ellipsis "") (ellipsis "")

View file

@ -44,7 +44,8 @@
;; computations ;; computations
fn with with! forked call-with-output fn with with! forked call-with-output
;; state variables ;; state variables
port row col width output writer string-width pad-char ellipsis port row col width output writer pad-char ellipsis
string-width substring/width
radix precision decimal-sep decimal-align sign-rule radix precision decimal-sep decimal-align sign-rule
comma-sep comma-rule word-separator? ambiguous-is-wide? comma-sep comma-rule word-separator? ambiguous-is-wide?
) )

View file

@ -146,29 +146,29 @@
width width
(each-in-list ls) (each-in-list ls)
(lambda (str str-width diff) (lambda (str str-width diff)
(fn (ellipsis string-width col) (fn (ellipsis string-width substring/width)
(let* ((ell (if (char? ellipsis) (string ellipsis) (or ellipsis ""))) (let* ((ell (if (char? ellipsis) (string ellipsis) (or ellipsis "")))
(ell-len (string-width ell)) (ell-len (string-width ell))
(diff (- (+ str-width ell-len) width))) (diff (- (+ str-width ell-len) width)))
(each (if (negative? diff) (each (if (negative? diff)
nothing nothing
(substring str 0 (- width ell-len))) (substring/width str 0 (- width ell-len)))
ell)))))) ell))))))
;;> As \scheme{trimmed} but removes from the left. ;;> As \scheme{trimmed/right} but removes from the left.
(define (trimmed/left width . ls) (define (trimmed/left width . ls)
(trimmed/buffered (trimmed/buffered
width width
(each-in-list ls) (each-in-list ls)
(lambda (str str-width diff) (lambda (str str-width diff)
(fn (ellipsis string-width) (fn (ellipsis string-width substring/width)
(let* ((ell (if (char? ellipsis) (string ellipsis) (or ellipsis ""))) (let* ((ell (if (char? ellipsis) (string ellipsis) (or ellipsis "")))
(ell-len (string-width ell)) (ell-len (string-width ell))
(diff (- (+ str-width ell-len) width))) (diff (- (+ str-width ell-len) width)))
(each ell (each ell
(if (negative? diff) (if (negative? diff)
nothing nothing
(substring str diff)))))))) (substring/width str diff str-width))))))))
;;> An alias for \scheme{trimmed/left}. ;;> An alias for \scheme{trimmed/left}.
(define trimmed trimmed/left) (define trimmed trimmed/left)
@ -186,7 +186,7 @@
(ell-len (string-width ell)) (ell-len (string-width ell))
(diff (- (+ str-width ell-len ell-len) width)) (diff (- (+ str-width ell-len ell-len) width))
(left (quotient diff 2)) (left (quotient diff 2))
(right (- (string-width str) (quotient (+ diff 1) 2)))) (right (- str-width (quotient (+ diff 1) 2))))
(if (negative? diff) (if (negative? diff)
ell ell
(each ell (substring str left right) ell))))))) (each ell (substring str left right) ell)))))))
@ -197,7 +197,7 @@
;;> (e.g. \scheme{write-simple} on an infinite list). The nature of ;;> (e.g. \scheme{write-simple} on an infinite list). The nature of
;;> this procedure means only truncating on the right is meaningful. ;;> this procedure means only truncating on the right is meaningful.
(define (trimmed/lazy width . ls) (define (trimmed/lazy width . ls)
(fn ((orig-output output) string-width) (fn ((orig-output output) string-width substring/width)
(call-with-current-continuation (call-with-current-continuation
(lambda (return) (lambda (return)
(let ((chars-written 0) (let ((chars-written 0)
@ -207,7 +207,7 @@
(set! chars-written (+ chars-written len)) (set! chars-written (+ chars-written len))
(if (> chars-written width) (if (> chars-written width)
(let* ((end (max 0 (- len (- chars-written width)))) (let* ((end (max 0 (- len (- chars-written width))))
(s (substring str 0 end))) (s (substring/width str 0 end)))
(each (orig-output s) (each (orig-output s)
(with! (output orig-output)) (with! (output orig-output))
(fn () (return nothing)))) (fn () (return nothing))))

View file

@ -737,20 +737,35 @@ def | 6
(test "\x1B;[31mred\x1B;[34mblue\x1B;[31mred\x1B;[0m" (test "\x1B;[31mred\x1B;[34mblue\x1B;[31mred\x1B;[0m"
(show #f (as-red "red" (as-blue "blue") "red"))) (show #f (as-red "red" (as-blue "blue") "red")))
(test "\x1b;[31m1234567\x1b;[0m col: 7" (test "\x1b;[31m1234567\x1b;[0m col: 7"
(show #f (as-unicode (as-red "1234567") (fn (col) (each " col: " col))))) (show #f (terminal-aware (as-red "1234567") (fn (col) (each " col: " col)))))
;; unicode ;; unicode
(test "〜日本語〜" (test "〜日本語〜"
(show #f (with ((pad-char #\〜)) (padded/both 5 "日本語")))) (show #f (with ((pad-char #\〜)) (padded/both 5 "日本語"))))
(test "日本語" (test "日本語"
(show #f (as-unicode (with ((pad-char #\〜)) (padded/both 5 "日本語"))))) (show #f (terminal-aware (with ((pad-char #\〜)) (padded/both 5 "日本語")))))
(test "本語"
(show #f (trimmed 2 "日本語")))
(test "語"
(show #f (terminal-aware (trimmed 2 "日本語"))))
(test "日本"
(show #f (trimmed/right 2 "日本語")))
(test "日"
(show #f (terminal-aware (trimmed/right 2 "日本語"))))
(test "\x1B;[31m日\x1B;[46m\x1B;[31m\x1B;[0m"
(show #f (terminal-aware
(trimmed/right 2 (as-red "日本語" (on-cyan "!!!!"))))))
(test "日本語"
(show #f (trimmed/right 3 "日本語")))
(test "日"
(show #f (terminal-aware (trimmed/right 3 "日本語"))))
(test "日本語 col: 6" (test "日本語 col: 6"
(show #f (as-unicode "日本語" (fn (col) (each " col: " col))))) (show #f (terminal-aware "日本語" (fn (col) (each " col: " col)))))
(test "日本語ΠΜΕ col: 9" (test "日本語ΠΜΕ col: 9"
(show #f (as-unicode "日本語ΠΜΕ" (fn (col) (each " col: " col))))) (show #f (terminal-aware "日本語ΠΜΕ" (fn (col) (each " col: " col)))))
(test "日本語ΠΜΕ col: 12" (test "日本語ΠΜΕ col: 12"
(show #f (with ((ambiguous-is-wide? #t)) (show #f (with ((ambiguous-is-wide? #t))
(as-unicode "日本語ΠΜΕ" (terminal-aware "日本語ΠΜΕ"
(fn (col) (each " col: " col)))))) (fn (col) (each " col: " col))))))
;; from-file ;; from-file

View file

@ -14,18 +14,20 @@
(else (else
1)))) 1))))
(define (unicode-terminal-width/aux str start end ambiguous-is-wide?) (define (string-terminal-width/aux str start end ambiguous-is-wide?)
(let lp1 ((sc start) (width 0)) (let lp1 ((sc start) (width 0))
(if (string-cursor>=? sc end) (if (string-cursor>=? sc end)
width width
(let ((c (string-ref/cursor str sc))) (let ((c (string-ref/cursor str sc))
(sc2 (string-cursor-next str sc)))
(cond (cond
;; ANSI escapes ;; ANSI escapes
;; TODO: maintain a state machine so the escape can be ;; TODO: consider maintaining a state machine so the escape
;; spread across multiple strings ;; can be spread across multiple strings (not needed if
;; assuming all escapes come from (srfi 166 color)).
((and (= 27 (char->integer c)) ; esc ((and (= 27 (char->integer c)) ; esc
(string-cursor<? (string-cursor-next str sc) end) (string-cursor<? sc2 end)
(eqv? #\[ (string-ref/cursor str (string-cursor-next str sc)))) (eqv? #\[ (string-ref/cursor str sc2)))
(let lp2 ((sc (string-cursor-forward str sc 2))) (let lp2 ((sc (string-cursor-forward str sc 2)))
(cond ((string-cursor>=? sc end) width) (cond ((string-cursor>=? sc end) width)
((memv (string-ref/cursor str sc) '(#\m #\newline)) ((memv (string-ref/cursor str sc) '(#\m #\newline))
@ -33,40 +35,121 @@
(else (lp2 (string-cursor-next str sc)))))) (else (lp2 (string-cursor-next str sc))))))
;; fast-path ASCII ;; fast-path ASCII
((char<=? c #\~) ((char<=? c #\~)
(lp1 (string-cursor-next str sc) (+ width 1))) (lp1 sc2 (+ width 1)))
;; unicode ;; unicode
(else (else
(lp1 (string-cursor-next str sc) (lp1 sc2 (+ width (unicode-char-width c ambiguous-is-wide?)))
(+ width (unicode-char-width c ambiguous-is-wide?))))))))) ))))))
(define (cursor-arg str x) (define (cursor-arg str x)
(if (string-cursor? x) x (string-index->cursor str x))) (if (string-cursor? x) x (string-index->cursor str x)))
;; convert args to cursors internally for efficiency ;; convert args to cursors internally for efficiency
(define (unicode-terminal-width str . o) (define (string-terminal-width str . o)
(let ((start (cursor-arg str (if (pair? o) (let ((start (cursor-arg str (if (pair? o)
(car o) (car o)
(string-cursor-start str)))) (string-cursor-start str))))
(end (cursor-arg str (if (and (pair? o) (pair? (cdr o))) (end (cursor-arg str (if (and (pair? o) (pair? (cdr o)))
(cadr o) (cadr o)
(string-cursor-end str))))) (string-cursor-end str)))))
(unicode-terminal-width/aux str start end #f))) (string-terminal-width/aux str start end #f)))
(define (unicode-terminal-width/wide str . o) (define (string-terminal-width/wide str . o)
(let ((start (cursor-arg str (if (pair? o) (let ((start (cursor-arg str (if (pair? o)
(car o) (car o)
(string-cursor-start str)))) (string-cursor-start str))))
(end (cursor-arg str (if (and (pair? o) (pair? (cdr o))) (end (cursor-arg str (if (and (pair? o) (pair? (cdr o)))
(cadr o) (cadr o)
(string-cursor-end str))))) (string-cursor-end str)))))
(unicode-terminal-width/aux str start end #t))) (string-terminal-width/aux str start end #t)))
(define (as-unicode . args) (define (substring-terminal-width/aux str lo hi ambiguous-is-wide?)
(let ((start (string-cursor-start str))
(end (string-cursor-end str)))
(let lp1 ((sc start)
(from #f)
(width 0)
(escapes '()))
;; need to pick up trailing ansi escapes
(define (finish res sc)
(let ((res (if (pair? escapes)
(string-concatenate-reverse (cons res escapes))
res))
(end-1 (string-cursor-prev str end)))
(let lp ((sc sc) (right-escapes '()))
(define (finish2 right-escapes)
(if (pair? right-escapes)
(string-append res
(string-concatenate-reverse
right-escapes))
res))
(if (string-cursor>=? sc end-1)
(finish2 right-escapes)
(let ((c (string-ref/cursor str sc))
(sc2 (string-cursor-next str sc)))
(if (and (= 27 (char->integer c))
(eqv? #\[ (string-ref/cursor str sc2)))
(let lp2 ((sc2 (string-cursor-next str sc2)))
(if (string-cursor>=? sc2 end)
(finish2 right-escapes)
(let ((c2 (string-ref/cursor str sc2))
(sc3 (string-cursor-next str sc2)))
(if (eqv? #\m c2)
(lp sc3
(cons (substring/cursors str sc sc3)
right-escapes))
(lp2 sc3)))))
(lp sc2 right-escapes)))))))
(if (string-cursor>=? sc end)
(if from (substring/cursors str from end) str)
(let ((c (string-ref/cursor str sc)))
(cond
((and (= 27 (char->integer c)) ; esc
(string-cursor<? (string-cursor-next str sc) end)
(eqv? #\[ (string-ref/cursor
str
(string-cursor-next str sc))))
(let lp2 ((sc2 (string-cursor-forward str sc 2)))
(cond ((string-cursor>=? sc2 end)
(lp1 sc2 from width escapes))
((memv (string-ref/cursor str sc2) '(#\m #\newline))
(let* ((sc3 (string-cursor-next str sc2))
(escapes
(if (not from)
(cons (substring/cursors str sc sc3)
escapes)
escapes)))
(lp1 sc3 from width escapes)))
(else (lp2 (string-cursor-next str sc2))))))
(else
(let ((width2 (+ width
(unicode-char-width c ambiguous-is-wide?))))
(cond
((> width2 hi)
(finish (substring/cursors str (or from start) sc) sc))
((and (not from) (> width2 lo))
(lp1 (string-cursor-next str sc) sc width2 escapes))
(else
(lp1 (string-cursor-next str sc) from width2 escapes)
))))))))))
(define (substring-terminal-width str lo hi)
(substring-terminal-width/aux str lo hi #f))
(define (substring-terminal-width/wide str lo hi)
(substring-terminal-width/aux str lo hi #t))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (terminal-aware . args)
(fn (ambiguous-is-wide?) (fn (ambiguous-is-wide?)
(with ((string-width (if ambiguous-is-wide? (with ((string-width (if ambiguous-is-wide?
unicode-terminal-width/wide string-terminal-width/wide
unicode-terminal-width))) string-terminal-width))
(substring/width (if ambiguous-is-wide?
substring-terminal-width/wide
substring-terminal-width)))
(each-in-list args)))) (each-in-list args))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View file

@ -1,12 +1,13 @@
(define-library (srfi 166 unicode) (define-library (srfi 166 unicode)
(import (scheme base) (import (scheme base) (scheme write)
(scheme char) (scheme char)
(srfi 130) (srfi 130)
(srfi 151) (srfi 151)
(srfi 166 base)) (srfi 166 base))
(export as-unicode (export terminal-aware
unicode-terminal-width unicode-terminal-width/wide string-terminal-width string-terminal-width/wide
substring-terminal-width substring-terminal-width/wide
upcased downcased) upcased downcased)
(include "width.scm" (include "width.scm"
"unicode.scm")) "unicode.scm"))