adding substring/width

This commit is contained in:
Alex Shinn 2020-07-03 14:45:36 +09:00
parent 39344bcaa0
commit 36c3471fa7
7 changed files with 141 additions and 35 deletions

View file

@ -17,7 +17,8 @@
;; computations
fn with with! forked call-with-output
;; state variables
port row col width output writer string-width pad-char ellipsis
port row col width output writer pad-char ellipsis
string-width substring/width
radix precision decimal-sep decimal-align sign-rule
comma-sep comma-rule word-separator? ambiguous-is-wide?
;; pretty
@ -26,7 +27,10 @@
columnar tabular wrapped wrapped/list wrapped/char
justified from-file line-numbers
;; unicode
as-unicode unicode-terminal-width unicode-terminal-width/wide
terminal-aware
string-terminal-width string-terminal-width/wide
substring-terminal-width substring-terminal-width/wide
substring-terminal-width substring-terminal-width/wide
upcased downcased
;; color
as-red as-blue as-green as-cyan as-yellow

View file

@ -44,6 +44,8 @@
(pad-char #\space)
(output output-default)
(string-width substring-length)
(string-take-width string-take)
(substring/width substring)
(word-separator? char-whitespace?)
(ambiguous-is-wide? #f)
(ellipsis "")

View file

@ -44,7 +44,8 @@
;; computations
fn with with! forked call-with-output
;; state variables
port row col width output writer string-width pad-char ellipsis
port row col width output writer pad-char ellipsis
string-width substring/width
radix precision decimal-sep decimal-align sign-rule
comma-sep comma-rule word-separator? ambiguous-is-wide?
)

View file

@ -146,29 +146,29 @@
width
(each-in-list ls)
(lambda (str str-width diff)
(fn (ellipsis string-width col)
(fn (ellipsis string-width substring/width)
(let* ((ell (if (char? ellipsis) (string ellipsis) (or ellipsis "")))
(ell-len (string-width ell))
(diff (- (+ str-width ell-len) width)))
(each (if (negative? diff)
nothing
(substring str 0 (- width ell-len)))
(substring/width str 0 (- width ell-len)))
ell))))))
;;> As \scheme{trimmed} but removes from the left.
;;> As \scheme{trimmed/right} but removes from the left.
(define (trimmed/left width . ls)
(trimmed/buffered
width
(each-in-list ls)
(lambda (str str-width diff)
(fn (ellipsis string-width)
(fn (ellipsis string-width substring/width)
(let* ((ell (if (char? ellipsis) (string ellipsis) (or ellipsis "")))
(ell-len (string-width ell))
(diff (- (+ str-width ell-len) width)))
(each ell
(if (negative? diff)
nothing
(substring str diff))))))))
(substring/width str diff str-width))))))))
;;> An alias for \scheme{trimmed/left}.
(define trimmed trimmed/left)
@ -186,7 +186,7 @@
(ell-len (string-width ell))
(diff (- (+ str-width ell-len ell-len) width))
(left (quotient diff 2))
(right (- (string-width str) (quotient (+ diff 1) 2))))
(right (- str-width (quotient (+ diff 1) 2))))
(if (negative? diff)
ell
(each ell (substring str left right) ell)))))))
@ -197,7 +197,7 @@
;;> (e.g. \scheme{write-simple} on an infinite list). The nature of
;;> this procedure means only truncating on the right is meaningful.
(define (trimmed/lazy width . ls)
(fn ((orig-output output) string-width)
(fn ((orig-output output) string-width substring/width)
(call-with-current-continuation
(lambda (return)
(let ((chars-written 0)
@ -207,7 +207,7 @@
(set! chars-written (+ chars-written len))
(if (> chars-written width)
(let* ((end (max 0 (- len (- chars-written width))))
(s (substring str 0 end)))
(s (substring/width str 0 end)))
(each (orig-output s)
(with! (output orig-output))
(fn () (return nothing))))

View file

@ -737,20 +737,35 @@ def | 6
(test "\x1B;[31mred\x1B;[34mblue\x1B;[31mred\x1B;[0m"
(show #f (as-red "red" (as-blue "blue") "red")))
(test "\x1b;[31m1234567\x1b;[0m col: 7"
(show #f (as-unicode (as-red "1234567") (fn (col) (each " col: " col)))))
(show #f (terminal-aware (as-red "1234567") (fn (col) (each " col: " col)))))
;; unicode
(test "〜日本語〜"
(show #f (with ((pad-char #\〜)) (padded/both 5 "日本語"))))
(test "日本語"
(show #f (as-unicode (with ((pad-char #\〜)) (padded/both 5 "日本語")))))
(show #f (terminal-aware (with ((pad-char #\〜)) (padded/both 5 "日本語")))))
(test "本語"
(show #f (trimmed 2 "日本語")))
(test "語"
(show #f (terminal-aware (trimmed 2 "日本語"))))
(test "日本"
(show #f (trimmed/right 2 "日本語")))
(test "日"
(show #f (terminal-aware (trimmed/right 2 "日本語"))))
(test "\x1B;[31m日\x1B;[46m\x1B;[31m\x1B;[0m"
(show #f (terminal-aware
(trimmed/right 2 (as-red "日本語" (on-cyan "!!!!"))))))
(test "日本語"
(show #f (trimmed/right 3 "日本語")))
(test "日"
(show #f (terminal-aware (trimmed/right 3 "日本語"))))
(test "日本語 col: 6"
(show #f (as-unicode "日本語" (fn (col) (each " col: " col)))))
(show #f (terminal-aware "日本語" (fn (col) (each " col: " col)))))
(test "日本語ΠΜΕ col: 9"
(show #f (as-unicode "日本語ΠΜΕ" (fn (col) (each " col: " col)))))
(show #f (terminal-aware "日本語ΠΜΕ" (fn (col) (each " col: " col)))))
(test "日本語ΠΜΕ col: 12"
(show #f (with ((ambiguous-is-wide? #t))
(as-unicode "日本語ΠΜΕ"
(terminal-aware "日本語ΠΜΕ"
(fn (col) (each " col: " col))))))
;; from-file

View file

@ -14,18 +14,20 @@
(else
1))))
(define (unicode-terminal-width/aux str start end ambiguous-is-wide?)
(define (string-terminal-width/aux str start end ambiguous-is-wide?)
(let lp1 ((sc start) (width 0))
(if (string-cursor>=? sc end)
width
(let ((c (string-ref/cursor str sc)))
(let ((c (string-ref/cursor str sc))
(sc2 (string-cursor-next str sc)))
(cond
;; ANSI escapes
;; TODO: maintain a state machine so the escape can be
;; spread across multiple strings
;; TODO: consider maintaining a state machine so the escape
;; can be spread across multiple strings (not needed if
;; assuming all escapes come from (srfi 166 color)).
((and (= 27 (char->integer c)) ; esc
(string-cursor<? (string-cursor-next str sc) end)
(eqv? #\[ (string-ref/cursor str (string-cursor-next str sc))))
(string-cursor<? sc2 end)
(eqv? #\[ (string-ref/cursor str sc2)))
(let lp2 ((sc (string-cursor-forward str sc 2)))
(cond ((string-cursor>=? sc end) width)
((memv (string-ref/cursor str sc) '(#\m #\newline))
@ -33,40 +35,121 @@
(else (lp2 (string-cursor-next str sc))))))
;; fast-path ASCII
((char<=? c #\~)
(lp1 (string-cursor-next str sc) (+ width 1)))
(lp1 sc2 (+ width 1)))
;; unicode
(else
(lp1 (string-cursor-next str sc)
(+ width (unicode-char-width c ambiguous-is-wide?)))))))))
(lp1 sc2 (+ width (unicode-char-width c ambiguous-is-wide?)))
))))))
(define (cursor-arg str x)
(if (string-cursor? x) x (string-index->cursor str x)))
;; convert args to cursors internally for efficiency
(define (unicode-terminal-width str . o)
(define (string-terminal-width str . o)
(let ((start (cursor-arg str (if (pair? o)
(car o)
(string-cursor-start str))))
(end (cursor-arg str (if (and (pair? o) (pair? (cdr o)))
(cadr o)
(string-cursor-end str)))))
(unicode-terminal-width/aux str start end #f)))
(string-terminal-width/aux str start end #f)))
(define (unicode-terminal-width/wide str . o)
(define (string-terminal-width/wide str . o)
(let ((start (cursor-arg str (if (pair? o)
(car o)
(string-cursor-start str))))
(end (cursor-arg str (if (and (pair? o) (pair? (cdr o)))
(cadr o)
(string-cursor-end str)))))
(unicode-terminal-width/aux str start end #t)))
(string-terminal-width/aux str start end #t)))
(define (as-unicode . args)
(define (substring-terminal-width/aux str lo hi ambiguous-is-wide?)
(let ((start (string-cursor-start str))
(end (string-cursor-end str)))
(let lp1 ((sc start)
(from #f)
(width 0)
(escapes '()))
;; need to pick up trailing ansi escapes
(define (finish res sc)
(let ((res (if (pair? escapes)
(string-concatenate-reverse (cons res escapes))
res))
(end-1 (string-cursor-prev str end)))
(let lp ((sc sc) (right-escapes '()))
(define (finish2 right-escapes)
(if (pair? right-escapes)
(string-append res
(string-concatenate-reverse
right-escapes))
res))
(if (string-cursor>=? sc end-1)
(finish2 right-escapes)
(let ((c (string-ref/cursor str sc))
(sc2 (string-cursor-next str sc)))
(if (and (= 27 (char->integer c))
(eqv? #\[ (string-ref/cursor str sc2)))
(let lp2 ((sc2 (string-cursor-next str sc2)))
(if (string-cursor>=? sc2 end)
(finish2 right-escapes)
(let ((c2 (string-ref/cursor str sc2))
(sc3 (string-cursor-next str sc2)))
(if (eqv? #\m c2)
(lp sc3
(cons (substring/cursors str sc sc3)
right-escapes))
(lp2 sc3)))))
(lp sc2 right-escapes)))))))
(if (string-cursor>=? sc end)
(if from (substring/cursors str from end) str)
(let ((c (string-ref/cursor str sc)))
(cond
((and (= 27 (char->integer c)) ; esc
(string-cursor<? (string-cursor-next str sc) end)
(eqv? #\[ (string-ref/cursor
str
(string-cursor-next str sc))))
(let lp2 ((sc2 (string-cursor-forward str sc 2)))
(cond ((string-cursor>=? sc2 end)
(lp1 sc2 from width escapes))
((memv (string-ref/cursor str sc2) '(#\m #\newline))
(let* ((sc3 (string-cursor-next str sc2))
(escapes
(if (not from)
(cons (substring/cursors str sc sc3)
escapes)
escapes)))
(lp1 sc3 from width escapes)))
(else (lp2 (string-cursor-next str sc2))))))
(else
(let ((width2 (+ width
(unicode-char-width c ambiguous-is-wide?))))
(cond
((> width2 hi)
(finish (substring/cursors str (or from start) sc) sc))
((and (not from) (> width2 lo))
(lp1 (string-cursor-next str sc) sc width2 escapes))
(else
(lp1 (string-cursor-next str sc) from width2 escapes)
))))))))))
(define (substring-terminal-width str lo hi)
(substring-terminal-width/aux str lo hi #f))
(define (substring-terminal-width/wide str lo hi)
(substring-terminal-width/aux str lo hi #t))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (terminal-aware . args)
(fn (ambiguous-is-wide?)
(with ((string-width (if ambiguous-is-wide?
unicode-terminal-width/wide
unicode-terminal-width)))
string-terminal-width/wide
string-terminal-width))
(substring/width (if ambiguous-is-wide?
substring-terminal-width/wide
substring-terminal-width)))
(each-in-list args))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View file

@ -1,12 +1,13 @@
(define-library (srfi 166 unicode)
(import (scheme base)
(import (scheme base) (scheme write)
(scheme char)
(srfi 130)
(srfi 151)
(srfi 166 base))
(export as-unicode
unicode-terminal-width unicode-terminal-width/wide
(export terminal-aware
string-terminal-width string-terminal-width/wide
substring-terminal-width substring-terminal-width/wide
upcased downcased)
(include "width.scm"
"unicode.scm"))