From 36c3471fa7f0f0b25761bb377328ae4f544c05e0 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Fri, 3 Jul 2020 14:45:36 +0900 Subject: [PATCH] adding substring/width --- lib/srfi/166.sld | 8 ++- lib/srfi/166/base.scm | 2 + lib/srfi/166/base.sld | 3 +- lib/srfi/166/show.scm | 16 +++--- lib/srfi/166/test.sld | 25 +++++++-- lib/srfi/166/unicode.scm | 115 +++++++++++++++++++++++++++++++++------ lib/srfi/166/unicode.sld | 7 ++- 7 files changed, 141 insertions(+), 35 deletions(-) diff --git a/lib/srfi/166.sld b/lib/srfi/166.sld index c29497a6..43930621 100644 --- a/lib/srfi/166.sld +++ b/lib/srfi/166.sld @@ -17,7 +17,8 @@ ;; computations fn with with! forked call-with-output ;; state variables - port row col width output writer string-width pad-char ellipsis + port row col width output writer pad-char ellipsis + string-width substring/width radix precision decimal-sep decimal-align sign-rule comma-sep comma-rule word-separator? ambiguous-is-wide? ;; pretty @@ -26,7 +27,10 @@ columnar tabular wrapped wrapped/list wrapped/char justified from-file line-numbers ;; unicode - as-unicode unicode-terminal-width unicode-terminal-width/wide + terminal-aware + string-terminal-width string-terminal-width/wide + substring-terminal-width substring-terminal-width/wide + substring-terminal-width substring-terminal-width/wide upcased downcased ;; color as-red as-blue as-green as-cyan as-yellow diff --git a/lib/srfi/166/base.scm b/lib/srfi/166/base.scm index 0010d012..3db2307a 100644 --- a/lib/srfi/166/base.scm +++ b/lib/srfi/166/base.scm @@ -44,6 +44,8 @@ (pad-char #\space) (output output-default) (string-width substring-length) + (string-take-width string-take) + (substring/width substring) (word-separator? char-whitespace?) (ambiguous-is-wide? #f) (ellipsis "") diff --git a/lib/srfi/166/base.sld b/lib/srfi/166/base.sld index 85d3c910..b3293e8b 100644 --- a/lib/srfi/166/base.sld +++ b/lib/srfi/166/base.sld @@ -44,7 +44,8 @@ ;; computations fn with with! forked call-with-output ;; state variables - port row col width output writer string-width pad-char ellipsis + port row col width output writer pad-char ellipsis + string-width substring/width radix precision decimal-sep decimal-align sign-rule comma-sep comma-rule word-separator? ambiguous-is-wide? ) diff --git a/lib/srfi/166/show.scm b/lib/srfi/166/show.scm index c28e442a..479ad35b 100644 --- a/lib/srfi/166/show.scm +++ b/lib/srfi/166/show.scm @@ -146,29 +146,29 @@ width (each-in-list ls) (lambda (str str-width diff) - (fn (ellipsis string-width col) + (fn (ellipsis string-width substring/width) (let* ((ell (if (char? ellipsis) (string ellipsis) (or ellipsis ""))) (ell-len (string-width ell)) (diff (- (+ str-width ell-len) width))) (each (if (negative? diff) nothing - (substring str 0 (- width ell-len))) + (substring/width str 0 (- width ell-len))) ell)))))) -;;> As \scheme{trimmed} but removes from the left. +;;> As \scheme{trimmed/right} but removes from the left. (define (trimmed/left width . ls) (trimmed/buffered width (each-in-list ls) (lambda (str str-width diff) - (fn (ellipsis string-width) + (fn (ellipsis string-width substring/width) (let* ((ell (if (char? ellipsis) (string ellipsis) (or ellipsis ""))) (ell-len (string-width ell)) (diff (- (+ str-width ell-len) width))) (each ell (if (negative? diff) nothing - (substring str diff)))))))) + (substring/width str diff str-width)))))))) ;;> An alias for \scheme{trimmed/left}. (define trimmed trimmed/left) @@ -186,7 +186,7 @@ (ell-len (string-width ell)) (diff (- (+ str-width ell-len ell-len) width)) (left (quotient diff 2)) - (right (- (string-width str) (quotient (+ diff 1) 2)))) + (right (- str-width (quotient (+ diff 1) 2)))) (if (negative? diff) ell (each ell (substring str left right) ell))))))) @@ -197,7 +197,7 @@ ;;> (e.g. \scheme{write-simple} on an infinite list). The nature of ;;> this procedure means only truncating on the right is meaningful. (define (trimmed/lazy width . ls) - (fn ((orig-output output) string-width) + (fn ((orig-output output) string-width substring/width) (call-with-current-continuation (lambda (return) (let ((chars-written 0) @@ -207,7 +207,7 @@ (set! chars-written (+ chars-written len)) (if (> chars-written width) (let* ((end (max 0 (- len (- chars-written width)))) - (s (substring str 0 end))) + (s (substring/width str 0 end))) (each (orig-output s) (with! (output orig-output)) (fn () (return nothing)))) diff --git a/lib/srfi/166/test.sld b/lib/srfi/166/test.sld index fc09794e..f45eacac 100644 --- a/lib/srfi/166/test.sld +++ b/lib/srfi/166/test.sld @@ -737,20 +737,35 @@ def | 6 (test "\x1B;[31mred\x1B;[34mblue\x1B;[31mred\x1B;[0m" (show #f (as-red "red" (as-blue "blue") "red"))) (test "\x1b;[31m1234567\x1b;[0m col: 7" - (show #f (as-unicode (as-red "1234567") (fn (col) (each " col: " col))))) + (show #f (terminal-aware (as-red "1234567") (fn (col) (each " col: " col))))) ;; unicode (test "〜日本語〜" (show #f (with ((pad-char #\〜)) (padded/both 5 "日本語")))) (test "日本語" - (show #f (as-unicode (with ((pad-char #\〜)) (padded/both 5 "日本語"))))) + (show #f (terminal-aware (with ((pad-char #\〜)) (padded/both 5 "日本語"))))) + (test "本語" + (show #f (trimmed 2 "日本語"))) + (test "語" + (show #f (terminal-aware (trimmed 2 "日本語")))) + (test "日本" + (show #f (trimmed/right 2 "日本語"))) + (test "日" + (show #f (terminal-aware (trimmed/right 2 "日本語")))) + (test "\x1B;[31m日\x1B;[46m\x1B;[31m\x1B;[0m" + (show #f (terminal-aware + (trimmed/right 2 (as-red "日本語" (on-cyan "!!!!")))))) + (test "日本語" + (show #f (trimmed/right 3 "日本語"))) + (test "日" + (show #f (terminal-aware (trimmed/right 3 "日本語")))) (test "日本語 col: 6" - (show #f (as-unicode "日本語" (fn (col) (each " col: " col))))) + (show #f (terminal-aware "日本語" (fn (col) (each " col: " col))))) (test "日本語ΠΜΕ col: 9" - (show #f (as-unicode "日本語ΠΜΕ" (fn (col) (each " col: " col))))) + (show #f (terminal-aware "日本語ΠΜΕ" (fn (col) (each " col: " col))))) (test "日本語ΠΜΕ col: 12" (show #f (with ((ambiguous-is-wide? #t)) - (as-unicode "日本語ΠΜΕ" + (terminal-aware "日本語ΠΜΕ" (fn (col) (each " col: " col)))))) ;; from-file diff --git a/lib/srfi/166/unicode.scm b/lib/srfi/166/unicode.scm index 1ef761f1..43db2335 100644 --- a/lib/srfi/166/unicode.scm +++ b/lib/srfi/166/unicode.scm @@ -14,18 +14,20 @@ (else 1)))) -(define (unicode-terminal-width/aux str start end ambiguous-is-wide?) +(define (string-terminal-width/aux str start end ambiguous-is-wide?) (let lp1 ((sc start) (width 0)) (if (string-cursor>=? sc end) width - (let ((c (string-ref/cursor str sc))) + (let ((c (string-ref/cursor str sc)) + (sc2 (string-cursor-next str sc))) (cond ;; ANSI escapes - ;; TODO: maintain a state machine so the escape can be - ;; spread across multiple strings + ;; TODO: consider maintaining a state machine so the escape + ;; can be spread across multiple strings (not needed if + ;; assuming all escapes come from (srfi 166 color)). ((and (= 27 (char->integer c)) ; esc - (string-cursor=? sc end) width) ((memv (string-ref/cursor str sc) '(#\m #\newline)) @@ -33,40 +35,121 @@ (else (lp2 (string-cursor-next str sc)))))) ;; fast-path ASCII ((char<=? c #\~) - (lp1 (string-cursor-next str sc) (+ width 1))) + (lp1 sc2 (+ width 1))) ;; unicode (else - (lp1 (string-cursor-next str sc) - (+ width (unicode-char-width c ambiguous-is-wide?))))))))) + (lp1 sc2 (+ width (unicode-char-width c ambiguous-is-wide?))) + )))))) (define (cursor-arg str x) (if (string-cursor? x) x (string-index->cursor str x))) ;; convert args to cursors internally for efficiency -(define (unicode-terminal-width str . o) +(define (string-terminal-width str . o) (let ((start (cursor-arg str (if (pair? o) (car o) (string-cursor-start str)))) (end (cursor-arg str (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-cursor-end str))))) - (unicode-terminal-width/aux str start end #f))) + (string-terminal-width/aux str start end #f))) -(define (unicode-terminal-width/wide str . o) +(define (string-terminal-width/wide str . o) (let ((start (cursor-arg str (if (pair? o) (car o) (string-cursor-start str)))) (end (cursor-arg str (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-cursor-end str))))) - (unicode-terminal-width/aux str start end #t))) + (string-terminal-width/aux str start end #t))) -(define (as-unicode . args) +(define (substring-terminal-width/aux str lo hi ambiguous-is-wide?) + (let ((start (string-cursor-start str)) + (end (string-cursor-end str))) + (let lp1 ((sc start) + (from #f) + (width 0) + (escapes '())) + ;; need to pick up trailing ansi escapes + (define (finish res sc) + (let ((res (if (pair? escapes) + (string-concatenate-reverse (cons res escapes)) + res)) + (end-1 (string-cursor-prev str end))) + (let lp ((sc sc) (right-escapes '())) + (define (finish2 right-escapes) + (if (pair? right-escapes) + (string-append res + (string-concatenate-reverse + right-escapes)) + res)) + (if (string-cursor>=? sc end-1) + (finish2 right-escapes) + (let ((c (string-ref/cursor str sc)) + (sc2 (string-cursor-next str sc))) + (if (and (= 27 (char->integer c)) + (eqv? #\[ (string-ref/cursor str sc2))) + (let lp2 ((sc2 (string-cursor-next str sc2))) + (if (string-cursor>=? sc2 end) + (finish2 right-escapes) + (let ((c2 (string-ref/cursor str sc2)) + (sc3 (string-cursor-next str sc2))) + (if (eqv? #\m c2) + (lp sc3 + (cons (substring/cursors str sc sc3) + right-escapes)) + (lp2 sc3))))) + (lp sc2 right-escapes))))))) + (if (string-cursor>=? sc end) + (if from (substring/cursors str from end) str) + (let ((c (string-ref/cursor str sc))) + (cond + ((and (= 27 (char->integer c)) ; esc + (string-cursor=? sc2 end) + (lp1 sc2 from width escapes)) + ((memv (string-ref/cursor str sc2) '(#\m #\newline)) + (let* ((sc3 (string-cursor-next str sc2)) + (escapes + (if (not from) + (cons (substring/cursors str sc sc3) + escapes) + escapes))) + (lp1 sc3 from width escapes))) + (else (lp2 (string-cursor-next str sc2)))))) + (else + (let ((width2 (+ width + (unicode-char-width c ambiguous-is-wide?)))) + (cond + ((> width2 hi) + (finish (substring/cursors str (or from start) sc) sc)) + ((and (not from) (> width2 lo)) + (lp1 (string-cursor-next str sc) sc width2 escapes)) + (else + (lp1 (string-cursor-next str sc) from width2 escapes) + )))))))))) + +(define (substring-terminal-width str lo hi) + (substring-terminal-width/aux str lo hi #f)) + +(define (substring-terminal-width/wide str lo hi) + (substring-terminal-width/aux str lo hi #t)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define (terminal-aware . args) (fn (ambiguous-is-wide?) (with ((string-width (if ambiguous-is-wide? - unicode-terminal-width/wide - unicode-terminal-width))) + string-terminal-width/wide + string-terminal-width)) + (substring/width (if ambiguous-is-wide? + substring-terminal-width/wide + substring-terminal-width))) (each-in-list args)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/lib/srfi/166/unicode.sld b/lib/srfi/166/unicode.sld index b1aea83d..17284c80 100644 --- a/lib/srfi/166/unicode.sld +++ b/lib/srfi/166/unicode.sld @@ -1,12 +1,13 @@ (define-library (srfi 166 unicode) - (import (scheme base) + (import (scheme base) (scheme write) (scheme char) (srfi 130) (srfi 151) (srfi 166 base)) - (export as-unicode - unicode-terminal-width unicode-terminal-width/wide + (export terminal-aware + string-terminal-width string-terminal-width/wide + substring-terminal-width substring-terminal-width/wide upcased downcased) (include "width.scm" "unicode.scm"))