mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-18 21:29:19 +02:00
adding unicode-string-width/wide
This commit is contained in:
parent
6f1cf6588f
commit
1164ecf9b7
13 changed files with 166 additions and 152 deletions
7
Makefile
7
Makefile
|
@ -184,12 +184,19 @@ data/%.txt:
|
|||
build-lib/chibi/char-set/derived.scm: data/UnicodeData.txt data/DerivedCoreProperties.txt chibi-scheme$(EXE)
|
||||
$(CHIBI) tools/extract-unicode-props.scm --default > $@
|
||||
|
||||
build-lib/chibi/char-set/width.scm: data/UnicodeData.txt data/EastAsianWidth.txt chibi-scheme$(EXE)
|
||||
$(CHIBI) tools/extract-unicode-props.scm Zero-Width=Mn > $@
|
||||
$(CHIBI) tools/extract-unicode-props.scm -d data/EastAsianWidth.txt Full-Width=F@1,W@1 Ambiguous-Width=A@1 >> $@
|
||||
|
||||
lib/chibi/char-set/ascii.scm: build-lib/chibi/char-set/derived.scm chibi-scheme$(EXE)
|
||||
$(CHIBI) -Abuild-lib tools/optimize-char-sets.scm --ascii chibi.char-set.compute > $@
|
||||
|
||||
lib/chibi/char-set/full.scm: build-lib/chibi/char-set/derived.scm chibi-scheme$(EXE)
|
||||
$(CHIBI) -Abuild-lib tools/optimize-char-sets.scm chibi.char-set.compute > $@
|
||||
|
||||
lib/chibi/show/width.scm: build-lib/chibi/char-set/width.scm chibi-scheme$(EXE)
|
||||
$(CHIBI) -Abuild-lib tools/optimize-char-sets.scm --predicate chibi.char-set.width > $@
|
||||
|
||||
lib/scheme/char/case-offsets.scm: data/UnicodeData.txt chibi-scheme$(EXE) all-libs
|
||||
$(CHIBI) tools/extract-case-offsets.scm $< > $@
|
||||
|
||||
|
|
|
@ -168,3 +168,23 @@
|
|||
,(iset-bits iset)
|
||||
,(iset->code (iset-left iset))
|
||||
,(iset->code (iset-right iset)))))
|
||||
|
||||
;; uses only if, <, <=, >, and SRFI 151 bit-set?
|
||||
(define (iset->code/lambda iset)
|
||||
(define (code iset)
|
||||
(and iset
|
||||
(if (and (not (iset-left iset))
|
||||
(not (iset-right iset))
|
||||
(not (iset-bits iset)))
|
||||
`(<= ,(iset-start iset) n ,(iset-end iset))
|
||||
`(if (< n ,(iset-start iset))
|
||||
,(code (iset-left iset))
|
||||
,(if (and (not (iset-right iset)) (not (iset-bits iset)))
|
||||
`(<= n ,(iset-end iset))
|
||||
`(if (> n ,(iset-end iset))
|
||||
,(code (iset-right iset))
|
||||
,(if (iset-bits iset)
|
||||
`(bit-set? (- n ,(iset-start iset))
|
||||
,(iset-bits iset))
|
||||
#t)))))))
|
||||
`(lambda (n) ,(code iset)))
|
||||
|
|
|
@ -17,4 +17,5 @@
|
|||
(bitwise-and (%mask size) (arithmetic-shift n (- position)))))))
|
||||
(include "optimize.scm")
|
||||
(export
|
||||
iset-balance iset-balance! iset-optimize iset-optimize! iset->code))
|
||||
iset-balance iset-balance! iset-optimize iset-optimize!
|
||||
iset->code iset->code/lambda))
|
||||
|
|
|
@ -742,7 +742,13 @@ def | 6
|
|||
(test "日本語"
|
||||
(show #f (as-unicode (with ((pad-char #\〜)) (padded/both 5 "日本語")))))
|
||||
(test "日本語 col: 6"
|
||||
(show #f (as-unicode "日本語" (fn ((col)) (each " col: " col)))))
|
||||
(show #f (as-unicode "日本語" (fn (col) (each " col: " col)))))
|
||||
(test "日本語ΠΜΕ col: 9"
|
||||
(show #f (as-unicode "日本語ΠΜΕ" (fn (col) (each " col: " col)))))
|
||||
(test "日本語ΠΜΕ col: 12"
|
||||
(show #f (with ((ambiguous-is-wide? #t))
|
||||
(as-unicode "日本語ΠΜΕ"
|
||||
(fn (col) (each " col: " col))))))
|
||||
|
||||
;; from-file
|
||||
;; for reference, filesystem-test relies on creating files under /tmp
|
||||
|
|
|
@ -1,134 +1,70 @@
|
|||
;; unicode.scm -- Unicode character width and ANSI escape support
|
||||
;; Copyright (c) 2006-2017 Alex Shinn. All rights reserved.
|
||||
;; Copyright (c) 2006-2020 Alex Shinn. All rights reserved.
|
||||
;; BSD-style license: http://synthcode.com/license.txt
|
||||
|
||||
;; a condensed non-spacing mark range from UnicodeData.txt (chars with
|
||||
;; the Mn property) - generated partially by hand, should automate
|
||||
;; this better
|
||||
|
||||
(define low-non-spacing-chars
|
||||
(bytevector
|
||||
#xff #xff #xff #xff #xff #xff #xff #xff #xff #xff #xff #xff #xff #xff 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
#x78 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 #xfe #xff #xff #xff #xff #xff #x1f 0 0 0 0 0 0 0
|
||||
0 0 #x3f 0 0 0 0 0 0 #xf8 #xff #x01 0 0 #x01 0
|
||||
0 0 0 0 0 0 0 0 0 0 #xc0 #xff #xff #x3f 0 0
|
||||
0 0 #x02 0 0 0 #xff #xff #xff #x07 0 0 0 0 0 0
|
||||
0 0 0 0 #xc0 #xff #x01 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
#x06 0 0 0 0 0 0 #x10 #xfe #x21 #x1e 0 #x0c 0 0 0
|
||||
#x02 0 0 0 0 0 0 #x10 #x1e #x20 0 0 #x0c 0 0 0
|
||||
#x06 0 0 0 0 0 0 #x10 #xfe #x3f 0 0 0 0 #x03 0
|
||||
#x06 0 0 0 0 0 0 #x30 #xfe #x21 0 0 #x0c 0 0 0
|
||||
#x02 0 0 0 0 0 0 #x90 #x0e #x20 #x40 0 0 0 0 0
|
||||
#x04 0 0 0 0 0 0 0 0 #x20 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 #xc0 #xc1 #xff #x7f 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 #x10 #x40 #x30 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 #x0e #x20 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 #x04 #x7c 0 0 0 0 0
|
||||
0 0 0 0 0 0 #xf2 #x07 #x80 #x7f 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 #xf2 #x1f 0 #x3f 0 0 0 0 0 0
|
||||
0 0 0 #x03 0 0 #xa0 #x02 0 0 0 0 0 0 #xfe #x7f
|
||||
#xdf 0 #xff #xff #xff #xff #xff #x1f #x40 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 #xe0 #xfd #x02 0 0 0 #x03 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 #x1c 0 0 0 #x1c 0 0 0 #x0c 0 0 0 #x0c 0
|
||||
0 0 0 0 0 0 #x80 #x3f #x40 #xfe #x0f #x20 0 0 0 0
|
||||
0 #x38 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 #x02 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 #x87 #x01 #x04 #x0e 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 #xff #x1f #xe2 #x07
|
||||
))
|
||||
|
||||
(define (unicode-char-width c)
|
||||
(let ((ci (char->integer c)))
|
||||
(define (unicode-char-width ch ambiguous-is-wide?)
|
||||
(let ((ci (char->integer ch)))
|
||||
(cond
|
||||
;; hand-checked ranges from EastAsianWidth.txt
|
||||
((<= #x1100 ci #x115F) 2) ; Hangul
|
||||
((<= #x2E80 ci #x4DB5) 2) ; CJK
|
||||
((<= #x4E00 ci #xA4C6) 2)
|
||||
((<= #xAC00 ci #xD7A3) 2) ; Hangul
|
||||
((<= #xF900 ci #xFAD9) 2) ; CJK compat
|
||||
((<= #xFE10 ci #xFE6B) 2)
|
||||
((<= #xFF01 ci #xFF60) 2)
|
||||
((<= #xFFE0 ci #xFFE6) 2)
|
||||
((<= #x20000 ci #x30000) 2)
|
||||
;; non-spacing mark (Mn) ranges from UnicodeData.txt
|
||||
((<= #x0300 ci #x3029)
|
||||
;; inlined bit-vector-ref for portability
|
||||
(let* ((i (- ci #x0300))
|
||||
(byte (quotient i 8))
|
||||
(off (remainder i 8)))
|
||||
(if (zero? (bitwise-and (bytevector-u8-ref low-non-spacing-chars byte)
|
||||
(arithmetic-shift 1 off)))
|
||||
1
|
||||
0)))
|
||||
((<= #x302A ci #x302F) 0)
|
||||
((<= #x3099 ci #x309A) 0)
|
||||
((= #xFB1E ci) 0)
|
||||
((<= #xFE00 ci #xFE23) 0)
|
||||
((<= #x1D167 ci #x1D169) 0)
|
||||
((<= #x1D17B ci #x1D182) 0)
|
||||
((<= #x1D185 ci #x1D18B) 0)
|
||||
((<= #x1D1AA ci #x1D1AD) 0)
|
||||
((<= #xE0100 ci #xE01EF) 0)
|
||||
(else 1))))
|
||||
((char-set:zero-width? ci)
|
||||
0)
|
||||
((char-set:full-width? ci)
|
||||
2)
|
||||
((and ambiguous-is-wide? (char-set:ambiguous-width? ci))
|
||||
2)
|
||||
(else
|
||||
1))))
|
||||
|
||||
(define (unicode-terminal-width/aux str start end ambiguous-is-wide?)
|
||||
(let lp1 ((sc start) (width 0))
|
||||
(if (string-cursor>=? sc end)
|
||||
width
|
||||
(let ((c (string-ref/cursor str sc)))
|
||||
(cond
|
||||
;; ANSI escapes
|
||||
;; TODO: maintain a state machine so the escape can be
|
||||
;; spread across multiple strings
|
||||
((and (= 27 (char->integer c)) ; esc
|
||||
(string-cursor<? (string-cursor-next str sc) end)
|
||||
(eqv? #\[ (string-ref/cursor str (string-cursor-next str sc))))
|
||||
(let lp2 ((sc (string-cursor-forward str sc 2)))
|
||||
(cond ((string-cursor>=? sc end) width)
|
||||
((memv (string-ref/cursor str sc) '(#\m #\newline))
|
||||
(lp1 (string-cursor-next str sc) width))
|
||||
(else (lp2 (string-cursor-next str sc))))))
|
||||
;; fast-path ASCII
|
||||
((char<=? c #\~)
|
||||
(lp1 (string-cursor-next str sc) (+ width 1)))
|
||||
;; unicode
|
||||
(else
|
||||
(lp1 (string-cursor-next str sc)
|
||||
(+ width (unicode-char-width c ambiguous-is-wide?)))))))))
|
||||
|
||||
(define (cursor-arg str x)
|
||||
(if (string-cursor? x) x (string-index->cursor str x)))
|
||||
|
||||
;; convert args to cursors internally for efficiency
|
||||
|
||||
(define (unicode-terminal-width str . o)
|
||||
(let ((start (if (pair? o) (car o) 0))
|
||||
(end (if (and (pair? o) (pair? (cdr o)))
|
||||
(cadr o)
|
||||
(string-length str))))
|
||||
(let lp1 ((i start) (width 0))
|
||||
(if (>= i end)
|
||||
width
|
||||
(let ((c (string-ref str i)))
|
||||
(cond
|
||||
;; ANSI escapes
|
||||
((and (= 27 (char->integer c)) ; esc
|
||||
(< (+ i 1) end)
|
||||
(eqv? #\[ (string-ref str (+ i 1))))
|
||||
(let lp2 ((i (+ i 2)))
|
||||
(cond ((>= i end) width)
|
||||
((memv (string-ref str i) '(#\m #\newline))
|
||||
(lp1 (+ i 1) width))
|
||||
(else (lp2 (+ i 1))))))
|
||||
;; unicode characters
|
||||
((>= (char->integer c) #x80)
|
||||
(lp1 (+ i 1) (+ width (unicode-char-width c))))
|
||||
;; normal ASCII
|
||||
(else (lp1 (+ i 1) (+ width 1)))))))))
|
||||
(let ((start (cursor-arg str (if (pair? o)
|
||||
(car o)
|
||||
(string-cursor-start str))))
|
||||
(end (cursor-arg str (if (and (pair? o) (pair? (cdr o)))
|
||||
(cadr o)
|
||||
(string-cursor-end str)))))
|
||||
(unicode-terminal-width/aux str start end #f)))
|
||||
|
||||
(define (unicode-terminal-width/wide str . o)
|
||||
(let ((start (cursor-arg str (if (pair? o)
|
||||
(car o)
|
||||
(string-cursor-start str))))
|
||||
(end (cursor-arg str (if (and (pair? o) (pair? (cdr o)))
|
||||
(cadr o)
|
||||
(string-cursor-end str)))))
|
||||
(unicode-terminal-width/aux str start end #t)))
|
||||
|
||||
(define (as-unicode . args)
|
||||
(with ((string-width unicode-terminal-width))
|
||||
(each-in-list args)))
|
||||
(fn (ambiguous-is-wide?)
|
||||
(with ((string-width (if ambiguous-is-wide?
|
||||
unicode-terminal-width/wide
|
||||
unicode-terminal-width)))
|
||||
(each-in-list args))))
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
(define-library (chibi show unicode)
|
||||
(import (scheme base) (chibi show base) (srfi 151))
|
||||
(export as-unicode unicode-terminal-width)
|
||||
(include "unicode.scm"))
|
||||
(import (scheme base) (chibi show base) (srfi 130) (srfi 151))
|
||||
(export as-unicode unicode-terminal-width unicode-terminal-width/wide)
|
||||
(include "width.scm" "unicode.scm"))
|
||||
|
|
|
@ -19,14 +19,14 @@
|
|||
;; state variables
|
||||
port row col width output writer string-width pad-char ellipsis
|
||||
radix precision decimal-sep decimal-align sign-rule
|
||||
comma-sep comma-rule word-separator?
|
||||
comma-sep comma-rule word-separator? ambiguous-is-wide?
|
||||
;; pretty
|
||||
pretty pretty-shared pretty-simply pretty-color
|
||||
;; columnar
|
||||
columnar tabular wrapped wrapped/list wrapped/char
|
||||
justified from-file line-numbers show-columns
|
||||
;; unicode
|
||||
as-unicode unicode-terminal-width
|
||||
as-unicode unicode-terminal-width unicode-terminal-width/wide
|
||||
upcased downcased
|
||||
;; color
|
||||
as-red as-blue as-green as-cyan as-yellow
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
(output output-default)
|
||||
(string-width substring-length)
|
||||
(word-separator? char-whitespace?)
|
||||
(ambiguous-is-wide? #f)
|
||||
(ellipsis "")
|
||||
(decimal-align #f)
|
||||
(decimal-sep #f)
|
||||
|
|
|
@ -48,7 +48,7 @@
|
|||
;; state variables
|
||||
port row col width output writer string-width pad-char ellipsis
|
||||
radix precision decimal-sep decimal-align sign-rule
|
||||
comma-sep comma-rule word-separator?
|
||||
comma-sep comma-rule word-separator? ambiguous-is-wide?
|
||||
)
|
||||
(include "base.scm")
|
||||
(include "../../chibi/show/write.scm")
|
||||
|
|
|
@ -741,6 +741,12 @@ def | 6
|
|||
(show #f (as-unicode (with ((pad-char #\〜)) (padded/both 5 "日本語")))))
|
||||
(test "日本語 col: 6"
|
||||
(show #f (as-unicode "日本語" (fn (col) (each " col: " col)))))
|
||||
(test "日本語ΠΜΕ col: 9"
|
||||
(show #f (as-unicode "日本語ΠΜΕ" (fn (col) (each " col: " col)))))
|
||||
(test "日本語ΠΜΕ col: 12"
|
||||
(show #f (with ((ambiguous-is-wide? #t))
|
||||
(as-unicode "日本語ΠΜΕ"
|
||||
(fn (col) (each " col: " col))))))
|
||||
|
||||
;; from-file
|
||||
;; for reference, filesystem-test relies on creating files under /tmp
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
|
||||
(define-library (srfi 166 unicode)
|
||||
(import (scheme base) (srfi 151) (srfi 166 base))
|
||||
(export-all)
|
||||
(include "../../chibi/show/unicode.scm"))
|
||||
(import (scheme base) (srfi 130) (srfi 151) (srfi 166 base))
|
||||
(export as-unicode unicode-terminal-width unicode-terminal-width/wide)
|
||||
(include "../../chibi/show/width.scm"
|
||||
"../../chibi/show/unicode.scm"))
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
;; A value can be any of:
|
||||
;;
|
||||
;; Property_Name: all unicode characters with the given derived property
|
||||
;; Prop@N: a property matched at a given field (instead of default category)
|
||||
;; Xx: all unicode characters with the given general category
|
||||
;; X: all unicode characters with any general category X*
|
||||
;; NNNN: a single unicode value in hex format
|
||||
|
@ -25,7 +26,7 @@
|
|||
;; Unicode property name.
|
||||
;;
|
||||
;; Assumes the files UnicodeData.txt and DerivedCoreProperties.txt are
|
||||
;; in the data/ current directory, unless overridden with the --data or
|
||||
;; in the ./data/ directory, unless overridden with the --data or
|
||||
;; --derived options.
|
||||
|
||||
(import (chibi) (chibi io) (chibi string))
|
||||
|
@ -35,8 +36,9 @@
|
|||
(for-each (lambda (x) (display x err)) args)
|
||||
(newline err)))
|
||||
|
||||
;; Parse UnicodeData.txt for characters matching a given class.
|
||||
(define (extract-char-set-category cat data)
|
||||
;; Parse UnicodeData.txt or other semi-colon-delimited TSV file for
|
||||
;; characters matching a given class in a given field.
|
||||
(define (extract-char-set-category cat field data)
|
||||
(define (join-to-range n ls)
|
||||
(cond
|
||||
((null? ls)
|
||||
|
@ -62,22 +64,43 @@
|
|||
((or (equal? line "") (eqv? #\# (string-ref line 0)))
|
||||
(lp ranges))
|
||||
(else
|
||||
(let ((ls (string-split line #\; 4)))
|
||||
(let* ((line (substring-cursor line
|
||||
(string-cursor-start line)
|
||||
(string-find line #\#)))
|
||||
(ls (map string-trim (string-split line #\;))))
|
||||
(cond
|
||||
((< (length ls) 3)
|
||||
((<= (length ls) field)
|
||||
(warn "invalid UnicodeData line: " line)
|
||||
(lp ranges))
|
||||
(else
|
||||
(let ((ch (string->number (car ls) 16))
|
||||
(let ((ch (if (string-contains (car ls) "..")
|
||||
(let* ((sc (string-contains (car ls) "..")))
|
||||
(cons (string->number
|
||||
(substring-cursor
|
||||
(car ls)
|
||||
(string-cursor-start (car ls))
|
||||
sc)
|
||||
16)
|
||||
(string->number
|
||||
(substring-cursor
|
||||
(car ls)
|
||||
(string-cursor-forward (car ls) sc 2))
|
||||
16)))
|
||||
(string->number (car ls) 16)))
|
||||
(name (cadr ls))
|
||||
(ch-cat (car (cddr ls))))
|
||||
(ch-cat (list-ref ls field)))
|
||||
(cond
|
||||
((or (not ch) (not (= 2 (string-length ch-cat))))
|
||||
((not (or (integer? ch)
|
||||
(and (pair? ch)
|
||||
(integer? (car ch))
|
||||
(integer? (cdr ch)))))
|
||||
(warn "invalid UnicodeData line: " line))
|
||||
((if (char? cat)
|
||||
(eqv? cat (string-ref ch-cat 0))
|
||||
(equal? cat ch-cat))
|
||||
(lp (join-to-range ch ranges)))
|
||||
(lp (if (pair? ch)
|
||||
(cons ch ranges)
|
||||
(join-to-range ch ranges))))
|
||||
(else
|
||||
(lp ranges))))))))))))))
|
||||
|
||||
|
@ -131,13 +154,19 @@
|
|||
(error "invalid character range, expected NNNN-MMMM, got: " def))))
|
||||
((string->number def 16)
|
||||
=> (lambda (start) `(char-set ,(integer->char start))))
|
||||
((string-find? def #\@)
|
||||
(let* ((sc (string-find def #\@))
|
||||
(cat (substring-cursor def (string-cursor-start def) sc))
|
||||
(field (string->number
|
||||
(substring-cursor def (string-cursor-next def sc)))))
|
||||
(extract-char-set-category cat field data)))
|
||||
((and (= 1 (string-length def))
|
||||
(char-upper-case? (string-ref def 0)))
|
||||
(extract-char-set-category (string-ref def 0) data))
|
||||
(extract-char-set-category (string-ref def 0) 2 data))
|
||||
((and (= 2 (string-length def))
|
||||
(char-upper-case? (string-ref def 0))
|
||||
(char-lower-case? (string-ref def 1)))
|
||||
(extract-char-set-category def data))
|
||||
(extract-char-set-category def 2 data))
|
||||
;; derived properties
|
||||
((and (> (string-length def) 1)
|
||||
(eqv? #\: (string-ref def 0)))
|
||||
|
|
|
@ -21,13 +21,15 @@
|
|||
(remove (lambda (i) (hash-table-exists? ls2-tab i)) ls1)))
|
||||
|
||||
(let ((args (command-line)))
|
||||
(let lp ((ls (cdr args)) (ascii? #f))
|
||||
(let lp ((ls (cdr args)) (ascii? #f) (predicate? #f))
|
||||
(cond
|
||||
((and (pair? ls) (not (equal? "" (car ls)))
|
||||
(eqv? #\- (string-ref (car ls) 0)))
|
||||
(cond
|
||||
((member (car ls) '("-a" "--ascii"))
|
||||
(lp (cdr ls) #t))
|
||||
(lp (cdr ls) #t predicate?))
|
||||
((member (car ls) '("-p" "--predicate"))
|
||||
(lp (cdr ls) ascii? #t))
|
||||
(else (error "unknown option" (car ls)))))
|
||||
((or (null? ls) (pair? (cdr ls)))
|
||||
(error "usage: optimize-char-sets.scm [--ascii] module.name"))
|
||||
|
@ -78,8 +80,13 @@
|
|||
(newline (current-error-port))
|
||||
(error "optimized iset is different"))))
|
||||
(display " writing\n" (current-error-port))
|
||||
(write `(define ,exp
|
||||
(immutable-char-set ,(iset->code iset2))))
|
||||
(write
|
||||
(if predicate?
|
||||
`(define ,(string->symbol
|
||||
(string-append (symbol->string exp) "?"))
|
||||
,(iset->code/lambda iset2))
|
||||
`(define ,exp
|
||||
(immutable-char-set ,(iset->code iset2)))))
|
||||
(newline)
|
||||
(newline)
|
||||
(display " done\n" (current-error-port)))))))
|
||||
|
|
Loading…
Add table
Reference in a new issue