mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-07-16 09:27:33 +02:00
Adding special case (non-1-to-1) char case mappings.
This commit is contained in:
parent
6094e5fb12
commit
3683e8cbdc
4 changed files with 154 additions and 6 deletions
|
@ -3,10 +3,12 @@
|
|||
(import (scheme base))
|
||||
(cond-expand
|
||||
(full-unicode
|
||||
(import (chibi char-set full)
|
||||
(import (scheme write)
|
||||
(chibi char-set full)
|
||||
(chibi char-set base)
|
||||
(chibi iset base))
|
||||
(include "char/full.scm")
|
||||
(include "char/special-casing.scm")
|
||||
(include "char/case-offsets.scm"))
|
||||
(else
|
||||
(include "char/ascii.scm")
|
||||
|
|
|
@ -62,14 +62,40 @@
|
|||
(define (char-ci<=? a . ls) (char-cmp-ci <= a ls))
|
||||
(define (char-ci>=? a . ls) (char-cmp-ci >= a ls))
|
||||
|
||||
(define (char-get-special-case ch off)
|
||||
(let ((i (char->integer ch)))
|
||||
(let loop ((a 0) (b (vector-length special-cases)))
|
||||
(if (= a b)
|
||||
#f
|
||||
(let* ((mid (+ a (quotient (- b a) 2)))
|
||||
(vec (vector-ref special-cases mid))
|
||||
(val (vector-ref vec 0)))
|
||||
(cond ((< i val) (if (= mid b) #f (loop a mid)))
|
||||
((> i val) (if (= mid a) #f (loop mid b)))
|
||||
(else (vector-ref vec off))))))))
|
||||
|
||||
(define (call-with-output-string proc)
|
||||
(let ((out (open-output-string)))
|
||||
(proc out)
|
||||
(get-output-string out)))
|
||||
|
||||
(define (string-downcase str)
|
||||
(string-map char-downcase str))
|
||||
(call-with-output-string
|
||||
(lambda (out)
|
||||
(string-for-each
|
||||
(lambda (ch)
|
||||
(display (or (char-get-special-case ch 1) (char-downcase ch)) out))
|
||||
str))))
|
||||
|
||||
(define string-foldcase string-downcase)
|
||||
|
||||
(define (string-upcase str)
|
||||
(string-map char-upcase str))
|
||||
|
||||
(define (string-foldcase str)
|
||||
(string-map char-foldcase str))
|
||||
(call-with-output-string
|
||||
(lambda (out)
|
||||
(string-for-each
|
||||
(lambda (ch)
|
||||
(display (or (char-get-special-case ch 3) (char-upcase ch)) out))
|
||||
str))))
|
||||
|
||||
(define (string-cmp-ci op a ls)
|
||||
(let lp ((op op) (a (string-foldcase a)) (ls ls))
|
||||
|
|
109
lib/scheme/char/special-casing.scm
Normal file
109
lib/scheme/char/special-casing.scm
Normal file
|
@ -0,0 +1,109 @@
|
|||
|
||||
;; Unconditoinal non-1-to-1 case mappings derived from Unicode data
|
||||
;; file SpecialCasing.txt.
|
||||
|
||||
(define special-cases
|
||||
;; <code> <lower> <title> <upper>
|
||||
#(#(223 "ß" "Ss" "SS")
|
||||
#(304 "i̇" "İ" "İ")
|
||||
#(329 "ʼn" "ʼN" "ʼN")
|
||||
#(496 "ǰ" "J̌" "J̌")
|
||||
#(912 "ΐ" "Ϊ́" "Ϊ́")
|
||||
#(944 "ΰ" "Ϋ́" "Ϋ́")
|
||||
#(1415 "և" "Եւ" "ԵՒ")
|
||||
#(7830 "ẖ" "H̱" "H̱")
|
||||
#(7831 "ẗ" "T̈" "T̈")
|
||||
#(7832 "ẘ" "W̊" "W̊")
|
||||
#(7833 "ẙ" "Y̊" "Y̊")
|
||||
#(7834 "ẚ" "Aʾ" "Aʾ")
|
||||
#(8016 "ὐ" "Υ̓" "Υ̓")
|
||||
#(8018 "ὒ" "Υ̓̀" "Υ̓̀")
|
||||
#(8020 "ὔ" "Υ̓́" "Υ̓́")
|
||||
#(8022 "ὖ" "Υ̓͂" "Υ̓͂")
|
||||
#(8064 "ᾀ" "ᾈ" "ἈΙ")
|
||||
#(8065 "ᾁ" "ᾉ" "ἉΙ")
|
||||
#(8066 "ᾂ" "ᾊ" "ἊΙ")
|
||||
#(8067 "ᾃ" "ᾋ" "ἋΙ")
|
||||
#(8068 "ᾄ" "ᾌ" "ἌΙ")
|
||||
#(8069 "ᾅ" "ᾍ" "ἍΙ")
|
||||
#(8070 "ᾆ" "ᾎ" "ἎΙ")
|
||||
#(8071 "ᾇ" "ᾏ" "ἏΙ")
|
||||
#(8072 "ᾀ" "ᾈ" "ἈΙ")
|
||||
#(8073 "ᾁ" "ᾉ" "ἉΙ")
|
||||
#(8074 "ᾂ" "ᾊ" "ἊΙ")
|
||||
#(8075 "ᾃ" "ᾋ" "ἋΙ")
|
||||
#(8076 "ᾄ" "ᾌ" "ἌΙ")
|
||||
#(8077 "ᾅ" "ᾍ" "ἍΙ")
|
||||
#(8078 "ᾆ" "ᾎ" "ἎΙ")
|
||||
#(8079 "ᾇ" "ᾏ" "ἏΙ")
|
||||
#(8080 "ᾐ" "ᾘ" "ἨΙ")
|
||||
#(8081 "ᾑ" "ᾙ" "ἩΙ")
|
||||
#(8082 "ᾒ" "ᾚ" "ἪΙ")
|
||||
#(8083 "ᾓ" "ᾛ" "ἫΙ")
|
||||
#(8084 "ᾔ" "ᾜ" "ἬΙ")
|
||||
#(8085 "ᾕ" "ᾝ" "ἭΙ")
|
||||
#(8086 "ᾖ" "ᾞ" "ἮΙ")
|
||||
#(8087 "ᾗ" "ᾟ" "ἯΙ")
|
||||
#(8088 "ᾐ" "ᾘ" "ἨΙ")
|
||||
#(8089 "ᾑ" "ᾙ" "ἩΙ")
|
||||
#(8090 "ᾒ" "ᾚ" "ἪΙ")
|
||||
#(8091 "ᾓ" "ᾛ" "ἫΙ")
|
||||
#(8092 "ᾔ" "ᾜ" "ἬΙ")
|
||||
#(8093 "ᾕ" "ᾝ" "ἭΙ")
|
||||
#(8094 "ᾖ" "ᾞ" "ἮΙ")
|
||||
#(8095 "ᾗ" "ᾟ" "ἯΙ")
|
||||
#(8096 "ᾠ" "ᾨ" "ὨΙ")
|
||||
#(8097 "ᾡ" "ᾩ" "ὩΙ")
|
||||
#(8098 "ᾢ" "ᾪ" "ὪΙ")
|
||||
#(8099 "ᾣ" "ᾫ" "ὫΙ")
|
||||
#(8100 "ᾤ" "ᾬ" "ὬΙ")
|
||||
#(8101 "ᾥ" "ᾭ" "ὭΙ")
|
||||
#(8102 "ᾦ" "ᾮ" "ὮΙ")
|
||||
#(8103 "ᾧ" "ᾯ" "ὯΙ")
|
||||
#(8104 "ᾠ" "ᾨ" "ὨΙ")
|
||||
#(8105 "ᾡ" "ᾩ" "ὩΙ")
|
||||
#(8106 "ᾢ" "ᾪ" "ὪΙ")
|
||||
#(8107 "ᾣ" "ᾫ" "ὫΙ")
|
||||
#(8108 "ᾤ" "ᾬ" "ὬΙ")
|
||||
#(8109 "ᾥ" "ᾭ" "ὭΙ")
|
||||
#(8110 "ᾦ" "ᾮ" "ὮΙ")
|
||||
#(8111 "ᾧ" "ᾯ" "ὯΙ")
|
||||
#(8114 "ᾲ" "Ὰͅ" "ᾺΙ")
|
||||
#(8115 "ᾳ" "ᾼ" "ΑΙ")
|
||||
#(8116 "ᾴ" "Άͅ" "ΆΙ")
|
||||
#(8118 "ᾶ" "Α͂" "Α͂")
|
||||
#(8119 "ᾷ" "ᾼ͂" "Α͂Ι")
|
||||
#(8124 "ᾳ" "ᾼ" "ΑΙ")
|
||||
#(8130 "ῂ" "Ὴͅ" "ῊΙ")
|
||||
#(8131 "ῃ" "ῌ" "ΗΙ")
|
||||
#(8132 "ῄ" "Ήͅ" "ΉΙ")
|
||||
#(8134 "ῆ" "Η͂" "Η͂")
|
||||
#(8135 "ῇ" "ῌ͂" "Η͂Ι")
|
||||
#(8140 "ῃ" "ῌ" "ΗΙ")
|
||||
#(8146 "ῒ" "Ϊ̀" "Ϊ̀")
|
||||
#(8147 "ΐ" "Ϊ́" "Ϊ́")
|
||||
#(8150 "ῖ" "Ι͂" "Ι͂")
|
||||
#(8151 "ῗ" "Ϊ͂" "Ϊ͂")
|
||||
#(8162 "ῢ" "Ϋ̀" "Ϋ̀")
|
||||
#(8163 "ΰ" "Ϋ́" "Ϋ́")
|
||||
#(8164 "ῤ" "Ρ̓" "Ρ̓")
|
||||
#(8166 "ῦ" "Υ͂" "Υ͂")
|
||||
#(8167 "ῧ" "Ϋ͂" "Ϋ͂")
|
||||
#(8178 "ῲ" "Ὼͅ" "ῺΙ")
|
||||
#(8179 "ῳ" "ῼ" "ΩΙ")
|
||||
#(8180 "ῴ" "Ώͅ" "ΏΙ")
|
||||
#(8182 "ῶ" "Ω͂" "Ω͂")
|
||||
#(8183 "ῷ" "ῼ͂" "Ω͂Ι")
|
||||
#(8188 "ῳ" "ῼ" "ΩΙ")
|
||||
#(64256 "ff" "Ff" "FF")
|
||||
#(64257 "fi" "Fi" "FI")
|
||||
#(64258 "fl" "Fl" "FL")
|
||||
#(64259 "ffi" "Ffi" "FFI")
|
||||
#(64260 "ffl" "Ffl" "FFL")
|
||||
#(64261 "ſt" "St" "ST")
|
||||
#(64262 "st" "St" "ST")
|
||||
#(64275 "ﬓ" "Մն" "ՄՆ")
|
||||
#(64276 "ﬔ" "Մե" "ՄԵ")
|
||||
#(64277 "ﬕ" "Մի" "ՄԻ")
|
||||
#(64278 "ﬖ" "Վն" "ՎՆ")
|
||||
#(64279 "ﬗ" "Մխ" "ՄԽ")))
|
|
@ -1103,6 +1103,7 @@
|
|||
(test #t (string-ci<=? "ΑΒΓ" "αβγ"))
|
||||
(test #t (string-ci>=? "ΑΒΓ" "αβγ"))
|
||||
|
||||
;; latin
|
||||
(test "ABC" (string-upcase "abc"))
|
||||
(test "ABC" (string-upcase "ABC"))
|
||||
(test "abc" (string-downcase "abc"))
|
||||
|
@ -1110,6 +1111,7 @@
|
|||
(test "abc" (string-foldcase "abc"))
|
||||
(test "abc" (string-foldcase "ABC"))
|
||||
|
||||
;; cyrillic
|
||||
(test "ΑΒΓ" (string-upcase "αβγ"))
|
||||
(test "ΑΒΓ" (string-upcase "ΑΒΓ"))
|
||||
(test "αβγ" (string-downcase "αβγ"))
|
||||
|
@ -1117,6 +1119,15 @@
|
|||
(test "αβγ" (string-foldcase "αβγ"))
|
||||
(test "αβγ" (string-foldcase "ΑΒΓ"))
|
||||
|
||||
;; special cases
|
||||
(test "SSA" (string-upcase "ßa"))
|
||||
(test "ßa" (string-downcase "ßa"))
|
||||
(test "ssa" (string-downcase "SSA"))
|
||||
(test "İ" (string-upcase "İ"))
|
||||
(test "i̇" (string-downcase "İ"))
|
||||
(test "i̇" (string-foldcase "İ"))
|
||||
(test "J̌" (string-upcase "ǰ"))
|
||||
|
||||
(test "" (substring "" 0 0))
|
||||
(test "" (substring "a" 0 0))
|
||||
(test "" (substring "abc" 1 1))
|
||||
|
|
Loading…
Add table
Reference in a new issue