diff --git a/Makefile b/Makefile index 5fdb6ea7..39c2f74f 100644 --- a/Makefile +++ b/Makefile @@ -120,15 +120,25 @@ doc: doc/chibi.html doc-libs %.html: %.scrbl $(CHIBI_DOC_DEPENDENCIES) $(CHIBI_DOC) $< > $@ -clean: clean-libs - -$(RM) *.o *.i *.s *.8 tests/basic/*.out tests/basic/*.err +######################################################################## +# Dist builds - rules to build generated files included in distribution +# (currently just char-sets since it takes a long time and we don't want +# to bundle the raw Unicode files or require a net connection to build). -cleaner: clean - -$(RM) chibi-scheme$(EXE) chibi-scheme-static$(EXE) chibi-scheme-ulimit$(EXE) \ - libchibi-scheme$(SO) *.a include/chibi/install.h \ - $(shell $(FIND) lib -name \*.o) +data/%.txt: + curl --silent http://www.unicode.org/Public/UNIDATA/$*.txt > $@ -dist-clean: dist-clean-libs cleaner +build-lib/chibi/char-set/derived.scm: data/UnicodeData.txt data/DerivedCoreProperties.txt chibi-scheme$(EXE) + $(CHIBI) tools/extract-unicode-props.scm --default > $@ + +lib/chibi/char-set/ascii.scm: build-lib/chibi/char-set/derived.scm chibi-scheme$(EXE) + $(CHIBI) -Abuild-lib tools/optimize-char-sets.scm --ascii chibi.char-set.compute > $@ + +lib/chibi/char-set/full.scm: build-lib/chibi/char-set/derived.scm chibi-scheme$(EXE) + $(CHIBI) -Abuild-lib tools/optimize-char-sets.scm chibi.char-set.compute > $@ + +######################################################################## +# Tests checkdefs: @for d in $(D); do \ @@ -201,6 +211,19 @@ test-libs: chibi-scheme$(EXE) test: chibi-scheme$(EXE) $(CHIBI) -xscheme tests/r5rs-tests.scm +######################################################################## +# Packaging + +clean: clean-libs + -$(RM) *.o *.i *.s *.8 tests/basic/*.out tests/basic/*.err + +cleaner: clean + -$(RM) chibi-scheme$(EXE) chibi-scheme-static$(EXE) chibi-scheme-ulimit$(EXE) \ + libchibi-scheme$(SO) *.a include/chibi/install.h \ + $(shell $(FIND) lib -name \*.o) + +dist-clean: dist-clean-libs cleaner + install: all $(MKDIR) $(DESTDIR)$(BINDIR) $(INSTALL) chibi-scheme$(EXE) $(DESTDIR)$(BINDIR)/ diff --git a/build-lib/chibi/char-set/compute.scm b/build-lib/chibi/char-set/compute.scm new file mode 100644 index 00000000..73b7a816 --- /dev/null +++ b/build-lib/chibi/char-set/compute.scm @@ -0,0 +1,20 @@ + +(define char-set:letter+digit + (immutable-char-set (char-set-union char-set:letter char-set:digit))) + +(define char-set:hex-digit + (immutable-char-set + (char-set-union (string->char-set "0123456789abcdefABCDEF")))) + +(define char-set:iso-control + (immutable-char-set + (char-set-union (ucs-range->char-set 0 #x20) + (ucs-range->char-set #x7F #xA0)))) + +(define char-set:graphic + (immutable-char-set + (char-set-union + char-set:letter char-set:digit char-set:punctuation char-set:symbol))) + +(define char-set:printing + (immutable-char-set (char-set-union char-set:whitespace char-set:graphic))) diff --git a/build-lib/chibi/char-set/compute.sld b/build-lib/chibi/char-set/compute.sld new file mode 100644 index 00000000..f2db421b --- /dev/null +++ b/build-lib/chibi/char-set/compute.sld @@ -0,0 +1,22 @@ + +;; Don't import this - it's temporarily used to compute optimized +;; char-set representations. + +(define-library (chibi char-set compute) + (import (scheme) (chibi iset) (chibi char-set)) + (include "derived.scm" "compute.scm") + (export + char-set:lower-case + char-set:upper-case + char-set:title-case + char-set:letter + char-set:punctuation + char-set:symbol + char-set:blank + char-set:whitespace + char-set:digit + char-set:letter+digit + char-set:hex-digit + char-set:iso-control + char-set:graphic + char-set:printing)) diff --git a/data/.hgignore b/data/.hgignore new file mode 100644 index 00000000..9acc7fcd --- /dev/null +++ b/data/.hgignore @@ -0,0 +1,2 @@ +syntax: glob +*.txt diff --git a/lib/chibi/char-set.sld b/lib/chibi/char-set.sld new file mode 100644 index 00000000..e6a49539 --- /dev/null +++ b/lib/chibi/char-set.sld @@ -0,0 +1,12 @@ + +(define-library (chibi char-set) + (import (scheme) (chibi char-set base) (chibi char-set extras)) + (export + Char-Set char-set? char-set-contains? + char-set ucs-range->char-set char-set-copy char-set-size + list->char-set char-set->list string->char-set char-set->string + char-set-adjoin! char-set-adjoin char-set-union char-set-union! + char-set-intersection char-set-intersection! + char-set-difference char-set-difference! + immutable-char-set char-set-complement + char-set:empty char-set:ascii char-set:full)) diff --git a/lib/chibi/char-set/ascii.scm b/lib/chibi/char-set/ascii.scm new file mode 100644 index 00000000..0cb198fc --- /dev/null +++ b/lib/chibi/char-set/ascii.scm @@ -0,0 +1,42 @@ +;; char-set:lower-case +(define char-set:lower-case (immutable-char-set (%make-iset 97 122 #f #f #f))) + +;; char-set:upper-case +(define char-set:upper-case (immutable-char-set (%make-iset 65 90 #f #f #f))) + +;; char-set:title-case +(define char-set:title-case (immutable-char-set #f)) + +;; char-set:letter +(define char-set:letter (immutable-char-set (%make-iset 97 122 #f (%make-iset 65 90 #f #f #f) #f))) + +;; char-set:punctuation +(define char-set:punctuation (immutable-char-set (%make-iset 63 64 #f (%make-iset 44 47 #f (%make-iset 37 42 #f (%make-iset 33 35 #f #f #f) #f) (%make-iset 58 59 #f #f #f)) (%make-iset 123 123 #f (%make-iset 95 95 #f (%make-iset 91 93 #f #f #f) #f) (%make-iset 125 125 #f #f #f))))) + +;; char-set:symbol +(define char-set:symbol (immutable-char-set (%make-iset 94 94 #f (%make-iset 43 43 #f (%make-iset 36 36 #f #f #f) (%make-iset 60 62 #f #f #f)) (%make-iset 124 124 #f (%make-iset 96 96 #f #f #f) (%make-iset 126 126 #f #f #f))))) + +;; char-set:blank +(define char-set:blank (immutable-char-set (%make-iset 32 32 #f (%make-iset 9 9 #f #f #f) #f))) + +;; char-set:whitespace +(define char-set:whitespace (immutable-char-set (%make-iset 32 32 #f (%make-iset 9 13 #f #f #f) #f))) + +;; char-set:digit +(define char-set:digit (immutable-char-set (%make-iset 48 57 #f #f #f))) + +;; char-set:letter+digit +(define char-set:letter+digit (immutable-char-set (%make-iset 65 90 #f (%make-iset 48 57 #f #f #f) (%make-iset 97 122 #f #f #f)))) + +;; char-set:hex-digit +(define char-set:hex-digit (immutable-char-set (%make-iset 65 70 #f (%make-iset 48 57 #f #f #f) (%make-iset 97 102 #f #f #f)))) + +;; char-set:iso-control +(define char-set:iso-control (immutable-char-set (%make-iset 127 127 #f (%make-iset 0 31 #f #f #f) #f))) + +;; char-set:graphic +(define char-set:graphic (immutable-char-set (%make-iset 33 126 #f #f #f))) + +;; char-set:printing +(define char-set:printing (immutable-char-set (%make-iset 32 126 #f (%make-iset 9 13 #f #f #f) #f))) + diff --git a/lib/chibi/char-set/ascii.sld b/lib/chibi/char-set/ascii.sld new file mode 100644 index 00000000..87724471 --- /dev/null +++ b/lib/chibi/char-set/ascii.sld @@ -0,0 +1,11 @@ + +(define-library (chibi char-set ascii) + (import (scheme) (chibi iset base)) + (export char-set-contains? + char-set:lower-case char-set:upper-case char-set:title-case + char-set:letter char-set:digit char-set:letter+digit + char-set:graphic char-set:printing char-set:whitespace + char-set:iso-control char-set:punctuation char-set:symbol + char-set:hex-digit char-set:blank char-set:ascii + char-set:empty char-set:full) + (include "ascii.scm")) diff --git a/lib/chibi/char-set/base.sld b/lib/chibi/char-set/base.sld new file mode 100644 index 00000000..b236ce32 --- /dev/null +++ b/lib/chibi/char-set/base.sld @@ -0,0 +1,14 @@ + +(define-library (chibi char-set base) + (import (scheme) (chibi iset base)) + (export (rename Integer-Set Char-Set) + (rename iset? char-set?) + immutable-char-set + char-set-contains?) + (begin + (define-syntax immutable-char-set + (sc-macro-transformer + (lambda (expr use-env) + (eval (cadr expr) use-env)))) + (define (char-set-contains? cset ch) + (iset-contains? cset (char->integer ch))))) diff --git a/lib/chibi/char-set/extras.scm b/lib/chibi/char-set/extras.scm new file mode 100644 index 00000000..41d81121 --- /dev/null +++ b/lib/chibi/char-set/extras.scm @@ -0,0 +1,47 @@ + +(define (char-set . args) + (list->char-set args)) + +;; This is a mistake in the SRFI-14 design - end should be inclusive. +(define (ucs-range->char-set start end) + (make-iset start (- end 1))) + +(define char-set-copy iset-copy) + +(define char-set-size iset-size) + +(define (list->char-set ls) + (list->iset (map char->integer ls))) +(define (char-set->list cset) + (map integer->char (iset->list cset))) + +(define (string->char-set str) + (list->char-set (string->list str))) +(define (char-set->string cset) + (list->string (char-set->list cset))) + +(define (char-set-adjoin! cset ch) + (iset-adjoin! cset (char->integer ch))) +(define (char-set-adjoin cset ch) + (iset-adjoin cset (char->integer ch))) + +(define char-set-union iset-union) +(define char-set-union! iset-union!) +(define char-set-intersection iset-intersection) +(define char-set-intersection! iset-intersection!) +(define char-set-difference iset-difference) +(define char-set-difference! iset-difference!) + +(define char-set:empty (immutable-char-set (%make-iset 0 0 0 #f #f))) +(define char-set:ascii (immutable-char-set (%make-iset 0 #x7F #f #f #f))) + +(cond-expand + (full-unicode + (define char-set:full + (immutable-char-set + (%make-iset 0 #xD7FF #f #f (%make-iset #xE000 #x10FFFD #f #f #f))))) + (else + (define char-set:full (immutable-char-set (%make-iset 0 #xFF #f #f #f))))) + +(define (char-set-complement cset) + (char-set-difference char-set:full cset)) diff --git a/lib/chibi/char-set/extras.sld b/lib/chibi/char-set/extras.sld new file mode 100644 index 00000000..d7269d2a --- /dev/null +++ b/lib/chibi/char-set/extras.sld @@ -0,0 +1,11 @@ + +(define-library (chibi char-set extras) + (import (scheme) (chibi iset) (chibi char-set base)) + (include "extras.scm") + (export + char-set ucs-range->char-set char-set-copy char-set-size + list->char-set char-set->list string->char-set char-set->string + char-set-adjoin! char-set-adjoin char-set-union char-set-union! + char-set-intersection char-set-intersection! + char-set-difference char-set-difference! + char-set-complement char-set:empty char-set:ascii char-set:full)) diff --git a/lib/chibi/char-set/full.scm b/lib/chibi/char-set/full.scm new file mode 100644 index 00000000..0240cf56 --- /dev/null +++ b/lib/chibi/char-set/full.scm @@ -0,0 +1,42 @@ +;; char-set:lower-case +(define char-set:lower-case (immutable-char-set (%make-iset 11312 11557 113078212145405220956299481848306876004857152314519694622414813644330106879 (%make-iset 7424 7615 6277101735386680763834460195211881500229451637403754168319 (%make-iset 891 1231 3359617113266706315836163420852667279900761594669382740612734659380575149118011965863067042895076786183 (%make-iset 591 740 1382646202340133545564837515061266821645598719 (%make-iset 97 576 2365509536875796115271152425453441351280931286999514252096600828646551318540912334052744481790519950100366591329056407886414129454316222836375551 #f #f) #f) (%make-iset 1377 1415 #f #f #f)) (%make-iset 8336 8575 1766820104831717270911194217800685387968293958678012717949960101612756991 (%make-iset 7829 8183 63072327451413996121415570261819623312617164997438933550707549860292318813601613991411837156872431794651647 #f #f) (%make-iset 9424 9449 #f #f #f))) (%make-iset 65345 65370 #f (%make-iset 43000 43001 #f (%make-iset 42799 42872 18815678955183742648327 #f #f) (%make-iset 64256 64279 16253055 #f #f)) (%make-iset 119834 120327 51146727486231585908820696036815651831314948480676177501992199236163085036512359821035265912047941451080432971005274950492488384578638513597382131711 (%make-iset 66600 66639 #f #f #f) (%make-iset 120354 120777 42984503304530211461104808282321868720026385944674536297193504244060802094551012793517930744128497289034962077133152853331804159 #f #f)))))) + +;; char-set:upper-case +(define char-set:upper-case (immutable-char-set (%make-iset 9398 9423 #f (%make-iset 1216 1366 2854495385401535168399502283641929280107053059 (%make-iset 376 582 193510492298500594587530311832788051917914503280678853822825987 (%make-iset 65 222 363948161469878586209393690477933956061783392255 #f #f) (%make-iset 904 1071 374144419156711012060424995402076372759432889630407 #f #f)) (%make-iset 7944 8187 1736907842101479124536426275495535083658609664624882011531376654293716462141695 (%make-iset 4256 4293 #f #f #f) (%make-iset 8459 8559 2535262514830237892353053933799 #f #f))) (%make-iset 66560 66599 #f (%make-iset 42877 42878 #f (%make-iset 11264 11392 595503879886640614262110651042930622463 #f #f) (%make-iset 65313 65338 #f #f #f)) (%make-iset 120328 120744 338460645933693697560665215760249528245999403264098033404085806319476851798827321172475719366475085497933670543086984716353535 (%make-iset 119808 120301 51146727486231585908820696036815651831314948480394089408712024707913450654934185184668161964607423236491353583631673266179433585099917637254106841087 #f #f) #f))))) + +;; char-set:title-case +(define char-set:title-case (immutable-char-set (%make-iset 8072 8188 83076749736557537208897815481090303 (%make-iset 453 498 35184372088905 #f #f) #f))) + +;; char-set:letter +(define char-set:letter (immutable-char-set (%make-iset 44032 55203 #f (%make-iset 6912 7414 22886562555891042693710382455505074355841760073166317418421820388451117221729500954464088123708152923721508489525154758089698131078348046953168412606447 (%make-iset 3913 4346 44362715105927999221712262984342111652617098472873236058512408135598431308732685725527979136265102154321118707789398691204023975935 (%make-iset 1869 2380 13407424329480734270447878660048428920496373804417168083627602288254107168259292949124979141882146787383731347613928737074953757721179033264662499184082943 (%make-iset 880 1366 399583814438996167382020011346842847816169115710121343500162631807336209279770003226660543306154439487482399564906606488253101298916621286954252495 (%make-iset 452 740 963565667247331348724662781356953235953225866953231882562121816871620562685859362504703 (%make-iset 65 451 314600393224337169960540460799584166474933543237622550739399649019542269878494744694979332000092312996040451282698239 #f #f) #f) (%make-iset 1377 1855 1560874275157973934342501410334434064559112153595576892194119830209925249809262307688780388207250242577643819833026382406056630315346898446188543 #f #f)) (%make-iset 2908 3404 381991200485141231019756222367537226053710584749387702525493540482928266985181611648107653982490012918979831010671369935541658005339438249753284968699 (%make-iset 2382 2892 5366736463464336406706714106425487149575078127243771493347988908203826370682080174911419639786360015702762728603625228669492029470702181043482460663316355 #f #f) (%make-iset 3424 3911 796045880330578019002307293597294347346987043146984104137316595631780707567417231169995506839382805294177527833722740156565765733950441790015275023 #f #f))) (%make-iset 5121 5740 #f (%make-iset 4682 4822 2787528182814337039150675927861957382578127 (%make-iset 4349 4680 #f #f #f) (%make-iset 4824 5108 62165404551223330269422779455378792455782943721961461362086716517957521327837304324095 #f #f)) (%make-iset 6212 6683 12194330229244424385358936041275179229938106075909171049697737889247539090636773541118657721546749280855637804188578406358228684801308538961919 (%make-iset 5743 6210 762145642144808933311681242303344929266684400897434715282060471834151313118761625963263953770881497968694418657905208578955948716669007691775 #f #f) (%make-iset 6688 6772 38685487814037185985773567 #f #f)))) (%make-iset 19968 40908 #f (%make-iset 9398 9449 #f (%make-iset 7680 8188 1669429210027032353070387959902572074068251569912541634239543664469254840701605815586298160569854604812755249439701457169906905863833481177213265564925951 (%make-iset 7424 7615 6277101735386680763834460195211881500229451637403754168319 #f #f) (%make-iset 8336 8584 904625697166121400702423284964750605108746128179460890055734500600385118207 #f #f)) (%make-iset 12321 12799 1560850458106678397246853318509506420346053090363696722427379182288726125506853871807416126725592575693831523564009034418563951096328109074350591 (%make-iset 11264 11775 1716199415031085457304815023196129625190876140284840059163076939411632061379695825768761320243048559913035261692194332965927453391792248614786550156348096511 #f #f) (%make-iset 13312 19893 #f #f #f))) (%make-iset 42656 43137 9366007796590143684266426014173832021882860391206691228341429856634888413055477646498624449482500722743196759142345085250473767233032902320062463 (%make-iset 40982 42124 #f (%make-iset 40960 40980 #f #f #f) (%make-iset 42192 42647 186070703019498633361408888448075376716851403313694244648522839560594426772554545643464113891391655073968019319443321956603475346266783743 #f #f)) (%make-iset 43648 43822 47701954761756259097387504798417463778755529815359487 (%make-iset 43138 43638 6495633707107511592130816216430842683164567658071841769919061558009539875610149675919448576995054309973657727503245554791947025664158092800029345447935 #f #f) (%make-iset 43968 44010 7559142440959 #f #f))))) (%make-iset 71296 71349 17952825858326527 (%make-iset 65536 65908 354901720847464262643270080013918741494457029505007323037992842993478140487429303646626346434630199938330987804055257434518162763775 (%make-iset 64256 64433 383123885216472214589586756787576615644966064332734591 (%make-iset 63744 64109 #f (%make-iset 55216 55291 75557863725914197590015 #f #f) (%make-iset 64112 64217 #f #f #f)) (%make-iset 64848 64967 341611594916723379280938182270919901183 (%make-iset 64467 64829 #f #f #f) (%make-iset 65008 65500 23166459242184795109519745412560813317707843523172326996034783221158488630072272676325665038195628414381257440134258275967737233656228847161199038463 #f #f))) (%make-iset 67840 68220 4925250765375315437575115447282545225619899143486530057386523691675827454339457238250572167562730571287653355880447 (%make-iset 66640 66717 #f (%make-iset 66176 66639 878680692688788100968217104054132723536510135400456683066998889497980605885632024127229627367624962058724496859707859793938161524087860595782141042536975695871 #f #f) (%make-iset 67584 67669 77371243358065019892792383 #f #f)) (%make-iset 68608 68680 #f (%make-iset 68352 68466 1427247352465131255837309846865400673685471231 #f #f) (%make-iset 69635 70084 204586912993508730593563550625432944389072422914203271589172564161864958211730995356562955949992950450193086469961732181890394864571677076213876129791 #f #f)))) (%make-iset 120146 120485 #f (%make-iset 92160 92728 #f (%make-iset 74752 74850 #f (%make-iset 73728 74606 #f #f #f) (%make-iset 77824 78894 #f #f #f)) (%make-iset 110592 110593 #f (%make-iset 93952 94111 1461323231539455856679019361440445717580125044735 #f #f) (%make-iset 119808 120144 1221462192088229133428742507380976417683660481320862010013722843488511299179635651402650638514783282651558855669972991 #f #f))) (%make-iset 131072 173782 #f (%make-iset 120656 120779 21226109557071405325645863666178850815 (%make-iset 120488 120654 187072206790762423064706648907459429699768891211775 #f #f) (%make-iset 126464 126651 6421867392636242726356536395708148792154285362342779204665327 #f #f)) (%make-iset 177984 178205 #f (%make-iset 173824 177972 #f #f #f) (%make-iset 194560 195101 #f #f #f)))))))) + +;; char-set:punctuation +(define char-set:punctuation (immutable-char-set (%make-iset 10627 10749 7975367975289779630837864365545226239 (%make-iset 5120 5120 #f (%make-iset 2800 2800 #f (%make-iset 1370 1805 177440029682739492195629626423414285791075425806584367987188710887245779556727828964224741026149370230681856791701409903272452423743 (%make-iset 894 903 513 (%make-iset 33 191 6277101735777033378358367027585284040564796057489604901879 #f #f) #f) (%make-iset 2404 2416 4099 (%make-iset 2039 2142 10141204806548057579655195000839 #f #f) #f)) (%make-iset 4347 4347 #f (%make-iset 3844 4175 8612299728833109452216727519275634166367255279803428156814546022481552546661143084834195942383124479 (%make-iset 3572 3675 15214283082817323578510236712961 #f #f) #f) (%make-iset 4960 4968 #f #f #f))) (%make-iset 7002 7008 #f (%make-iset 6468 6469 #f (%make-iset 6100 6154 36011204832919671 (%make-iset 5741 5942 4820814132776970826626481771165599449877669887122490332807171 #f #f) #f) (%make-iset 6816 6829 16255 (%make-iset 6686 6687 #f #f #f) #f)) (%make-iset 9001 9002 #f (%make-iset 8208 8334 127607834706674478748244504138880122879 (%make-iset 7164 7379 52681756409358971006475467946070306489686807156907791211684691983 #f #f) #f) (%make-iset 10088 10223 87027215340059722591700904977272299077631 #f #f)))) (%make-iset 65281 65381 2521683860030948918624327334903 (%make-iset 43124 43615 11987514433213410168505333362405758614102799522152940711615041872404225459959956214682136066587259816913167135759024198010248565942507088814987542543 (%make-iset 12289 12539 1809251394333065553493296641491499378872794969502476229475819322195851214727 (%make-iset 11776 11835 1152780767118491647 (%make-iset 11513 11632 664613997892457936451903530140172399 #f #f) #f) (%make-iset 42509 42743 54351252480975689826048702415431575448442147475312362296571857733681159 (%make-iset 42238 42239 #f #f #f) #f)) (%make-iset 64830 64831 #f (%make-iset 44011 44011 #f (%make-iset 43742 43761 786435 #f #f) #f) (%make-iset 65040 65131 4037812089938908849547248639 #f #f))) (%make-iset 68176 68223 140737488355839 (%make-iset 67671 67671 #f (%make-iset 66463 66512 562949953421313 (%make-iset 65792 65794 #f #f #f) #f) (%make-iset 67871 67903 4294967297 #f #f)) (%make-iset 70085 70088 #f (%make-iset 69703 69955 13569385457497991651199724805705614201565294768222350139146883642703225028735 (%make-iset 68409 68415 #f #f #f) #f) (%make-iset 74864 74867 #f #f #f))))))) + +;; char-set:symbol +(define char-set:symbol (immutable-char-set (%make-iset 10750 11084 #f (%make-iset 4254 4255 #f (%make-iset 2801 2928 170141183460469231731687303715884105729 (%make-iset 1423 1551 536343496299213554716686148823119036417 (%make-iset 706 1014 521481209941628438084722096232800884555162981189081790621504207040293760564100563907364454415 (%make-iset 36 247 3291009115408659855127328282852688335858940725263447471358476417 #f #f) (%make-iset 1154 1154 #f #f #f)) (%make-iset 2038 2038 #f (%make-iset 1758 1790 6442452993 #f #f) (%make-iset 2546 2555 771 #f #f))) (%make-iset 3647 3647 #f (%make-iset 3199 3199 #f (%make-iset 3059 3066 #f #f #f) (%make-iset 3449 3449 #f #f #f)) (%make-iset 4030 4056 126058239 (%make-iset 3841 3896 47287798208921607 #f #f) #f))) (%make-iset 8592 9000 #f (%make-iset 6622 6655 #f (%make-iset 6107 6107 #f (%make-iset 5008 5017 #f #f #f) (%make-iset 6464 6464 #f #f #f)) (%make-iset 8125 8527 15180504508302547082559792549452134039979727921597050291198905168453617261872958515620728168035045459212134851764818477085 (%make-iset 7009 7036 267912191 #f #f) #f)) (%make-iset 10224 10626 #f (%make-iset 9472 9983 #f (%make-iset 9003 9449 363419362147803445274660701490199886974208000930090554402992578672453439413725018135787913021374683400778402767997805117095404640403455 #f #f) (%make-iset 9985 10213 862718293047519590130783627714548701007576706064464912086123449155583 #f #f)) (%make-iset 10716 10749 4294967295 (%make-iset 10649 10711 #f #f #f) #f)))) (%make-iset 65020 65129 796084576943328064139929612976131 (%make-iset 19904 19967 #f (%make-iset 12443 12444 #f (%make-iset 11493 11498 #f (%make-iset 11088 11097 #f #f #f) (%make-iset 11904 12351 547258453716185545689710299454841531346882567525409776024515836385994490529377606833512566938465691840048809561654146263061905633443839 #f #f)) (%make-iset 13056 13311 #f (%make-iset 12688 13054 300613450595050653137261935414156179889591998741512647513967970216935158771592840931503642564904187380807827395 #f #f) #f)) (%make-iset 43639 43641 #f (%make-iset 42752 42890 522673715590561479879743397015208866086911 (%make-iset 42128 42182 #f #f #f) (%make-iset 43048 43065 245775 #f #f)) (%make-iset 64434 64449 #f (%make-iset 64297 64297 #f #f #f) #f))) (%make-iset 120513 120771 463168363851011130091785795673740600836767278013411671357985905734243019915265 (%make-iset 118784 119261 780437137578995458467560908739391556888996158179266522929460546508236933653296437157060350808521225678016835714111230592247297551268494937423871 (%make-iset 65504 65533 805339007 (%make-iset 65284 65374 1547425050547877224499904641 #f #f) (%make-iset 65847 66044 401734511064736150903948875408776876244564738538201663144447 #f #f)) (%make-iset 119552 119638 #f (%make-iset 119296 119365 664082786653543858175 #f #f) #f)) (%make-iset 127744 128252 1623601741516486367526542089626489179499454454837360135582644525057721907975405279813447049733043276512275822980244041212191896116171460908759068350349311 (%make-iset 126976 127386 5288447750321425141395456744287458546996189758388629186172721346534510152156602991240248150625623449590204448175840041107455 (%make-iset 126704 126705 #f #f #f) (%make-iset 127462 127569 244021541289521102071576548868095 #f #f)) (%make-iset 128507 128883 307828173409331868845930000782371979146838607456393061394578876014578854359990151589006733868359031372480153387007 (%make-iset 128256 128359 5104235524096485346957782767918670413823 #f #f) #f))))))) + +;; char-set:blank +(define char-set:blank (immutable-char-set (%make-iset 6158 6158 #f (%make-iset 160 160 #f (%make-iset 9 32 8388609 #f #f) (%make-iset 5760 5760 #f #f #f)) (%make-iset 12288 12288 #f (%make-iset 8192 8287 39614081257132309534260332543 #f #f) #f)))) + +;; char-set:whitespace +(define char-set:whitespace (immutable-char-set (%make-iset 6158 6158 #f (%make-iset 160 160 #f (%make-iset 9 32 8388639 #f #f) (%make-iset 5760 5760 #f #f #f)) (%make-iset 12288 12288 #f (%make-iset 8192 8287 39614081257132312832795215871 #f #f) #f)))) + +;; char-set:digit +(define char-set:digit (immutable-char-set (%make-iset 6992 7097 81050410252092417358195461194751 (%make-iset 3430 3439 #f (%make-iset 1984 1993 #f (%make-iset 1632 1641 #f (%make-iset 48 57 #f #f #f) (%make-iset 1776 1785 #f #f #f)) (%make-iset 2918 3311 743556014954405849072266507808340190880121684743320383604969609548181369962480481034346862340233244904693236621235324859507752652901450751 (%make-iset 2406 2799 743556014954405849072266507808340190880121684743320383604969609548181369962480481034346862340233244904693236621235324859507752652901450751 #f #f) #f)) (%make-iset 6470 6479 #f (%make-iset 4160 4249 1236731113465765645724419071 (%make-iset 3664 3881 420844212009973745855555120560483693212126493377631139457593771007 #f #f) (%make-iset 6112 6169 287948901175002111 #f #f)) (%make-iset 6784 6809 67044351 (%make-iset 6608 6617 #f #f #f) #f))) (%make-iset 66720 66729 #f (%make-iset 43472 43609 6421475075300574421752121441301562791795741843089406187013119 (%make-iset 42528 42537 #f (%make-iset 7232 7257 67044351 #f #f) (%make-iset 43216 43273 287948901175002111 #f #f)) (%make-iset 65296 65305 #f (%make-iset 44016 44025 #f #f #f) #f)) (%make-iset 70096 70105 #f (%make-iset 69872 69951 1207745227993911763403775 (%make-iset 69734 69743 #f #f #f) #f) (%make-iset 120782 120831 #f (%make-iset 71360 71369 #f #f #f) #f)))))) + +;; char-set:letter+digit +(define char-set:letter+digit (immutable-char-set (%make-iset 44032 55203 #f (%make-iset 6912 7414 22886562555891042693710382458146756289377006471936556892838923982807328844367070294241793956553485968351753386046618718102707539530518625959993520685039 (%make-iset 3913 4346 44362715105927999221712262984621806339048726122702370012163096018033725502663463203116646689073826286574787756247207860776650932223 (%make-iset 1869 2380 13407424329480734270447878660048428920496373804417168083627602288254107168259292949124979141882146787383731347613928779568711247970208345158244457521348607 (%make-iset 880 1366 399583814438996167382020011346842847816169115710121343500162631807336209279770003226660543306154439487482399564906606488253101298916621286954252495 (%make-iset 452 740 963565667247331348724662781356953235953225866953231882562121816871620562685859362504703 (%make-iset 48 451 41235302740700321541067959277923095868202489379241662970514590796289444397514063176660331003916099649017014030525823583231 #f #f) #f) (%make-iset 1377 1855 1560874275157973934343822231147075518302721156252056432767839474782185051280962423908807488297855721715644480587704321413018649007858524599877631 #f #f)) (%make-iset 2908 3404 381991200485141231019756222408812876448756014237186533546277447507183436269917901262081454044646810274583365631168335787645676728618713947476107197691 (%make-iset 2382 2892 5366736463561891048904088864332422319078538891026511232750792789353571719231492410784884055633496549297705163924592108787908229198392583314950938978942851 #f #f) (%make-iset 3424 3911 796045881074145379721101485003658612742377112762910063541249941107066206101891013710754591976857025636790244447628442609572802972081506968078974927 #f #f))) (%make-iset 5121 5740 #f (%make-iset 4682 4822 2787528182814337039150675927861957382578127 (%make-iset 4349 4680 #f #f #f) (%make-iset 4824 5108 62165404551223330269422779455378792455782943721961461362086716517957521327837304324095 #f #f)) (%make-iset 6212 6683 12194330229244424385524038642855360947889231407969113621412693927197786728086255496979946034430370000318048005765115268416095504239842568437759 (%make-iset 5743 6210 762145642145155178562790324783619807494011846352632466837216562044788852063159421730556579887606746611010843054583947091606179550442951671807 #f #f) (%make-iset 6688 6809 5311800736730066244218093126239125503 #f #f)))) (%make-iset 19968 40908 #f (%make-iset 9398 9449 #f (%make-iset 7680 8188 1669429210027032353070387959902572074068251569912541634239543664469254840701605815586298160569854604812755249439701457169906905863833481177213265564925951 (%make-iset 7424 7615 6277101735386680763834460195211881500229451637403754168319 #f #f) (%make-iset 8336 8584 904625697166121400702423284964750605108746128179460890055734500600385118207 #f #f)) (%make-iset 12321 12799 1560850458106678397246853318509506420346053090363696722427379182288726125506853871807416126725592575693831523564009034418563951096328109074350591 (%make-iset 11264 11775 1716199415031085457304815023196129625190876140284840059163076939411632061379695825768761320243048559913035261692194332965927453391792248614786550156348096511 #f #f) (%make-iset 13312 19893 #f #f #f))) (%make-iset 42656 43137 9366007796590143684266426014173832021882860391206691228341429856634888413055477646498624449482500722743196759142345085250473767233032902320062463 (%make-iset 40982 42124 #f (%make-iset 40960 40980 #f #f #f) (%make-iset 42192 42647 186070703019498633361408888448075519920530856307050957164988653405500459175263525083868751559444797946410782900983767246458201399646748671 #f #f)) (%make-iset 43648 43822 47701954761756259097387504798417463778755529815359487 (%make-iset 43138 43638 224726016208531273348937437899872290007502680726266932355584409924411794087837267452460956059789185107995855861761463319664142696520594339463520992995707994505215 #f #f) (%make-iset 43968 44025 287956460317442047 #f #f))))) (%make-iset 71296 71369 348108861360120048141903260542929609949183 (%make-iset 65536 65908 354901720847464262643270080013918741494457029505007323037992842993478140487429303646626346434630199938330987804055257434518162763775 (%make-iset 64256 64433 383123885216472214589586756787576615644966064332734591 (%make-iset 63744 64109 #f (%make-iset 55216 55291 75557863725914197590015 #f #f) (%make-iset 64112 64217 #f #f #f)) (%make-iset 64848 64967 341611594916723379280938182270919901183 (%make-iset 64467 64829 #f #f #f) (%make-iset 65008 65500 23166459242184795109519745412560813317707843523172326996035291982829335841807197632365519235915051181023523126849199870616098935821672668804369354751 #f #f))) (%make-iset 67840 68220 4925250765375315437575115447282545225619899143486530057386523691675827454339457238250572167562730571287653355880447 (%make-iset 66640 66729 1237033344920669303018094591 (%make-iset 66176 66639 878680692688788100968217104054132723536510135400456683066998889497980605885632024127229627367624962058724496859707859793938161524087860595782141042536975695871 #f #f) (%make-iset 67584 67669 77371243358065019892792383 #f #f)) (%make-iset 68608 68680 #f (%make-iset 68352 68466 1427247352465131255837309846865400673685471231 #f #f) (%make-iset 69635 70105 204586912993508730593563550625432944389072422914203271855910203047007904292292283148049105766861891958292943578471947333866751571791177989246645436415 #f #f)))) (%make-iset 120146 120485 #f (%make-iset 92160 92728 #f (%make-iset 74752 74850 #f (%make-iset 73728 74606 #f #f #f) (%make-iset 77824 78894 #f #f #f)) (%make-iset 110592 110593 #f (%make-iset 93952 94111 1461323231539455856679019361440445717580125044735 #f #f) (%make-iset 119808 120144 1221462192088229133428742507380976417683660481320862010013722843488511299179635651402650638514783282651558855669972991 #f #f))) (%make-iset 131072 173782 #f (%make-iset 120656 120831 95780971304117989802914516033683783778383003373273087 (%make-iset 120488 120654 187072206790762423064706648907459429699768891211775 #f #f) (%make-iset 126464 126651 6421867392636242726356536395708148792154285362342779204665327 #f #f)) (%make-iset 177984 178205 #f (%make-iset 173824 177972 #f #f #f) (%make-iset 194560 195101 #f #f #f)))))))) + +;; char-set:hex-digit +(define char-set:hex-digit (immutable-char-set (%make-iset 48 102 35465847073801215 #f #f))) + +;; char-set:iso-control +(define char-set:iso-control (immutable-char-set (%make-iset 0 159 1461501637160761734743215600984595715944343404543 #f #f))) + +;; char-set:graphic +(define char-set:graphic (immutable-char-set (%make-iset 4960 4968 #f (%make-iset 64848 64967 341611594916723379280938182270919901183 (%make-iset 8336 8584 904625697166121400702423284964750605108746128179460890055734500600385118207 (%make-iset 4349 4680 #f (%make-iset 1869 2380 13407424329480734270447878660048428920496373804417168083627602288254107168259292949124979141882146787383731347613928779568711247970208345158244457521348607 (%make-iset 880 1366 399583814438996167382020011346842847816169115710121343500162631837690410720797019959777135600271922403770006425096286507812670201087000743294040271 (%make-iset 452 1014 15095849699286165408966218323953077744206039126039399880352876178469244380253667520073008553405383346474973778635124736918461513833545106799961749425123263291782871384063 (%make-iset 33 451 1351198400207268136257714889618984005409259171978990812217822137540885437287020053544046511735433817788139780300462983613513727 #f #f) #f) (%make-iset 1377 1855 1560874275157973934343822231147075518302721156252056432767839474782185051280962423908807488297855721715644480587704321413018649007858524599877631 #f #f)) (%make-iset 3424 3911 796045881074145379721101485003658612742377112762910063541249941107066206101891013710754591976857025636790244447628442609572802972081506968078974927 (%make-iset 2908 3404 381991200485141231019756222408812876448756014237186533546277447507183436269917901262081454044646810274583365631168335787645676728618713947476107197691 (%make-iset 2382 2892 5366736463561891048904088864332422319078538891026511232750792789353571719231492410784884055633496549297705163924592108787908229198392583314950938978942851 #f #f) #f) (%make-iset 3913 4346 44362715105927999221712262984621806339048726122702370012163096018033725502663463203116646689073826286574787756247207860776650932223 #f #f))) (%make-iset 6212 6683 12194330229244424385524038642855360947889231407969113621412693927197786728086255496979946034430370000318048005765115268416095504239842568437759 (%make-iset 5121 5740 #f (%make-iset 4824 5108 62165404551223330269422779455378792455782943721961461362086716517957521327837304324095 (%make-iset 4682 4822 2787528182814337039150675927861957382578127 #f #f) #f) (%make-iset 5743 6210 762145642145155178562790324783619807494011846352632466837216562044788852063159421730556579887606746611010843054583947091606179550442951671807 #f #f)) (%make-iset 7424 7615 6277101735386680763834460195211881500229451637403754168319 (%make-iset 6912 7414 22886562555891042693710382458146756289377006471936556892838923982807328844367070294241793956553485968351753386046618718102707539530518625959993520685039 (%make-iset 6688 6809 5311800736730066244218093126239125503 #f #f) #f) (%make-iset 7680 8188 1669429210027032353070387959902572074068251569912541634239543664469254840701605815586298160569854604812755249439701457169906905863833481177213265564925951 #f #f)))) (%make-iset 43138 43638 224726016208531273348937437899872290007502680726266932355584409924411794087837267452460956059789185107995855861761463319664142696520594339463520992995707994505215 (%make-iset 19968 40908 #f (%make-iset 12321 12799 1560850458106678397246853318509506420346053090363696722427379182288726125506853871807416126725592575693831523564009034418563951096328109074350591 (%make-iset 11264 11775 1716199415031085457304815023196129625190876140284840059163076939411632061379695825768761320243048559913035261692194332965927453391792248614786550156348096511 (%make-iset 9398 9449 #f #f #f) #f) (%make-iset 13312 19893 #f #f #f)) (%make-iset 42192 42647 186070703019498633361408888448075519920530856307050957164988653405500459175263525083868751559444797946410782900983767246458201399646748671 (%make-iset 40982 42124 #f (%make-iset 40960 40980 #f #f #f) #f) (%make-iset 42656 43137 9366007796590143684266426014173832021882860391206691228341429856634888413055477646498624449482500722743196759142345085250473767233032902320062463 #f #f))) (%make-iset 63744 64109 #f (%make-iset 44032 55203 #f (%make-iset 43968 44025 287956460317442047 (%make-iset 43648 43822 47701954761756259097387504798417463778755529815359487 #f #f) #f) (%make-iset 55216 55291 75557863725914197590015 #f #f)) (%make-iset 64256 64433 383123885216472214589586756787576615644966064332734591 (%make-iset 64112 64217 #f #f #f) (%make-iset 64467 64829 #f #f #f))))) (%make-iset 120488 120654 187072206790762423064706648907459429699768891211775 (%make-iset 71296 71369 348108861360120048141903260542929609949183 (%make-iset 67584 67669 77371243358065019892792383 (%make-iset 66176 66639 878680692688788100968217104054132723536510135400456683066998889497980605885632024127229627367624962058724496859707859793938161524087860595782141042536975695871 (%make-iset 65536 65908 354901720847464262643270080013918741494457029505007323037992842993478140487429303646626346434630199938330987804055257434518162763775 (%make-iset 65008 65500 23166459242184795109519745412560813317707843523172326996035291982829335841807197632365519235915051181023523126849199870616098935821672668804369354751 #f #f) #f) (%make-iset 66640 66729 1237033344920669303018094591 #f #f)) (%make-iset 68608 68680 #f (%make-iset 68352 68466 1427247352465131255837309846865400673685471231 (%make-iset 67840 68220 4925250765375315437575115447282545225619899143486530057386523691675827454339457238250572167562730571287653355880447 #f #f) #f) (%make-iset 69635 70105 204586912993508730593563550625432944389072422914203271855910203047007904292292283148049105766861891958292943578471947333866751571791177989246645436415 #f #f))) (%make-iset 93952 94111 1461323231539455856679019361440445717580125044735 (%make-iset 77824 78894 #f (%make-iset 74752 74850 #f (%make-iset 73728 74606 #f #f #f) #f) (%make-iset 92160 92728 #f #f #f)) (%make-iset 119808 120144 1221462192088229133428742507380976417683660481320862010013722843488511299179635651402650638514783282651558855669972991 (%make-iset 110592 110593 #f #f #f) (%make-iset 120146 120485 #f #f #f)))) (%make-iset 2404 2416 4099 (%make-iset 177984 178205 #f (%make-iset 131072 173782 #f (%make-iset 126464 126651 6421867392636242726356536395708148792154285362342779204665327 (%make-iset 120656 120831 95780971304117989802914516033683783778383003373273087 #f #f) #f) (%make-iset 173824 177972 #f #f #f)) (%make-iset 2038 2038 #f (%make-iset 1370 1805 177444091211903494854164183597562195186987239759942234703242934973005962356878807556430406995673757716803475823941853280674043134015 (%make-iset 194560 195101 #f #f #f) #f) (%make-iset 2039 2142 10141204806548057579655195000839 #f #f))) (%make-iset 3449 3449 #f (%make-iset 3059 3066 #f (%make-iset 2800 2928 340282366920938463463374607431768211459 (%make-iset 2546 2555 771 #f #f) #f) (%make-iset 3199 3199 #f #f #f)) (%make-iset 3844 4175 8612299728833109452216727519275634166367255279803428156814546022481552546661143084834195942383124479 (%make-iset 3572 3675 15214283120596255441467398422529 #f #f) (%make-iset 4347 4347 #f #f #f)))))) (%make-iset 6622 6655 #f (%make-iset 44011 44011 #f (%make-iset 9001 9002 #f (%make-iset 6686 6687 #f (%make-iset 6100 6154 36011204832919671 (%make-iset 5741 5942 4820814132776970826626481771165599449877669887122490332807171 (%make-iset 5120 5120 #f #f #f) #f) (%make-iset 6468 6469 #f #f #f)) (%make-iset 7164 7379 52681756409358971006475467946070306489686807156907791211684691983 (%make-iset 7002 7008 #f (%make-iset 6816 6829 16255 #f #f) #f) (%make-iset 8208 8334 127607834706674478748244504138880122879 #f #f))) (%make-iset 12289 12539 1809251394333065553493296641491499378872794969502476229475819322195851214727 (%make-iset 11513 11632 664613997892457936451903530140172399 (%make-iset 10627 10749 7975367975289779630837864365545226239 (%make-iset 10088 10223 87027215340059722591700904977272299077631 #f #f) #f) (%make-iset 11776 11835 1152780767118491647 #f #f)) (%make-iset 43124 43615 11987514433213410168505333362405758614102799522152940711615041872404225459959956214682136066587259816913167135759024198010248565942507088814987542543 (%make-iset 42509 42743 54351252480975689826048702415431575448442147475312362296571857733681159 (%make-iset 42238 42239 #f #f #f) #f) (%make-iset 43742 43761 786435 #f #f)))) (%make-iset 69703 69955 13569385457497991651199724805705614201565294768222350139146883642703225028735 (%make-iset 66463 66512 562949953421313 (%make-iset 65281 65381 2521683860030948918624327334903 (%make-iset 65040 65131 4037812089938908849547248639 (%make-iset 64830 64831 #f #f #f) #f) (%make-iset 65792 65794 #f #f #f)) (%make-iset 68176 68223 140737488355839 (%make-iset 67871 67903 4294967297 (%make-iset 67671 67671 #f #f #f) #f) (%make-iset 68409 68415 #f #f #f))) (%make-iset 4254 4255 #f (%make-iset 3841 3896 47287798208921607 (%make-iset 74864 74867 #f (%make-iset 70085 70088 #f #f #f) #f) (%make-iset 4030 4056 126058239 #f #f)) (%make-iset 6107 6107 #f (%make-iset 5008 5017 #f #f #f) (%make-iset 6464 6464 #f #f #f))))) (%make-iset 42752 42890 522673715590561479879743397015208866086911 (%make-iset 10750 11084 #f (%make-iset 9472 9983 #f (%make-iset 8592 9000 #f (%make-iset 8125 8527 15180504508302547082559792549452134039979727921597050291198905168453617261872958515620728168035045459212134851764818477085 (%make-iset 7009 7036 267912191 #f #f) #f) (%make-iset 9003 9449 363419362147803445274660701490199886974208000930090554402992578672453439413725018135787913021374683400778402767997805117095404640403455 #f #f)) (%make-iset 10649 10711 #f (%make-iset 10224 10626 #f (%make-iset 9985 10213 862718293047519590130783627714548701007576706064464912086123449155583 #f #f) #f) (%make-iset 10716 10749 4294967295 #f #f))) (%make-iset 12688 13054 300613450595050653137261935414156179889591998741512647513967970216935158771592840931503642564904187380807827395 (%make-iset 11904 12351 547258453716185545689710299454841531346882567525409776024515836385994490529377606833512566938465691840048809561654146263061905633443839 (%make-iset 11493 11498 #f (%make-iset 11088 11097 #f #f #f) #f) (%make-iset 12443 12444 #f #f #f)) (%make-iset 19904 19967 #f (%make-iset 13056 13311 #f #f #f) (%make-iset 42128 42182 #f #f #f)))) (%make-iset 119296 119365 664082786653543858175 (%make-iset 65020 65129 796084576943328064139929612976131 (%make-iset 64297 64297 #f (%make-iset 43639 43641 #f (%make-iset 43048 43065 245775 #f #f) #f) (%make-iset 64434 64449 #f #f #f)) (%make-iset 65847 66044 401734511064736150903948875408776876244564738538201663144447 (%make-iset 65504 65533 805339007 (%make-iset 65284 65374 1547425050547877224499904641 #f #f) #f) (%make-iset 118784 119261 780437137578995458467560908739391556888996158179266522929460546508236933653296437157060350808521225678016835714111230592247297551268494937423871 #f #f))) (%make-iset 127462 127569 244021541289521102071576548868095 (%make-iset 126704 126705 #f (%make-iset 120513 120771 463168363851011130091785795673740600836767278013411671357985905734243019915265 (%make-iset 119552 119638 #f #f #f) #f) (%make-iset 126976 127386 5288447750321425141395456744287458546996189758388629186172721346534510152156602991240248150625623449590204448175840041107455 #f #f)) (%make-iset 128256 128359 5104235524096485346957782767918670413823 (%make-iset 127744 128252 1623601741516486367526542089626489179499454454837360135582644525057721907975405279813447049733043276512275822980244041212191896116171460908759068350349311 #f #f) (%make-iset 128507 128883 307828173409331868845930000782371979146838607456393061394578876014578854359990151589006733868359031372480153387007 #f #f))))))))) + +;; char-set:printing +(define char-set:printing (immutable-char-set (%make-iset 4347 4347 #f (%make-iset 63744 64109 #f (%make-iset 6688 6809 5311800736730066244218093126239125503 (%make-iset 4349 4680 #f (%make-iset 1869 2380 13407424329480734270447878660048428920496373804417168083627602288254107168259292949124979141882146787383731347613928779568711247970208345158244457521348607 (%make-iset 452 1366 276978483139049986878383159513059937866295390192563056831002732490712977617729827250930061495547874753339425272845057761345036295330752004396560217460615349388256052740008771052267223259379816261947173385923045218076729795326919802994234467957919553429350144124989345512816639 (%make-iset 160 160 #f (%make-iset 9 451 22669347419131782291913114369553852359296309528272676318593841051105143812618789434640033841431908014736263332293412376088380325232671 #f #f) #f) (%make-iset 1377 1855 1560874275157973934343822231147075518302721156252056432767839474782185051280962423908807488297855721715644480587704321413018649007858524599877631 #f #f)) (%make-iset 3424 3911 796045881074145379721101485003658612742377112762910063541249941107066206101891013710754591976857025636790244447628442609572802972081506968078974927 (%make-iset 2908 3404 381991200485141231019756222408812876448756014237186533546277447507183436269917901262081454044646810274583365631168335787645676728618713947476107197691 (%make-iset 2382 2892 5366736463561891048904088864332422319078538891026511232750792789353571719231492410784884055633496549297705163924592108787908229198392583314950938978942851 #f #f) #f) (%make-iset 3913 4346 44362715105927999221712262984621806339048726122702370012163096018033725502663463203116646689073826286574787756247207860776650932223 #f #f))) (%make-iset 6158 6158 #f (%make-iset 5121 5740 #f (%make-iset 4824 5108 62165404551223330269422779455378792455782943721961461362086716517957521327837304324095 (%make-iset 4682 4822 2787528182814337039150675927861957382578127 #f #f) #f) (%make-iset 5760 5760 #f #f #f)) (%make-iset 5743 6210 762145642145155178562790324783619807494011846352632466837216562044788852063159421730556579887606746611010843054583947091606179550442951671807 (%make-iset 12288 12288 #f (%make-iset 8192 8287 39614081257132312832795215871 #f #f) #f) (%make-iset 6212 6683 12194330229244424385524038642855360947889231407969113621412693927197786728086255496979946034430370000318048005765115268416095504239842568437759 #f #f)))) (%make-iset 40960 40980 #f (%make-iset 9398 9449 #f (%make-iset 7680 8188 1669429210027032353070387959902572074068251569912541634239543664469254840701605815586298160569854604812755249439701457169906905863833481177213265564925951 (%make-iset 7424 7615 6277101735386680763834460195211881500229451637403754168319 (%make-iset 6912 7414 22886562555891042693710382458146756289377006471936556892838923982807328844367070294241793956553485968351753386046618718102707539530518625959993520685039 #f #f) #f) (%make-iset 8336 8584 904625697166121400702423284964750605108746128179460890055734500600385118207 #f #f)) (%make-iset 13312 19893 #f (%make-iset 12321 12799 1560850458106678397246853318509506420346053090363696722427379182288726125506853871807416126725592575693831523564009034418563951096328109074350591 (%make-iset 11264 11775 1716199415031085457304815023196129625190876140284840059163076939411632061379695825768761320243048559913035261692194332965927453391792248614786550156348096511 #f #f) #f) (%make-iset 19968 40908 #f #f #f))) (%make-iset 43648 43822 47701954761756259097387504798417463778755529815359487 (%make-iset 42656 43137 9366007796590143684266426014173832021882860391206691228341429856634888413055477646498624449482500722743196759142345085250473767233032902320062463 (%make-iset 42192 42647 186070703019498633361408888448075519920530856307050957164988653405500459175263525083868751559444797946410782900983767246458201399646748671 (%make-iset 40982 42124 #f #f #f) #f) (%make-iset 43138 43638 224726016208531273348937437899872290007502680726266932355584409924411794087837267452460956059789185107995855861761463319664142696520594339463520992995707994505215 #f #f)) (%make-iset 44032 55203 #f (%make-iset 43968 44025 287956460317442047 #f #f) (%make-iset 55216 55291 75557863725914197590015 #f #f))))) (%make-iset 110592 110593 #f (%make-iset 67840 68220 4925250765375315437575115447282545225619899143486530057386523691675827454339457238250572167562730571287653355880447 (%make-iset 65008 65500 23166459242184795109519745412560813317707843523172326996035291982829335841807197632365519235915051181023523126849199870616098935821672668804369354751 (%make-iset 64467 64829 #f (%make-iset 64256 64433 383123885216472214589586756787576615644966064332734591 (%make-iset 64112 64217 #f #f #f) #f) (%make-iset 64848 64967 341611594916723379280938182270919901183 #f #f)) (%make-iset 66640 66729 1237033344920669303018094591 (%make-iset 66176 66639 878680692688788100968217104054132723536510135400456683066998889497980605885632024127229627367624962058724496859707859793938161524087860595782141042536975695871 (%make-iset 65536 65908 354901720847464262643270080013918741494457029505007323037992842993478140487429303646626346434630199938330987804055257434518162763775 #f #f) #f) (%make-iset 67584 67669 77371243358065019892792383 #f #f))) (%make-iset 73728 74606 #f (%make-iset 69635 70105 204586912993508730593563550625432944389072422914203271855910203047007904292292283148049105766861891958292943578471947333866751571791177989246645436415 (%make-iset 68608 68680 #f (%make-iset 68352 68466 1427247352465131255837309846865400673685471231 #f #f) #f) (%make-iset 71296 71369 348108861360120048141903260542929609949183 #f #f)) (%make-iset 92160 92728 #f (%make-iset 77824 78894 #f (%make-iset 74752 74850 #f #f #f) #f) (%make-iset 93952 94111 1461323231539455856679019361440445717580125044735 #f #f)))) (%make-iset 1370 1805 177444091211903494854164183597562195186987239759942234703242934973005962356878807556430406995673757716803475823941853280674043134015 (%make-iset 126464 126651 6421867392636242726356536395708148792154285362342779204665327 (%make-iset 120488 120654 187072206790762423064706648907459429699768891211775 (%make-iset 120146 120485 #f (%make-iset 119808 120144 1221462192088229133428742507380976417683660481320862010013722843488511299179635651402650638514783282651558855669972991 #f #f) #f) (%make-iset 120656 120831 95780971304117989802914516033683783778383003373273087 #f #f)) (%make-iset 177984 178205 #f (%make-iset 173824 177972 #f (%make-iset 131072 173782 #f #f #f) #f) (%make-iset 194560 195101 #f #f #f))) (%make-iset 3059 3066 #f (%make-iset 2546 2555 771 (%make-iset 2404 2416 4099 (%make-iset 2038 2142 20282409613096115159310390001679 #f #f) #f) (%make-iset 2800 2928 340282366920938463463374607431768211459 #f #f)) (%make-iset 3449 3675 161786008477555489129424138914539858559159520266627551066615690821633 (%make-iset 3199 3199 #f #f #f) (%make-iset 3844 4175 8612299728833109452216727519275634166367255279803428156814546022481552546661143084834195942383124479 #f #f)))))) (%make-iset 6464 6464 #f (%make-iset 43742 43761 786435 (%make-iset 8208 8334 127607834706674478748244504138880122879 (%make-iset 6468 6469 #f (%make-iset 5741 5942 4820814132776970826626481771165599449877669887122490332807171 (%make-iset 5120 5120 #f (%make-iset 4960 4968 #f #f #f) #f) (%make-iset 6100 6154 36011204832919671 #f #f)) (%make-iset 7002 7008 #f (%make-iset 6816 6829 16255 (%make-iset 6686 6687 #f #f #f) #f) (%make-iset 7164 7379 52681756409358971006475467946070306489686807156907791211684691983 #f #f))) (%make-iset 11776 11835 1152780767118491647 (%make-iset 10627 10749 7975367975289779630837864365545226239 (%make-iset 10088 10223 87027215340059722591700904977272299077631 (%make-iset 9001 9002 #f #f #f) #f) (%make-iset 11513 11632 664613997892457936451903530140172399 #f #f)) (%make-iset 42509 42743 54351252480975689826048702415431575448442147475312362296571857733681159 (%make-iset 42238 42239 #f (%make-iset 12289 12539 1809251394333065553493296641491499378872794969502476229475819322195851214727 #f #f) #f) (%make-iset 43124 43615 11987514433213410168505333362405758614102799522152940711615041872404225459959956214682136066587259816913167135759024198010248565942507088814987542543 #f #f)))) (%make-iset 68409 68415 #f (%make-iset 65792 65794 #f (%make-iset 65040 65131 4037812089938908849547248639 (%make-iset 64830 64831 #f (%make-iset 44011 44011 #f #f #f) #f) (%make-iset 65281 65381 2521683860030948918624327334903 #f #f)) (%make-iset 67871 67903 4294967297 (%make-iset 67671 67671 #f (%make-iset 66463 66512 562949953421313 #f #f) #f) (%make-iset 68176 68223 140737488355839 #f #f))) (%make-iset 4030 4056 126058239 (%make-iset 74864 74867 #f (%make-iset 70085 70088 #f (%make-iset 69703 69955 13569385457497991651199724805705614201565294768222350139146883642703225028735 #f #f) #f) (%make-iset 3841 3896 47287798208921607 #f #f)) (%make-iset 5008 5017 #f (%make-iset 4254 4255 #f #f #f) (%make-iset 6107 6107 #f #f #f))))) (%make-iset 42752 42890 522673715590561479879743397015208866086911 (%make-iset 10716 10749 4294967295 (%make-iset 9003 9449 363419362147803445274660701490199886974208000930090554402992578672453439413725018135787913021374683400778402767997805117095404640403455 (%make-iset 8125 8527 15180504508302547082559792549452134039979727921597050291198905168453617261872958515620728168035045459212134851764818477085 (%make-iset 7009 7036 267912191 (%make-iset 6622 6655 #f #f #f) #f) (%make-iset 8592 9000 #f #f #f)) (%make-iset 10224 10626 #f (%make-iset 9985 10213 862718293047519590130783627714548701007576706064464912086123449155583 (%make-iset 9472 9983 #f #f #f) #f) (%make-iset 10649 10711 #f #f #f))) (%make-iset 12443 12444 #f (%make-iset 11493 11498 #f (%make-iset 11088 11097 #f (%make-iset 10750 11084 #f #f #f) #f) (%make-iset 11904 12351 547258453716185545689710299454841531346882567525409776024515836385994490529377606833512566938465691840048809561654146263061905633443839 #f #f)) (%make-iset 19904 19967 #f (%make-iset 13056 13311 #f (%make-iset 12688 13054 300613450595050653137261935414156179889591998741512647513967970216935158771592840931503642564904187380807827395 #f #f) #f) (%make-iset 42128 42182 #f #f #f)))) (%make-iset 119296 119365 664082786653543858175 (%make-iset 65020 65129 796084576943328064139929612976131 (%make-iset 64297 64297 #f (%make-iset 43639 43641 #f (%make-iset 43048 43065 245775 #f #f) #f) (%make-iset 64434 64449 #f #f #f)) (%make-iset 65847 66044 401734511064736150903948875408776876244564738538201663144447 (%make-iset 65504 65533 805339007 (%make-iset 65284 65374 1547425050547877224499904641 #f #f) #f) (%make-iset 118784 119261 780437137578995458467560908739391556888996158179266522929460546508236933653296437157060350808521225678016835714111230592247297551268494937423871 #f #f))) (%make-iset 127462 127569 244021541289521102071576548868095 (%make-iset 126704 126705 #f (%make-iset 120513 120771 463168363851011130091785795673740600836767278013411671357985905734243019915265 (%make-iset 119552 119638 #f #f #f) #f) (%make-iset 126976 127386 5288447750321425141395456744287458546996189758388629186172721346534510152156602991240248150625623449590204448175840041107455 #f #f)) (%make-iset 128256 128359 5104235524096485346957782767918670413823 (%make-iset 127744 128252 1623601741516486367526542089626489179499454454837360135582644525057721907975405279813447049733043276512275822980244041212191896116171460908759068350349311 #f #f) (%make-iset 128507 128883 307828173409331868845930000782371979146838607456393061394578876014578854359990151589006733868359031372480153387007 #f #f))))))))) + diff --git a/lib/chibi/char-set/full.sld b/lib/chibi/char-set/full.sld new file mode 100644 index 00000000..00076053 --- /dev/null +++ b/lib/chibi/char-set/full.sld @@ -0,0 +1,9 @@ + +(define-library (chibi char-set full) + (import (scheme) (chibi iset base) (chibi char-set base)) + (export char-set:lower-case char-set:upper-case char-set:title-case + char-set:letter char-set:digit char-set:letter+digit + char-set:graphic char-set:printing char-set:whitespace + char-set:iso-control char-set:punctuation char-set:symbol + char-set:hex-digit char-set:blank) + (include "full.scm")) diff --git a/lib/chibi/iset/optimize.scm b/lib/chibi/iset/optimize.scm index 5ca105f7..b6879ad0 100644 --- a/lib/chibi/iset/optimize.scm +++ b/lib/chibi/iset/optimize.scm @@ -3,24 +3,25 @@ ;; Optimizing Iset Representation (define (iset-balance iset) - (let ((nodes '())) - (iset-for-each-node - (lambda (is) (set! nodes (cons (iset-copy-node is) nodes))) - iset) - (let reduce ((nodes (reverse nodes))) - (let ((len (length nodes))) - (case len - ((0) #f) - ((1) (car nodes)) - (else - (let ((mid (quotient len 2))) - (let lp ((i 0) (ls nodes) (left '())) - (if (= i mid) - (let ((res (car ls))) - (iset-left-set! res (reduce (reverse left))) - (iset-right-set! res (reduce (cdr ls))) - res) - (lp (+ i 1) (cdr ls) (cons (car ls) left))))))))))) + (and iset + (let ((nodes '())) + (iset-for-each-node + (lambda (is) (set! nodes (cons (iset-copy-node is) nodes))) + iset) + (let reduce ((nodes (reverse nodes))) + (let ((len (length nodes))) + (case len + ((0) #f) + ((1) (car nodes)) + (else + (let ((mid (quotient len 2))) + (let lp ((i 0) (ls nodes) (left '())) + (if (= i mid) + (let ((res (car ls))) + (iset-left-set! res (reduce (reverse left))) + (iset-right-set! res (reduce (cdr ls))) + res) + (lp (+ i 1) (cdr ls) (cons (car ls) left)))))))))))) (define (iset-balance! iset) (iset-balance iset)) diff --git a/tools/extract-unicode-props.scm b/tools/extract-unicode-props.scm new file mode 100755 index 00000000..90bfcf45 --- /dev/null +++ b/tools/extract-unicode-props.scm @@ -0,0 +1,209 @@ +#!/usr/bin/env chibi-scheme + +;; Simple tool to extract Unicode properties as character-sets. +;; +;; Usage: +;; extract-unicode-props.scm Lowercase Punctuation=P Blank=Zs,0009 >out +;; +;; Accepts a list of char-set names with optional definitions as +;; arguments, and writes their Scheme definitions to stdout. A +;; char-set can be of the form: +;; +;; Name: equivalent to Name=Name +;; Name=value,...: +;; +;; A value can be any of: +;; +;; Property_Name: all unicode characters with the given derived property +;; Xx: all unicode characters with the given general category +;; X: all unicode characters with any general category X* +;; NNNN: a single unicode value in hex format +;; NNNN-NNNN: an inclusive range of unicode values in hex format +;; +;; The char-set names generated are always lowercased, replacing _ +;; with -, for convenicence when the char-set name coincides with a +;; Unicode property name. +;; +;; Assumes the files UnicodeData.txt and DerivedCoreProperties.txt are +;; in the data/ current directory, unless overridden with the --data or +;; --derived options. + +(import (scheme) (chibi io) (chibi strings)) + +(define (warn . args) + (let ((err (current-error-port))) + (for-each (lambda (x) (display x err)) args) + (newline err))) + +;; Parse UnicodeData.txt for characters matching a given class. +(define (extract-char-set-category cat data) + (define (join-to-range n ls) + (cond + ((null? ls) + (list n)) + ((eqv? (car ls) (- n 1)) + (cons (cons (car ls) n) (cdr ls))) + ((and (pair? (car ls)) (eqv? (- n 1) (cdar ls))) + (cons (cons (caar ls) n) (cdr ls))) + (else + (cons n ls)))) + (call-with-input-file data + (lambda (in) + (let lp ((ranges '())) + (let ((line (read-line in))) + (cond + ((eof-object? line) + `(char-set-union + ,@(map (lambda (x) + (if (pair? x) + `(ucs-range->char-set ,(car x) ,(+ 1 (cdr x))) + `(char-set ,(integer->char x)))) + (reverse ranges)))) + ((or (equal? line "") (eqv? #\# (string-ref line 0))) + (lp ranges)) + (else + (let ((ls (string-split line #\; 4))) + (cond + ((< (length ls) 3) + (warn "invalid UnicodeData line: " line) + (lp ranges)) + (else + (let ((ch (string->number (car ls) 16)) + (name (cadr ls)) + (ch-cat (car (cddr ls)))) + (cond + ((or (not ch) (not (= 2 (string-length ch-cat)))) + (warn "invalid UnicodeData line: " line)) + ((if (char? cat) + (eqv? cat (string-ref ch-cat 0)) + (equal? cat ch-cat)) + (lp (join-to-range ch ranges))) + (else + (lp ranges)))))))))))))) + +;; Parse DerivedCoreProperties.txt for characters matching a given +;; property. +(define (extract-char-set-property prop derived) + (define (string-trim-comment str comment-ch) + (car (string-split str comment-ch 2))) + (call-with-input-file derived + (lambda (in) + (let lp ((ranges '())) + (let ((line (read-line in))) + (cond + ((eof-object? line) + `(char-set-union ,@(reverse ranges))) + ((or (equal? line "") (eqv? #\# (string-ref line 0))) + (lp ranges)) + (else + (let ((ls (string-split (string-trim-comment line #\#) #\;))) + (cond + ((< (length ls) 2) + (warn "invalid DerivedCoreProperties line: " line) + (lp ranges)) + ((string-ci=? prop (string-trim (cadr ls))) + (cond + ((string-contains (car ls) "..") + => (lambda (i) + (let* ((str (string-trim (car ls))) + (start (string->number (substring str 0 i) 16)) + (end (string->number (substring str (+ i 2)) 16))) + (if (and start end (<= 0 start end #x110000)) + (lp (cons `(ucs-range->char-set ,start ,(+ end 1)) + ranges)) + (error "invalid char range: " line))))) + ((string->number (cadr ls) 16) + => (lambda (n) + (lp (cons `(char-set ,(integer->char n)) ranges)))) + (else + (lp ranges)))) + (else + (lp ranges))))))))))) + +(define (extract-char-set-simple def data derived) + (let ((ls (string-split def #\- 2))) + (cond + ((= 2 (length ls)) + (let ((start (string->number (car ls) 16)) + (end (string->number (cadr ls) 16))) + (if (and start end (<= start end)) + `(ucs-range->char-set ,start ,(+ end 1)) + (error "invalid character range, expected NNNN-MMMM, got: " def)))) + ((string->number def 16) + => (lambda (start) `(char-set ,(integer->char start)))) + ((and (= 1 (string-length def)) + (char-upper-case? (string-ref def 0))) + (extract-char-set-category (string-ref def 0) data)) + ((and (= 2 (string-length def)) + (char-upper-case? (string-ref def 0)) + (char-lower-case? (string-ref def 1))) + (extract-char-set-category def data)) + (else ;; derived property + (extract-char-set-property def derived))))) + +(define (extract-char-set def data derived) + (let ((defs (string-split def #\,))) + (cond + ((= 1 (length defs)) + (extract-char-set-simple (car defs) data derived)) + (else + `(char-set-union + ,@(map (lambda (def) (extract-char-set-simple def data derived)) + defs)))))) + +(define (process-char-set name def data derived out) + (define (normalize-char-set-name str) + (string-append + "char-set:" + (string-map (lambda (ch) (if (eqv? ch #\_) #\- (char-downcase ch))) str))) + (display ";; " out) + (display def out) + (newline out) + (write + `(define ,(string->symbol (normalize-char-set-name name)) + (immutable-char-set + ,(extract-char-set def data derived))) + out) + (newline out) + (newline out)) + +(define default-char-sets + '("Lower-Case=Lowercase" + "Upper-Case=Uppercase" + "Title-Case=Lt" + "Letter=Alphabetic" + "Punctuation=P" + "Symbol=S" + "Blank=Zs,0009" + "Whitespace=Zs,Zl,Zp,0009,000A,000B,000C,000D" + "Digit=Nd")) + +(define (main args) + (let lp ((ls (cdr args)) + (data "data/UnicodeData.txt") + (derived "data/DerivedCoreProperties.txt") + (out (current-output-port))) + (cond + ((and (pair? ls) (not (equal? "" (car ls))) + (eqv? #\- (string-ref (car ls) 0))) + (cond + ((member (car ls) '("-d" "--data")) + (lp (cddr ls) (cadr ls) derived out)) + ((member (car ls) '("-e" "--derived")) + (lp (cddr ls) data (cadr ls) out)) + ((member (car ls) '("-o" "--output")) + (lp (cddr ls) data derived (open-output-file (cadr ls)))) + ((member (car ls) '("f" "--default")) + (lp (append default-char-sets (cdr ls)) data derived out)) + (else + (error "unknown option: " (car ls))))) + ((pair? ls) + (let ((ls (string-split (car ls) #\= 2))) + (cond + ((= 1 (length ls)) + (process-char-set (car ls) (car ls) data derived out)) + (else + (process-char-set (car ls) (cadr ls) data derived out)))) + (lp (cdr ls) data derived out)) + (else + (close-output-port out))))) diff --git a/tools/optimize-char-sets.scm b/tools/optimize-char-sets.scm new file mode 100644 index 00000000..47d0bdca --- /dev/null +++ b/tools/optimize-char-sets.scm @@ -0,0 +1,82 @@ +#!/usr/bin/env chibi-scheme + +;; Simple tool to generate libraries of optimized char-sets. +;; +;; Usage: +;; optimize-char-sets.scm [--ascii] module.name > out +;; +;; Imports (module name) and writes optimized versions of all exported +;; char-sets to stdout. + +(import (scheme) (srfi 1) (srfi 69) + (chibi io) (chibi strings) (chibi modules) + (chibi char-set) (chibi iset) (chibi iset optimize) + (only (meta) load-module)) + +;; Use a hash table for speedup of huge sets instead of O(n^2) +;; srfi-1 implementation. +(define (lset-diff ls1 ls2) + (let ((ls2-tab (make-hash-table eq?))) + (for-each (lambda (i) (hash-table-set! ls2-tab i #t)) ls2) + (remove (lambda (i) (hash-table-exists? ls2-tab i)) ls1))) + +(define (main args) + (let lp ((ls (cdr args)) (ascii? #f)) + (cond + ((and (pair? ls) (not (equal? "" (car ls))) + (eqv? #\- (string-ref (car ls) 0))) + (cond + ((member (car ls) '("-a" "--ascii")) + (lp (cdr ls) #t)) + (else (error "unknown option" (car ls))))) + ((or (null? ls) (pair? (cdr ls))) + (error "usage: optimize-char-sets.scm [--ascii] module.name")) + (else + (let ((mod (load-module + (map (lambda (x) (or (string->number x) (string->symbol x))) + (string-split (car ls) #\.))))) + (for-each + (lambda (exp) + (display ";; ") (write exp) (newline) + (let ((value (module-ref mod exp))) + (cond + ((char-set? value) + (write `(optimize ,exp) (current-error-port)) (newline (current-error-port)) + (if (not (equal? (iset->list value) + (do ((cur (iset-cursor value) + (iset-cursor-next value cur)) + (res '() (cons (iset-ref value cur) res))) + ((end-of-iset? cur) (reverse res))))) + (error "error in iset cursors")) + (display " computing intersection\n" (current-error-port)) + (let* ((iset1 (if ascii? + (iset-intersection char-set:ascii value) + value)) + (_ (display " optimizing\n" (current-error-port))) + (iset-opt (iset-optimize iset1)) + (_ (display " balancing\n" (current-error-port))) + (iset2 (iset-balance iset-opt))) + (if (and (not ascii?) (not (iset= iset1 iset2))) + (begin + (display " different!\n" (current-error-port)) + (let* ((ls1 (iset->list iset1)) + (ls2 (iset->list iset2)) + (diff1 (lset-diff ls1 ls2)) + (diff2 (lset-diff ls2 ls1))) + (display " original: " (current-error-port)) + (write (length ls1) (current-error-port)) + (display " elements, missing: " (current-error-port)) + (write diff1 (current-error-port)) + (newline (current-error-port)) + (display " optimized: " (current-error-port)) + (write (length ls2) (current-error-port)) + (display " elements, missing: " (current-error-port)) + (write diff2 (current-error-port)) + (newline (current-error-port)) + (error "optimized iset is different")))) + (display " writing\n" (current-error-port)) + (write `(define ,exp + (immutable-char-set ,(iset->code iset2)))) + (newline) + (newline)))))) + (module-exports mod)))))))