mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-18 21:29:19 +02:00
117 lines
4.3 KiB
Scheme
117 lines
4.3 KiB
Scheme
#!/usr/bin/env chibi-scheme
|
|
|
|
;; Build a table of special case (non 1:1) case mappings.
|
|
;;
|
|
;; Usage:
|
|
;; extract-special-casing.scm [options] CaseFolding.txt SpecialCasing.txt > out
|
|
|
|
(import (chibi) (srfi 1) (srfi 125) (srfi 95) (chibi io) (chibi string))
|
|
|
|
(define (warn . args)
|
|
(let ((err (current-error-port)))
|
|
(for-each (lambda (x) (display x err)) args)
|
|
(newline err)))
|
|
|
|
(define (useq->string str)
|
|
(let ((ls (map (lambda (s) (string->number s 16))
|
|
(map string-trim (string-split str #\space)))))
|
|
(and ls
|
|
(every integer? ls)
|
|
(list->string (map integer->char ls)))))
|
|
|
|
(define (extract-full-folds in)
|
|
(let ((res (make-hash-table equal?)))
|
|
(let lp ()
|
|
(let ((line (read-line in)))
|
|
(cond
|
|
((eof-object? line)
|
|
res)
|
|
(else
|
|
(let* ((line (substring-cursor line
|
|
(string-cursor-start line)
|
|
(string-find line #\#)))
|
|
(ls (map string-trim (string-split line #\;))))
|
|
(if (and (>= (length ls) 3)
|
|
(member (second ls) '("F")))
|
|
(let ((base (string->number (first ls) 16))
|
|
(folded (useq->string (third ls))))
|
|
(if (and base folded)
|
|
(hash-table-set! res base folded))))
|
|
(lp))))))))
|
|
|
|
(define (extract-special-cases in out folds)
|
|
;; TODO: handle folds not in cases (currently only #\x1e9e)
|
|
(define (write-cases cases out)
|
|
(write-string "(define special-cases\n ;; <code> <lower> <title> <upper> [<fold>]\n '#(" out)
|
|
(for-each
|
|
(lambda (x)
|
|
(if (not (eq? x (car cases)))
|
|
(write-string " " out))
|
|
(write-string "#(#x" out)
|
|
(write-string (number->string (first x) 16) out)
|
|
(write-char #\space out)
|
|
(write (second x) out)
|
|
(write-char #\space out)
|
|
(write (third x) out)
|
|
(write-char #\space out)
|
|
(write (fourth x) out)
|
|
(cond
|
|
((hash-table-ref/default folds (first x) #f)
|
|
=> (lambda (fold)
|
|
(write-char #\space out)
|
|
(write fold out))))
|
|
(write-string ")\n" out))
|
|
cases)
|
|
(write-string " ))\n" out))
|
|
(let lp ((cases '()))
|
|
(let ((line (read-line in)))
|
|
(cond
|
|
((eof-object? line)
|
|
(write-cases (sort cases < car) out))
|
|
((or (equal? line "") (eqv? #\# (string-ref line 0)))
|
|
(lp cases))
|
|
(else
|
|
(let* ((line (substring-cursor line
|
|
(string-cursor-start line)
|
|
(string-find line #\#)))
|
|
(ls (map string-trim (string-split line #\;))))
|
|
(if (and (>= (length ls) 4)
|
|
(or (= 4 (length ls))
|
|
(string-null? (list-ref ls 4))))
|
|
(let ((base (string->number (first ls) 16))
|
|
(lower (useq->string (second ls)))
|
|
(title (useq->string (third ls)))
|
|
(upper (useq->string (fourth ls))))
|
|
(if (and base lower title upper)
|
|
(lp (cons (list base lower title upper) cases))
|
|
(lp cases)))
|
|
(lp cases))))))))
|
|
|
|
(let ((args (command-line)))
|
|
(let lp ((ls (cdr args))
|
|
(min-count 26)
|
|
(max-char-sets #f)
|
|
(name "char-downcase-offsets")
|
|
(out (current-output-port)))
|
|
(cond
|
|
((and (pair? ls) (not (equal? "" (car ls)))
|
|
(eqv? #\- (string-ref (car ls) 0)))
|
|
(cond
|
|
((member (car ls) '("-o" "--output"))
|
|
(lp (cddr ls) min-count max-char-sets name
|
|
(open-output-file (cadr ls))))
|
|
(else
|
|
(error "unknown option: " (car ls)))))
|
|
((not (= 2 (length ls)))
|
|
(error "usage: extract-special-casing <CaseFolding.txt> <SpecialCasing.txt>"))
|
|
(else
|
|
(display ";; auto-generated by extract-special-casing.scm\n\n" out)
|
|
(let ((folds
|
|
(if (equal? "-" (car ls))
|
|
(extract-full-folds (current-input-port))
|
|
(call-with-input-file (car ls) extract-full-folds))))
|
|
(if (equal? "-" (cadr ls))
|
|
(extract-special-cases (current-input-port) out folds)
|
|
(call-with-input-file (cadr ls)
|
|
(lambda (in) (extract-special-cases in out folds)))))
|
|
(close-output-port out)))))
|