#!/usr/bin/env chibi-scheme ;; Build a table of special case (non 1:1) case mappings. ;; ;; Usage: ;; extract-special-casing.scm [options] CaseFolding.txt SpecialCasing.txt > out (import (chibi) (srfi 1) (srfi 125) (srfi 95) (chibi io) (chibi string)) (define (warn . args) (let ((err (current-error-port))) (for-each (lambda (x) (display x err)) args) (newline err))) (define (useq->string str) (let ((ls (map (lambda (s) (string->number s 16)) (map string-trim (string-split str #\space))))) (and ls (every integer? ls) (list->string (map integer->char ls))))) (define (extract-full-folds in) (let ((res (make-hash-table equal?))) (let lp () (let ((line (read-line in))) (cond ((eof-object? line) res) (else (let* ((line (substring-cursor line (string-cursor-start line) (string-find line #\#))) (ls (map string-trim (string-split line #\;)))) (if (and (>= (length ls) 3) (member (second ls) '("F"))) (let ((base (string->number (first ls) 16)) (folded (useq->string (third ls)))) (if (and base folded) (hash-table-set! res base folded)))) (lp)))))))) (define (extract-special-cases in out folds) ;; TODO: handle folds not in cases (currently only #\x1e9e) (define (write-cases cases out) (write-string "(define special-cases\n ;; <upper> [<fold>]\n '#(" out) (for-each (lambda (x) (if (not (eq? x (car cases))) (write-string " " out)) (write-string "#(#x" out) (write-string (number->string (first x) 16) out) (write-char #\space out) (write (second x) out) (write-char #\space out) (write (third x) out) (write-char #\space out) (write (fourth x) out) (cond ((hash-table-ref/default folds (first x) #f) => (lambda (fold) (write-char #\space out) (write fold out)))) (write-string ")\n" out)) cases) (write-string " ))\n" out)) (let lp ((cases '())) (let ((line (read-line in))) (cond ((eof-object? line) (write-cases (sort cases < car) out)) ((or (equal? line "") (eqv? #\# (string-ref line 0))) (lp cases)) (else (let* ((line (substring-cursor line (string-cursor-start line) (string-find line #\#))) (ls (map string-trim (string-split line #\;)))) (if (and (>= (length ls) 4) (or (= 4 (length ls)) (string-null? (list-ref ls 4)))) (let ((base (string->number (first ls) 16)) (lower (useq->string (second ls))) (title (useq->string (third ls))) (upper (useq->string (fourth ls)))) (if (and base lower title upper) (lp (cons (list base lower title upper) cases)) (lp cases))) (lp cases)))))))) (let ((args (command-line))) (let lp ((ls (cdr args)) (min-count 26) (max-char-sets #f) (name "char-downcase-offsets") (out (current-output-port))) (cond ((and (pair? ls) (not (equal? "" (car ls))) (eqv? #\- (string-ref (car ls) 0))) (cond ((member (car ls) '("-o" "--output")) (lp (cddr ls) min-count max-char-sets name (open-output-file (cadr ls)))) (else (error "unknown option: " (car ls))))) ((not (= 2 (length ls))) (error "usage: extract-special-casing <CaseFolding.txt> <SpecialCasing.txt>")) (else (display ";; auto-generated by extract-special-casing.scm\n\n" out) (let ((folds (if (equal? "-" (car ls)) (extract-full-folds (current-input-port)) (call-with-input-file (car ls) extract-full-folds)))) (if (equal? "-" (cadr ls)) (extract-special-cases (current-input-port) out folds) (call-with-input-file (cadr ls) (lambda (in) (extract-special-cases in out folds))))) (close-output-port out)))))