#!/usr/bin/env chibi-scheme

;; Build a table of special case (non 1:1) case mappings.
;;
;; Usage:
;;   extract-special-casing.scm [options] CaseFolding.txt SpecialCasing.txt > out

(import (chibi) (srfi 1) (srfi 125) (srfi 95) (chibi io) (chibi string))

(define (warn . args)
  (let ((err (current-error-port)))
    (for-each (lambda (x) (display x err)) args)
    (newline err)))

(define (useq->string str)
  (let ((ls (map (lambda (s) (string->number s 16))
                 (map string-trim (string-split str #\space)))))
    (and ls
         (every integer? ls)
         (list->string (map integer->char ls)))))

(define (extract-full-folds in)
  (let ((res (make-hash-table equal?)))
    (let lp ()
      (let ((line (read-line in)))
        (cond
         ((eof-object? line)
          res)
         (else
          (let* ((line (substring-cursor line
                                         (string-cursor-start line)
                                         (string-find line #\#)))
                 (ls (map string-trim (string-split line #\;))))
            (if (and (>= (length ls) 3)
                     (member (second ls) '("F")))
                (let ((base (string->number (first ls) 16))
                      (folded (useq->string (third ls))))
                  (if (and base folded)
                      (hash-table-set! res base folded))))
            (lp))))))))

(define (extract-special-cases in out folds)
  ;; TODO: handle folds not in cases (currently only #\x1e9e)
  (define (write-cases cases out)
    (write-string "(define special-cases\n  ;; <code> <lower> <title> <upper> [<fold>]\n  '#(" out)
    (for-each
     (lambda (x)
       (if (not (eq? x (car cases)))
           (write-string "     " out))
       (write-string "#(#x" out)
       (write-string (number->string (first x) 16) out)
       (write-char #\space out)
       (write (second x) out)
       (write-char #\space out)
       (write (third x) out)
       (write-char #\space out)
       (write (fourth x) out)
       (cond
        ((hash-table-ref/default folds (first x) #f)
         => (lambda (fold)
              (write-char #\space out)
              (write fold out))))
       (write-string ")\n" out))
     cases)
    (write-string "  ))\n" out))
  (let lp ((cases '()))
    (let ((line (read-line in)))
      (cond
       ((eof-object? line)
        (write-cases (sort cases < car) out))
       ((or (equal? line "") (eqv? #\# (string-ref line 0)))
        (lp cases))
       (else
        (let* ((line (substring-cursor line
                                       (string-cursor-start line)
                                       (string-find line #\#)))
               (ls (map string-trim (string-split line #\;))))
          (if (and (>= (length ls) 4)
                   (or (= 4 (length ls))
                       (string-null? (list-ref ls 4))))
              (let ((base (string->number (first ls) 16))
                    (lower (useq->string (second ls)))
                    (title (useq->string (third ls)))
                    (upper (useq->string (fourth ls))))
                (if (and base lower title upper)
                    (lp (cons (list base lower title upper) cases))
                    (lp cases)))
              (lp cases))))))))

(let ((args (command-line)))
  (let lp ((ls (cdr args))
           (min-count 26)
           (max-char-sets #f)
           (name "char-downcase-offsets")
           (out (current-output-port)))
    (cond
     ((and (pair? ls) (not (equal? "" (car ls)))
           (eqv? #\- (string-ref (car ls) 0)))
      (cond
       ((member (car ls) '("-o" "--output"))
        (lp (cddr ls) min-count max-char-sets name
            (open-output-file (cadr ls))))
       (else
        (error "unknown option: " (car ls)))))
     ((not (= 2 (length ls)))
      (error "usage: extract-special-casing <CaseFolding.txt> <SpecialCasing.txt>"))
     (else
      (display ";; auto-generated by extract-special-casing.scm\n\n" out)
      (let ((folds
             (if (equal? "-" (car ls))
                 (extract-full-folds (current-input-port))
                 (call-with-input-file (car ls) extract-full-folds))))
        (if (equal? "-" (cadr ls))
            (extract-special-cases (current-input-port) out folds)
            (call-with-input-file (cadr ls)
              (lambda (in) (extract-special-cases in out folds)))))
      (close-output-port out)))))