#! /usr/bin/env chibi-scheme

;;; The Computer Language Benchmarks Game
;;; http://shootout.alioth.debian.org/

;;; based on Racket version by Matthew Flatt

(import (srfi 69)
        (srfi 95)
        (chibi io))

(define (print . args)
  (for-each display args)
  (newline))

(define (string-copy! dst dstart src start end)
  (do ((i dstart (+ i 1))
       (j start (+ j 1)))
      ((>= j end))
    (string-set! dst i (string-ref src j))))

(define (string-upcase str)
  (let* ((len (string-length str))
         (res (make-string len)))
    (do ((i 0 (+ i 1)))
        ((>= i len) res)
      (string-set! res i (char-upcase (string-ref str i))))))

(define (all-counts len dna)
  (let ((table (make-hash-table eq?))
        (seq (make-string len)))
    (do ((s (- (string-length dna) len) ( - s 1)))
        ((< s 0) table)
      (string-copy! seq 0 dna s (+ s len))
      (let ((key (string->symbol seq)))
        (let ((cnt (hash-table-ref/default table key 0)))
          (hash-table-set! table key (+ cnt 1)))))))

(define (write-freqs table)
  (let* ((content (hash-table->alist table))
         (total (exact->inexact (apply + (map cdr content)))))
    (for-each
     (lambda (a)
       (print (car a) " "
              (/ (round (* 100000.0 (/ (cdr a) total))) 1000.0)))
     (sort content > cdr))))

(define (write-one-freq table key)
  (print (hash-table-ref/default table key 0) "\t" key))

(define dna
  (let ((in (current-input-port)))
    ;; Skip to ">THREE ..."
    (let lp ()
      (let ((line (read-line in)))
        (cond ((eof-object? line))
              ((and (>= (string-length line) 6)
                    (eqv? #\> (string-ref line 0))
                    (equal? (substring line 0 6) ">THREE")))
              (else (lp)))))
    (let ((out (open-output-string)))
      ;; Copy everything but newlines to out:
      (let lp ()
        (let ((line (read-line in)))
          (cond ((eof-object? line))
                (else
                 (display line out)
                 (lp)))))
      ;; Extract the string from out:
      (string-upcase (get-output-string out)))))

;; 1-nucleotide counts:
(write-freqs (all-counts 1 dna))
(newline)

;; 2-nucleotide counts:
(write-freqs (all-counts 2 dna))
(newline)

;; Specific sequences:
(for-each
 (lambda (seq)
   (write-one-freq (all-counts (string-length seq) dna)
                   (string->symbol seq)))
 '("GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))