chibi-scheme/benchmarks/shootout/knucleotide.chibi
2015-01-26 08:06:59 +09:00

86 lines
2.4 KiB
Text

#! /usr/bin/env chibi-scheme
;;; The Computer Language Benchmarks Game
;;; http://shootout.alioth.debian.org/
;;; based on Racket version by Matthew Flatt
(import (chibi)
(srfi 69)
(srfi 95)
(chibi io))
(define (print . args)
(for-each display args)
(newline))
(define (string-copy! dst dstart src start end)
(do ((i dstart (+ i 1))
(j start (+ j 1)))
((>= j end))
(string-set! dst i (string-ref src j))))
(define (string-upcase str)
(let* ((len (string-length str))
(res (make-string len)))
(do ((i 0 (+ i 1)))
((>= i len) res)
(string-set! res i (char-upcase (string-ref str i))))))
(define (all-counts len dna)
(let ((table (make-hash-table eq?))
(seq (make-string len)))
(do ((s (- (string-length dna) len) ( - s 1)))
((< s 0) table)
(string-copy! seq 0 dna s (+ s len))
(let ((key (string->symbol seq)))
(let ((cnt (hash-table-ref/default table key 0)))
(hash-table-set! table key (+ cnt 1)))))))
(define (write-freqs table)
(let* ((content (hash-table->alist table))
(total (exact->inexact (apply + (map cdr content)))))
(for-each
(lambda (a)
(print (car a) " "
(/ (round (* 100000.0 (/ (cdr a) total))) 1000.0)))
(sort content > cdr))))
(define (write-one-freq table key)
(print (hash-table-ref/default table key 0) "\t" key))
(define dna
(let ((in (current-input-port)))
;; Skip to ">THREE ..."
(let lp ()
(let ((line (read-line in)))
(cond ((eof-object? line))
((and (>= (string-length line) 6)
(eqv? #\> (string-ref line 0))
(equal? (substring line 0 6) ">THREE")))
(else (lp)))))
(let ((out (open-output-string)))
;; Copy everything but newlines to out:
(let lp ()
(let ((line (read-line in)))
(cond ((eof-object? line))
(else
(display line out)
(lp)))))
;; Extract the string from out:
(string-upcase (get-output-string out)))))
;; 1-nucleotide counts:
(write-freqs (all-counts 1 dna))
(newline)
;; 2-nucleotide counts:
(write-freqs (all-counts 2 dna))
(newline)
;; Specific sequences:
(for-each
(lambda (seq)
(write-one-freq (all-counts (string-length seq) dna)
(string->symbol seq)))
'("GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))