diff --git a/lib/chibi/base64.module b/lib/chibi/base64.module new file mode 100644 index 00000000..12324e1d --- /dev/null +++ b/lib/chibi/base64.module @@ -0,0 +1,7 @@ + +(define-module (chibi base64) + (export base64-encode base64-encode-string + base64-decode base64-decode-string + base64-encode-header) + (import-immutable (scheme) (srfi 33) (chibi io)) + (include "base64.scm")) diff --git a/lib/chibi/base64.scm b/lib/chibi/base64.scm new file mode 100644 index 00000000..3d95ad71 --- /dev/null +++ b/lib/chibi/base64.scm @@ -0,0 +1,351 @@ +;; Copyright (c) 2005-2009 Alex Shinn. All rights reserved. +;; BSD-style license: http://synthcode.com/license.txt + +;; Procedure: base64-encode-string str +;; Return a base64 encoded representation of string according to the +;; official base64 standard as described in RFC3548. + +;; Procedure: base64-decode-string str +;; Return a base64 decoded representation of string, also interpreting +;; the alternate 62 & 63 valued characters as described in RFC3548. +;; Other out-of-band characters are silently stripped, and = signals +;; the end of the encoded string. No errors will be raised. + +;; Procedure: base64-encode [port] +;; Procedure: base64-decode [port] +;; Variations of the above which read and write to ports. + +;; Procedure: base64-encode-header enc str [start-col max-col nl] +;; Return a base64 encoded representation of string as above, +;; wrapped in =?ENC?B?...?= as per RFC1522, split across multiple +;; MIME-header lines as needed to keep each lines length less than +;; MAX-COL. The string is encoded as is, and the encoding ENC is +;; just used for the prefix, i.e. you are responsible for ensuring +;; STR is already encoded according to ENC. The optional argument +;; NL is the newline separator, defaulting to CRLF. + +;; This API is compatible with the Gauche library rfc.base64. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; string utils + +(define (string-chop str n) + (let ((len (string-length str))) + (let lp ((i 0) (res '())) + (let ((j (+ i n))) + (if (>= j len) + (reverse (cons (substring str i len) res)) + (lp j (cons (substring str i j) res))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; constants and tables + +(define *default-max-col* 76) + +(define *outside-char* 99) ; luft-balloons +(define *pad-char* 101) ; dalmations + +(define *base64-decode-table* + (let ((res (make-vector #x100 *outside-char*))) + (let lp ((i 0)) ; map letters + (cond + ((<= i 25) + (vector-set! res (+ i 65) i) + (vector-set! res (+ i 97) (+ i 26)) + (lp (+ i 1))))) + (let lp ((i 0)) ; map numbers + (cond + ((<= i 9) + (vector-set! res (+ i 48) (+ i 52)) + (lp (+ i 1))))) + ;; extras (be liberal for different common base64 formats) + (vector-set! res (char->integer #\+) 62) + (vector-set! res (char->integer #\-) 62) + (vector-set! res (char->integer #\/) 63) + (vector-set! res (char->integer #\_) 63) + (vector-set! res (char->integer #\~) 63) + (vector-set! res (char->integer #\=) *pad-char*) + res)) + +(define (base64-decode-char c) + (vector-ref *base64-decode-table* (char->integer c))) + +(define *base64-encode-table* + (let ((res (make-vector 64))) + (let lp ((i 0)) ; map letters + (cond + ((<= i 25) + (vector-set! res i (integer->char (+ i 65))) + (vector-set! res (+ i 26) (integer->char (+ i 97))) + (lp (+ i 1))))) + (let lp ((i 0)) ; map numbers + (cond + ((<= i 9) + (vector-set! res (+ i 52) (integer->char (+ i 48))) + (lp (+ i 1))))) + (vector-set! res 62 #\+) + (vector-set! res 63 #\/) + res)) + +(define (enc i) + (vector-ref *base64-encode-table* i)) + +;; try to match common boundaries +(define decode-src-length + (lcm 76 78)) + +(define decode-dst-length + (* 3 (arithmetic-shift (+ 3 decode-src-length) -2))) + +(define encode-src-length + (* 3 1024)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; decoding + +;; Create a result buffer with the maximum possible length for the +;; input, and pass it to the internal base64-decode-string! utility. +;; If the resulting length used is exact, we can return that buffer, +;; otherwise we return the appropriate substring. +(define (base64-decode-string src) + (let* ((len (string-length src)) + (dst-len (* 3 (arithmetic-shift (+ 3 len) -2))) + (dst (make-string dst-len))) + (base64-decode-string! + src 0 len dst + (lambda (src-offset res-len b1 b2 b3) + (let ((res-len (base64-decode-finish dst res-len b1 b2 b3))) + (if (= res-len dst-len) + dst + (substring dst 0 res-len))))))) + +;; This is a little funky. +;; +;; We want to skip over "outside" characters (e.g. newlines inside +;; base64-encoded data, as would be passed in mail clients and most +;; large base64 data). This would normally mean two nested loops - +;; one for overall processing the input, and one for looping until +;; we get to a valid character. However, many Scheme compilers are +;; really bad about optimizing nested loops of primitives, so we +;; flatten this into a single loop, using conditionals to determine +;; which character is currently being read. +(define (base64-decode-string! src start end dst kont) + (let lp ((i start) + (j 0) + (b1 *outside-char*) + (b2 *outside-char*) + (b3 *outside-char*)) + (if (>= i end) + (kont i j b1 b2 b3) + (let ((c (base64-decode-char (string-ref src i)))) + (cond + ((eqv? c *pad-char*) + (kont i j b1 b2 b3)) + ((eqv? c *outside-char*) + (lp (+ i 1) j b1 b2 b3)) + ((eqv? b1 *outside-char*) + (lp (+ i 1) j c b2 b3)) + ((eqv? b2 *outside-char*) + (lp (+ i 1) j b1 c b3)) + ((eqv? b3 *outside-char*) + (lp (+ i 1) j b1 b2 c)) + (else + (string-set! dst + j + (integer->char + (bitwise-ior (arithmetic-shift b1 2) + (extract-bit-field 2 4 b2)))) + (string-set! dst + (+ j 1) + (integer->char + (bitwise-ior + (arithmetic-shift (extract-bit-field 4 0 b2) 4) + (extract-bit-field 4 2 b3)))) + (string-set! dst + (+ j 2) + (integer->char + (bitwise-ior + (arithmetic-shift (extract-bit-field 2 0 b3) 6) + c))) + (lp (+ i 1) (+ j 3) + *outside-char* *outside-char* *outside-char*))))))) + +;; If requested, account for any "partial" results (i.e. trailing 2 or +;; 3 chars) by writing them into the destination (additional 1 or 2 +;; bytes) and returning the adjusted offset for how much data we've +;; written. +(define (base64-decode-finish dst j b1 b2 b3) + (cond + ((eqv? b1 *outside-char*) + j) + ((eqv? b2 *outside-char*) + (string-set! dst j (integer->char (arithmetic-shift b1 2))) + (+ j 1)) + (else + (string-set! dst + j + (integer->char + (bitwise-ior (arithmetic-shift b1 2) + (extract-bit-field 2 4 b2)))) + (cond + ((eqv? b3 *outside-char*) + (+ j 1)) + (else + (string-set! dst + (+ j 1) + (integer->char + (bitwise-ior + (arithmetic-shift (extract-bit-field 4 0 b2) 4) + (extract-bit-field 4 2 b3)))) + (+ j 2)))))) + +;; General port decoder: work in single blocks at a time to avoid +;; allocating memory (crucial for Scheme implementations that don't +;; allow large strings). +(define (base64-decode . o) + (let ((in (if (pair? o) (car o) (current-input-port))) + (out (if (and (pair? o) (pair? (cdr o))) + (cadr o) + (current-output-port)))) + (let ((src (make-string decode-src-length)) + (dst (make-string decode-dst-length))) + (let lp ((offset 0)) + (let ((src-len (+ offset + (read-string! decode-src-length src in offset)))) + (cond + ((= src-len decode-src-length) + ;; read a full chunk: decode, write and loop + (base64-decode-string! + src 0 decode-src-length dst + (lambda (src-offset dst-len b1 b2 b3) + (cond + ((and (< src-offset src-len) + (eqv? #\= (string-ref src src-offset))) + ;; done + (let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3))) + (write-string dst dst-len out))) + ((eqv? b1 *outside-char*) + (write-string dst dst-len out) + (lp 0)) + (else + (write-string dst dst-len out) + ;; one to three chars left in buffer + (string-set! src 0 (enc b1)) + (cond + ((eqv? b2 *outside-char*) + (lp 1)) + (else + (string-set! src 1 (enc b2)) + (cond + ((eqv? b3 *outside-char*) + (lp 2)) + (else + (string-set! src 2 (enc b3)) + (lp 3)))))))))) + (else + ;; end of source - just decode and write once + (base64-decode-string! + src 0 src-len dst + (lambda (src-offset dst-len b1 b2 b3) + (let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3))) + (write-string dst dst-len out))))))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; encoding + +(define (base64-encode-string str) + (let* ((len (string-length str)) + (quot (quotient len 3)) + (rem (- len (* quot 3))) + (res-len (arithmetic-shift (+ quot (if (zero? rem) 0 1)) 2)) + (res (make-string res-len))) + (base64-encode-string! str 0 len res) + res)) + +(define (base64-encode-string! str start end res) + (let* ((res-len (string-length res)) + (limit (- end 2))) + (let lp ((i start) (j 0)) + (if (>= i limit) + (case (- end i) + ((1) + (let ((b1 (char->integer (string-ref str i)))) + (string-set! res j (enc (arithmetic-shift b1 -2))) + (string-set! res + (+ j 1) + (enc (arithmetic-shift (bitwise-and #b11 b1) 4))) + (string-set! res (+ j 2) #\=) + (string-set! res (+ j 3) #\=))) + ((2) + (let ((b1 (char->integer (string-ref str i))) + (b2 (char->integer (string-ref str (+ i 1))))) + (string-set! res j (enc (arithmetic-shift b1 -2))) + (string-set! res + (+ j 1) + (enc (bitwise-ior + (arithmetic-shift (bitwise-and #b11 b1) 4) + (extract-bit-field 4 4 b2)))) + (string-set! res + (+ j 2) + (enc (arithmetic-shift (extract-bit-field 4 0 b2) + 2))) + (string-set! res (+ j 3) #\=)))) + (let ((b1 (char->integer (string-ref str i))) + (b2 (char->integer (string-ref str (+ i 1)))) + (b3 (char->integer (string-ref str (+ i 2))))) + (string-set! res j (enc (arithmetic-shift b1 -2))) + (string-set! res + (+ j 1) + (enc (bitwise-ior + (arithmetic-shift (bitwise-and #b11 b1) 4) + (extract-bit-field 4 4 b2)))) + (string-set! res + (+ j 2) + (enc (bitwise-ior + (arithmetic-shift (extract-bit-field 4 0 b2) 2) + (extract-bit-field 2 6 b3)))) + (string-set! res (+ j 3) (enc (bitwise-and #b111111 b3))) + (lp (+ i 3) (+ j 4))))))) + +(define (base64-encode . o) + (let ((in (if (pair? o) (car o) (current-input-port))) + (out (if (and (pair? o) (pair? (cdr o))) + (cadr o) + (current-output-port)))) + (let ((src (make-string encode-src-length)) + (dst (make-string + (arithmetic-shift (quotient encode-src-length 3) 2)))) + (let lp () + (let ((n (read-string! 2048 src in))) + (base64-encode-string! src 0 n dst) + (write-string dst (* 3 (quotient (+ n 3) 4)) out) + (if (= n 2048) + (lp))))))) + +(define (base64-encode-header encoding str . o) + (define (round4 i) (arithmetic-shift (arithmetic-shift i -2) 2)) + (let ((start-col (if (pair? o) (car o) 0)) + (max-col (if (and (pair? o) (pair? (cdr o))) + (car (cdr o)) + *default-max-col*)) + (nl (if (and (pair? o) (pair? (cdr o)) (pair? (cdr (cdr o)))) + (car (cdr (cdr o))) + "\r\n"))) + (let* ((prefix (string-append "=?" encoding "?B?")) + (prefix-length (+ 2 (string-length prefix))) + (effective-max-col (round4 (- max-col prefix-length))) + (first-max-col (round4 (- effective-max-col start-col))) + (str (base64-encode-string str)) + (len (string-length str))) + (if (<= len first-max-col) + (string-append prefix str "?=") + (string-append + (if (positive? first-max-col) + (string-append + prefix (substring str 0 first-max-col) "?=" nl "\t" prefix) + "") + (string-concatenate (string-chop (substring str first-max-col len) + effective-max-col) + (string-append "?=" nl "\t" prefix)) + "?="))))) + diff --git a/lib/chibi/mime.module b/lib/chibi/mime.module new file mode 100644 index 00000000..2c10dbd1 --- /dev/null +++ b/lib/chibi/mime.module @@ -0,0 +1,7 @@ + +(define-module (chibi mime) + (export mime-ref assoc-ref mime-header-fold mime-headers->list + mime-parse-content-type mime-decode-header + mime-message-fold mime-message->sxml) + (import-immutable (scheme) (chibi base64) (chibi quoted-printable) (chibi io)) + (include "mime.scm")) diff --git a/lib/chibi/mime.scm b/lib/chibi/mime.scm new file mode 100644 index 00000000..e712d7fa --- /dev/null +++ b/lib/chibi/mime.scm @@ -0,0 +1,410 @@ +;; mime.scm -- RFC2045 MIME library +;; Copyright (c) 2005-2009 Alex Shinn. All rights reserved. +;; BSD-style license: http://synthcode.com/license.txt + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; RFC2822 headers + +;; Procedure: mime-header-fold kons knil [source [limit [kons-from]]] +;; +;; Performs a fold operation on the MIME headers of source which can be +;; either a string or port, and defaults to current-input-port. kons +;; is called on the three values: +;; kons header value accumulator +;; where accumulator begins with knil. Neither the header nor the +;; value are modified, except wrapped lines are handled for the value. +;; +;; The optional procedure KONS-FROM is a procedure to be called when +;; the first line of the headers is an "From
" line, to +;; enable this procedure to be used as-is on mbox files and the like. +;; It defaults to KONS, and if such a line is found the fold will begin +;; with (KONS-FROM "%from"
(KONS-FROM "%date" KNIL)). +;; +;; The optional LIMIT gives a limit on the number of headers to read. + +;; Procedure: mime-headers->list [source] +;; Return an alist of the MIME headers from source with headers all +;; downcased. + +;; Procedure: mime-parse-content-type str +;; Parses STR as a Content-Type style-value returning the list +;; (type (attr . val) ...) +;; For example: +;; (mime-parse-content-type +;; "text/html; CHARSET=US-ASCII; filename=index.html") +;; => ("text/html" ("charset" . "US-ASCII") ("filename" . "index.html")) + +;; Procedure: mime-decode-header str +;; Replace all occurrences of RFC1522 =?ENC?...?= escapes in STR with +;; the appropriate decoded and charset converted value. + +;; Procedure: mime-ref headers str [default] +;; A case-insensitive assoc-ref. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; RFC2045 MIME encoding + +;; Procedure: mime-message-fold src headers kons knil +;; Performs a fold operation on the given string or port SRC as a MIME +;; body corresponding to the headers give in HEADERS. KONS is called +;; on the successive values: +;; +;; KONS part-headers part-body accumulator +;; +;; where part-headers are the headers for the given MIME part (the +;; original headers for single-part MIME), part-body is the +;; appropriately decoded and charset-converted body of the message, +;; and the accumulator begins with KNIL. +;; +;; TODO: Extend mime-message-fold to (optionally?) pass KONS an +;; input-port instead of string for the body to handle very large bodies +;; (this is not much of an issue for SMTP since the messages are in +;; practice limited, but it could be problematic for large HTTP bodies). +;; +;; This does a depth-first search, folding in sequence. It should +;; probably be doing a tree-fold as in html-parser. + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(define mime-line-length-limit 4096) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; association lists + +(define (assoc* key ls . o) + (let ((eq (if (pair? o) (car o) equal?))) + (let lp ((ls ls)) + (cond + ((null? ls) #f) + ((eq key (caar ls)) (car ls)) + (else (lp (cdr ls))))))) + +(define (assoc-ref ls key . o) + (let ((default (and (pair? o) (car o))) + (eq (if (and (pair? o) (pair? (cdr o))) (car (cdr o)) equal?))) + (cond ((assoc* key ls eq) => cdr) + (else default)))) + +(define (mime-ref ls key . o) + (assoc-ref ls key (and (pair? o) (car o)) string-ci=?)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; simple matching instead of regexps + +(define (match-mbox-from-line line) + (let ((len (string-length line))) + (and (> len 5) + (string=? (substring line 0 5) "From ") + (let lp ((i 6)) + (cond + ((= i len) (list (substring line 5 len) "")) + ((memq (string-ref line i) '(#\space #\tab)) + (list (substring line 5 i) (substring line (+ i 1) len))) + (else (lp (+ i 1)))))))) + +(define (string-scan-colon-or-maybe-equal str) + (let ((len (string-length str))) + (let lp ((i 0) (best #f)) + (if (= i len) + best + (let ((c (string-ref str i))) + (cond ((or (char-alphabetic? c) + (char-numeric? c) + (memv c '(#\- #\_))) + (lp (+ i 1) best)) + ((eq? c #\:) + (if (= i 0) #f i)) + ((eqv? c #\=) + (lp (+ i 1) (or best i))) + (else + best))))))) + +(define (string-skip-white-space str i) + (let ((lim (string-length str))) + (let lp ((i i)) + (cond ((>= i lim) lim) + ((char-whitespace? (string-ref str i)) (lp (+ i 1))) + (else i))))) + +(define (match-mime-header-line line) + (let ((i (string-scan-colon-or-maybe-equal line))) + (and i + (let ((j (string-skip-white-space line (+ i 1)))) + (list (substring line 0 i) + (substring line j (string-length line))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; dummy encoder + +(define (ces-convert str . x) + str) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; some srfi-13 & string utils + +(define (string-copy! to tstart from . o) + (let ((start (if (pair? o) (car o) 0)) + (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length from)))) + (let lp ((i start) (j tstart)) + (cond + ((< i end) + (string-set! to j (string-ref from i)) + (lp (+ i 1) (+ j 1))))))) + +(define (string-concatenate-reverse ls) + (let lp ((ls ls) (rev '()) (len 0)) + (if (null? ls) + (let ((res (make-string len))) + (let lp ((ls rev) (i 0)) + (cond + ((null? ls) + res) + (else + (string-copy! res i (car ls)) + (lp (cdr ls) (+ i (string-length (car ls)))))))) + (lp (cdr ls) (cons (car ls) rev) (+ len (string-length (car ls))))))) + +(define (string-downcase s . o) + (let ((start (if (pair? o) (car o) 0)) + (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length s)))) + (let* ((len (- end start)) (s2 (make-string len))) + (let lp ((i start) (j 0)) + (cond + ((>= i end) + s2) + (else + (string-set! s2 j (char-downcase (string-ref s i))) + (lp (+ i 1) (+ j 1)))))))) + +(define (string-char-index str c . o) + (let ((start (if (pair? o) (car o) 0)) + (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) + (let lp ((i start)) + (cond + ((= i end) #f) + ((eq? c (string-ref str i)) i) + (else (lp (+ i 1))))))) + +(define (string-trim-white-space s) + (let ((len (string-length s))) + (let lp ((i 0)) + (cond ((= i len) "") + ((char-whitespace? (string-ref s i)) (lp (+ i 1))) + (else + (let lp ((j (- len 1))) + (cond ((<= j i) "") + ((char-whitespace? (string-ref s j)) (lp (- j 1))) + (else (substring s i (+ j 1)))))))))) + +(define (string-split str ch) + (let ((len (string-length str))) + (let lp ((i 0) (res '())) + (let ((j (string-char-index str ch i))) + (if j + (lp (+ j 1) (cons (substring str i j) res)) + (reverse (cons (substring str i len) res))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; header parsing + +(define (mime-header-fold kons knil . o) + (let ((src (and (pair? o) (car o))) + (limit (and (pair? o) (pair? (cdr o)) (car (cdr o)))) + (kons-from (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o))) (caddr o) kons))) + ((if (string? src) mime-header-fold-string mime-header-fold-port) + kons knil (or src (current-input-port)) limit kons-from))) + +(define (mime-header-fold-string kons knil str limit kons-from) + (call-with-input-string str + (lambda (in) (mime-header-fold-port kons knil in limit kons-from)))) + +(define (mime-header-fold-port kons knil port limit kons-from) + (define (out line acc count) + (cond + ((or (and limit (> count limit)) (eof-object? line) (string=? line "")) + acc) + ((match-mime-header-line line) + => (lambda (m) (in (car m) (list (cadr m)) acc (+ count 1)))) + (else + ;;(warn "invalid header line: ~S\n" line) + (out (read-line port mime-line-length-limit) acc (+ count 1))))) + (define (in header value acc count) + (let ((line (read-line port mime-line-length-limit))) + (cond + ((and limit (> count limit)) + acc) + ((or (eof-object? line) (string=? line "")) + (kons header (string-concatenate-reverse value) acc)) + ((char-whitespace? (string-ref line 0)) + (in header (cons line value) acc (+ count 1))) + (else + (out line + (kons header (string-concatenate-reverse value) acc) + (+ count 1)))))) + (let ((first-line (read-line port mime-line-length-limit))) + (cond + ((eof-object? first-line) + knil) + ((and kons-from (match-mbox-from-line first-line)) + => (lambda (m) ; special case check on first line for mbox files + (out (read-line port mime-line-length-limit) + (kons-from "%from" (car m) + (kons-from "%date" (cadr m) knil)) + 0))) + (else + (out first-line knil 0))))) + +(define (mime-headers->list . o) + (reverse + (apply + mime-header-fold + (lambda (h v acc) (cons (cons (string-downcase h) v) acc)) + '() + o))) + +(define (mime-split-name+value s) + (let ((i (string-char-index s #\=))) + (if i + (cons (string-downcase (string-trim-white-space (substring s 0 i))) + (if (= i (string-length s)) + "" + (if (eqv? #\" (string-ref s (+ i 1))) + (substring s (+ i 2) (- (string-length s) 1)) + (substring s (+ i 1) (string-length s))))) + (cons (string-downcase (string-trim-white-space s)) "")))) + +(define (mime-parse-content-type str) + (map mime-split-name+value (string-split str #\;))) + +(define (mime-decode-header str) + (let* ((len (string-length str)) + (limit (- len 8))) ; need at least 8 chars: "=?Q?X??=" + (let lp ((i 0) (from 0) (res '())) + (if (>= i limit) + (string-concatenate (reverse (cons (substring str from len) res))) + (if (and (eqv? #\= (string-ref str i)) + (eqv? #\? (string-ref str (+ i 1)))) + (let* ((j (string-char-index str #\? (+ i 3))) + (k (string-char-index str #\? (+ j 3)))) + (if (and j k (< (+ k 1) len) + (eqv? #\? (string-ref str (+ j 2))) + (memq (string-ref str (+ j 1)) '(#\Q #\B #\q #\b)) + (eqv? #\= (string-ref str (+ k 1)))) + (let ((decode (if (memq (string-ref str (+ j 1)) '(#\Q #\q)) + quoted-printable-decode-string + base64-decode-string)) + (cset (substring str (+ i 2) j)) + (content (substring str (+ j 3) k)) + (k2 (+ k 2))) + (lp k2 k2 (cons (ces-convert (decode content) cset) + (cons (substring str from i) res)))) + (lp (+ i 2) from res))) + (lp (+ i 1) from res)))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; message parsing + +(define (mime-read-to-boundary port boundary next final) + (let ((final-boundary (and boundary (string-append boundary "--")))) + (let lp ((res '())) + (let ((line (read-line port mime-line-length-limit))) + (cond + ((or (eof-object? line) (equal? line final-boundary)) + (final (string-concatenate (reverse res) + (call-with-output-string newline)))) + ((equal? line boundary) + (next (string-concatenate (reverse res) + (call-with-output-string newline)))) + (else + (lp (cons line res)))))))) + +(define (mime-convert-part str cte enc) + (let ((str (cond + ((and (string? cte) (string-ci=? cte "quoted-printable")) + (quoted-printable-decode-string str)) + ((and (string? cte) (string-ci=? cte "base64")) + (base64-decode-string str)) + (else + str)))) + (if (string? enc) (ces-convert str enc) str))) + +(define (mime-read-part port cte enc boundary next final) + (mime-read-to-boundary + port boundary + (lambda (x) (next (mime-convert-part x cte enc))) + (lambda (x) (final (mime-convert-part x cte enc))))) + +;; (kons parent-headers part-headers part-body seed) +;; (start headers seed) +;; (end headers parent-seed seed) +(define (mime-message-fold src kons init-seed . o) + (let ((port (if (string? src) (open-input-string src) src))) + (let ((kons-start + (if (pair? o) (car o) (lambda (headers seed) '()))) + (kons-end + (if (and (pair? o) (pair? (cdr o))) + (car (cdr o)) + (lambda (headers parent-seed seed) + `((mime (^ ,@headers) + ,@(if (pair? seed) (reverse seed) seed)) + ,@parent-seed)))) + (headers + (if (and (pair? o) (pair? (cdr o)) (pair? (cdr (cdr o)))) + (car (cdr (cdr o))) + (mime-headers->list port)))) + (let tfold ((parent-headers '()) + (headers headers) + (seed init-seed) + (boundary #f) + (next (lambda (x) x)) + (final (lambda (x) x))) + (let* ((ctype (mime-parse-content-type + (mime-ref headers "Content-Type" "text/plain"))) + (type (string-trim-white-space (caar ctype))) + (enc (string-trim-white-space + (or (mime-ref ctype "charset") + (mime-ref headers "charset" "ASCII")))) + (cte (string-trim-white-space + (or (mime-ref headers "Content-Transfer-Encoding") + (mime-ref headers "Encoding" "7-bit"))))) + (cond + ((and (string-ci=? type "multipart/") + (mime-ref ctype "boundary")) + => (lambda (boundary2) + (let ((boundary2 (string-append "--" boundary2))) + ;; skip preamble + (mime-read-to-boundary port boundary2 (lambda (x) x) (lambda (x) x)) + (let lp ((part-seed (kons-start headers seed))) + (let ((part-headers (mime-headers->list port))) + (tfold parent-headers part-headers + part-seed boundary2 + lp + (lambda (x) + ;; skip epilogue + (if boundary + (mime-read-to-boundary port boundary + (lambda (x) x) (lambda (x) x))) + (next (kons-end headers seed x))) + )))))) + (else + (mime-read-part + port cte enc boundary + (lambda (x) (next (kons parent-headers headers x seed))) + (lambda (x) (final (kons parent-headers headers x seed))))))))))) + +;; (mime (^ (header . value) ...) parts ...) +(define (mime-message->sxml . o) + (car + (apply + mime-message-fold + (if (pair? o) (car o) (current-input-port)) + (lambda (parent-headers headers body seed) + `((mime (^ ,@headers) ,body) ,@seed)) + '() + (lambda (headers seed) '()) + (lambda (headers parent-seed seed) + `((mime (^ ,@headers) + ,@(if (pair? seed) (reverse seed) seed)) + ,@parent-seed)) + (if (pair? o) (cdr o) '())))) + diff --git a/lib/chibi/net.module b/lib/chibi/net.module index 41cdafe4..845a7aa8 100644 --- a/lib/chibi/net.module +++ b/lib/chibi/net.module @@ -1,6 +1,7 @@ (define-module (chibi net) - (export sockaddr? address-info? get-address-info socket connect with-net-io + (export sockaddr? address-info? get-address-info socket connect + with-net-io open-net-io address-info-family address-info-socket-type address-info-protocol address-info-address address-info-address-length address-info-next) (import-immutable (scheme)) diff --git a/lib/chibi/net.scm b/lib/chibi/net.scm index 85ed756a..5f912cb5 100644 --- a/lib/chibi/net.scm +++ b/lib/chibi/net.scm @@ -1,9 +1,13 @@ ;; net.scm -- the high-level network interface -;; Copyright (c) 2009 Alex Shinn. All rights reserved. +;; Copyright (c) 2010 Alex Shinn. All rights reserved. ;; BSD-style license: http://synthcode.com/license.txt -(define (with-net-io host service proc) - (let lp ((addr (get-address-info host service #f))) +(define (open-net-io host service) + (let lp ((addr (get-address-info host + (if (integer? service) + (number->string service) + service) + #f))) (if (not addr) (error "couldn't find address" host service) (let ((sock (socket (address-info-family addr) @@ -16,8 +20,13 @@ (address-info-address addr) (address-info-address-length addr))) (lp (address-info-next addr)) - (let ((in (open-input-file-descriptor sock)) - (out (open-output-file-descriptor sock))) - (let ((res (proc in out))) - (close-input-port in) - res)))))))) + (list (open-input-file-descriptor sock) + (open-output-file-descriptor sock)))))))) + +(define (with-net-io host service proc) + (let ((io (open-net-io host service))) + (if (not (pair? io)) + (error "couldn't find address" host service) + (let ((res (proc (car io) (car (cdr io))))) + (close-input-port (car io)) + res)))) diff --git a/lib/chibi/net/http.module b/lib/chibi/net/http.module new file mode 100644 index 00000000..352bf7b4 --- /dev/null +++ b/lib/chibi/net/http.module @@ -0,0 +1,7 @@ + +(define-module (chibi net http) + (export http-get call-with-input-url with-input-from-url + http-parse-request http-parse-form) + (import-immutable (scheme) (srfi 39) (chibi net) (chibi io) + (chibi uri) (chibi mime)) + (include "http.scm")) diff --git a/lib/chibi/net/http.scm b/lib/chibi/net/http.scm new file mode 100644 index 00000000..37cac5e6 --- /dev/null +++ b/lib/chibi/net/http.scm @@ -0,0 +1,180 @@ +;; http.scm -- http client +;; Copyright (c) 2009 Alex Shinn. All rights reserved. +;; BSD-style license: http://synthcode.com/license.txt + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; string utils + +(define (string-char-index str c . o) + (let ((start (if (pair? o) (car o) 0)) + (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) + (let lp ((i start)) + (cond + ((= i end) #f) + ((eq? c (string-ref str i)) i) + (else (lp (+ i 1))))))) + +(define (string-split str ch) + (let ((len (string-length str))) + (let lp ((i 0) (res '())) + (let ((j (string-char-index str ch i))) + (if j + (lp (+ j 1) (cons (substring str i j) res)) + (reverse (cons (substring str i len) res))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; client utils + +(define http-user-agent "chibi") + +(define http-redirect-limit 10) +(define http-chunked-buffer-size 4096) +(define http-chunked-size-limit 409600) + +(define (string-scan str ch . o) + (let ((start (if (pair? o) (car o) 0)) + (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) + (let lp ((i start)) + (and (< i end) + (if (eqv? ch (string-ref str i)) + i + (lp (+ i 1))))))) + +(define (http-parse-response line) + (let* ((len (string-length line)) + (i (or (string-scan line #\space 0 len) len)) + (j (or (string-scan line #\space (+ i 1) len) len)) + (n (and (< i j) (string->number (substring line (+ i 1) j))))) + (if (not (integer? n)) + (error "bad response" line i j) + (list (substring line 0 i) + n + (if (>= j len) "" (substring line (+ j 1) len)))))) + +(define (http-wrap-chunked-input-port in) + (define (read-chunk in) + (let* ((line (read-line in)) + (n (and (string? line) (string->number line 16)))) + (display "read-chunk ") (write line) (newline) + (cond + ((not (and (integer? n) (<= 0 n http-chunked-size-limit))) + (error "invalid chunked size line" line)) + ((zero? n) "") + (else (read-string n in))))) + (make-generated-input-port + (lambda () (read-chunk in)))) + +(define (http-get/raw url in-headers limit) + (if (<= limit 0) + (error "http-get: redirect limit reached" url) + (let* ((uri (if (uri? url) url (string->uri url))) + (host (and uri (uri-host uri)))) + (if (not host) + (error "invalid url" url) + (let* ((io (open-net-io + host + (or (uri-port uri) + (if (eq? 'https (uri-scheme uri)) 443 80)))) + (in (car io)) + (out (car (cdr io)))) + (display "GET " out) + (display (or (uri-path uri) "/") out) + (display " HTTP/1.0\r\n" out) + (display "Host: " out) (display host out) (display "\r\n" out) + (cond + ((not (mime-ref in-headers "user-agent")) + (display "User-Agent: " out) + (display http-user-agent out) + (display "\r\n" out))) + (for-each + (lambda (x) + (display (car x) out) (display ": " out) + (display (cdr x) out) (display "\r\n" out)) + in-headers) + (display "Connection: close\r\n\r\n" out) + (flush-output out) + (let* ((resp (http-parse-response (read-line in))) + (headers (mime-headers->list in)) + (status (quotient (cadr resp) 100))) + (case status + ((2) + (let ((enc (mime-ref headers "transfer-encoding"))) + (cond + ((equal? enc "chunked") + (http-wrap-chunked-input-port in)) + (else + in)))) + ((3) + (close-input-port in) + (close-output-port out) + (let ((url2 (mime-ref headers "location"))) + (if url2 + (http-get/raw url2 in-headers (- limit 1)) + (error "redirect with no location header")))) + (else + (close-input-port in) + (close-output-port out) + (error "couldn't retrieve url" url resp))))))))) + +(define (http-get url . headers) + (http-get/raw url + (if (pair? headers) (car headers) '()) + http-redirect-limit)) + +(define (call-with-input-url url proc) + (let* ((p (http-get url)) + (res (proc p))) + (close-input-port p) + res)) + +(define (with-input-from-url url thunk) + (let ((p (http-get url))) + (let ((res (parameterize ((current-input-port p)) (thunk)))) + (close-input-port p) + res))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; server utils + +;; read and parse a request line +(define (http-parse-request . o) + (let ((line (string-split + (read-line (if (pair? o) (car o) (current-input-port)) 4096)))) + (cons (string->symbol (car line)) (cdr line)))) + +;; Parse a form body with a given URI and MIME headers (as parsed with +;; mime-headers->list). Returns an alist of (name . value) for every +;; query or form parameter. +(define (http-parse-form uri headers . o) + (let* ((in (if (pair? o) (car o) (current-input-port))) + (type (mime-ref headers + "content-type" + "application/x-www-form-urlencoded")) + (query0 (or (uri-query (if (string? uri) (string->uri uri) uri)) '())) + (query (if (string? query0) (uri-query->alist query0) query0))) + (cond + ((string-ci=? "multipart/" type) + (let ((mime (mime-message->sxml in headers))) + (append + (let lp ((ls (cddr mime)) + (res '())) + (cond + ((null? ls) + res) + ((and (pair? (car ls)) + (eq? 'mime (caar ls)) + (pair? (cdar ls)) + (pair? (cadar ls)) + (memq (caadar ls) '(^ @))) + (let* ((disp0 (mime-ref (cdadar ls) "content-disposition" "")) + (disp (mime-parse-content-type disp0)) + (name (mime-ref disp "name"))) + (if name + (lp (cdr ls) (cons (cons name (caddar ls)) res)) + (lp (cdr ls) res)))) + (else + (lp (cdr ls) res)))) + query))) + (else + query)))) + diff --git a/lib/chibi/quoted-printable.module b/lib/chibi/quoted-printable.module new file mode 100644 index 00000000..9cbec430 --- /dev/null +++ b/lib/chibi/quoted-printable.module @@ -0,0 +1,7 @@ + +(define-module (chibi quoted-printable) + (export quoted-printable-encode quoted-printable-encode-string + quoted-printable-encode-header + quoted-printable-decode quoted-printable-decode-string) + (import-immutable (scheme) (srfi 33) (chibi io)) + (include "quoted-printable.scm")) diff --git a/lib/chibi/quoted-printable.scm b/lib/chibi/quoted-printable.scm new file mode 100644 index 00000000..80709026 --- /dev/null +++ b/lib/chibi/quoted-printable.scm @@ -0,0 +1,157 @@ +;; quoted-printable.scm -- RFC2045 implementation +;; Copyright (c) 2005-2009 Alex Shinn. All rights reserved. +;; BSD-style license: http://synthcode.com/license.txt + +;; Procedure: quoted-printable-encode-string str [start-col max-col] +;; Return a quoted-printable encoded representation of string +;; according to the official standard as described in RFC2045. +;; +;; ? and _ are always encoded for compatibility with RFC1522 encoding, +;; and soft newlines are inserted as necessary to keep each lines +;; length less than MAX-COL (default 76). The starting column may be +;; overridden with START-COL (default 0). + +;; Procedure: quoted-printable-decode-string str [mime?] +;; Return a quoted-printable decoded representation of string. If +;; MIME? is specified and true, _ will be decoded as as space in +;; accordance with RFC1522. No errors will be raised on invalid +;; input. + +;; Procedure: quoted-printable-encode [port start-col max-col] +;; Procedure: quoted-printable-decode [port start-col max-col] +;; Variations of the above which read and write to ports. + +;; Procedure: quoted-printable-encode-header enc str [start-col max-col] +;; Return a quoted-printable encoded representation of string as +;; above, wrapped in =?ENC?Q?...?= as per RFC1522, split across +;; multiple MIME-header lines as needed to keep each lines length less +;; than MAX-COL. The string is encoded as is, and the encoding ENC is +;; just used for the prefix, i.e. you are responsible for ensuring STR +;; is already encoded according to ENC. + +;; Example: + +;; (define (mime-encode-header header value charset) +;; (let ((prefix (string-append header ": ")) +;; (str (ces-convert value "UTF8" charset))) +;; (string-append +;; prefix +;; (quoted-printable-encode-header charset str (string-length prefix))))) + +;; This API is backwards compatible with the Gauche library +;; rfc.quoted-printable. + +(define *default-max-col* 76) + +;; Allow for RFC1522 quoting for headers by always escaping ? and _ +(define (qp-encode str start-col max-col separator) + (define (hex i) (integer->char (+ i (if (<= i 9) 48 55)))) + (let ((end (string-length str)) + (buf (make-string max-col))) + (let lp ((i 0) (col start-col) (res '())) + (cond + ((= i end) + (if (pair? res) + (string-concatenate (reverse (cons (substring buf 0 col) res)) + separator) + (substring buf start-col col))) + ((>= col (- max-col 3)) + (lp i 0 (cons (substring buf (if (pair? res) 0 start-col) col) res))) + (else + (let ((c (char->integer (string-ref str i)))) + (cond + ((and (<= 33 c 126) (not (memq c '(61 63 95)))) + (string-set! buf col (integer->char c)) + (lp (+ i 1) (+ col 1) res)) + (else + (string-set! buf col #\=) + (string-set! buf (+ col 1) (hex (arithmetic-shift c -4))) + (string-set! buf (+ col 2) (hex (bitwise-and c #b1111))) + (lp (+ i 1) (+ col 3) res))))))))) + +(define (quoted-printable-encode-string . o) + (let ((src (if (pair? o) (car o) (current-input-port))) + (start-col (if (and (pair? o) (pair? (cdr o))) (cadr o) 0)) + (max-col (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o))) + (caddr o) + *default-max-col*))) + (qp-encode (if (string? src) src (read-string #f src)) + start-col max-col "=\r\n"))) + +(define (quoted-printable-encode . o) + (display (apply (quoted-printable-encode-string o)))) + +(define (quoted-printable-encode-header encoding . o) + (let ((src (if (pair? o) (car o) (current-input-port))) + (start-col (if (and (pair? o) (pair? (cdr o))) (cadr o) 0)) + (max-col (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o))) + (caddr o) + *default-max-col*)) + (nl (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o)) (pair? (cdddr o))) + (cadddr o) + "\r\n"))) + (let* ((prefix (string-append "=?" encoding "?Q?")) + (prefix-length (+ 2 (string-length prefix))) + (separator (string-append "?=" nl "\t" prefix)) + (effective-max-col (- max-col prefix-length))) + (string-append prefix + (qp-encode (if (string? src) src (read-string #f src)) + start-col effective-max-col separator) + "?=")))) + +(define (quoted-printable-decode-string . o) + (define (hex? c) (or (char-numeric? c) (<= 65 (char->integer c) 70))) + (define (unhex1 c) + (let ((i (char->integer c))) (if (>= i 65) (- i 55) (- i 48)))) + (define (unhex c1 c2) + (integer->char (+ (arithmetic-shift (unhex1 c1) 4) (unhex1 c2)))) + (let ((src (if (pair? o) (car o) (current-input-port))) + (mime-header? (and (pair? o) (pair? (cdr o)) (car (cdr o))))) + (let* ((str (if (string? src) src (read-string #f src))) + (end (string-length str))) + (call-with-output-string + (lambda (out) + (let lp ((i 0)) + (cond + ((< i end) + (let ((c (string-ref str i))) + (case c + ((#\=) ; = escapes + (cond + ((< (+ i 2) end) + (let ((c2 (string-ref str (+ i 1)))) + (cond + ((eq? c2 #\newline) (lp (+ i 2))) + ((eq? c2 #\return) + (lp (if (eq? (string-ref str (+ i 2)) #\newline) + (+ i 3) + (+ i 2)))) + ((hex? c2) + (let ((c3 (string-ref str (+ i 2)))) + (if (hex? c3) (write-char (unhex c2 c3) out)) + (lp (+ i 3)))) + (else (lp (+ i 3)))))))) + ((#\_) ; maybe translate _ to space + (write-char (if mime-header? #\space c) out) + (lp (+ i 1))) + ((#\space #\tab) ; strip trailing whitespace + (let lp2 ((j (+ i 1))) + (cond + ((not (= j end)) + (case (string-ref str j) + ((#\space #\tab) (lp2 (+ j 1))) + ((#\newline) + (lp (+ j 1))) + ((#\return) + (let ((k (+ j 1))) + (lp (if (and (< k end) + (eqv? #\newline (string-ref str k))) + (+ k 1) k)))) + (else (display (substring str i j) out) (lp j))))))) + (else ; a literal char + (write-char c out) + (lp (+ i 1))))))))))))) + +(define (quoted-printable-decode . o) + (display (apply quoted-printable-decode-string o))) + diff --git a/lib/chibi/uri.module b/lib/chibi/uri.module index 2456dd9f..46f9e6a6 100644 --- a/lib/chibi/uri.module +++ b/lib/chibi/uri.module @@ -1,7 +1,7 @@ (define-module (chibi uri) - (export uri->string make-uri string->uri - uri-scheme uri-user uri-host uri-path uri-query uri-fragment + (export uri? uri->string make-uri string->uri + uri-scheme uri-user uri-host uri-port uri-path uri-query uri-fragment uri-with-scheme uri-with-user uri-with-host uri-with-path uri-with-query uri-with-fragment uri-encode uri-decode uri-query->alist uri-alist->query) diff --git a/lib/srfi/33/bitwise.scm b/lib/srfi/33/bitwise.scm index d0ac59f1..4ae8840f 100644 --- a/lib/srfi/33/bitwise.scm +++ b/lib/srfi/33/bitwise.scm @@ -38,7 +38,7 @@ -1 (integer-length (- i (bit-and i (- i 1)))))) -(define (mask len) (bitwise-not (arithmetic-shift -1 len))) +(define (mask len) (- (arithmetic-shift 1 len) 1)) (define (bitwise-merge mask n m) (bit-ior (bit-and mask n) (bit-and (bitwise-not mask) m)))