;; mime.scm -- RFC2045 MIME library ;; Copyright (c) 2005-2009 Alex Shinn. All rights reserved. ;; BSD-style license: http://synthcode.com/license.txt ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; RFC2822 headers ;; Procedure: mime-header-fold kons knil [source [limit [kons-from]]] ;; ;; Performs a fold operation on the MIME headers of source which can be ;; either a string or port, and defaults to current-input-port. kons ;; is called on the three values: ;; kons header value accumulator ;; where accumulator begins with knil. Neither the header nor the ;; value are modified, except wrapped lines are handled for the value. ;; ;; The optional procedure KONS-FROM is a procedure to be called when ;; the first line of the headers is an "From
" line, to ;; enable this procedure to be used as-is on mbox files and the like. ;; It defaults to KONS, and if such a line is found the fold will begin ;; with (KONS-FROM "%from"
(KONS-FROM "%date" KNIL)). ;; ;; The optional LIMIT gives a limit on the number of headers to read. ;; Procedure: mime-headers->list [source] ;; Return an alist of the MIME headers from source with headers all ;; downcased. ;; Procedure: mime-parse-content-type str ;; Parses STR as a Content-Type style-value returning the list ;; (type (attr . val) ...) ;; For example: ;; (mime-parse-content-type ;; "text/html; CHARSET=US-ASCII; filename=index.html") ;; => ("text/html" ("charset" . "US-ASCII") ("filename" . "index.html")) ;; Procedure: mime-decode-header str ;; Replace all occurrences of RFC1522 =?ENC?...?= escapes in STR with ;; the appropriate decoded and charset converted value. ;; Procedure: mime-ref headers str [default] ;; A case-insensitive assoc-ref. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; RFC2045 MIME encoding ;; Procedure: mime-message-fold src headers kons knil ;; Performs a fold operation on the given string or port SRC as a MIME ;; body corresponding to the headers give in HEADERS. KONS is called ;; on the successive values: ;; ;; KONS part-headers part-body accumulator ;; ;; where part-headers are the headers for the given MIME part (the ;; original headers for single-part MIME), part-body is the ;; appropriately decoded and charset-converted body of the message, ;; and the accumulator begins with KNIL. ;; ;; TODO: Extend mime-message-fold to (optionally?) pass KONS an ;; input-port instead of string for the body to handle very large bodies ;; (this is not much of an issue for SMTP since the messages are in ;; practice limited, but it could be problematic for large HTTP bodies). ;; ;; This does a depth-first search, folding in sequence. It should ;; probably be doing a tree-fold as in html-parser. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define mime-line-length-limit 4096) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; association lists (define (assoc* key ls . o) (let ((eq (if (pair? o) (car o) equal?))) (let lp ((ls ls)) (cond ((null? ls) #f) ((eq key (caar ls)) (car ls)) (else (lp (cdr ls))))))) (define (assoc-ref ls key . o) (let ((default (and (pair? o) (car o))) (eq (if (and (pair? o) (pair? (cdr o))) (car (cdr o)) equal?))) (cond ((assoc* key ls eq) => cdr) (else default)))) (define (mime-ref ls key . o) (assoc-ref ls key (and (pair? o) (car o)) string-ci=?)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; simple matching instead of regexps (define (match-mbox-from-line line) (let ((len (string-length line))) (and (> len 5) (string=? (substring line 0 5) "From ") (let lp ((i 6)) (cond ((= i len) (list (substring line 5 len) "")) ((memq (string-ref line i) '(#\space #\tab)) (list (substring line 5 i) (substring line (+ i 1) len))) (else (lp (+ i 1)))))))) (define (string-scan-colon-or-maybe-equal str) (let ((len (string-length str))) (let lp ((i 0) (best #f)) (if (= i len) best (let ((c (string-ref str i))) (cond ((or (char-alphabetic? c) (char-numeric? c) (memv c '(#\- #\_))) (lp (+ i 1) best)) ((eq? c #\:) (if (= i 0) #f i)) ((eqv? c #\=) (lp (+ i 1) (or best i))) (else best))))))) (define (string-skip-white-space str i) (let ((lim (string-length str))) (let lp ((i i)) (cond ((>= i lim) lim) ((char-whitespace? (string-ref str i)) (lp (+ i 1))) (else i))))) (define (match-mime-header-line line) (let ((i (string-scan-colon-or-maybe-equal line))) (and i (let ((j (string-skip-white-space line (+ i 1)))) (list (substring line 0 i) (substring line j (string-length line))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; dummy encoder (define (ces-convert str . x) str) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; some srfi-13 & string utils (define (string-copy! to tstart from . o) (let ((start (if (pair? o) (car o) 0)) (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length from)))) (let lp ((i start) (j tstart)) (cond ((< i end) (string-set! to j (string-ref from i)) (lp (+ i 1) (+ j 1))))))) (define (string-concatenate-reverse ls) (let lp ((ls ls) (rev '()) (len 0)) (if (null? ls) (let ((res (make-string len))) (let lp ((ls rev) (i 0)) (cond ((null? ls) res) (else (string-copy! res i (car ls)) (lp (cdr ls) (+ i (string-length (car ls)))))))) (lp (cdr ls) (cons (car ls) rev) (+ len (string-length (car ls))))))) (define (string-downcase s . o) (let ((start (if (pair? o) (car o) 0)) (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length s)))) (let* ((len (- end start)) (s2 (make-string len))) (let lp ((i start) (j 0)) (cond ((>= i end) s2) (else (string-set! s2 j (char-downcase (string-ref s i))) (lp (+ i 1) (+ j 1)))))))) (define (string-char-index str c . o) (let ((start (if (pair? o) (car o) 0)) (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) (let lp ((i start)) (cond ((= i end) #f) ((eq? c (string-ref str i)) i) (else (lp (+ i 1))))))) (define (string-trim-white-space s) (let ((len (string-length s))) (let lp ((i 0)) (cond ((= i len) "") ((char-whitespace? (string-ref s i)) (lp (+ i 1))) (else (let lp ((j (- len 1))) (cond ((<= j i) "") ((char-whitespace? (string-ref s j)) (lp (- j 1))) (else (substring s i (+ j 1)))))))))) (define (string-split str ch) (let ((len (string-length str))) (let lp ((i 0) (res '())) (let ((j (string-char-index str ch i))) (if j (lp (+ j 1) (cons (substring str i j) res)) (reverse (cons (substring str i len) res))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; header parsing (define (mime-header-fold kons knil . o) (let ((src (and (pair? o) (car o))) (limit (and (pair? o) (pair? (cdr o)) (car (cdr o)))) (kons-from (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o))) (caddr o) kons))) ((if (string? src) mime-header-fold-string mime-header-fold-port) kons knil (or src (current-input-port)) limit kons-from))) (define (mime-header-fold-string kons knil str limit kons-from) (call-with-input-string str (lambda (in) (mime-header-fold-port kons knil in limit kons-from)))) (define (mime-header-fold-port kons knil port limit kons-from) (define (out line acc count) (cond ((or (and limit (> count limit)) (eof-object? line) (string=? line "")) acc) ((match-mime-header-line line) => (lambda (m) (in (car m) (list (cadr m)) acc (+ count 1)))) (else ;;(warn "invalid header line: ~S\n" line) (out (read-line port mime-line-length-limit) acc (+ count 1))))) (define (in header value acc count) (let ((line (read-line port mime-line-length-limit))) (cond ((and limit (> count limit)) acc) ((or (eof-object? line) (string=? line "")) (kons header (string-concatenate-reverse value) acc)) ((char-whitespace? (string-ref line 0)) (in header (cons line value) acc (+ count 1))) (else (out line (kons header (string-concatenate-reverse value) acc) (+ count 1)))))) (let ((first-line (read-line port mime-line-length-limit))) (cond ((eof-object? first-line) knil) ((and kons-from (match-mbox-from-line first-line)) => (lambda (m) ; special case check on first line for mbox files (out (read-line port mime-line-length-limit) (kons-from "%from" (car m) (kons-from "%date" (cadr m) knil)) 0))) (else (out first-line knil 0))))) (define (mime-headers->list . o) (reverse (apply mime-header-fold (lambda (h v acc) (cons (cons (string-downcase h) v) acc)) '() o))) (define (mime-split-name+value s) (let ((i (string-char-index s #\=))) (if i (cons (string-downcase (string-trim-white-space (substring s 0 i))) (if (= i (string-length s)) "" (if (eqv? #\" (string-ref s (+ i 1))) (substring s (+ i 2) (- (string-length s) 1)) (substring s (+ i 1) (string-length s))))) (cons (string-downcase (string-trim-white-space s)) "")))) (define (mime-parse-content-type str) (map mime-split-name+value (string-split str #\;))) (define (mime-decode-header str) (let* ((len (string-length str)) (limit (- len 8))) ; need at least 8 chars: "=?Q?X??=" (let lp ((i 0) (from 0) (res '())) (if (>= i limit) (string-concatenate (reverse (cons (substring str from len) res))) (if (and (eqv? #\= (string-ref str i)) (eqv? #\? (string-ref str (+ i 1)))) (let* ((j (string-char-index str #\? (+ i 3))) (k (string-char-index str #\? (+ j 3)))) (if (and j k (< (+ k 1) len) (eqv? #\? (string-ref str (+ j 2))) (memq (string-ref str (+ j 1)) '(#\Q #\B #\q #\b)) (eqv? #\= (string-ref str (+ k 1)))) (let ((decode (if (memq (string-ref str (+ j 1)) '(#\Q #\q)) quoted-printable-decode-string base64-decode-string)) (cset (substring str (+ i 2) j)) (content (substring str (+ j 3) k)) (k2 (+ k 2))) (lp k2 k2 (cons (ces-convert (decode content) cset) (cons (substring str from i) res)))) (lp (+ i 2) from res))) (lp (+ i 1) from res)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; message parsing (define (mime-read-to-boundary port boundary next final) (let ((final-boundary (and boundary (string-append boundary "--")))) (let lp ((res '())) (let ((line (read-line port mime-line-length-limit))) (cond ((or (eof-object? line) (equal? line final-boundary)) (final (string-concatenate (reverse res) (call-with-output-string newline)))) ((equal? line boundary) (next (string-concatenate (reverse res) (call-with-output-string newline)))) (else (lp (cons line res)))))))) (define (mime-convert-part str cte enc) (let ((str (cond ((and (string? cte) (string-ci=? cte "quoted-printable")) (quoted-printable-decode-string str)) ((and (string? cte) (string-ci=? cte "base64")) (base64-decode-string str)) (else str)))) (if (string? enc) (ces-convert str enc) str))) (define (mime-read-part port cte enc boundary next final) (mime-read-to-boundary port boundary (lambda (x) (next (mime-convert-part x cte enc))) (lambda (x) (final (mime-convert-part x cte enc))))) ;; (kons parent-headers part-headers part-body seed) ;; (start headers seed) ;; (end headers parent-seed seed) (define (mime-message-fold src kons init-seed . o) (let ((port (if (string? src) (open-input-string src) src))) (let ((kons-start (if (pair? o) (car o) (lambda (headers seed) '()))) (kons-end (if (and (pair? o) (pair? (cdr o))) (car (cdr o)) (lambda (headers parent-seed seed) `((mime (^ ,@headers) ,@(if (pair? seed) (reverse seed) seed)) ,@parent-seed)))) (headers (if (and (pair? o) (pair? (cdr o)) (pair? (cdr (cdr o)))) (car (cdr (cdr o))) (mime-headers->list port)))) (let tfold ((parent-headers '()) (headers headers) (seed init-seed) (boundary #f) (next (lambda (x) x)) (final (lambda (x) x))) (let* ((ctype (mime-parse-content-type (mime-ref headers "Content-Type" "text/plain"))) (type (string-trim-white-space (caar ctype))) (enc (string-trim-white-space (or (mime-ref ctype "charset") (mime-ref headers "charset" "ASCII")))) (cte (string-trim-white-space (or (mime-ref headers "Content-Transfer-Encoding") (mime-ref headers "Encoding" "7-bit"))))) (cond ((and (string-ci=? type "multipart/") (mime-ref ctype "boundary")) => (lambda (boundary2) (let ((boundary2 (string-append "--" boundary2))) ;; skip preamble (mime-read-to-boundary port boundary2 (lambda (x) x) (lambda (x) x)) (let lp ((part-seed (kons-start headers seed))) (let ((part-headers (mime-headers->list port))) (tfold parent-headers part-headers part-seed boundary2 lp (lambda (x) ;; skip epilogue (if boundary (mime-read-to-boundary port boundary (lambda (x) x) (lambda (x) x))) (next (kons-end headers seed x))) )))))) (else (mime-read-part port cte enc boundary (lambda (x) (next (kons parent-headers headers x seed))) (lambda (x) (final (kons parent-headers headers x seed))))))))))) ;; (mime (^ (header . value) ...) parts ...) (define (mime-message->sxml . o) (car (apply mime-message-fold (if (pair? o) (car o) (current-input-port)) (lambda (parent-headers headers body seed) `((mime (^ ,@headers) ,body) ,@seed)) '() (lambda (headers seed) '()) (lambda (headers parent-seed seed) `((mime (^ ,@headers) ,@(if (pair? seed) (reverse seed) seed)) ,@parent-seed)) (if (pair? o) (cdr o) '()))))