;; mime.scm -- RFC2045 MIME library ;; Copyright (c) 2005-2011 Alex Shinn. All rights reserved. ;; BSD-style license: http://synthcode.com/license.txt ;;> A library to parse MIME headers and bodies into SXML. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define mime-line-length-limit 4096) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; association lists (define (assoc* key ls . o) (let ((eq (if (pair? o) (car o) equal?))) (let lp ((ls ls)) (cond ((null? ls) #f) ((eq key (caar ls)) (car ls)) (else (lp (cdr ls))))))) (define (assoc-ref ls key . o) (let ((default (and (pair? o) (car o))) (eq (if (and (pair? o) (pair? (cdr o))) (car (cdr o)) equal?))) (cond ((assoc* key ls eq) => cdr) (else default)))) ;;> @subsubsubsection{@scheme{(mime-ref headers str [default])}} ;;> A case-insensitive @scheme{assoc-ref}. (define (mime-ref ls key . o) (assoc-ref ls key (and (pair? o) (car o)) string-ci=?)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; simple matching instead of regexps (define (match-mbox-from-line line) (let ((len (string-length line))) (and (> len 5) (string=? (substring line 0 5) "From ") (let lp ((i 6)) (cond ((= i len) (list (substring line 5 len) "")) ((memq (string-ref line i) '(#\space #\tab)) (list (substring line 5 i) (substring line (+ i 1) len))) (else (lp (+ i 1)))))))) (define (string-scan-colon-or-maybe-equal str) (let ((len (string-length str))) (let lp ((i 0) (best #f)) (if (= i len) best (let ((c (string-ref str i))) (cond ((or (char-alphabetic? c) (char-numeric? c) (memv c '(#\- #\_))) (lp (+ i 1) best)) ((eq? c #\:) (if (= i 0) #f i)) ((eqv? c #\=) (lp (+ i 1) (or best i))) (else best))))))) (define (string-skip-white-space str i) (let ((lim (string-length str))) (let lp ((i i)) (cond ((>= i lim) lim) ((char-whitespace? (string-ref str i)) (lp (+ i 1))) (else i))))) (define (match-mime-header-line line) (let ((i (string-scan-colon-or-maybe-equal line))) (and i (let ((j (string-skip-white-space line (+ i 1)))) (list (substring line 0 i) (substring line j (string-length line))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; dummy encoder (define (ces-convert str . x) str) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; some srfi-13 & string utils (define (string-copy! to tstart from . o) (let ((start (if (pair? o) (car o) 0)) (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length from)))) (let lp ((i start) (j tstart)) (cond ((< i end) (string-set! to j (string-ref from i)) (lp (+ i 1) (+ j 1))))))) (define (string-concatenate-reverse ls) (let lp ((ls ls) (rev '()) (len 0)) (if (null? ls) (let ((res (make-string len))) (let lp ((ls rev) (i 0)) (cond ((null? ls) res) (else (string-copy! res i (car ls)) (lp (cdr ls) (+ i (string-length (car ls)))))))) (lp (cdr ls) (cons (car ls) rev) (+ len (string-length (car ls))))))) (define (string-downcase s . o) (let ((start (if (pair? o) (car o) 0)) (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length s)))) (let* ((len (- end start)) (s2 (make-string len))) (let lp ((i start) (j 0)) (cond ((>= i end) s2) (else (string-set! s2 j (char-downcase (string-ref s i))) (lp (+ i 1) (+ j 1)))))))) (define (string-char-index str c . o) (let ((start (if (pair? o) (car o) 0)) (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) (let lp ((i start)) (cond ((= i end) #f) ((eq? c (string-ref str i)) i) (else (lp (+ i 1))))))) (define (string-trim-white-space s) (let ((len (string-length s))) (let lp ((i 0)) (cond ((= i len) "") ((char-whitespace? (string-ref s i)) (lp (+ i 1))) (else (let lp ((j (- len 1))) (cond ((<= j i) "") ((char-whitespace? (string-ref s j)) (lp (- j 1))) (else (substring s i (+ j 1)))))))))) (define (string-split str ch) (let ((len (string-length str))) (let lp ((i 0) (res '())) (let ((j (string-char-index str ch i))) (if j (lp (+ j 1) (cons (substring str i j) res)) (reverse (cons (substring str i len) res))))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;> @subsubsection{RFC2822 Headers} ;;> @subsubsubsection{@scheme{(mime-header-fold kons knil [source [limit [kons-from]]])}} ;;> ;;> Performs a fold operation on the MIME headers of source which can be ;;> either a string or port, and defaults to current-input-port. @var{kons} ;;> is called on the three values: ;;> @scheme{(kons header value accumulator)} ;;> where accumulator begins with @var{knil}. Neither the header nor the ;;> value are modified, except wrapped lines are handled for the value. ;;> ;;> The optional procedure @var{kons-from} is a procedure to be called when ;;> the first line of the headers is an "From
" line, to ;;> enable this procedure to be used as-is on mbox files and the like. ;;> It defaults to @var{kons}, and if such a line is found the fold will begin ;;> with @scheme{(kons-from "%from"
(kons-from "%date" knil))}. ;;> ;;> The optional @var{limit} gives a limit on the number of headers to read. (define (mime-header-fold kons knil . o) (let ((src (and (pair? o) (car o))) (limit (and (pair? o) (pair? (cdr o)) (car (cdr o)))) (kons-from (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o))) (car (cddr o)) kons))) ((if (string? src) mime-header-fold-string mime-header-fold-port) kons knil (or src (current-input-port)) limit kons-from))) (define (mime-header-fold-string kons knil str limit kons-from) (call-with-input-string str (lambda (in) (mime-header-fold-port kons knil in limit kons-from)))) (define (mime-header-fold-port kons knil port limit kons-from) (define (out line acc count) (cond ((or (and limit (> count limit)) (eof-object? line) (string=? line "")) acc) ((match-mime-header-line line) => (lambda (m) (in (car m) (list (cadr m)) acc (+ count 1)))) (else ;;(warn "invalid header line: ~S\n" line) (out (read-line port mime-line-length-limit) acc (+ count 1))))) (define (in header value acc count) (let ((line (read-line port mime-line-length-limit))) (cond ((and limit (> count limit)) acc) ((or (eof-object? line) (string=? line "")) (kons header (string-concatenate-reverse value) acc)) ((char-whitespace? (string-ref line 0)) (in header (cons line value) acc (+ count 1))) (else (out line (kons header (string-concatenate-reverse value) acc) (+ count 1)))))) (let ((first-line (read-line port mime-line-length-limit))) (cond ((eof-object? first-line) knil) ((and kons-from (match-mbox-from-line first-line)) => (lambda (m) ; special case check on first line for mbox files (out (read-line port mime-line-length-limit) (kons-from "%from" (car m) (kons-from "%date" (cadr m) knil)) 0))) (else (out first-line knil 0))))) ;;> @subsubsubsection{@scheme{(mime-headers->list [source])}} ;;> Return an alist of the MIME headers from source with headers all ;;> downcased. (define (mime-headers->list . o) (reverse (apply mime-header-fold (lambda (h v acc) (cons (cons (string-downcase h) v) acc)) '() o))) (define (mime-split-name+value s) (let ((i (string-char-index s #\=))) (if i (cons (string-downcase (string-trim-white-space (substring s 0 i))) (if (= i (string-length s)) "" (if (eqv? #\" (string-ref s (+ i 1))) (substring s (+ i 2) (- (string-length s) 1)) (substring s (+ i 1) (string-length s))))) (cons (string-downcase (string-trim-white-space s)) "")))) ;;> @subsubsubsection{@scheme{(mime-parse-content-type str)}} ;;> Parses @var{str} as a Content-Type style-value returning the list ;;> @scheme{(type (attr . val) ...)}. ;;> @example{ ;;> (mime-parse-content-type "text/html; CHARSET=UTF-8; filename=index.html") ;;> } (define (mime-parse-content-type str) (map mime-split-name+value (string-split str #\;))) ;;> @subsubsubsection{@scheme{(mime-decode-header str)}} ;;> Replace all occurrences of RFC1522 =?ENC?...?= escapes in @var{str} with ;;> the appropriate decoded and charset converted value. (define (mime-decode-header str) (let* ((len (string-length str)) (limit (- len 8))) ; need at least 8 chars: "=?Q?X??=" (let lp ((i 0) (from 0) (res '())) (if (>= i limit) (string-concatenate (reverse (cons (substring str from len) res))) (if (and (eqv? #\= (string-ref str i)) (eqv? #\? (string-ref str (+ i 1)))) (let* ((j (string-char-index str #\? (+ i 3))) (k (string-char-index str #\? (+ j 3)))) (if (and j k (< (+ k 1) len) (eqv? #\? (string-ref str (+ j 2))) (memq (string-ref str (+ j 1)) '(#\Q #\B #\q #\b)) (eqv? #\= (string-ref str (+ k 1)))) (let ((decode (if (memq (string-ref str (+ j 1)) '(#\Q #\q)) quoted-printable-decode-string base64-decode-string)) (cset (substring str (+ i 2) j)) (content (substring str (+ j 3) k)) (k2 (+ k 2))) (lp k2 k2 (cons (ces-convert (decode content) cset) (cons (substring str from i) res)))) (lp (+ i 2) from res))) (lp (+ i 1) from res)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; message parsing (define (mime-read-to-boundary port boundary next final) (let ((final-boundary (and boundary (string-append boundary "--")))) (let lp ((res '())) (let ((line (read-line port mime-line-length-limit))) (cond ((or (eof-object? line) (equal? line final-boundary)) (final (string-concatenate (reverse res) (call-with-output-string newline)))) ((equal? line boundary) (next (string-concatenate (reverse res) (call-with-output-string newline)))) (else (lp (cons line res)))))))) (define (mime-convert-part str cte enc) (let ((str (cond ((and (string? cte) (string-ci=? cte "quoted-printable")) (quoted-printable-decode-string str)) ((and (string? cte) (string-ci=? cte "base64")) (base64-decode-string str)) (else str)))) (if (string? enc) (ces-convert str enc) str))) (define (mime-read-part port cte enc boundary next final) (mime-read-to-boundary port boundary (lambda (x) (next (mime-convert-part x cte enc))) (lambda (x) (final (mime-convert-part x cte enc))))) ;;> @subsubsection{RFC2045 MIME Encoding} ;;> @subsubsubsection{@scheme{(mime-message-fold src kons knil [start end headers])}} ;;> Performs a fold operation on the given string or port @var{src} as a ;;> MIME body corresponding to the headers give in @var{headers}. @var{kons} ;;> is called on the successive values: ;;> ;;> @schemeblock{(kons parent-headers part-headers part-body accumulator)} ;;> ;;> where @var{part-headers} are the headers for the given MIME part (the ;;> original headers for single-part MIME), @var{part-body} is the ;;> appropriately decoded and charset-converted body of the message, ;;> and the @var{accumulator} begins with @var{knil}. (define (mime-message-fold src kons init-seed . o) (let ((port (if (string? src) (open-input-string src) src))) (let ((kons-start (if (pair? o) (car o) (lambda (headers seed) '()))) (kons-end (if (and (pair? o) (pair? (cdr o))) (car (cdr o)) (lambda (headers parent-seed seed) `((mime (^ ,@headers) ,@(if (pair? seed) (reverse seed) seed)) ,@parent-seed)))) (headers (if (and (pair? o) (pair? (cdr o)) (pair? (cdr (cdr o)))) (car (cdr (cdr o))) (mime-headers->list port)))) (let tfold ((parent-headers '()) (headers headers) (seed init-seed) (boundary #f) (next (lambda (x) x)) (final (lambda (x) x))) (let* ((ctype (mime-parse-content-type (mime-ref headers "Content-Type" "text/plain"))) (type (string-trim-white-space (caar ctype))) (enc (string-trim-white-space (or (mime-ref ctype "charset") (mime-ref headers "charset" "ASCII")))) (cte (string-trim-white-space (or (mime-ref headers "Content-Transfer-Encoding") (mime-ref headers "Encoding" "7-bit"))))) (cond ((and (string-ci=? type "multipart/") (mime-ref ctype "boundary")) => (lambda (boundary2) (let ((boundary2 (string-append "--" boundary2))) ;; skip preamble (mime-read-to-boundary port boundary2 (lambda (x) x) (lambda (x) x)) (let lp ((part-seed (kons-start headers seed))) (let ((part-headers (mime-headers->list port))) (tfold parent-headers part-headers part-seed boundary2 lp (lambda (x) ;; skip epilogue (if boundary (mime-read-to-boundary port boundary (lambda (x) x) (lambda (x) x))) (next (kons-end headers seed x))) )))))) (else (mime-read-part port cte enc boundary (lambda (x) (next (kons parent-headers headers x seed))) (lambda (x) (final (kons parent-headers headers x seed))))))))))) ;;> @subsubsubsection{@scheme{(mime-message->sxml [src])}} ;;> ;;> Parse the given source as a MIME message and return ;;> the result as an SXML object of the form: ;;> @scheme{(mime (^ (header . value) ...) parts ...)}. (define (mime-message->sxml . o) (car (apply mime-message-fold (if (pair? o) (car o) (current-input-port)) (lambda (parent-headers headers body seed) `((mime (^ ,@headers) ,body) ,@seed)) '() (lambda (headers seed) '()) (lambda (headers parent-seed seed) `((mime (^ ,@headers) ,@(if (pair? seed) (reverse seed) seed)) ,@parent-seed)) (if (pair? o) (cdr o) '()))))