mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-18 21:29:19 +02:00
In `servlet-respond', the server defaults to Content-Type "text/html; charset=UTF-8" for extensions that don't appear in `mime-type-from-extension'. This meant that CSS files, JavaScript files, and various types of image files were getting the wrong Content-Type, which was fine when they were served directly, but caused clients to close the connection when they were loaded over HTTP/1.1, which can deliver back-to-back resources on the same connection. Sort the types in the `mime-type-from-extension' list now that it is longer. It's still not long enough for the order to be material for performance.
469 lines
18 KiB
Scheme
469 lines
18 KiB
Scheme
;; mime.scm -- RFC2045 MIME library
|
|
;; Copyright (c) 2005-2013 Alex Shinn. All rights reserved.
|
|
;; BSD-style license: http://synthcode.com/license.txt
|
|
|
|
;;> A library to parse MIME headers and bodies into SXML.
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
(define mime-line-length-limit 4096)
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; association lists
|
|
|
|
;;> \procedure{(assq-ref ls key [default])}
|
|
;;> Returns the \scheme{cdr} of the cell in \var{ls} whose
|
|
;;> \scheme{car} is \scheme{eq?} to \var{key}, or \var{default}
|
|
;;> if not found. Useful for retrieving values associated with
|
|
;;> MIME headers.
|
|
|
|
(define (assq-ref ls key . o)
|
|
(cond ((and (pair? ls) (pair? (car ls)) (assq key ls)) => cdr)
|
|
(else (and (pair? o) (car o)))))
|
|
|
|
;; most of these are plain text for easier viewing in the browser
|
|
(define (mime-type-from-extension ext)
|
|
(assq-ref
|
|
'((c . "text/plain; charset=utf-8")
|
|
(css . "text/css; charset=utf-8")
|
|
(gif . "image/gif")
|
|
(h . "text/plain; charset=utf-8")
|
|
(htm . "text/html; charset=utf-8")
|
|
(html . "text/html; charset=utf-8")
|
|
(jpeg . "image/jpeg")
|
|
(jpg . "image/jpeg")
|
|
(js . "application/javascript; charset=utf-8")
|
|
(json . "application/json; charset=utf-8")
|
|
(md . "text/plain; charset=utf-8")
|
|
(mp3 . "audio/mpeg")
|
|
(org . "text/plain; charset=utf-8")
|
|
(pdf . "application/pdf")
|
|
(png . "image/png")
|
|
(scm . "text/plain; charset=utf-8")
|
|
(sld . "text/plain; charset=utf-8")
|
|
(svg . "image/svg+xml")
|
|
(txt . "text/plain; charset=utf-8"))
|
|
(and (string? ext) (string->symbol ext))))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; simple matching instead of regexps
|
|
|
|
(define (match-mbox-from-line line)
|
|
(let ((len (string-length line)))
|
|
(and (> len 5)
|
|
(string=? (substring line 0 5) "From ")
|
|
(let lp ((i 6))
|
|
(cond
|
|
((= i len) (list (substring line 5 len) ""))
|
|
((memq (string-ref line i) '(#\space #\tab))
|
|
(list (substring line 5 i) (substring line (+ i 1) len)))
|
|
(else (lp (+ i 1))))))))
|
|
|
|
(define (string-scan-colon-or-maybe-equal str)
|
|
(let ((len (string-length str)))
|
|
(let lp ((i 0) (best #f))
|
|
(if (= i len)
|
|
best
|
|
(let ((c (string-ref str i)))
|
|
(cond ((or (char-alphabetic? c)
|
|
(char-numeric? c)
|
|
(memv c '(#\- #\_)))
|
|
(lp (+ i 1) best))
|
|
((eq? c #\:)
|
|
(if (= i 0) #f i))
|
|
((eqv? c #\=)
|
|
(lp (+ i 1) (or best i)))
|
|
(else
|
|
best)))))))
|
|
|
|
(define (string-skip-white-space str i)
|
|
(let ((lim (string-length str)))
|
|
(let lp ((i i))
|
|
(cond ((>= i lim) lim)
|
|
((char-whitespace? (string-ref str i)) (lp (+ i 1)))
|
|
(else i)))))
|
|
|
|
(define (match-mime-header-line line)
|
|
(let ((i (string-scan-colon-or-maybe-equal line)))
|
|
(and i
|
|
(let ((j (string-skip-white-space line (+ i 1))))
|
|
(list (string->symbol (string-downcase-ascii (substring line 0 i)))
|
|
(substring line j (string-length line)))))))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; dummy encoder
|
|
|
|
(define (ces-convert bv . o)
|
|
(let ((enc (if (pair? o) (car o) "utf8")))
|
|
;; TODO: add conversion routines for non-utf8 encodings
|
|
(utf8->string bv)))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;;> \section{RFC2822 Headers}
|
|
|
|
;;> \procedure{(mime-header-fold kons knil [source [limit [kons-from]]])}
|
|
;;>
|
|
;;> Performs a fold operation on the MIME headers of source which can be
|
|
;;> either a string or port, and defaults to current-input-port. \var{kons}
|
|
;;> is called on the three values:
|
|
;;> \scheme{(kons header value accumulator)}
|
|
;;> where accumulator begins with \var{knil}. Neither the header nor the
|
|
;;> value are modified, except wrapped lines are handled for the value.
|
|
;;>
|
|
;;> The optional procedure \var{kons-from} is a procedure to be called when
|
|
;;> the first line of the headers is an "From <address> <date>" line, to
|
|
;;> enable this procedure to be used as-is on mbox files and the like.
|
|
;;> It defaults to \var{kons}, and if such a line is found the fold will begin
|
|
;;> with \scheme{(kons-from '%from <address> (kons-from '%date <date> knil))}.
|
|
;;>
|
|
;;> The optional \var{limit} gives a limit on the number of headers to read.
|
|
|
|
(define (mime-header-fold kons knil . o)
|
|
(let ((src (and (pair? o) (car o)))
|
|
(limit (and (pair? o) (pair? (cdr o)) (car (cdr o))))
|
|
(kons-from (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o))) (car (cddr o)) kons)))
|
|
((if (string? src) mime-header-fold-string mime-header-fold-port)
|
|
kons knil (or src (current-input-port)) limit kons-from)))
|
|
|
|
(define (mime-header-fold-string kons knil str limit kons-from)
|
|
(call-with-input-string str
|
|
(lambda (in) (mime-header-fold-port kons knil in limit kons-from))))
|
|
|
|
(define (mime-header-fold-port kons knil port limit kons-from)
|
|
(define (out line acc count)
|
|
(cond
|
|
((or (and limit (> count limit)) (eof-object? line) (string=? line ""))
|
|
acc)
|
|
((match-mime-header-line line)
|
|
=> (lambda (m) (in (car m) (list (cadr m)) acc (+ count 1))))
|
|
(else
|
|
;;(warn "invalid header line: ~S\n" line)
|
|
(out (read-line port mime-line-length-limit) acc (+ count 1)))))
|
|
(define (in header value acc count)
|
|
(let ((line (read-line port mime-line-length-limit)))
|
|
(cond
|
|
((and limit (> count limit))
|
|
acc)
|
|
((or (eof-object? line) (string=? line ""))
|
|
(kons header (string-join (reverse value)) acc))
|
|
((char-whitespace? (string-ref line 0))
|
|
(in header (cons line value) acc (+ count 1)))
|
|
(else
|
|
(out line
|
|
(kons header (string-join (reverse value)) acc)
|
|
(+ count 1))))))
|
|
(let ((first-line (read-line port mime-line-length-limit)))
|
|
(cond
|
|
((eof-object? first-line)
|
|
knil)
|
|
((and kons-from (match-mbox-from-line first-line))
|
|
=> (lambda (m) ; special case check on first line for mbox files
|
|
(out (read-line port mime-line-length-limit)
|
|
(kons-from '%from (car m)
|
|
(kons-from '%date (cadr m) knil))
|
|
0)))
|
|
(else
|
|
(out first-line knil 0)))))
|
|
|
|
;;> \procedure{(mime-headers->list [source])}
|
|
;;> Return an alist of the MIME headers from source with headers all
|
|
;;> downcased.
|
|
|
|
(define (mime-headers->list . o)
|
|
(reverse
|
|
(apply
|
|
mime-header-fold
|
|
(lambda (h v acc) (cons (cons h v) acc))
|
|
'()
|
|
o)))
|
|
|
|
(define (mime-split-name+value s)
|
|
(let ((i (string-find s #\=))
|
|
(start (string-cursor-start s))
|
|
(end (string-cursor-end s)))
|
|
(if (string-cursor<? i end)
|
|
(cons (string->symbol
|
|
(string-downcase-ascii
|
|
(string-trim (substring-cursor s start i))))
|
|
(if (string-cursor=? (string-cursor-next s i) end)
|
|
""
|
|
(if (eqv? #\" (string-cursor-ref s (string-cursor-next s i)))
|
|
(substring-cursor s
|
|
(string-cursor-forward s i 2)
|
|
(string-cursor-prev s end))
|
|
(substring-cursor s (string-cursor-next s i) end))))
|
|
(cons (string->symbol (string-downcase-ascii (string-trim s))) ""))))
|
|
|
|
;;> \procedure{(mime-parse-content-type str)}
|
|
;;> Parses \var{str} as a Content-Type style-value returning the list
|
|
;;> \scheme{(type (attr . val) ...)}.
|
|
|
|
;;> \example{
|
|
;;> (mime-parse-content-type "text/html; CHARSET=UTF-8; filename=index.html")
|
|
;;> }
|
|
|
|
(define (mime-parse-content-type str)
|
|
(let ((res (map mime-split-name+value
|
|
(string-split str (lambda (ch)
|
|
(or (eqv? ch #\;) (eqv? ch #\,)))))))
|
|
(if (and (pair? res) (pair? (car res)) (equal? "" (cdar res)))
|
|
(cons (caar res) (cdr res))
|
|
res)))
|
|
|
|
;;> \procedure{(mime-decode-header str)}
|
|
;;> Replace all occurrences of RFC1522 =?ENC?...?= escapes in \var{str} with
|
|
;;> the appropriate decoded and charset converted value.
|
|
|
|
(define (mime-decode-header str)
|
|
(let* ((end (string-cursor-end str))
|
|
;; need at least 8 chars: "=?Q?X??="
|
|
(limit (string-cursor-back end 8))
|
|
(start (string-cursor-start str)))
|
|
(let lp ((i start) (from start) (res '()))
|
|
(cond
|
|
((string-cursor>=? i limit)
|
|
(string-join (reverse (cons (substring-cursor str from end) res))))
|
|
((and (eqv? #\= (string-cursor-ref str i))
|
|
(eqv? #\? (string-cursor-ref str (string-cursor-next str i))))
|
|
(let* ((j (string-find str #\? (string-cursor-forward str i 3)))
|
|
(k (string-find str #\? (string-cursor-forward str j 3))))
|
|
(if (and j k (string-cursor<? (string-cursor-next str k) end)
|
|
(eqv? #\?
|
|
(string-cursor-ref str
|
|
(string-cursor-forward str j 2)))
|
|
(memq (string-cursor-ref str (string-cursor-next str j))
|
|
'(#\Q #\B #\q #\b))
|
|
(eqv? #\=
|
|
(string-cursor-ref str (string-cursor-next str k))))
|
|
(let ((decode
|
|
(if (memq (string-cursor-ref str
|
|
(string-cursor-next str j))
|
|
'(#\Q #\q))
|
|
quoted-printable-decode-string
|
|
base64-decode-string))
|
|
(cset
|
|
(substring-cursor str (string-cursor-forward str i 2) j))
|
|
(content
|
|
(substring-cursor str (string-cursor-forward str j 3) k))
|
|
(k2 (string-cursor-forward k 2)))
|
|
(lp k2 k2 (cons (ces-convert (decode content) cset)
|
|
(cons (substring-cursor str from i) res))))
|
|
(lp (string-cursor-forward str i 2) from res))))
|
|
(else
|
|
(lp (string-cursor-forward str i 1) from res))))))
|
|
|
|
;;> Write out an alist of headers in mime format.
|
|
|
|
(define (mime-write-headers headers out)
|
|
(for-each
|
|
(lambda (h)
|
|
(write-string (car h) out) (write-string ": " out)
|
|
(write-string (cdr h) out) (write-string "\r\n" out))
|
|
headers))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; message parsing
|
|
|
|
(define (read-line/binary in)
|
|
(let ((out (open-output-bytevector)))
|
|
(let lp ()
|
|
(let ((ch (read-u8 in)))
|
|
(cond ((eof-object? ch)
|
|
(let ((res (get-output-bytevector out)))
|
|
(if (zero? (bytevector-length res))
|
|
ch
|
|
res)))
|
|
((eqv? ch 10)
|
|
(get-output-bytevector out))
|
|
(else
|
|
(write-u8 ch out)
|
|
(lp)))))))
|
|
|
|
(define (bv-length-before-cr bv)
|
|
(let ((len (bytevector-length bv)))
|
|
(if (and (> len 0) (= 13 (bytevector-u8-ref bv (- len 1))))
|
|
(- len 1)
|
|
len)))
|
|
|
|
(define (mime-read-to-boundary/binary port boundary next final)
|
|
(let* ((boundary (if (string? boundary) (string->utf8 boundary) boundary))
|
|
(boundary-cr (and boundary (bytevector-append boundary #u8(13))))
|
|
(final-boundary
|
|
(and boundary (bytevector-append boundary #u8(45 45))))
|
|
(final-boundary-cr
|
|
(and final-boundary (bytevector-append final-boundary #u8(13))))
|
|
(out (open-output-bytevector)))
|
|
(let lp ((prev #f))
|
|
(let ((line (read-line/binary port)))
|
|
(cond
|
|
((or (eof-object? line)
|
|
(equal? line final-boundary)
|
|
(equal? line final-boundary-cr))
|
|
(if prev
|
|
(write-bytevector prev out 0 (bv-length-before-cr prev)))
|
|
(final (get-output-bytevector out)))
|
|
((or (equal? line boundary) (equal? line boundary-cr))
|
|
(if prev
|
|
(write-bytevector prev out 0 (bv-length-before-cr prev)))
|
|
(next (get-output-bytevector out)))
|
|
(else
|
|
(cond (prev
|
|
(write-bytevector prev out)
|
|
(write-u8 10 out)))
|
|
(lp line)))))))
|
|
|
|
(define (mime-read-to-boundary/text port boundary next final)
|
|
(let ((final-boundary (and boundary (string-append boundary "--"))))
|
|
(let lp ((res '()))
|
|
(let ((line (read-line port mime-line-length-limit)))
|
|
(cond
|
|
((or (eof-object? line) (equal? line final-boundary))
|
|
(final (string-join (reverse res)
|
|
(call-with-output-string newline))))
|
|
((equal? line boundary)
|
|
(next (string-join (reverse res)
|
|
(call-with-output-string newline))))
|
|
(else
|
|
(lp (cons line res))))))))
|
|
|
|
(define (mime-read-to-boundary port boundary next final)
|
|
((if (binary-port? port)
|
|
mime-read-to-boundary/binary
|
|
mime-read-to-boundary/text)
|
|
port boundary next final))
|
|
|
|
(define (mime-convert-part part text? cte enc)
|
|
(let ((res (cond
|
|
((and (string? cte) (string-ci=? cte "quoted-printable"))
|
|
(if text?
|
|
(quoted-printable-decode-string part)
|
|
(quoted-printable-decode-bytevector
|
|
(if (string? part) (string->utf8 part) part))))
|
|
((and (string? cte) (string-ci=? cte "base64"))
|
|
(if text?
|
|
(base64-decode-string part)
|
|
(base64-decode-bytevector
|
|
(if (string? part) (string->utf8 part) part))))
|
|
((and (not text?) (string? part))
|
|
(string->utf8 part))
|
|
(else
|
|
part))))
|
|
(cond
|
|
((and text? (bytevector? res)) (ces-convert res enc))
|
|
(else res))))
|
|
|
|
(define (mime-read-part port type cte enc boundary next final)
|
|
(let ((text? (and (symbol? type)
|
|
(string-prefix? "text/" (symbol->string type)))))
|
|
(mime-read-to-boundary
|
|
port boundary
|
|
(lambda (x) (next (mime-convert-part x text? cte enc)))
|
|
(lambda (x) (final (mime-convert-part x text? cte enc))))))
|
|
|
|
;;> \section{RFC2045 MIME Encoding}
|
|
|
|
;;> \procedure{(mime-message-fold src kons knil [down up headers])}
|
|
;;> Performs a tree fold operation on the given string or port
|
|
;;> \var{src} as a MIME body corresponding to the headers give in
|
|
;;> \var{headers}. If \var{headers} are false or not provided they
|
|
;;> are first read from \var{src}.
|
|
;;>
|
|
;;> \var{kons} is called on the successive values:
|
|
;;>
|
|
;;> \schemeblock{(kons parent-headers part-headers part-body accumulator)}
|
|
;;>
|
|
;;> where \var{part-headers} are the headers for the given MIME part (the
|
|
;;> original headers for single-part MIME), \var{part-body} is the
|
|
;;> appropriately decoded and charset-converted body of the message,
|
|
;;> and the \var{accumulator} begins with \var{knil}.
|
|
;;>
|
|
;;> If a multipart body is found, then a tree fold is performed,
|
|
;;> calling \var{down} once to get a new accumulator to pass to
|
|
;;> \var{kons}, and \var{up} on the result when returning. Their
|
|
;;> signatures are:
|
|
;;>
|
|
;;> \schemeblock{(down headers seed)}
|
|
;;> \schemeblock{(up headers parent-seed seed)}
|
|
;;>
|
|
;;> The default \var{down} simply returns null, and the default
|
|
;;> \var{up} wraps the seed in the following sxml:
|
|
;;>
|
|
;;> \schemeblock{
|
|
;;> ((mime (@ headers ...)
|
|
;;> seed ...)
|
|
;;> parent-seed ...)
|
|
;;> }
|
|
|
|
(define (mime-message-fold src kons init-seed . o)
|
|
(let ((port (if (string? src) (open-input-string src) src)))
|
|
(let ((kons-down
|
|
(or (and (pair? o) (car o)) (lambda (headers seed) '())))
|
|
(kons-up
|
|
(or (and (pair? o) (pair? (cdr o)) (car (cdr o)))
|
|
(lambda (headers parent-seed seed)
|
|
`((mime (@ ,@headers)
|
|
,@(if (pair? seed) (reverse seed) seed))
|
|
,@parent-seed))))
|
|
(headers
|
|
(or (and (pair? o) (pair? (cdr o)) (pair? (cdr (cdr o)))
|
|
(car (cdr (cdr o))))
|
|
(mime-headers->list port))))
|
|
(let tfold ((parent-headers '())
|
|
(headers headers)
|
|
(seed init-seed)
|
|
(boundary #f)
|
|
(next (lambda (x) x))
|
|
(final (lambda (x) x)))
|
|
(let* ((ctype (mime-parse-content-type
|
|
(assq-ref headers 'content-type "text/plain")))
|
|
(type (car ctype))
|
|
(enc (string-trim
|
|
(or (assq-ref ctype 'charset)
|
|
(assq-ref headers 'charset "ascii"))))
|
|
(cte (string-trim
|
|
(or (assq-ref headers 'content-transfer-encoding)
|
|
(assq-ref headers 'encoding "7-bit")))))
|
|
(cond
|
|
((and (symbol? type)
|
|
(string-prefix? "multipart/" (symbol->string type))
|
|
(assq-ref (cdr ctype) 'boundary))
|
|
=> (lambda (boundary2)
|
|
(let ((boundary2 (string-append "--" boundary2)))
|
|
;; skip preamble
|
|
(mime-read-to-boundary port boundary2 (lambda (x) x) (lambda (x) x))
|
|
(let lp ((part-seed (kons-down headers seed)))
|
|
(let ((part-headers (mime-headers->list port)))
|
|
(flush-output-port (current-error-port))
|
|
(tfold headers part-headers
|
|
part-seed boundary2
|
|
lp
|
|
(lambda (x)
|
|
;; skip epilogue
|
|
(if boundary
|
|
(mime-read-to-boundary
|
|
port boundary
|
|
(lambda (x) x) (lambda (x) x)))
|
|
(next (kons-up headers seed x)))))))))
|
|
(else
|
|
(mime-read-part
|
|
port type cte enc boundary
|
|
(lambda (x) (next (kons parent-headers headers x seed)))
|
|
(lambda (x) (final (kons parent-headers headers x seed)))))))))))
|
|
|
|
;;> \procedure{(mime-message->sxml [src [headers]])}
|
|
;;>
|
|
;;> Parse the given source as a MIME message and return
|
|
;;> the result as an SXML object of the form:
|
|
;;> \scheme{(mime (@ (header . value) ...) parts ...)}.
|
|
|
|
(define (mime-message->sxml . o)
|
|
(car
|
|
(apply
|
|
mime-message-fold
|
|
(if (pair? o) (car o) (current-input-port))
|
|
(lambda (parent-headers headers body seed)
|
|
;; Discard empty bodies.
|
|
(if (and (equal? body "") (null? headers))
|
|
seed
|
|
`((mime (@ ,@headers) ,body) ,@seed)))
|
|
'() #f #f (if (pair? o) (cdr o) '()))))
|