Adding bytevector support to quoted printable.

This commit is contained in:
Alex Shinn 2014-03-21 21:30:54 +09:00
parent f425126a11
commit e36b71a75f

View file

@ -18,30 +18,30 @@
(define *default-max-col* 76) (define *default-max-col* 76)
;; Allow for RFC1522 quoting for headers by always escaping ? and _ ;; Allow for RFC1522 quoting for headers by always escaping ? and _
(define (qp-encode str start-col max-col separator) (define (qp-encode bv start-col max-col separator)
(define (hex i) (integer->char (+ i (if (<= i 9) 48 55)))) (define (hex i) (+ i (if (<= i 9) 48 55)))
(let ((end (string-length str)) (let ((end (bytevector-length bv))
(buf (make-string max-col))) (buf (make-bytevector max-col))
(let lp ((i 0) (col start-col) (res '())) (out (open-output-bytevector)))
(let lp ((i 0) (col start-col))
(cond (cond
((= i end) ((= i end)
(if (pair? res) (write-bytevector (bytevector-copy buf 0 col) out)
(string-concatenate (reverse (cons (substring buf 0 col) res)) (get-output-bytevector out))
separator)
(substring buf start-col col)))
((>= col (- max-col 3)) ((>= col (- max-col 3))
(lp i 0 (cons (substring buf (if (pair? res) 0 start-col) col) res))) (write-bytevector (bytevector-copy buf 0 col) out)
(lp i 0))
(else (else
(let ((c (char->integer (string-ref str i)))) (let ((c (bytevector-u8-ref bv i)))
(cond (cond
((and (<= 33 c 126) (not (memq c '(61 63 95)))) ((and (<= 33 c 126) (not (memq c '(61 63 95))))
(string-set! buf col (integer->char c)) (bytevector-u8-set! buf col c)
(lp (+ i 1) (+ col 1) res)) (lp (+ i 1) (+ col 1)))
(else (else
(string-set! buf col #\=) (bytevector-u8-set! buf col (char->integer #\=))
(string-set! buf (+ col 1) (hex (arithmetic-shift c -4))) (bytevector-u8-set! buf (+ col 1) (hex (arithmetic-shift c -4)))
(string-set! buf (+ col 2) (hex (bitwise-and c #b1111))) (bytevector-u8-set! buf (+ col 2) (hex (bitwise-and c #b1111)))
(lp (+ i 1) (+ col 3) res))))))))) (lp (+ i 1) (+ col 3))))))))))
;;> Return a quoted-printable encoded representation of the input ;;> Return a quoted-printable encoded representation of the input
;;> according to the official standard as described in RFC2045. ;;> according to the official standard as described in RFC2045.
@ -51,19 +51,29 @@
;;> lines length less than \var{max-col} (default 76). The starting ;;> lines length less than \var{max-col} (default 76). The starting
;;> column may be overridden with \var{start-col} (default 0). ;;> column may be overridden with \var{start-col} (default 0).
(define (quoted-printable-encode-string . o) (define (quoted-printable-encode-string src . o)
(let ((src (if (pair? o) (car o) (current-input-port))) (if (string? src)
(start-col (if (and (pair? o) (pair? (cdr o))) (cadr o) 0)) (utf8->string
(max-col (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o))) (apply quoted-printable-encode-bytevector
(car (cddr o)) (string->utf8 src)
*default-max-col*))) o))
(qp-encode (if (string? src) src (read-string #f src)) (apply quoted-printable-encode-bytevector src o)))
start-col max-col "=\r\n")))
(define (quoted-printable-encode-bytevector . o)
(let* ((src (if (pair? o) (car o) (current-input-port)))
(o (if (pair? o) (cdr o) '()))
(start-col (if (pair? o) (car o) 0))
(o (if (pair? o) (cdr o) '()))
(max-col (if (pair? o) (car o) *default-max-col*))
(o (if (pair? o) (cdr o) '()))
(sep (if (pair? o) (car o) (string->utf8 "=\r\n"))))
(qp-encode (if (bytevector? src) src (read-bytevector 1000000000 src))
start-col max-col sep)))
;;> Variation of the above to read and write to ports. ;;> Variation of the above to read and write to ports.
(define (quoted-printable-encode . o) (define (quoted-printable-encode . o)
(display (apply quoted-printable-encode-string o))) (write-string (apply quoted-printable-encode-string o)))
;;> Return a quoted-printable encoded representation of string as ;;> Return a quoted-printable encoded representation of string as
;;> above, wrapped in =?ENC?Q?...?= as per RFC1522, split across ;;> above, wrapped in =?ENC?Q?...?= as per RFC1522, split across
@ -74,82 +84,95 @@
;;> \var{enc}. ;;> \var{enc}.
(define (quoted-printable-encode-header encoding . o) (define (quoted-printable-encode-header encoding . o)
(let ((src (if (pair? o) (car o) (current-input-port))) (let* ((src (if (pair? o) (car o) (current-input-port)))
(start-col (if (and (pair? o) (pair? (cdr o))) (cadr o) 0)) (o (if (pair? o) (cdr o) '()))
(max-col (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o))) (start-col (if (pair? o) (car o) 0))
(car (cddr o)) (o (if (pair? o) (cdr o) '()))
*default-max-col*)) (max-col (if (pair? o) (car o) *default-max-col*))
(nl (if (and (pair? o) (pair? (cdr o)) (pair? (cddr o)) (pair? (cdr (cddr o)))) (o (if (pair? o) (cdr o) '()))
(cadr (cddr o)) (nl (if (pair? o) (car o) "\r\n")))
"\r\n")))
(let* ((prefix (string-append "=?" encoding "?Q?")) (let* ((prefix (string-append "=?" encoding "?Q?"))
(prefix-length (+ 2 (string-length prefix))) (prefix-length (+ 2 (string-length prefix)))
(separator (string-append "?=" nl "\t" prefix)) (separator (string->utf8 (string-append "?=" nl "\t" prefix)))
(effective-max-col (- max-col prefix-length))) (effective-max-col (- max-col prefix-length)))
(string-append prefix (bytevector-append
(string->utf8 prefix)
(qp-encode (if (string? src) src (read-string #f src)) (qp-encode (if (string? src) src (read-string #f src))
start-col effective-max-col separator) start-col effective-max-col separator)
"?=")))) (string->utf8 "?=")))))
;;> Return a quoted-printable decoded representation of \var{str}. If ;;> Return a quoted-printable decoded representation of \var{str}. If
;;> \var{mime-header?} is specified and true, _ will be decoded as as ;;> \var{mime-header?} is specified and true, _ will be decoded as as
;;> space in accordance with RFC1522. No errors will be raised on ;;> space in accordance with RFC1522. No errors will be raised on
;;> invalid input. ;;> invalid input.
(define (quoted-printable-decode-string . o) (define (quoted-printable-decode-string src . o)
(define (hex? c) (or (char-numeric? c) (<= 65 (char->integer c) 70))) (if (string? src)
(define (unhex1 c) (utf8->string
(let ((i (char->integer c))) (if (>= i 65) (- i 55) (- i 48)))) (apply quoted-printable-decode-bytevector
(string->utf8 src)
o))
(apply quoted-printable-decode-bytevector src o)))
(define (quoted-printable-decode-bytevector . o)
(define (hex? c)
(or (char<=? #\0 (integer->char c) #\9)
(char<=? #\A (integer->char c) #\F)))
(define (unhex1 i)
(if (>= i 65) (- i 55) (- i 48)))
(define (unhex c1 c2) (define (unhex c1 c2)
(integer->char (+ (arithmetic-shift (unhex1 c1) 4) (unhex1 c2)))) (+ (arithmetic-shift (unhex1 c1) 4) (unhex1 c2)))
(let ((src (if (pair? o) (car o) (current-input-port))) (let ((src (if (pair? o) (car o) (current-input-port)))
(mime-header? (and (pair? o) (pair? (cdr o)) (car (cdr o))))) (mime-header? (and (pair? o) (pair? (cdr o)) (car (cdr o)))))
(let* ((str (if (string? src) src (read-string #f src))) (let* ((bv (if (bytevector? src) src (read-bytevector 1000000000 src)))
(end (string-length str))) (end (bytevector-length bv))
(call-with-output-string (out (open-output-bytevector)))
(lambda (out)
(let lp ((i 0)) (let lp ((i 0))
(cond (cond
((< i end) ((>= i end)
(let ((c (string-ref str i))) (get-output-bytevector out))
(else
(let ((c (bytevector-u8-ref bv i)))
(case c (case c
((#\=) ; = escapes ((61) ; = escapes
(cond (cond
((< (+ i 2) end) ((< (+ i 2) end)
(let ((c2 (string-ref str (+ i 1)))) (let ((c2 (bytevector-u8-ref bv (+ i 1))))
(cond (cond
((eq? c2 #\newline) (lp (+ i 2))) ((eq? c2 10) (lp (+ i 2)))
((eq? c2 #\return) ((eq? c2 13)
(lp (if (eq? (string-ref str (+ i 2)) #\newline) (lp (if (eq? 10 (bytevector-u8-ref bv (+ i 2)))
(+ i 3) (+ i 3)
(+ i 2)))) (+ i 2))))
((hex? c2) ((hex? c2)
(let ((c3 (string-ref str (+ i 2)))) (let ((c3 (bytevector-u8-ref bv (+ i 2))))
(if (hex? c3) (write-char (unhex c2 c3) out)) (if (hex? c3) (write-u8 (unhex c2 c3) out))
(lp (+ i 3)))) (lp (+ i 3))))
(else (lp (+ i 3)))))))) (else (lp (+ i 3))))))))
((#\_) ; maybe translate _ to space ((95) ; maybe translate _ to space
(write-char (if mime-header? #\space c) out) (write-u8 (if mime-header? 32 c) out)
(lp (+ i 1))) (lp (+ i 1)))
((#\space #\tab) ; strip trailing whitespace ((32 9) ; strip trailing whitespace
(let lp2 ((j (+ i 1))) (let lp2 ((j (+ i 1)))
(cond (cond
((not (= j end)) ((not (= j end))
(case (string-ref str j) (case (bytevector-u8-ref bv j)
((#\space #\tab) (lp2 (+ j 1))) ((32 9) (lp2 (+ j 1)))
((#\newline) ((10)
(lp (+ j 1))) (lp (+ j 1)))
((#\return) ((13)
(let ((k (+ j 1))) (let ((k (+ j 1)))
(lp (if (and (< k end) (lp (if (and (< k end)
(eqv? #\newline (string-ref str k))) (eq? 10 (bytevector-u8-ref bv k)))
(+ k 1) k)))) (+ k 1) k))))
(else (display (substring str i j) out) (lp j))))))) (else
(write-bytevector (bytevector-copy bv i j) out)
(lp j)))))))
(else ; a literal char (else ; a literal char
(write-char c out) (write-u8 c out)
(lp (+ i 1))))))))))))) (lp (+ i 1)))))))))))
;;> Variation of the above to read and write to ports. ;;> Variation of the above to read and write to ports.
(define (quoted-printable-decode . o) (define (quoted-printable-decode . o)
(display (apply quoted-printable-decode-string o))) (write-string (apply quoted-printable-decode-string o)))