mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-18 21:29:19 +02:00
371 lines
14 KiB
Scheme
371 lines
14 KiB
Scheme
;; Copyright (c) 2005-2014 Alex Shinn. All rights reserved.
|
|
;; BSD-style license: http://synthcode.com/license.txt
|
|
|
|
;;> RFC 3548 base64 encoding and decoding utilities.
|
|
;;> This API is compatible with the Gauche library rfc.base64.
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; string utils
|
|
|
|
(define (string-chop str n)
|
|
(let ((len (string-length str)))
|
|
(let lp ((i 0) (res '()))
|
|
(let ((j (+ i n)))
|
|
(if (>= j len)
|
|
(reverse (cons (substring str i len) res))
|
|
(lp j (cons (substring str i j) res)))))))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; constants and tables
|
|
|
|
(define *default-max-col* 76)
|
|
|
|
(define *outside-char* 99) ; luft-balloons
|
|
(define *pad-char* 101) ; dalmations
|
|
|
|
(define *base64-decode-table*
|
|
(let ((res (make-vector #x100 *outside-char*)))
|
|
(let lp ((i 0)) ; map letters
|
|
(cond
|
|
((<= i 25)
|
|
(vector-set! res (+ i 65) i)
|
|
(vector-set! res (+ i 97) (+ i 26))
|
|
(lp (+ i 1)))))
|
|
(let lp ((i 0)) ; map numbers
|
|
(cond
|
|
((<= i 9)
|
|
(vector-set! res (+ i 48) (+ i 52))
|
|
(lp (+ i 1)))))
|
|
;; extras (be liberal for different common base64 formats)
|
|
(vector-set! res (char->integer #\+) 62)
|
|
(vector-set! res (char->integer #\-) 62)
|
|
(vector-set! res (char->integer #\/) 63)
|
|
(vector-set! res (char->integer #\_) 63)
|
|
(vector-set! res (char->integer #\~) 63)
|
|
(vector-set! res (char->integer #\=) *pad-char*)
|
|
res))
|
|
|
|
(define (base64-decode-u8 u8)
|
|
(vector-ref *base64-decode-table* u8))
|
|
|
|
(define *base64-encode-table*
|
|
(let ((res (make-vector 64)))
|
|
(let lp ((i 0)) ; map letters
|
|
(cond
|
|
((<= i 25)
|
|
(vector-set! res i (+ i 65))
|
|
(vector-set! res (+ i 26) (+ i 97))
|
|
(lp (+ i 1)))))
|
|
(let lp ((i 0)) ; map numbers
|
|
(cond
|
|
((<= i 9)
|
|
(vector-set! res (+ i 52) (+ i 48))
|
|
(lp (+ i 1)))))
|
|
(vector-set! res 62 (char->integer #\+))
|
|
(vector-set! res 63 (char->integer #\/))
|
|
res))
|
|
|
|
(define (enc i)
|
|
(vector-ref *base64-encode-table* i))
|
|
|
|
;; try to match common boundaries
|
|
(define decode-src-length
|
|
(lcm 76 78))
|
|
|
|
(define decode-dst-length
|
|
(* 3 (arithmetic-shift (+ 3 decode-src-length) -2)))
|
|
|
|
(define encode-src-length
|
|
(* 3 1024))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; decoding
|
|
|
|
;;> Return a base64 decoded representation of string, also interpreting
|
|
;;> the alternate 62 & 63 valued characters as described in RFC3548.
|
|
;;> Other out-of-band characters are silently stripped, and = signals
|
|
;;> the end of the encoded string. No errors will be raised.
|
|
|
|
;; Create a result buffer with the maximum possible length for the
|
|
;; input, and pass it to the internal base64-decode-string! utility.
|
|
;; If the resulting length used is exact, we can return that buffer,
|
|
;; otherwise we return the appropriate substring.
|
|
|
|
(define (base64-decode-string str)
|
|
(utf8->string (base64-decode-bytevector (string->utf8 str))))
|
|
|
|
(define (base64-decode-bytevector src)
|
|
(let* ((len (bytevector-length src))
|
|
(dst-len (* 3 (arithmetic-shift (+ 3 len) -2)))
|
|
(dst (make-bytevector dst-len)))
|
|
(base64-decode-bytevector!
|
|
src 0 len dst
|
|
(lambda (src-offset res-len b1 b2 b3)
|
|
(let ((res-len (base64-decode-finish dst res-len b1 b2 b3)))
|
|
(if (= res-len dst-len)
|
|
dst
|
|
(bytevector-copy dst 0 res-len)))))))
|
|
|
|
;; This is a little funky.
|
|
;;
|
|
;; We want to skip over "outside" characters (e.g. newlines inside
|
|
;; base64-encoded data, as would be passed in mail clients and most
|
|
;; large base64 data). This would normally mean two nested loops -
|
|
;; one for overall processing the input, and one for looping until
|
|
;; we get to a valid character. However, many Scheme compilers are
|
|
;; really bad about optimizing nested loops of primitives, so we
|
|
;; flatten this into a single loop, using conditionals to determine
|
|
;; which character is currently being read.
|
|
(define (base64-decode-bytevector! src start end dst kont)
|
|
(let lp ((i start)
|
|
(j 0)
|
|
(b1 *outside-char*)
|
|
(b2 *outside-char*)
|
|
(b3 *outside-char*))
|
|
(if (>= i end)
|
|
(kont i j b1 b2 b3)
|
|
(let ((c (base64-decode-u8 (bytevector-u8-ref src i))))
|
|
(cond
|
|
((eqv? c *pad-char*)
|
|
(kont i j b1 b2 b3))
|
|
((eqv? c *outside-char*)
|
|
(lp (+ i 1) j b1 b2 b3))
|
|
((eqv? b1 *outside-char*)
|
|
(lp (+ i 1) j c b2 b3))
|
|
((eqv? b2 *outside-char*)
|
|
(lp (+ i 1) j b1 c b3))
|
|
((eqv? b3 *outside-char*)
|
|
(lp (+ i 1) j b1 b2 c))
|
|
(else
|
|
(bytevector-u8-set!
|
|
dst
|
|
j
|
|
(bitwise-ior (arithmetic-shift b1 2)
|
|
(bit-field b2 4 6)))
|
|
(bytevector-u8-set!
|
|
dst
|
|
(+ j 1)
|
|
(bitwise-ior
|
|
(arithmetic-shift (bit-field b2 0 4) 4)
|
|
(bit-field b3 2 6)))
|
|
(bytevector-u8-set!
|
|
dst
|
|
(+ j 2)
|
|
(bitwise-ior
|
|
(arithmetic-shift (bit-field b3 0 2) 6)
|
|
c))
|
|
(lp (+ i 1) (+ j 3)
|
|
*outside-char* *outside-char* *outside-char*)))))))
|
|
|
|
;; If requested, account for any "partial" results (i.e. trailing 2 or
|
|
;; 3 chars) by writing them into the destination (additional 1 or 2
|
|
;; bytes) and returning the adjusted offset for how much data we've
|
|
;; written.
|
|
(define (base64-decode-finish dst j b1 b2 b3)
|
|
(cond
|
|
((eqv? b1 *outside-char*)
|
|
j)
|
|
((eqv? b2 *outside-char*)
|
|
(bytevector-u8-set! dst j (arithmetic-shift b1 2))
|
|
(+ j 1))
|
|
(else
|
|
(bytevector-u8-set! dst
|
|
j
|
|
(bitwise-ior (arithmetic-shift b1 2)
|
|
(bit-field b2 4 6)))
|
|
(cond
|
|
((eqv? b3 *outside-char*)
|
|
(+ j 1))
|
|
(else
|
|
(bytevector-u8-set! dst
|
|
(+ j 1)
|
|
(bitwise-ior
|
|
(arithmetic-shift (bit-field b2 0 4) 4)
|
|
(bit-field b3 2 6)))
|
|
(+ j 2))))))
|
|
|
|
;;> Variation of the above to read and write to ports.
|
|
|
|
(define (base64-decode . o)
|
|
(let ((in (if (pair? o) (car o) (current-input-port)))
|
|
(out (if (and (pair? o) (pair? (cdr o)))
|
|
(cadr o)
|
|
(current-output-port))))
|
|
(cond
|
|
((not (binary-port? in))
|
|
(let ((str (port->string in)))
|
|
(write-string (base64-decode-string str) out)))
|
|
(else
|
|
(let ((src (make-bytevector decode-src-length))
|
|
(dst (make-bytevector decode-dst-length)))
|
|
(let lp ((offset 0))
|
|
(let ((src-len
|
|
(+ offset
|
|
(read-bytevector! src in offset decode-src-length))))
|
|
(cond
|
|
((= src-len decode-src-length)
|
|
;; read a full chunk: decode, write and loop
|
|
(base64-decode-bytevector!
|
|
src 0 decode-src-length dst
|
|
(lambda (src-offset dst-len b1 b2 b3)
|
|
(cond
|
|
((and (< src-offset src-len)
|
|
(eqv? #x3D (bytevector-u8-ref src src-offset)))
|
|
;; done
|
|
(let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3)))
|
|
(write-bytevector dst out 0 dst-len)))
|
|
((eqv? b1 *outside-char*)
|
|
(write-bytevector dst out 0 dst-len)
|
|
(lp 0))
|
|
(else
|
|
(write-bytevector dst out 0 dst-len)
|
|
;; one to three chars left in buffer
|
|
(bytevector-u8-set! src 0 (enc b1))
|
|
(cond
|
|
((eqv? b2 *outside-char*)
|
|
(lp 1))
|
|
(else
|
|
(bytevector-u8-set! src 1 (enc b2))
|
|
(cond
|
|
((eqv? b3 *outside-char*)
|
|
(lp 2))
|
|
(else
|
|
(bytevector-u8-set! src 2 (enc b3))
|
|
(lp 3))))))))))
|
|
(else
|
|
;; end of source - just decode and write once
|
|
(base64-decode-bytevector!
|
|
src 0 src-len dst
|
|
(lambda (src-offset dst-len b1 b2 b3)
|
|
(let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3)))
|
|
(write-bytevector dst out 0 dst-len)))))))))))))
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
;; encoding
|
|
|
|
;;> Return a base64 encoded representation of string according to the
|
|
;;> official base64 standard as described in RFC3548.
|
|
|
|
(define (base64-encode-string str)
|
|
(utf8->string (base64-encode-bytevector (string->utf8 str))))
|
|
|
|
(define (base64-encode-bytevector bv)
|
|
(let* ((len (bytevector-length bv))
|
|
(quot (quotient len 3))
|
|
(rem (- len (* quot 3)))
|
|
(res-len (arithmetic-shift (+ quot (if (zero? rem) 0 1)) 2))
|
|
(res (make-bytevector res-len)))
|
|
(base64-encode-bytevector! bv 0 len res)
|
|
res))
|
|
|
|
(define (base64-encode-bytevector! bv start end res)
|
|
(let ((limit (- end 2)))
|
|
(let lp ((i start) (j 0))
|
|
(if (>= i limit)
|
|
(case (- end i)
|
|
((1)
|
|
(let ((b1 (bytevector-u8-ref bv i)))
|
|
(bytevector-u8-set! res j (enc (arithmetic-shift b1 -2)))
|
|
(bytevector-u8-set!
|
|
res
|
|
(+ j 1)
|
|
(enc (arithmetic-shift (bitwise-and #b11 b1) 4)))
|
|
(bytevector-u8-set! res (+ j 2) (char->integer #\=))
|
|
(bytevector-u8-set! res (+ j 3) (char->integer #\=))
|
|
(+ j 4)))
|
|
((2)
|
|
(let ((b1 (bytevector-u8-ref bv i))
|
|
(b2 (bytevector-u8-ref bv (+ i 1))))
|
|
(bytevector-u8-set! res j (enc (arithmetic-shift b1 -2)))
|
|
(bytevector-u8-set!
|
|
res
|
|
(+ j 1)
|
|
(enc (bitwise-ior
|
|
(arithmetic-shift (bitwise-and #b11 b1) 4)
|
|
(bit-field b2 4 8))))
|
|
(bytevector-u8-set!
|
|
res
|
|
(+ j 2)
|
|
(enc (arithmetic-shift (bit-field b2 0 4) 2)))
|
|
(bytevector-u8-set! res (+ j 3) (char->integer #\=))
|
|
(+ j 4)))
|
|
(else
|
|
j))
|
|
(let ((b1 (bytevector-u8-ref bv i))
|
|
(b2 (bytevector-u8-ref bv (+ i 1)))
|
|
(b3 (bytevector-u8-ref bv (+ i 2))))
|
|
(bytevector-u8-set! res j (enc (arithmetic-shift b1 -2)))
|
|
(bytevector-u8-set!
|
|
res
|
|
(+ j 1)
|
|
(enc (bitwise-ior
|
|
(arithmetic-shift (bitwise-and #b11 b1) 4)
|
|
(bit-field b2 4 8))))
|
|
(bytevector-u8-set!
|
|
res
|
|
(+ j 2)
|
|
(enc (bitwise-ior
|
|
(arithmetic-shift (bit-field b2 0 4) 2)
|
|
(bit-field b3 6 8))))
|
|
(bytevector-u8-set! res (+ j 3) (enc (bitwise-and #b111111 b3)))
|
|
(lp (+ i 3) (+ j 4)))))))
|
|
|
|
;;> Variation of the above to read and write to ports.
|
|
|
|
(define (base64-encode . o)
|
|
(let ((in (if (pair? o) (car o) (current-input-port)))
|
|
(out (if (and (pair? o) (pair? (cdr o)))
|
|
(cadr o)
|
|
(current-output-port))))
|
|
(cond
|
|
((not (binary-port? in))
|
|
(let ((str (port->string in)))
|
|
(write-string (base64-encode-string str) out)))
|
|
(else
|
|
(let ((src (make-bytevector encode-src-length))
|
|
(dst (make-bytevector
|
|
(arithmetic-shift (quotient encode-src-length 3) 2))))
|
|
(let lp ()
|
|
(let ((n (read-bytevector! src in 0 2048)))
|
|
(base64-encode-bytevector! src 0 n dst)
|
|
(write-bytevector dst out 0 (* 4 (quotient (+ n 2) 3)))
|
|
(if (= n 2048)
|
|
(lp)
|
|
(flush-output-port out)))))))))
|
|
|
|
;;> Return a base64 encoded representation of the string \var{str} as
|
|
;;> above, wrapped in =?ENC?B?...?= as per RFC1522, split across
|
|
;;> multiple MIME-header lines as needed to keep each lines length
|
|
;;> less than \var{max-col}. The string is encoded as is, and the
|
|
;;> encoding \var{enc} is just used for the prefix, i.e. you are
|
|
;;> responsible for ensuring \var{str} is already encoded according to
|
|
;;> \var{enc}. The optional argument \var{nl} is the newline
|
|
;;> separator, defaulting to \var{crlf}.
|
|
|
|
(define (base64-encode-header encoding str . o)
|
|
(define (round4 i) (arithmetic-shift (arithmetic-shift i -2) 2))
|
|
(let ((start-col (if (pair? o) (car o) 0))
|
|
(max-col (if (and (pair? o) (pair? (cdr o)))
|
|
(car (cdr o))
|
|
*default-max-col*))
|
|
(nl (if (and (pair? o) (pair? (cdr o)) (pair? (cdr (cdr o))))
|
|
(car (cdr (cdr o)))
|
|
"\r\n")))
|
|
(let* ((prefix (string-append "=?" encoding "?B?"))
|
|
(prefix-length (+ 2 (string-length prefix)))
|
|
(effective-max-col (round4 (- max-col prefix-length)))
|
|
(first-max-col (round4 (- effective-max-col start-col)))
|
|
(str (base64-encode-string str))
|
|
(len (string-length str)))
|
|
(if (<= len first-max-col)
|
|
(string-append prefix str "?=")
|
|
(string-append
|
|
(if (positive? first-max-col)
|
|
(string-append
|
|
prefix (substring str 0 first-max-col) "?=" nl "\t" prefix)
|
|
"")
|
|
(string-join (string-chop (substring str first-max-col len)
|
|
effective-max-col)
|
|
(string-append "?=" nl "\t" prefix))
|
|
"?=")))))
|
|
|