Adding bytevector support to base64 lib.

This commit is contained in:
Alex Shinn 2014-03-21 17:38:49 +09:00
parent 1e06cd215a
commit f425126a11
3 changed files with 193 additions and 133 deletions

View file

@ -45,24 +45,24 @@
(vector-set! res (char->integer #\=) *pad-char*) (vector-set! res (char->integer #\=) *pad-char*)
res)) res))
(define (base64-decode-char c) (define (base64-decode-u8 u8)
(vector-ref *base64-decode-table* (char->integer c))) (vector-ref *base64-decode-table* u8))
(define *base64-encode-table* (define *base64-encode-table*
(let ((res (make-vector 64))) (let ((res (make-vector 64)))
(let lp ((i 0)) ; map letters (let lp ((i 0)) ; map letters
(cond (cond
((<= i 25) ((<= i 25)
(vector-set! res i (integer->char (+ i 65))) (vector-set! res i (+ i 65))
(vector-set! res (+ i 26) (integer->char (+ i 97))) (vector-set! res (+ i 26) (+ i 97))
(lp (+ i 1))))) (lp (+ i 1)))))
(let lp ((i 0)) ; map numbers (let lp ((i 0)) ; map numbers
(cond (cond
((<= i 9) ((<= i 9)
(vector-set! res (+ i 52) (integer->char (+ i 48))) (vector-set! res (+ i 52) (+ i 48))
(lp (+ i 1))))) (lp (+ i 1)))))
(vector-set! res 62 #\+) (vector-set! res 62 (char->integer #\+))
(vector-set! res 63 #\/) (vector-set! res 63 (char->integer #\/))
res)) res))
(define (enc i) (define (enc i)
@ -90,17 +90,21 @@
;; input, and pass it to the internal base64-decode-string! utility. ;; input, and pass it to the internal base64-decode-string! utility.
;; If the resulting length used is exact, we can return that buffer, ;; If the resulting length used is exact, we can return that buffer,
;; otherwise we return the appropriate substring. ;; otherwise we return the appropriate substring.
(define (base64-decode-string src)
(let* ((len (string-length src)) (define (base64-decode-string str)
(utf8->string (base64-decode-bytevector (string->utf8 str))))
(define (base64-decode-bytevector src)
(let* ((len (bytevector-length src))
(dst-len (* 3 (arithmetic-shift (+ 3 len) -2))) (dst-len (* 3 (arithmetic-shift (+ 3 len) -2)))
(dst (make-string dst-len))) (dst (make-bytevector dst-len)))
(base64-decode-string! (base64-decode-bytevector!
src 0 len dst src 0 len dst
(lambda (src-offset res-len b1 b2 b3) (lambda (src-offset res-len b1 b2 b3)
(let ((res-len (base64-decode-finish dst res-len b1 b2 b3))) (let ((res-len (base64-decode-finish dst res-len b1 b2 b3)))
(if (= res-len dst-len) (if (= res-len dst-len)
dst dst
(substring dst 0 res-len))))))) (bytevector-copy dst 0 res-len)))))))
;; This is a little funky. ;; This is a little funky.
;; ;;
@ -112,7 +116,7 @@
;; really bad about optimizing nested loops of primitives, so we ;; really bad about optimizing nested loops of primitives, so we
;; flatten this into a single loop, using conditionals to determine ;; flatten this into a single loop, using conditionals to determine
;; which character is currently being read. ;; which character is currently being read.
(define (base64-decode-string! src start end dst kont) (define (base64-decode-bytevector! src start end dst kont)
(let lp ((i start) (let lp ((i start)
(j 0) (j 0)
(b1 *outside-char*) (b1 *outside-char*)
@ -120,7 +124,7 @@
(b3 *outside-char*)) (b3 *outside-char*))
(if (>= i end) (if (>= i end)
(kont i j b1 b2 b3) (kont i j b1 b2 b3)
(let ((c (base64-decode-char (string-ref src i)))) (let ((c (base64-decode-u8 (bytevector-u8-ref src i))))
(cond (cond
((eqv? c *pad-char*) ((eqv? c *pad-char*)
(kont i j b1 b2 b3)) (kont i j b1 b2 b3))
@ -133,23 +137,23 @@
((eqv? b3 *outside-char*) ((eqv? b3 *outside-char*)
(lp (+ i 1) j b1 b2 c)) (lp (+ i 1) j b1 b2 c))
(else (else
(string-set! dst (bytevector-u8-set!
j dst
(integer->char j
(bitwise-ior (arithmetic-shift b1 2) (bitwise-ior (arithmetic-shift b1 2)
(extract-bit-field 2 4 b2)))) (extract-bit-field 2 4 b2)))
(string-set! dst (bytevector-u8-set!
(+ j 1) dst
(integer->char (+ j 1)
(bitwise-ior (bitwise-ior
(arithmetic-shift (extract-bit-field 4 0 b2) 4) (arithmetic-shift (extract-bit-field 4 0 b2) 4)
(extract-bit-field 4 2 b3)))) (extract-bit-field 4 2 b3)))
(string-set! dst (bytevector-u8-set!
(+ j 2) dst
(integer->char (+ j 2)
(bitwise-ior (bitwise-ior
(arithmetic-shift (extract-bit-field 2 0 b3) 6) (arithmetic-shift (extract-bit-field 2 0 b3) 6)
c))) c))
(lp (+ i 1) (+ j 3) (lp (+ i 1) (+ j 3)
*outside-char* *outside-char* *outside-char*))))))) *outside-char* *outside-char* *outside-char*)))))))
@ -162,24 +166,22 @@
((eqv? b1 *outside-char*) ((eqv? b1 *outside-char*)
j) j)
((eqv? b2 *outside-char*) ((eqv? b2 *outside-char*)
(string-set! dst j (integer->char (arithmetic-shift b1 2))) (bytevector-u8-set! dst j (arithmetic-shift b1 2))
(+ j 1)) (+ j 1))
(else (else
(string-set! dst (bytevector-u8-set! dst
j j
(integer->char (bitwise-ior (arithmetic-shift b1 2)
(bitwise-ior (arithmetic-shift b1 2) (extract-bit-field 2 4 b2)))
(extract-bit-field 2 4 b2))))
(cond (cond
((eqv? b3 *outside-char*) ((eqv? b3 *outside-char*)
(+ j 1)) (+ j 1))
(else (else
(string-set! dst (bytevector-u8-set! dst
(+ j 1) (+ j 1)
(integer->char (bitwise-ior
(bitwise-ior (arithmetic-shift (extract-bit-field 4 0 b2) 4)
(arithmetic-shift (extract-bit-field 4 0 b2) 4) (extract-bit-field 4 2 b3)))
(extract-bit-field 4 2 b3))))
(+ j 2)))))) (+ j 2))))))
;;> Variation of the above to read and write to ports. ;;> Variation of the above to read and write to ports.
@ -189,48 +191,53 @@
(out (if (and (pair? o) (pair? (cdr o))) (out (if (and (pair? o) (pair? (cdr o)))
(cadr o) (cadr o)
(current-output-port)))) (current-output-port))))
(let ((src (make-string decode-src-length)) (cond
(dst (make-string decode-dst-length))) ((not (binary-port? in))
(let lp ((offset 0)) (write-string (base64-decode-string (port->string in)) out))
(let ((src-len (+ offset (else
(read-string! decode-src-length src in offset)))) (let ((src (make-bytevector decode-src-length))
(cond (dst (make-bytevector decode-dst-length)))
((= src-len decode-src-length) (let lp ((offset 0))
;; read a full chunk: decode, write and loop (let ((src-len
(base64-decode-string! (+ offset
src 0 decode-src-length dst (read-bytevector! decode-src-length src in offset))))
(lambda (src-offset dst-len b1 b2 b3) (cond
(cond ((= src-len decode-src-length)
((and (< src-offset src-len) ;; read a full chunk: decode, write and loop
(eqv? #\= (string-ref src src-offset))) (base64-decode-bytevector!
;; done src 0 decode-src-length dst
(let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3))) (lambda (src-offset dst-len b1 b2 b3)
(write-string dst out 0 dst-len)))
((eqv? b1 *outside-char*)
(write-string dst out 0 dst-len)
(lp 0))
(else
(write-string dst out 0 dst-len)
;; one to three chars left in buffer
(string-set! src 0 (enc b1))
(cond (cond
((eqv? b2 *outside-char*) ((and (< src-offset src-len)
(lp 1)) (eqv? #\= (string-ref src src-offset)))
;; done
(let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3)))
(write-bytevector dst out 0 dst-len)))
((eqv? b1 *outside-char*)
(write-string dst out 0 dst-len)
(lp 0))
(else (else
(string-set! src 1 (enc b2)) (write-bytevector dst out 0 dst-len)
;; one to three chars left in buffer
(bytevector-u8-set! src 0 (enc b1))
(cond (cond
((eqv? b3 *outside-char*) ((eqv? b2 *outside-char*)
(lp 2)) (lp 1))
(else (else
(string-set! src 2 (enc b3)) (bytevector-u8-set! src 1 (enc b2))
(lp 3)))))))))) (cond
(else ((eqv? b3 *outside-char*)
;; end of source - just decode and write once (lp 2))
(base64-decode-string! (else
src 0 src-len dst (bytevector-u8-set! src 2 (enc b3))
(lambda (src-offset dst-len b1 b2 b3) (lp 3))))))))))
(let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3))) (else
(write-string dst out 0 dst-len))))))))))) ;; end of source - just decode and write once
(base64-decode-bytevector!
src 0 src-len dst
(lambda (src-offset dst-len b1 b2 b3)
(let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3)))
(write-string dst out 0 dst-len)))))))))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; encoding ;; encoding
@ -239,57 +246,65 @@
;;> official base64 standard as described in RFC3548. ;;> official base64 standard as described in RFC3548.
(define (base64-encode-string str) (define (base64-encode-string str)
(let* ((len (string-length str)) (utf8->string (base64-encode-bytevector (string->utf8 str))))
(define (base64-encode-bytevector bv)
(let* ((len (bytevector-length bv))
(quot (quotient len 3)) (quot (quotient len 3))
(rem (- len (* quot 3))) (rem (- len (* quot 3)))
(res-len (arithmetic-shift (+ quot (if (zero? rem) 0 1)) 2)) (res-len (arithmetic-shift (+ quot (if (zero? rem) 0 1)) 2))
(res (make-string res-len))) (res (make-bytevector res-len)))
(base64-encode-string! str 0 len res) (base64-encode-bytevector! bv 0 len res)
res)) res))
(define (base64-encode-string! str start end res) (define (base64-encode-bytevector! bv start end res)
(let* ((res-len (string-length res)) (let* ((res-len (bytevector-length res))
(limit (- end 2))) (limit (- end 2)))
(let lp ((i start) (j 0)) (let lp ((i start) (j 0))
(if (>= i limit) (if (>= i limit)
(case (- end i) (case (- end i)
((1) ((1)
(let ((b1 (char->integer (string-ref str i)))) (let ((b1 (bytevector-u8-ref bv i)))
(string-set! res j (enc (arithmetic-shift b1 -2))) (bytevector-u8-set! res j (enc (arithmetic-shift b1 -2)))
(string-set! res (bytevector-u8-set!
(+ j 1) res
(enc (arithmetic-shift (bitwise-and #b11 b1) 4))) (+ j 1)
(string-set! res (+ j 2) #\=) (enc (arithmetic-shift (bitwise-and #b11 b1) 4)))
(string-set! res (+ j 3) #\=))) (bytevector-u8-set! res (+ j 2) (char->integer #\=))
(bytevector-u8-set! res (+ j 3) (char->integer #\=))))
((2) ((2)
(let ((b1 (char->integer (string-ref str i))) (let ((b1 (bytevector-u8-ref bv i))
(b2 (char->integer (string-ref str (+ i 1))))) (b2 (bytevector-u8-ref bv (+ i 1))))
(string-set! res j (enc (arithmetic-shift b1 -2))) (bytevector-u8-set! res j (enc (arithmetic-shift b1 -2)))
(string-set! res (bytevector-u8-set!
(+ j 1) res
(enc (bitwise-ior (+ j 1)
(arithmetic-shift (bitwise-and #b11 b1) 4) (enc (bitwise-ior
(extract-bit-field 4 4 b2)))) (arithmetic-shift (bitwise-and #b11 b1) 4)
(string-set! res (extract-bit-field 4 4 b2))))
(+ j 2) (bytevector-u8-set!
(enc (arithmetic-shift (extract-bit-field 4 0 b2) res
2))) (+ j 2)
(string-set! res (+ j 3) #\=)))) (enc (arithmetic-shift (extract-bit-field 4 0 b2)
(let ((b1 (char->integer (string-ref str i))) 2)))
(b2 (char->integer (string-ref str (+ i 1)))) (bytevector-u8-set! res (+ j 3) (char->integer #\=)))))
(b3 (char->integer (string-ref str (+ i 2))))) (let ((b1 (bytevector-u8-ref bv i))
(string-set! res j (enc (arithmetic-shift b1 -2))) (b2 (bytevector-u8-ref bv (+ i 1)))
(string-set! res (b3 (bytevector-u8-ref bv (+ i 2))))
(+ j 1) (bytevector-u8-set! res j (enc (arithmetic-shift b1 -2)))
(enc (bitwise-ior (bytevector-u8-set!
(arithmetic-shift (bitwise-and #b11 b1) 4) res
(extract-bit-field 4 4 b2)))) (+ j 1)
(string-set! res (enc (bitwise-ior
(+ j 2) (arithmetic-shift (bitwise-and #b11 b1) 4)
(enc (bitwise-ior (extract-bit-field 4 4 b2))))
(arithmetic-shift (extract-bit-field 4 0 b2) 2) (bytevector-u8-set!
(extract-bit-field 2 6 b3)))) res
(string-set! res (+ j 3) (enc (bitwise-and #b111111 b3))) (+ j 2)
(enc (bitwise-ior
(arithmetic-shift (extract-bit-field 4 0 b2) 2)
(extract-bit-field 2 6 b3))))
(bytevector-u8-set! res (+ j 3) (enc (bitwise-and #b111111 b3)))
(lp (+ i 3) (+ j 4))))))) (lp (+ i 3) (+ j 4)))))))
;;> Variation of the above to read and write to ports. ;;> Variation of the above to read and write to ports.
@ -299,15 +314,19 @@
(out (if (and (pair? o) (pair? (cdr o))) (out (if (and (pair? o) (pair? (cdr o)))
(cadr o) (cadr o)
(current-output-port)))) (current-output-port))))
(let ((src (make-string encode-src-length)) (cond
(dst (make-string ((not (binary-port? in))
(arithmetic-shift (quotient encode-src-length 3) 2)))) (write-string (base64-encode-string (port->string in)) out))
(let lp () (else
(let ((n (read-string! 2048 src in))) (let ((src (make-string encode-src-length))
(base64-encode-string! src 0 n dst) (dst (make-string
(write-string dst out 0 (* 3 (quotient (+ n 3) 4))) (arithmetic-shift (quotient encode-src-length 3) 2))))
(if (= n 2048) (let lp ()
(lp))))))) (let ((n (read-bytevector! src in 0 2048)))
(base64-encode-bytevector! src 0 n dst)
(write-bytevector dst out 0 (* 3 (quotient (+ n 3) 4)))
(if (= n 2048)
(lp)))))))))
;;> Return a base64 encoded representation of the string \var{str} as ;;> Return a base64 encoded representation of the string \var{str} as
;;> above, wrapped in =?ENC?B?...?= as per RFC1522, split across ;;> above, wrapped in =?ENC?B?...?= as per RFC1522, split across

View file

@ -1,7 +1,8 @@
(define-library (chibi base64) (define-library (chibi base64)
(export base64-encode base64-encode-string (export base64-encode base64-encode-string base64-encode-bytevector
base64-decode base64-decode-string base64-decode base64-decode-string base64-decode-bytevector
base64-encode-header) base64-encode-header)
(import (chibi) (srfi 33) (chibi io)) (import (scheme base) (srfi 33) (chibi io)
(only (chibi) string-concatenate))
(include "base64.scm")) (include "base64.scm"))

40
tests/base64-tests.scm Normal file
View file

@ -0,0 +1,40 @@
(import (chibi) (chibi base64) (chibi test))
(test-begin "base64")
(test "YW55IGNhcm5hbCBwbGVhc3VyZS4="
(base64-encode-string "any carnal pleasure."))
(test "YW55IGNhcm5hbCBwbGVhc3VyZQ=="
(base64-encode-string "any carnal pleasure"))
(test "YW55IGNhcm5hbCBwbGVhc3Vy"
(base64-encode-string "any carnal pleasur"))
(test "YW55IGNhcm5hbCBwbGVhc3U="
(base64-encode-string "any carnal pleasu"))
(test "YW55IGNhcm5hbCBwbGVhcw=="
(base64-encode-string "any carnal pleas"))
(test "any carnal pleas"
(base64-decode-string "YW55IGNhcm5hbCBwbGVhcw=="))
(test "any carnal pleasu"
(base64-decode-string "YW55IGNhcm5hbCBwbGVhc3U="))
(test "any carnal pleasur"
(base64-decode-string "YW55IGNhcm5hbCBwbGVhc3Vy"))
(test "any carnal pleas"
(base64-decode-string "YW55IGNhcm5hbCBwbGVhcw"))
(test "any carnal pleasu"
(base64-decode-string "YW55IGNhcm5hbCBwbGVhc3U"))
(test "YW55IGNhcm5hbCBwbGVhc3VyZS4="
(call-with-output-string
(lambda (out)
(call-with-input-string "any carnal pleasure."
(lambda (in) (base64-encode in out))))))
(test "any carnal pleasure."
(call-with-output-string
(lambda (out)
(call-with-input-string "YW55IGNhcm5hbCBwbGVhc3VyZS4="
(lambda (in) (base64-decode in out))))))
(test-end)