From f425126a11c7a7e018c9d4ba7631f63276150bd3 Mon Sep 17 00:00:00 2001
From: Alex Shinn <ashinn@users.noreply.github.com>
Date: Fri, 21 Mar 2014 17:38:49 +0900
Subject: [PATCH] Adding bytevector support to base64 lib.

---
 lib/chibi/base64.scm   | 279 ++++++++++++++++++++++-------------------
 lib/chibi/base64.sld   |   7 +-
 tests/base64-tests.scm |  40 ++++++
 3 files changed, 193 insertions(+), 133 deletions(-)
 create mode 100644 tests/base64-tests.scm

diff --git a/lib/chibi/base64.scm b/lib/chibi/base64.scm
index 142bb653..0d4b10e8 100644
--- a/lib/chibi/base64.scm
+++ b/lib/chibi/base64.scm
@@ -45,24 +45,24 @@
     (vector-set! res (char->integer #\=) *pad-char*)
     res))
 
-(define (base64-decode-char c)
-  (vector-ref *base64-decode-table* (char->integer c)))
+(define (base64-decode-u8 u8)
+  (vector-ref *base64-decode-table* u8))
 
 (define *base64-encode-table*
   (let ((res (make-vector 64)))
     (let lp ((i 0)) ; map letters
       (cond
        ((<= i 25)
-        (vector-set! res i (integer->char (+ i 65)))
-        (vector-set! res (+ i 26) (integer->char (+ i 97)))
+        (vector-set! res i (+ i 65))
+        (vector-set! res (+ i 26) (+ i 97))
         (lp (+ i 1)))))
     (let lp ((i 0)) ; map numbers
       (cond
        ((<= i 9)
-        (vector-set! res (+ i 52) (integer->char (+ i 48)))
+        (vector-set! res (+ i 52) (+ i 48))
         (lp (+ i 1)))))
-    (vector-set! res 62 #\+)
-    (vector-set! res 63 #\/)
+    (vector-set! res 62 (char->integer #\+))
+    (vector-set! res 63 (char->integer #\/))
     res))
 
 (define (enc i)
@@ -90,17 +90,21 @@
 ;; input, and pass it to the internal base64-decode-string! utility.
 ;; If the resulting length used is exact, we can return that buffer,
 ;; otherwise we return the appropriate substring.
-(define (base64-decode-string src)
-  (let* ((len (string-length src))
+
+(define (base64-decode-string str)
+  (utf8->string (base64-decode-bytevector (string->utf8 str))))
+
+(define (base64-decode-bytevector src)
+  (let* ((len (bytevector-length src))
          (dst-len (* 3 (arithmetic-shift (+ 3 len) -2)))
-         (dst (make-string dst-len)))
-    (base64-decode-string!
+         (dst (make-bytevector dst-len)))
+    (base64-decode-bytevector!
      src 0 len dst
      (lambda (src-offset res-len b1 b2 b3)
        (let ((res-len (base64-decode-finish dst res-len b1 b2 b3)))
          (if (= res-len dst-len)
              dst
-             (substring dst 0 res-len)))))))
+             (bytevector-copy dst 0 res-len)))))))
 
 ;; This is a little funky.
 ;;
@@ -112,7 +116,7 @@
 ;;   really bad about optimizing nested loops of primitives, so we
 ;;   flatten this into a single loop, using conditionals to determine
 ;;   which character is currently being read.
-(define (base64-decode-string! src start end dst kont)
+(define (base64-decode-bytevector! src start end dst kont)
   (let lp ((i start)
            (j 0)
            (b1 *outside-char*)
@@ -120,7 +124,7 @@
            (b3 *outside-char*))
     (if (>= i end)
         (kont i j b1 b2 b3)
-        (let ((c (base64-decode-char (string-ref src i))))
+        (let ((c (base64-decode-u8 (bytevector-u8-ref src i))))
           (cond
            ((eqv? c *pad-char*)
             (kont i j b1 b2 b3))
@@ -133,23 +137,23 @@
            ((eqv? b3 *outside-char*)
             (lp (+ i 1) j b1 b2 c))
            (else
-            (string-set! dst
-                         j
-                         (integer->char
-                          (bitwise-ior (arithmetic-shift b1 2)
-                                       (extract-bit-field 2 4 b2))))
-            (string-set! dst
-                         (+ j 1)
-                         (integer->char
-                          (bitwise-ior
-                           (arithmetic-shift (extract-bit-field 4 0 b2) 4)
-                           (extract-bit-field 4 2 b3))))
-            (string-set! dst
-                         (+ j 2)
-                         (integer->char
-                          (bitwise-ior
-                           (arithmetic-shift (extract-bit-field 2 0 b3) 6)
-                           c)))
+            (bytevector-u8-set!
+             dst
+             j
+             (bitwise-ior (arithmetic-shift b1 2)
+                          (extract-bit-field 2 4 b2)))
+            (bytevector-u8-set!
+             dst
+             (+ j 1)
+             (bitwise-ior
+              (arithmetic-shift (extract-bit-field 4 0 b2) 4)
+              (extract-bit-field 4 2 b3)))
+            (bytevector-u8-set!
+             dst
+             (+ j 2)
+             (bitwise-ior
+              (arithmetic-shift (extract-bit-field 2 0 b3) 6)
+              c))
             (lp (+ i 1) (+ j 3)
                 *outside-char* *outside-char* *outside-char*)))))))
 
@@ -162,24 +166,22 @@
    ((eqv? b1 *outside-char*)
     j)
    ((eqv? b2 *outside-char*)
-    (string-set! dst j (integer->char (arithmetic-shift b1 2)))
+    (bytevector-u8-set! dst j (arithmetic-shift b1 2))
     (+ j 1))
    (else
-    (string-set! dst
-                 j
-                 (integer->char
-                  (bitwise-ior (arithmetic-shift b1 2)
-                               (extract-bit-field 2 4 b2))))
+    (bytevector-u8-set! dst
+                        j
+                        (bitwise-ior (arithmetic-shift b1 2)
+                                     (extract-bit-field 2 4 b2)))
     (cond
      ((eqv? b3 *outside-char*)
       (+ j 1))
      (else
-      (string-set! dst
-                   (+ j 1)
-                   (integer->char
-                    (bitwise-ior
-                     (arithmetic-shift (extract-bit-field 4 0 b2) 4)
-                     (extract-bit-field 4 2 b3))))
+      (bytevector-u8-set! dst
+                          (+ j 1)
+                          (bitwise-ior
+                           (arithmetic-shift (extract-bit-field 4 0 b2) 4)
+                           (extract-bit-field 4 2 b3)))
       (+ j 2))))))
 
 ;;>  Variation of the above to read and write to ports.
@@ -189,48 +191,53 @@
         (out (if (and (pair? o) (pair? (cdr o)))
                  (cadr o)
                  (current-output-port))))
-    (let ((src (make-string decode-src-length))
-          (dst (make-string decode-dst-length)))
-      (let lp ((offset 0))
-        (let ((src-len (+ offset
-                          (read-string! decode-src-length src in offset))))
-          (cond
-           ((= src-len decode-src-length)
-            ;; read a full chunk: decode, write and loop
-            (base64-decode-string!
-             src 0 decode-src-length dst
-             (lambda (src-offset dst-len b1 b2 b3)
-               (cond
-                ((and (< src-offset src-len)
-                      (eqv? #\= (string-ref src src-offset)))
-                 ;; done
-                 (let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3)))
-                   (write-string dst out 0 dst-len)))
-                ((eqv? b1 *outside-char*)
-                 (write-string dst out 0 dst-len)
-                 (lp 0))
-                (else
-                 (write-string dst out 0 dst-len)
-                 ;; one to three chars left in buffer
-                 (string-set! src 0 (enc b1))
+    (cond
+     ((not (binary-port? in))
+      (write-string (base64-decode-string (port->string in)) out))
+     (else
+      (let ((src (make-bytevector decode-src-length))
+            (dst (make-bytevector decode-dst-length)))
+        (let lp ((offset 0))
+          (let ((src-len
+                 (+ offset
+                    (read-bytevector! decode-src-length src in offset))))
+            (cond
+             ((= src-len decode-src-length)
+              ;; read a full chunk: decode, write and loop
+              (base64-decode-bytevector!
+               src 0 decode-src-length dst
+               (lambda (src-offset dst-len b1 b2 b3)
                  (cond
-                  ((eqv? b2 *outside-char*)
-                   (lp 1))
+                  ((and (< src-offset src-len)
+                        (eqv? #\= (string-ref src src-offset)))
+                   ;; done
+                   (let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3)))
+                     (write-bytevector dst out 0 dst-len)))
+                  ((eqv? b1 *outside-char*)
+                   (write-string dst out 0 dst-len)
+                   (lp 0))
                   (else
-                   (string-set! src 1 (enc b2))
+                   (write-bytevector dst out 0 dst-len)
+                   ;; one to three chars left in buffer
+                   (bytevector-u8-set! src 0 (enc b1))
                    (cond
-                    ((eqv? b3 *outside-char*)
-                     (lp 2))
+                    ((eqv? b2 *outside-char*)
+                     (lp 1))
                     (else
-                     (string-set! src 2 (enc b3))
-                     (lp 3))))))))))
-           (else
-            ;; end of source - just decode and write once
-            (base64-decode-string!
-             src 0 src-len dst
-             (lambda (src-offset dst-len b1 b2 b3)
-               (let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3)))
-                 (write-string dst out 0 dst-len)))))))))))
+                     (bytevector-u8-set! src 1 (enc b2))
+                     (cond
+                      ((eqv? b3 *outside-char*)
+                       (lp 2))
+                      (else
+                       (bytevector-u8-set! src 2 (enc b3))
+                       (lp 3))))))))))
+             (else
+              ;; end of source - just decode and write once
+              (base64-decode-bytevector!
+               src 0 src-len dst
+               (lambda (src-offset dst-len b1 b2 b3)
+                 (let ((dst-len (base64-decode-finish dst dst-len b1 b2 b3)))
+                   (write-string dst out 0 dst-len)))))))))))))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; encoding
@@ -239,57 +246,65 @@
 ;;> official base64 standard as described in RFC3548.
 
 (define (base64-encode-string str)
-  (let* ((len (string-length str))
+  (utf8->string (base64-encode-bytevector (string->utf8 str))))
+
+(define (base64-encode-bytevector bv)
+  (let* ((len (bytevector-length bv))
          (quot (quotient len 3))
          (rem (- len (* quot 3)))
          (res-len (arithmetic-shift (+ quot (if (zero? rem) 0 1)) 2))
-         (res (make-string res-len)))
-    (base64-encode-string! str 0 len res)
+         (res (make-bytevector res-len)))
+    (base64-encode-bytevector! bv 0 len res)
     res))
 
-(define (base64-encode-string! str start end res)
-  (let* ((res-len (string-length res))
+(define (base64-encode-bytevector! bv start end res)
+  (let* ((res-len (bytevector-length res))
          (limit (- end 2)))
     (let lp ((i start) (j 0))
       (if (>= i limit)
           (case (- end i)
             ((1)
-             (let ((b1 (char->integer (string-ref str i))))
-               (string-set! res j (enc (arithmetic-shift b1 -2)))
-               (string-set! res
-                            (+ j 1)
-                            (enc (arithmetic-shift (bitwise-and #b11 b1) 4)))
-               (string-set! res (+ j 2) #\=)
-               (string-set! res (+ j 3) #\=)))
+             (let ((b1 (bytevector-u8-ref bv i)))
+               (bytevector-u8-set! res j (enc (arithmetic-shift b1 -2)))
+               (bytevector-u8-set!
+                res
+                (+ j 1)
+                (enc (arithmetic-shift (bitwise-and #b11 b1) 4)))
+               (bytevector-u8-set! res (+ j 2) (char->integer #\=))
+               (bytevector-u8-set! res (+ j 3) (char->integer #\=))))
             ((2)
-             (let ((b1 (char->integer (string-ref str i)))
-                   (b2 (char->integer (string-ref str (+ i 1)))))
-               (string-set! res j (enc (arithmetic-shift b1 -2)))
-               (string-set! res
-                            (+ j 1)
-                            (enc (bitwise-ior
-                                  (arithmetic-shift (bitwise-and #b11 b1) 4)
-                                  (extract-bit-field 4 4 b2))))
-               (string-set! res
-                            (+ j 2)
-                            (enc (arithmetic-shift (extract-bit-field 4 0 b2)
-                                                   2)))
-               (string-set! res (+ j 3) #\=))))
-          (let ((b1 (char->integer (string-ref str i)))
-                (b2 (char->integer (string-ref str (+ i 1))))
-                (b3 (char->integer (string-ref str (+ i 2)))))
-            (string-set! res j (enc (arithmetic-shift b1 -2)))
-            (string-set! res
-                         (+ j 1)
-                         (enc (bitwise-ior
-                               (arithmetic-shift (bitwise-and #b11 b1) 4)
-                               (extract-bit-field 4 4 b2))))
-            (string-set! res
-                         (+ j 2)
-                         (enc (bitwise-ior
-                               (arithmetic-shift (extract-bit-field 4 0 b2) 2)
-                               (extract-bit-field 2 6 b3))))
-            (string-set! res (+ j 3) (enc (bitwise-and #b111111 b3)))
+             (let ((b1 (bytevector-u8-ref bv i))
+                   (b2 (bytevector-u8-ref bv (+ i 1))))
+               (bytevector-u8-set! res j (enc (arithmetic-shift b1 -2)))
+               (bytevector-u8-set!
+                res
+                (+ j 1)
+                (enc (bitwise-ior
+                      (arithmetic-shift (bitwise-and #b11 b1) 4)
+                      (extract-bit-field 4 4 b2))))
+               (bytevector-u8-set!
+                res
+                (+ j 2)
+                (enc (arithmetic-shift (extract-bit-field 4 0 b2)
+                                       2)))
+               (bytevector-u8-set! res (+ j 3) (char->integer #\=)))))
+          (let ((b1 (bytevector-u8-ref bv i))
+                (b2 (bytevector-u8-ref bv (+ i 1)))
+                (b3 (bytevector-u8-ref bv (+ i 2))))
+            (bytevector-u8-set! res j (enc (arithmetic-shift b1 -2)))
+            (bytevector-u8-set!
+             res
+             (+ j 1)
+             (enc (bitwise-ior
+                   (arithmetic-shift (bitwise-and #b11 b1) 4)
+                   (extract-bit-field 4 4 b2))))
+            (bytevector-u8-set!
+             res
+             (+ j 2)
+             (enc (bitwise-ior
+                   (arithmetic-shift (extract-bit-field 4 0 b2) 2)
+                   (extract-bit-field 2 6 b3))))
+            (bytevector-u8-set! res (+ j 3) (enc (bitwise-and #b111111 b3)))
             (lp (+ i 3) (+ j 4)))))))
 
 ;;>  Variation of the above to read and write to ports.
@@ -299,15 +314,19 @@
         (out (if (and (pair? o) (pair? (cdr o)))
                  (cadr o)
                  (current-output-port))))
-    (let ((src (make-string encode-src-length))
-          (dst (make-string
-                (arithmetic-shift (quotient encode-src-length 3) 2))))
-      (let lp ()
-        (let ((n (read-string! 2048 src in)))
-          (base64-encode-string! src 0 n dst)
-          (write-string dst out 0 (* 3 (quotient (+ n 3) 4)))
-          (if (= n 2048)
-              (lp)))))))
+    (cond
+     ((not (binary-port? in))
+      (write-string (base64-encode-string (port->string in)) out))
+     (else
+      (let ((src (make-string encode-src-length))
+            (dst (make-string
+                  (arithmetic-shift (quotient encode-src-length 3) 2))))
+        (let lp ()
+          (let ((n (read-bytevector! src in 0 2048)))
+            (base64-encode-bytevector! src 0 n dst)
+            (write-bytevector dst out 0 (* 3 (quotient (+ n 3) 4)))
+            (if (= n 2048)
+                (lp)))))))))
 
 ;;> Return a base64 encoded representation of the string \var{str} as
 ;;> above, wrapped in =?ENC?B?...?= as per RFC1522, split across
diff --git a/lib/chibi/base64.sld b/lib/chibi/base64.sld
index fe43b202..a55f3bd5 100644
--- a/lib/chibi/base64.sld
+++ b/lib/chibi/base64.sld
@@ -1,7 +1,8 @@
 
 (define-library (chibi base64)
-  (export base64-encode base64-encode-string
-          base64-decode base64-decode-string
+  (export base64-encode base64-encode-string base64-encode-bytevector
+          base64-decode base64-decode-string base64-decode-bytevector
           base64-encode-header)
-  (import (chibi) (srfi 33) (chibi io))
+  (import (scheme base) (srfi 33) (chibi io)
+          (only (chibi) string-concatenate))
   (include "base64.scm"))
diff --git a/tests/base64-tests.scm b/tests/base64-tests.scm
new file mode 100644
index 00000000..2ecb461f
--- /dev/null
+++ b/tests/base64-tests.scm
@@ -0,0 +1,40 @@
+
+(import (chibi) (chibi base64) (chibi test))
+
+(test-begin "base64")
+
+(test "YW55IGNhcm5hbCBwbGVhc3VyZS4="
+    (base64-encode-string "any carnal pleasure."))
+(test "YW55IGNhcm5hbCBwbGVhc3VyZQ=="
+    (base64-encode-string "any carnal pleasure"))
+(test "YW55IGNhcm5hbCBwbGVhc3Vy"
+    (base64-encode-string "any carnal pleasur"))
+(test "YW55IGNhcm5hbCBwbGVhc3U="
+    (base64-encode-string "any carnal pleasu"))
+(test "YW55IGNhcm5hbCBwbGVhcw=="
+    (base64-encode-string "any carnal pleas"))
+
+(test "any carnal pleas"
+    (base64-decode-string "YW55IGNhcm5hbCBwbGVhcw=="))
+(test "any carnal pleasu"
+    (base64-decode-string "YW55IGNhcm5hbCBwbGVhc3U="))
+(test "any carnal pleasur"
+    (base64-decode-string "YW55IGNhcm5hbCBwbGVhc3Vy"))
+(test "any carnal pleas"
+    (base64-decode-string "YW55IGNhcm5hbCBwbGVhcw"))
+(test "any carnal pleasu"
+    (base64-decode-string "YW55IGNhcm5hbCBwbGVhc3U"))
+
+(test "YW55IGNhcm5hbCBwbGVhc3VyZS4="
+    (call-with-output-string
+      (lambda (out)
+        (call-with-input-string "any carnal pleasure."
+          (lambda (in) (base64-encode in out))))))
+
+(test "any carnal pleasure."
+    (call-with-output-string
+      (lambda (out)
+        (call-with-input-string "YW55IGNhcm5hbCBwbGVhc3VyZS4="
+          (lambda (in) (base64-decode in out))))))
+
+(test-end)