From e608bec8665bae8b6876ca1355e7b83b7f19a3b3 Mon Sep 17 00:00:00 2001
From: Alex Shinn <ashinn@users.noreply.github.com>
Date: Sat, 26 Nov 2011 13:04:51 +0900
Subject: [PATCH] using result length from fread; string-count optimized and
 safe for invalid strings.

---
 eval.c               |  8 ++++----
 include/chibi/eval.h |  7 +++++++
 lib/chibi/io.sld     |  2 +-
 lib/chibi/io/io.scm  |  9 +--------
 lib/chibi/io/io.stub |  4 +++-
 lib/chibi/io/port.c  | 33 +++++++++++++++++++++++++++++++++
 tools/chibi-ffi      | 37 +++++++++++++++++++++++--------------
 7 files changed, 72 insertions(+), 28 deletions(-)

diff --git a/eval.c b/eval.c
index 23845b51..f426ed95 100644
--- a/eval.c
+++ b/eval.c
@@ -1419,20 +1419,20 @@ sexp sexp_string_cmp_op (sexp ctx, sexp self, sexp_sint_t n, sexp str1, sexp str
 
 #if SEXP_USE_UTF8_STRINGS
 
-static int sexp_utf8_initial_byte_count (int c) {
+int sexp_utf8_initial_byte_count (int c) {
   if (c < 0xC0) return 1;
   if (c < 0xE0) return 2;
   return ((c>>4)&1)+3;
 }
 
-static int sexp_utf8_char_byte_count (int c) {
+int sexp_utf8_char_byte_count (int c) {
   if (c < 0x80) return 1;
   if (c < 0x800) return 2;
   if (c < 0x10000) return 3;
   return 4;
 }
 
-static int sexp_string_utf8_length (unsigned char *p, int len) {
+int sexp_string_utf8_length (unsigned char *p, int len) {
   unsigned char *q = p+len;
   int i;
   for (i=0; p<q; i++)
@@ -1440,7 +1440,7 @@ static int sexp_string_utf8_length (unsigned char *p, int len) {
   return i;
 }
 
-static char* sexp_string_utf8_prev (unsigned char *p) {
+char* sexp_string_utf8_prev (unsigned char *p) {
   while ((*--p)>>6 == 2)
     ;
   return (char*)p;
diff --git a/include/chibi/eval.h b/include/chibi/eval.h
index 158c1090..afe6c5f6 100644
--- a/include/chibi/eval.h
+++ b/include/chibi/eval.h
@@ -90,6 +90,13 @@ SEXP_API sexp sexp_define_foreign_aux (sexp ctx, sexp env, const char *name, int
 #if SEXP_USE_GREEN_THREADS
 SEXP_API sexp sexp_dk (sexp ctx, sexp self, sexp_uint_t n, sexp val);
 #endif
+#if SEXP_USE_UTF8_STRINGS
+SEXP_API int sexp_utf8_initial_byte_count (int c);
+SEXP_API int sexp_utf8_char_byte_count (int c);
+SEXP_API int sexp_string_utf8_length (unsigned char *p, int len);
+SEXP_API char* sexp_string_utf8_prev (unsigned char *p);
+SEXP_API sexp sexp_string_utf8_ref (sexp ctx, sexp str, sexp i);
+#endif
 
 #if SEXP_USE_NATIVE_X86
 SEXP_API sexp sexp_write_char_op(sexp ctx, sexp self, sexp_sint_t n, sexp ch, sexp out);
diff --git a/lib/chibi/io.sld b/lib/chibi/io.sld
index 42322da7..d8fc87eb 100644
--- a/lib/chibi/io.sld
+++ b/lib/chibi/io.sld
@@ -7,7 +7,7 @@
           make-custom-input-port make-custom-output-port
           make-null-output-port make-broadcast-port make-concatenated-port
           make-generated-input-port make-filtered-output-port
-          make-filtered-input-port
+          make-filtered-input-port string-count
           open-input-bytevector open-output-bytevector get-output-bytevector
           string->utf8 utf8->string
           write-u8 read-u8 peek-u8)
diff --git a/lib/chibi/io/io.scm b/lib/chibi/io/io.scm
index 94b351eb..ddb5f083 100644
--- a/lib/chibi/io/io.scm
+++ b/lib/chibi/io/io.scm
@@ -14,13 +14,6 @@
       ((>= i to))
     (string-set! dst j (string-ref src i))))
 
-(define (string-count ch str . o)
-  (let ((start (if (pair? o) (car o) 0))
-        (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
-    (do ((i start (+ i 1))
-         (c 0 (if (eqv? ch (string-ref str i)) (+ c 1) c)))
-        ((>= i end) c))))
-
 (define (utf8->string vec)
   (string-copy (utf8->string! vec)))
 
@@ -74,7 +67,7 @@
            ((if (pair? res) (= 0 (car res)) #t)
             eof)
            (else
-            (port-line-set! in (+ (string-count #\newline (cadr res))
+            (port-line-set! in (+ (string-count #\newline (cadr res) 0)
                                   (port-line in)))
             (cadr res)))))))
 
diff --git a/lib/chibi/io/io.stub b/lib/chibi/io/io.stub
index f7cafddd..307dd0b3 100644
--- a/lib/chibi/io/io.stub
+++ b/lib/chibi/io/io.stub
@@ -3,7 +3,7 @@
   ((result (array char arg1)) int (default (current-input-port) input-port)))
 
 (define-c size_t (%read-string "fread")
-  ((result (array char arg2)) (value 1 size_t) size_t (default (current-input-port) input-port)))
+  ((result (array char (result arg2))) (value 1 size_t) size_t (default (current-input-port) input-port)))
 
 (define-c size_t (%read-string! "fread")
   (string (value 1 size_t) size_t (default (current-input-port) input-port)))
@@ -33,6 +33,8 @@
 (define-c sexp (get-output-bytevector "sexp_get_output_bytevector")
   ((value ctx sexp) (value self sexp) sexp))
 
+(define-c sexp (string-count "sexp_string_count")
+  ((value ctx sexp) (value self sexp) sexp sexp sexp (default NULL sexp)))
 (define-c sexp (string->utf8 "sexp_string_to_utf8")
   ((value ctx sexp) (value self sexp) sexp))
 (define-c sexp (utf8->string! "sexp_utf8_to_string_x")
diff --git a/lib/chibi/io/port.c b/lib/chibi/io/port.c
index 58f5c539..1d5b33b6 100644
--- a/lib/chibi/io/port.c
+++ b/lib/chibi/io/port.c
@@ -248,6 +248,39 @@ sexp sexp_get_output_bytevector (sexp ctx, sexp self, sexp port) {
   return res;
 }
 
+sexp sexp_string_count (sexp ctx, sexp self, sexp ch, sexp str, sexp start, sexp end) {
+  const unsigned char *s, *e;
+  sexp_sint_t c, count = 0;
+#if SEXP_USE_UTF8_STRINGS
+  sexp_sint_t i;
+#endif
+  sexp_assert_type(ctx, sexp_charp, SEXP_CHAR, ch);
+  sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
+  sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, start);
+  if (sexp_not(end)) end = sexp_make_fixnum(sexp_string_length(str));
+  else sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, end);
+  c = sexp_unbox_character(ch);
+#if SEXP_USE_UTF8_STRINGS
+  if (c < 128) {
+#endif
+    s = (unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(start);
+    e = (unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(end);
+    if (e > (unsigned char*)sexp_string_data(str) + sexp_string_length(str))
+      return sexp_user_exception(ctx, self, "string-count: end index out of range", end);
+    /* fast case for ASCII chars */
+    while (s < e) if (*s++ == c) count++;
+#if SEXP_USE_UTF8_STRINGS
+  } else {
+    /* decode utf8 chars */
+    s = (unsigned char*)sexp_string_data(str);
+    for (i = sexp_unbox_fixnum(start); i < sexp_unbox_fixnum(end);
+         i += sexp_utf8_initial_byte_count(s[i]))
+      if (sexp_string_utf8_ref(ctx, str, sexp_make_fixnum(i)) == ch) count++;
+  }
+#endif
+  return sexp_make_fixnum(count);
+}
+
 sexp sexp_string_to_utf8 (sexp ctx, sexp self, sexp str) {
   sexp res;
   sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
diff --git a/tools/chibi-ffi b/tools/chibi-ffi
index ec917425..571f69f0 100755
--- a/tools/chibi-ffi
+++ b/tools/chibi-ffi
@@ -456,6 +456,9 @@
    (else
     (type-id-value type))))
 
+(define (c-array-length type)
+  (or (get-array-length #f type) "-1"))
+
 (define (c->scheme-converter type val . o)
   (let ((base (type-base type)))
     (cond
@@ -475,14 +478,15 @@
       (cat "sexp_make_flonum(ctx, " val ")"))
      ((eq? base 'char)
       (if (type-array type)
-          (cat "sexp_c_string(ctx, " val ", -1)")
+          (cat "sexp_c_string(ctx, " val ", " (c-array-length type) ")")
           (cat "sexp_make_character(ctx, " val ")")))
      ((eq? 'env-string base)
       (cat "(p=strchr(" val ", '=') ? "
-           "sexp_cons(ctx, str=sexp_c_string(ctx, " val ", p - " val "), str=sexp_c_string(ctx, p, -1))"
+           "sexp_cons(ctx, str=sexp_c_string(ctx, " val
+           ", p - " val "), str=sexp_c_string(ctx, p, -1))"
            " : sexp_cons(ctx, str=" val ", SEXP_FALSE)"))
      ((string-type? base)
-      (cat "sexp_c_string(ctx, " val ", -1)"))
+      (cat "sexp_c_string(ctx, " val ", " (c-array-length type) ")"))
      ((eq? 'input-port base)
       (cat "sexp_make_non_null_input_port(ctx, " val ", SEXP_FALSE)"))
      ((eq? 'output-port base)
@@ -641,17 +645,22 @@
   (let ((len (if (pair? (type-array x))
                  (car (reverse (type-array x)))
                  (type-array x))))
-    (if (number? len)
-        len
-        (and (symbol? len)
-             (let* ((str (symbol->string len))
-                    (len2 (string-length str)))
-               (and (> len2 3)
-                    (string=? "arg" (substring str 0 3))
-                    (let ((i (string->number (substring str 3 len2))))
-                      (if i
-                          (let ((y (list-ref (func-c-args func) i)))
-                            (or (type-value y) len))))))))))
+    (cond
+     ((number? len)
+      len)
+     ((memq 'result (type-array x))
+      "sexp_unbox_fixnum(res)")
+     (else
+      (and func
+           (symbol? len)
+           (let* ((str (symbol->string len))
+                  (len2 (string-length str)))
+             (and (> len2 3)
+                  (string=? "arg" (substring str 0 3))
+                  (let ((i (string->number (substring str 3 len2))))
+                    (if i
+                        (let ((y (list-ref (func-c-args func) i)))
+                          (or (type-value y) len)))))))))))
 
 (define (write-locals func)
   (define (arg-res x)