diff --git a/eval.c b/eval.c index 23845b51..f426ed95 100644 --- a/eval.c +++ b/eval.c @@ -1419,20 +1419,20 @@ sexp sexp_string_cmp_op (sexp ctx, sexp self, sexp_sint_t n, sexp str1, sexp str #if SEXP_USE_UTF8_STRINGS -static int sexp_utf8_initial_byte_count (int c) { +int sexp_utf8_initial_byte_count (int c) { if (c < 0xC0) return 1; if (c < 0xE0) return 2; return ((c>>4)&1)+3; } -static int sexp_utf8_char_byte_count (int c) { +int sexp_utf8_char_byte_count (int c) { if (c < 0x80) return 1; if (c < 0x800) return 2; if (c < 0x10000) return 3; return 4; } -static int sexp_string_utf8_length (unsigned char *p, int len) { +int sexp_string_utf8_length (unsigned char *p, int len) { unsigned char *q = p+len; int i; for (i=0; p>6 == 2) ; return (char*)p; diff --git a/include/chibi/eval.h b/include/chibi/eval.h index 158c1090..afe6c5f6 100644 --- a/include/chibi/eval.h +++ b/include/chibi/eval.h @@ -90,6 +90,13 @@ SEXP_API sexp sexp_define_foreign_aux (sexp ctx, sexp env, const char *name, int #if SEXP_USE_GREEN_THREADS SEXP_API sexp sexp_dk (sexp ctx, sexp self, sexp_uint_t n, sexp val); #endif +#if SEXP_USE_UTF8_STRINGS +SEXP_API int sexp_utf8_initial_byte_count (int c); +SEXP_API int sexp_utf8_char_byte_count (int c); +SEXP_API int sexp_string_utf8_length (unsigned char *p, int len); +SEXP_API char* sexp_string_utf8_prev (unsigned char *p); +SEXP_API sexp sexp_string_utf8_ref (sexp ctx, sexp str, sexp i); +#endif #if SEXP_USE_NATIVE_X86 SEXP_API sexp sexp_write_char_op(sexp ctx, sexp self, sexp_sint_t n, sexp ch, sexp out); diff --git a/lib/chibi/io.sld b/lib/chibi/io.sld index 42322da7..d8fc87eb 100644 --- a/lib/chibi/io.sld +++ b/lib/chibi/io.sld @@ -7,7 +7,7 @@ make-custom-input-port make-custom-output-port make-null-output-port make-broadcast-port make-concatenated-port make-generated-input-port make-filtered-output-port - make-filtered-input-port + make-filtered-input-port string-count open-input-bytevector open-output-bytevector get-output-bytevector string->utf8 utf8->string write-u8 read-u8 peek-u8) diff --git a/lib/chibi/io/io.scm b/lib/chibi/io/io.scm index 94b351eb..ddb5f083 100644 --- a/lib/chibi/io/io.scm +++ b/lib/chibi/io/io.scm @@ -14,13 +14,6 @@ ((>= i to)) (string-set! dst j (string-ref src i)))) -(define (string-count ch str . o) - (let ((start (if (pair? o) (car o) 0)) - (end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str)))) - (do ((i start (+ i 1)) - (c 0 (if (eqv? ch (string-ref str i)) (+ c 1) c))) - ((>= i end) c)))) - (define (utf8->string vec) (string-copy (utf8->string! vec))) @@ -74,7 +67,7 @@ ((if (pair? res) (= 0 (car res)) #t) eof) (else - (port-line-set! in (+ (string-count #\newline (cadr res)) + (port-line-set! in (+ (string-count #\newline (cadr res) 0) (port-line in))) (cadr res))))))) diff --git a/lib/chibi/io/io.stub b/lib/chibi/io/io.stub index f7cafddd..307dd0b3 100644 --- a/lib/chibi/io/io.stub +++ b/lib/chibi/io/io.stub @@ -3,7 +3,7 @@ ((result (array char arg1)) int (default (current-input-port) input-port))) (define-c size_t (%read-string "fread") - ((result (array char arg2)) (value 1 size_t) size_t (default (current-input-port) input-port))) + ((result (array char (result arg2))) (value 1 size_t) size_t (default (current-input-port) input-port))) (define-c size_t (%read-string! "fread") (string (value 1 size_t) size_t (default (current-input-port) input-port))) @@ -33,6 +33,8 @@ (define-c sexp (get-output-bytevector "sexp_get_output_bytevector") ((value ctx sexp) (value self sexp) sexp)) +(define-c sexp (string-count "sexp_string_count") + ((value ctx sexp) (value self sexp) sexp sexp sexp (default NULL sexp))) (define-c sexp (string->utf8 "sexp_string_to_utf8") ((value ctx sexp) (value self sexp) sexp)) (define-c sexp (utf8->string! "sexp_utf8_to_string_x") diff --git a/lib/chibi/io/port.c b/lib/chibi/io/port.c index 58f5c539..1d5b33b6 100644 --- a/lib/chibi/io/port.c +++ b/lib/chibi/io/port.c @@ -248,6 +248,39 @@ sexp sexp_get_output_bytevector (sexp ctx, sexp self, sexp port) { return res; } +sexp sexp_string_count (sexp ctx, sexp self, sexp ch, sexp str, sexp start, sexp end) { + const unsigned char *s, *e; + sexp_sint_t c, count = 0; +#if SEXP_USE_UTF8_STRINGS + sexp_sint_t i; +#endif + sexp_assert_type(ctx, sexp_charp, SEXP_CHAR, ch); + sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str); + sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, start); + if (sexp_not(end)) end = sexp_make_fixnum(sexp_string_length(str)); + else sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, end); + c = sexp_unbox_character(ch); +#if SEXP_USE_UTF8_STRINGS + if (c < 128) { +#endif + s = (unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(start); + e = (unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(end); + if (e > (unsigned char*)sexp_string_data(str) + sexp_string_length(str)) + return sexp_user_exception(ctx, self, "string-count: end index out of range", end); + /* fast case for ASCII chars */ + while (s < e) if (*s++ == c) count++; +#if SEXP_USE_UTF8_STRINGS + } else { + /* decode utf8 chars */ + s = (unsigned char*)sexp_string_data(str); + for (i = sexp_unbox_fixnum(start); i < sexp_unbox_fixnum(end); + i += sexp_utf8_initial_byte_count(s[i])) + if (sexp_string_utf8_ref(ctx, str, sexp_make_fixnum(i)) == ch) count++; + } +#endif + return sexp_make_fixnum(count); +} + sexp sexp_string_to_utf8 (sexp ctx, sexp self, sexp str) { sexp res; sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str); diff --git a/tools/chibi-ffi b/tools/chibi-ffi index ec917425..571f69f0 100755 --- a/tools/chibi-ffi +++ b/tools/chibi-ffi @@ -456,6 +456,9 @@ (else (type-id-value type)))) +(define (c-array-length type) + (or (get-array-length #f type) "-1")) + (define (c->scheme-converter type val . o) (let ((base (type-base type))) (cond @@ -475,14 +478,15 @@ (cat "sexp_make_flonum(ctx, " val ")")) ((eq? base 'char) (if (type-array type) - (cat "sexp_c_string(ctx, " val ", -1)") + (cat "sexp_c_string(ctx, " val ", " (c-array-length type) ")") (cat "sexp_make_character(ctx, " val ")"))) ((eq? 'env-string base) (cat "(p=strchr(" val ", '=') ? " - "sexp_cons(ctx, str=sexp_c_string(ctx, " val ", p - " val "), str=sexp_c_string(ctx, p, -1))" + "sexp_cons(ctx, str=sexp_c_string(ctx, " val + ", p - " val "), str=sexp_c_string(ctx, p, -1))" " : sexp_cons(ctx, str=" val ", SEXP_FALSE)")) ((string-type? base) - (cat "sexp_c_string(ctx, " val ", -1)")) + (cat "sexp_c_string(ctx, " val ", " (c-array-length type) ")")) ((eq? 'input-port base) (cat "sexp_make_non_null_input_port(ctx, " val ", SEXP_FALSE)")) ((eq? 'output-port base) @@ -641,17 +645,22 @@ (let ((len (if (pair? (type-array x)) (car (reverse (type-array x))) (type-array x)))) - (if (number? len) - len - (and (symbol? len) - (let* ((str (symbol->string len)) - (len2 (string-length str))) - (and (> len2 3) - (string=? "arg" (substring str 0 3)) - (let ((i (string->number (substring str 3 len2)))) - (if i - (let ((y (list-ref (func-c-args func) i))) - (or (type-value y) len)))))))))) + (cond + ((number? len) + len) + ((memq 'result (type-array x)) + "sexp_unbox_fixnum(res)") + (else + (and func + (symbol? len) + (let* ((str (symbol->string len)) + (len2 (string-length str))) + (and (> len2 3) + (string=? "arg" (substring str 0 3)) + (let ((i (string->number (substring str 3 len2)))) + (if i + (let ((y (list-ref (func-c-args func) i))) + (or (type-value y) len))))))))))) (define (write-locals func) (define (arg-res x)