using result length from fread; string-count optimized and safe for invalid strings.

This commit is contained in:
Alex Shinn 2011-11-26 13:04:51 +09:00
parent 1bc31ff9c1
commit e608bec866
7 changed files with 72 additions and 28 deletions

8
eval.c
View file

@ -1419,20 +1419,20 @@ sexp sexp_string_cmp_op (sexp ctx, sexp self, sexp_sint_t n, sexp str1, sexp str
#if SEXP_USE_UTF8_STRINGS
static int sexp_utf8_initial_byte_count (int c) {
int sexp_utf8_initial_byte_count (int c) {
if (c < 0xC0) return 1;
if (c < 0xE0) return 2;
return ((c>>4)&1)+3;
}
static int sexp_utf8_char_byte_count (int c) {
int sexp_utf8_char_byte_count (int c) {
if (c < 0x80) return 1;
if (c < 0x800) return 2;
if (c < 0x10000) return 3;
return 4;
}
static int sexp_string_utf8_length (unsigned char *p, int len) {
int sexp_string_utf8_length (unsigned char *p, int len) {
unsigned char *q = p+len;
int i;
for (i=0; p<q; i++)
@ -1440,7 +1440,7 @@ static int sexp_string_utf8_length (unsigned char *p, int len) {
return i;
}
static char* sexp_string_utf8_prev (unsigned char *p) {
char* sexp_string_utf8_prev (unsigned char *p) {
while ((*--p)>>6 == 2)
;
return (char*)p;

View file

@ -90,6 +90,13 @@ SEXP_API sexp sexp_define_foreign_aux (sexp ctx, sexp env, const char *name, int
#if SEXP_USE_GREEN_THREADS
SEXP_API sexp sexp_dk (sexp ctx, sexp self, sexp_uint_t n, sexp val);
#endif
#if SEXP_USE_UTF8_STRINGS
SEXP_API int sexp_utf8_initial_byte_count (int c);
SEXP_API int sexp_utf8_char_byte_count (int c);
SEXP_API int sexp_string_utf8_length (unsigned char *p, int len);
SEXP_API char* sexp_string_utf8_prev (unsigned char *p);
SEXP_API sexp sexp_string_utf8_ref (sexp ctx, sexp str, sexp i);
#endif
#if SEXP_USE_NATIVE_X86
SEXP_API sexp sexp_write_char_op(sexp ctx, sexp self, sexp_sint_t n, sexp ch, sexp out);

View file

@ -7,7 +7,7 @@
make-custom-input-port make-custom-output-port
make-null-output-port make-broadcast-port make-concatenated-port
make-generated-input-port make-filtered-output-port
make-filtered-input-port
make-filtered-input-port string-count
open-input-bytevector open-output-bytevector get-output-bytevector
string->utf8 utf8->string
write-u8 read-u8 peek-u8)

View file

@ -14,13 +14,6 @@
((>= i to))
(string-set! dst j (string-ref src i))))
(define (string-count ch str . o)
(let ((start (if (pair? o) (car o) 0))
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
(do ((i start (+ i 1))
(c 0 (if (eqv? ch (string-ref str i)) (+ c 1) c)))
((>= i end) c))))
(define (utf8->string vec)
(string-copy (utf8->string! vec)))
@ -74,7 +67,7 @@
((if (pair? res) (= 0 (car res)) #t)
eof)
(else
(port-line-set! in (+ (string-count #\newline (cadr res))
(port-line-set! in (+ (string-count #\newline (cadr res) 0)
(port-line in)))
(cadr res)))))))

View file

@ -3,7 +3,7 @@
((result (array char arg1)) int (default (current-input-port) input-port)))
(define-c size_t (%read-string "fread")
((result (array char arg2)) (value 1 size_t) size_t (default (current-input-port) input-port)))
((result (array char (result arg2))) (value 1 size_t) size_t (default (current-input-port) input-port)))
(define-c size_t (%read-string! "fread")
(string (value 1 size_t) size_t (default (current-input-port) input-port)))
@ -33,6 +33,8 @@
(define-c sexp (get-output-bytevector "sexp_get_output_bytevector")
((value ctx sexp) (value self sexp) sexp))
(define-c sexp (string-count "sexp_string_count")
((value ctx sexp) (value self sexp) sexp sexp sexp (default NULL sexp)))
(define-c sexp (string->utf8 "sexp_string_to_utf8")
((value ctx sexp) (value self sexp) sexp))
(define-c sexp (utf8->string! "sexp_utf8_to_string_x")

View file

@ -248,6 +248,39 @@ sexp sexp_get_output_bytevector (sexp ctx, sexp self, sexp port) {
return res;
}
sexp sexp_string_count (sexp ctx, sexp self, sexp ch, sexp str, sexp start, sexp end) {
const unsigned char *s, *e;
sexp_sint_t c, count = 0;
#if SEXP_USE_UTF8_STRINGS
sexp_sint_t i;
#endif
sexp_assert_type(ctx, sexp_charp, SEXP_CHAR, ch);
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, start);
if (sexp_not(end)) end = sexp_make_fixnum(sexp_string_length(str));
else sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, end);
c = sexp_unbox_character(ch);
#if SEXP_USE_UTF8_STRINGS
if (c < 128) {
#endif
s = (unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(start);
e = (unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(end);
if (e > (unsigned char*)sexp_string_data(str) + sexp_string_length(str))
return sexp_user_exception(ctx, self, "string-count: end index out of range", end);
/* fast case for ASCII chars */
while (s < e) if (*s++ == c) count++;
#if SEXP_USE_UTF8_STRINGS
} else {
/* decode utf8 chars */
s = (unsigned char*)sexp_string_data(str);
for (i = sexp_unbox_fixnum(start); i < sexp_unbox_fixnum(end);
i += sexp_utf8_initial_byte_count(s[i]))
if (sexp_string_utf8_ref(ctx, str, sexp_make_fixnum(i)) == ch) count++;
}
#endif
return sexp_make_fixnum(count);
}
sexp sexp_string_to_utf8 (sexp ctx, sexp self, sexp str) {
sexp res;
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);

View file

@ -456,6 +456,9 @@
(else
(type-id-value type))))
(define (c-array-length type)
(or (get-array-length #f type) "-1"))
(define (c->scheme-converter type val . o)
(let ((base (type-base type)))
(cond
@ -475,14 +478,15 @@
(cat "sexp_make_flonum(ctx, " val ")"))
((eq? base 'char)
(if (type-array type)
(cat "sexp_c_string(ctx, " val ", -1)")
(cat "sexp_c_string(ctx, " val ", " (c-array-length type) ")")
(cat "sexp_make_character(ctx, " val ")")))
((eq? 'env-string base)
(cat "(p=strchr(" val ", '=') ? "
"sexp_cons(ctx, str=sexp_c_string(ctx, " val ", p - " val "), str=sexp_c_string(ctx, p, -1))"
"sexp_cons(ctx, str=sexp_c_string(ctx, " val
", p - " val "), str=sexp_c_string(ctx, p, -1))"
" : sexp_cons(ctx, str=" val ", SEXP_FALSE)"))
((string-type? base)
(cat "sexp_c_string(ctx, " val ", -1)"))
(cat "sexp_c_string(ctx, " val ", " (c-array-length type) ")"))
((eq? 'input-port base)
(cat "sexp_make_non_null_input_port(ctx, " val ", SEXP_FALSE)"))
((eq? 'output-port base)
@ -641,17 +645,22 @@
(let ((len (if (pair? (type-array x))
(car (reverse (type-array x)))
(type-array x))))
(if (number? len)
len
(and (symbol? len)
(let* ((str (symbol->string len))
(len2 (string-length str)))
(and (> len2 3)
(string=? "arg" (substring str 0 3))
(let ((i (string->number (substring str 3 len2))))
(if i
(let ((y (list-ref (func-c-args func) i)))
(or (type-value y) len))))))))))
(cond
((number? len)
len)
((memq 'result (type-array x))
"sexp_unbox_fixnum(res)")
(else
(and func
(symbol? len)
(let* ((str (symbol->string len))
(len2 (string-length str)))
(and (> len2 3)
(string=? "arg" (substring str 0 3))
(let ((i (string->number (substring str 3 len2))))
(if i
(let ((y (list-ref (func-c-args func) i)))
(or (type-value y) len)))))))))))
(define (write-locals func)
(define (arg-res x)