mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-20 06:09:18 +02:00
using result length from fread; string-count optimized and safe for invalid strings.
This commit is contained in:
parent
1bc31ff9c1
commit
e608bec866
7 changed files with 72 additions and 28 deletions
8
eval.c
8
eval.c
|
@ -1419,20 +1419,20 @@ sexp sexp_string_cmp_op (sexp ctx, sexp self, sexp_sint_t n, sexp str1, sexp str
|
||||||
|
|
||||||
#if SEXP_USE_UTF8_STRINGS
|
#if SEXP_USE_UTF8_STRINGS
|
||||||
|
|
||||||
static int sexp_utf8_initial_byte_count (int c) {
|
int sexp_utf8_initial_byte_count (int c) {
|
||||||
if (c < 0xC0) return 1;
|
if (c < 0xC0) return 1;
|
||||||
if (c < 0xE0) return 2;
|
if (c < 0xE0) return 2;
|
||||||
return ((c>>4)&1)+3;
|
return ((c>>4)&1)+3;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int sexp_utf8_char_byte_count (int c) {
|
int sexp_utf8_char_byte_count (int c) {
|
||||||
if (c < 0x80) return 1;
|
if (c < 0x80) return 1;
|
||||||
if (c < 0x800) return 2;
|
if (c < 0x800) return 2;
|
||||||
if (c < 0x10000) return 3;
|
if (c < 0x10000) return 3;
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int sexp_string_utf8_length (unsigned char *p, int len) {
|
int sexp_string_utf8_length (unsigned char *p, int len) {
|
||||||
unsigned char *q = p+len;
|
unsigned char *q = p+len;
|
||||||
int i;
|
int i;
|
||||||
for (i=0; p<q; i++)
|
for (i=0; p<q; i++)
|
||||||
|
@ -1440,7 +1440,7 @@ static int sexp_string_utf8_length (unsigned char *p, int len) {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char* sexp_string_utf8_prev (unsigned char *p) {
|
char* sexp_string_utf8_prev (unsigned char *p) {
|
||||||
while ((*--p)>>6 == 2)
|
while ((*--p)>>6 == 2)
|
||||||
;
|
;
|
||||||
return (char*)p;
|
return (char*)p;
|
||||||
|
|
|
@ -90,6 +90,13 @@ SEXP_API sexp sexp_define_foreign_aux (sexp ctx, sexp env, const char *name, int
|
||||||
#if SEXP_USE_GREEN_THREADS
|
#if SEXP_USE_GREEN_THREADS
|
||||||
SEXP_API sexp sexp_dk (sexp ctx, sexp self, sexp_uint_t n, sexp val);
|
SEXP_API sexp sexp_dk (sexp ctx, sexp self, sexp_uint_t n, sexp val);
|
||||||
#endif
|
#endif
|
||||||
|
#if SEXP_USE_UTF8_STRINGS
|
||||||
|
SEXP_API int sexp_utf8_initial_byte_count (int c);
|
||||||
|
SEXP_API int sexp_utf8_char_byte_count (int c);
|
||||||
|
SEXP_API int sexp_string_utf8_length (unsigned char *p, int len);
|
||||||
|
SEXP_API char* sexp_string_utf8_prev (unsigned char *p);
|
||||||
|
SEXP_API sexp sexp_string_utf8_ref (sexp ctx, sexp str, sexp i);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if SEXP_USE_NATIVE_X86
|
#if SEXP_USE_NATIVE_X86
|
||||||
SEXP_API sexp sexp_write_char_op(sexp ctx, sexp self, sexp_sint_t n, sexp ch, sexp out);
|
SEXP_API sexp sexp_write_char_op(sexp ctx, sexp self, sexp_sint_t n, sexp ch, sexp out);
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
make-custom-input-port make-custom-output-port
|
make-custom-input-port make-custom-output-port
|
||||||
make-null-output-port make-broadcast-port make-concatenated-port
|
make-null-output-port make-broadcast-port make-concatenated-port
|
||||||
make-generated-input-port make-filtered-output-port
|
make-generated-input-port make-filtered-output-port
|
||||||
make-filtered-input-port
|
make-filtered-input-port string-count
|
||||||
open-input-bytevector open-output-bytevector get-output-bytevector
|
open-input-bytevector open-output-bytevector get-output-bytevector
|
||||||
string->utf8 utf8->string
|
string->utf8 utf8->string
|
||||||
write-u8 read-u8 peek-u8)
|
write-u8 read-u8 peek-u8)
|
||||||
|
|
|
@ -14,13 +14,6 @@
|
||||||
((>= i to))
|
((>= i to))
|
||||||
(string-set! dst j (string-ref src i))))
|
(string-set! dst j (string-ref src i))))
|
||||||
|
|
||||||
(define (string-count ch str . o)
|
|
||||||
(let ((start (if (pair? o) (car o) 0))
|
|
||||||
(end (if (and (pair? o) (pair? (cdr o))) (cadr o) (string-length str))))
|
|
||||||
(do ((i start (+ i 1))
|
|
||||||
(c 0 (if (eqv? ch (string-ref str i)) (+ c 1) c)))
|
|
||||||
((>= i end) c))))
|
|
||||||
|
|
||||||
(define (utf8->string vec)
|
(define (utf8->string vec)
|
||||||
(string-copy (utf8->string! vec)))
|
(string-copy (utf8->string! vec)))
|
||||||
|
|
||||||
|
@ -74,7 +67,7 @@
|
||||||
((if (pair? res) (= 0 (car res)) #t)
|
((if (pair? res) (= 0 (car res)) #t)
|
||||||
eof)
|
eof)
|
||||||
(else
|
(else
|
||||||
(port-line-set! in (+ (string-count #\newline (cadr res))
|
(port-line-set! in (+ (string-count #\newline (cadr res) 0)
|
||||||
(port-line in)))
|
(port-line in)))
|
||||||
(cadr res)))))))
|
(cadr res)))))))
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
((result (array char arg1)) int (default (current-input-port) input-port)))
|
((result (array char arg1)) int (default (current-input-port) input-port)))
|
||||||
|
|
||||||
(define-c size_t (%read-string "fread")
|
(define-c size_t (%read-string "fread")
|
||||||
((result (array char arg2)) (value 1 size_t) size_t (default (current-input-port) input-port)))
|
((result (array char (result arg2))) (value 1 size_t) size_t (default (current-input-port) input-port)))
|
||||||
|
|
||||||
(define-c size_t (%read-string! "fread")
|
(define-c size_t (%read-string! "fread")
|
||||||
(string (value 1 size_t) size_t (default (current-input-port) input-port)))
|
(string (value 1 size_t) size_t (default (current-input-port) input-port)))
|
||||||
|
@ -33,6 +33,8 @@
|
||||||
(define-c sexp (get-output-bytevector "sexp_get_output_bytevector")
|
(define-c sexp (get-output-bytevector "sexp_get_output_bytevector")
|
||||||
((value ctx sexp) (value self sexp) sexp))
|
((value ctx sexp) (value self sexp) sexp))
|
||||||
|
|
||||||
|
(define-c sexp (string-count "sexp_string_count")
|
||||||
|
((value ctx sexp) (value self sexp) sexp sexp sexp (default NULL sexp)))
|
||||||
(define-c sexp (string->utf8 "sexp_string_to_utf8")
|
(define-c sexp (string->utf8 "sexp_string_to_utf8")
|
||||||
((value ctx sexp) (value self sexp) sexp))
|
((value ctx sexp) (value self sexp) sexp))
|
||||||
(define-c sexp (utf8->string! "sexp_utf8_to_string_x")
|
(define-c sexp (utf8->string! "sexp_utf8_to_string_x")
|
||||||
|
|
|
@ -248,6 +248,39 @@ sexp sexp_get_output_bytevector (sexp ctx, sexp self, sexp port) {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sexp sexp_string_count (sexp ctx, sexp self, sexp ch, sexp str, sexp start, sexp end) {
|
||||||
|
const unsigned char *s, *e;
|
||||||
|
sexp_sint_t c, count = 0;
|
||||||
|
#if SEXP_USE_UTF8_STRINGS
|
||||||
|
sexp_sint_t i;
|
||||||
|
#endif
|
||||||
|
sexp_assert_type(ctx, sexp_charp, SEXP_CHAR, ch);
|
||||||
|
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
||||||
|
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, start);
|
||||||
|
if (sexp_not(end)) end = sexp_make_fixnum(sexp_string_length(str));
|
||||||
|
else sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, end);
|
||||||
|
c = sexp_unbox_character(ch);
|
||||||
|
#if SEXP_USE_UTF8_STRINGS
|
||||||
|
if (c < 128) {
|
||||||
|
#endif
|
||||||
|
s = (unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(start);
|
||||||
|
e = (unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(end);
|
||||||
|
if (e > (unsigned char*)sexp_string_data(str) + sexp_string_length(str))
|
||||||
|
return sexp_user_exception(ctx, self, "string-count: end index out of range", end);
|
||||||
|
/* fast case for ASCII chars */
|
||||||
|
while (s < e) if (*s++ == c) count++;
|
||||||
|
#if SEXP_USE_UTF8_STRINGS
|
||||||
|
} else {
|
||||||
|
/* decode utf8 chars */
|
||||||
|
s = (unsigned char*)sexp_string_data(str);
|
||||||
|
for (i = sexp_unbox_fixnum(start); i < sexp_unbox_fixnum(end);
|
||||||
|
i += sexp_utf8_initial_byte_count(s[i]))
|
||||||
|
if (sexp_string_utf8_ref(ctx, str, sexp_make_fixnum(i)) == ch) count++;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return sexp_make_fixnum(count);
|
||||||
|
}
|
||||||
|
|
||||||
sexp sexp_string_to_utf8 (sexp ctx, sexp self, sexp str) {
|
sexp sexp_string_to_utf8 (sexp ctx, sexp self, sexp str) {
|
||||||
sexp res;
|
sexp res;
|
||||||
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
||||||
|
|
|
@ -456,6 +456,9 @@
|
||||||
(else
|
(else
|
||||||
(type-id-value type))))
|
(type-id-value type))))
|
||||||
|
|
||||||
|
(define (c-array-length type)
|
||||||
|
(or (get-array-length #f type) "-1"))
|
||||||
|
|
||||||
(define (c->scheme-converter type val . o)
|
(define (c->scheme-converter type val . o)
|
||||||
(let ((base (type-base type)))
|
(let ((base (type-base type)))
|
||||||
(cond
|
(cond
|
||||||
|
@ -475,14 +478,15 @@
|
||||||
(cat "sexp_make_flonum(ctx, " val ")"))
|
(cat "sexp_make_flonum(ctx, " val ")"))
|
||||||
((eq? base 'char)
|
((eq? base 'char)
|
||||||
(if (type-array type)
|
(if (type-array type)
|
||||||
(cat "sexp_c_string(ctx, " val ", -1)")
|
(cat "sexp_c_string(ctx, " val ", " (c-array-length type) ")")
|
||||||
(cat "sexp_make_character(ctx, " val ")")))
|
(cat "sexp_make_character(ctx, " val ")")))
|
||||||
((eq? 'env-string base)
|
((eq? 'env-string base)
|
||||||
(cat "(p=strchr(" val ", '=') ? "
|
(cat "(p=strchr(" val ", '=') ? "
|
||||||
"sexp_cons(ctx, str=sexp_c_string(ctx, " val ", p - " val "), str=sexp_c_string(ctx, p, -1))"
|
"sexp_cons(ctx, str=sexp_c_string(ctx, " val
|
||||||
|
", p - " val "), str=sexp_c_string(ctx, p, -1))"
|
||||||
" : sexp_cons(ctx, str=" val ", SEXP_FALSE)"))
|
" : sexp_cons(ctx, str=" val ", SEXP_FALSE)"))
|
||||||
((string-type? base)
|
((string-type? base)
|
||||||
(cat "sexp_c_string(ctx, " val ", -1)"))
|
(cat "sexp_c_string(ctx, " val ", " (c-array-length type) ")"))
|
||||||
((eq? 'input-port base)
|
((eq? 'input-port base)
|
||||||
(cat "sexp_make_non_null_input_port(ctx, " val ", SEXP_FALSE)"))
|
(cat "sexp_make_non_null_input_port(ctx, " val ", SEXP_FALSE)"))
|
||||||
((eq? 'output-port base)
|
((eq? 'output-port base)
|
||||||
|
@ -641,9 +645,14 @@
|
||||||
(let ((len (if (pair? (type-array x))
|
(let ((len (if (pair? (type-array x))
|
||||||
(car (reverse (type-array x)))
|
(car (reverse (type-array x)))
|
||||||
(type-array x))))
|
(type-array x))))
|
||||||
(if (number? len)
|
(cond
|
||||||
len
|
((number? len)
|
||||||
(and (symbol? len)
|
len)
|
||||||
|
((memq 'result (type-array x))
|
||||||
|
"sexp_unbox_fixnum(res)")
|
||||||
|
(else
|
||||||
|
(and func
|
||||||
|
(symbol? len)
|
||||||
(let* ((str (symbol->string len))
|
(let* ((str (symbol->string len))
|
||||||
(len2 (string-length str)))
|
(len2 (string-length str)))
|
||||||
(and (> len2 3)
|
(and (> len2 3)
|
||||||
|
@ -651,7 +660,7 @@
|
||||||
(let ((i (string->number (substring str 3 len2))))
|
(let ((i (string->number (substring str 3 len2))))
|
||||||
(if i
|
(if i
|
||||||
(let ((y (list-ref (func-c-args func) i)))
|
(let ((y (list-ref (func-c-args func) i)))
|
||||||
(or (type-value y) len))))))))))
|
(or (type-value y) len)))))))))))
|
||||||
|
|
||||||
(define (write-locals func)
|
(define (write-locals func)
|
||||||
(define (arg-res x)
|
(define (arg-res x)
|
||||||
|
|
Loading…
Add table
Reference in a new issue