mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-19 05:39:18 +02:00
add compile-time option to store precomputed index->cursor tables for strings
This commit is contained in:
parent
677ccdce68
commit
9569460a58
5 changed files with 88 additions and 9 deletions
2
eval.c
2
eval.c
|
@ -1947,6 +1947,8 @@ void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) {
|
||||||
sexp_string_size(str) += new_len - old_len;
|
sexp_string_size(str) += new_len - old_len;
|
||||||
}
|
}
|
||||||
sexp_utf8_encode_char(p, new_len, c);
|
sexp_utf8_encode_char(p, new_len, c);
|
||||||
|
if (old_len != new_len)
|
||||||
|
sexp_update_string_index_lookup(ctx, str);
|
||||||
}
|
}
|
||||||
|
|
||||||
sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch) {
|
sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch) {
|
||||||
|
|
|
@ -210,12 +210,14 @@
|
||||||
/* Making them immutable allows for packed UTF-8 strings. */
|
/* Making them immutable allows for packed UTF-8 strings. */
|
||||||
/* #define SEXP_USE_MUTABLE_STRINGS 0 */
|
/* #define SEXP_USE_MUTABLE_STRINGS 0 */
|
||||||
|
|
||||||
/* uncomment this to make string cursors just fixnum offsets */
|
/* uncomment this to enable precomputed index->cursor tables for strings */
|
||||||
/* The default when using UTF-8 is to have a disjoint string */
|
/* This makes string-ref faster at the expensive of making string */
|
||||||
/* cursor type. This is an immediate type with no loss in */
|
/* construction (including string-append and I/O) slower. */
|
||||||
/* performance, and prevents confusion mixing indexes and */
|
/* You can configure with SEXP_STRING_INDEX_TABLE_CHUNK_SIZE below, */
|
||||||
/* cursors. */
|
/* the default is caching every 64th index (<=12.5% string overhead). */
|
||||||
/* #define SEXP_USE_DISJOINT_STRING_CURSORS 0 */
|
/* With a minimum of 1 you'd have up to 8x string overhead, and */
|
||||||
|
/* string-ref would still be slightly slower than string-cursors. */
|
||||||
|
/* #define SEXP_USE_STRING_INDEX_TABLE 1 */
|
||||||
|
|
||||||
/* uncomment this to disable automatic closing of ports */
|
/* uncomment this to disable automatic closing of ports */
|
||||||
/* If enabled, the underlying FILE* for file ports will be */
|
/* If enabled, the underlying FILE* for file ports will be */
|
||||||
|
@ -647,6 +649,18 @@
|
||||||
#define SEXP_USE_PACKED_STRINGS 1
|
#define SEXP_USE_PACKED_STRINGS 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if SEXP_USE_PACKED_STRINGS
|
||||||
|
#define SEXP_USE_STRING_INDEX_TABLE 0
|
||||||
|
#endif
|
||||||
|
#ifndef SEXP_USE_STRING_INDEX_TABLE
|
||||||
|
#define SEXP_USE_STRING_INDEX_TABLE 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* for every chunk_size indexes store the precomputed offset */
|
||||||
|
#ifndef SEXP_STRING_INDEX_TABLE_CHUNK_SIZE
|
||||||
|
#define SEXP_STRING_INDEX_TABLE_CHUNK_SIZE 64
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef SEXP_USE_DISJOINT_STRING_CURSORS
|
#ifndef SEXP_USE_DISJOINT_STRING_CURSORS
|
||||||
#define SEXP_USE_DISJOINT_STRING_CURSORS SEXP_USE_UTF8_STRINGS
|
#define SEXP_USE_DISJOINT_STRING_CURSORS SEXP_USE_UTF8_STRINGS
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -443,6 +443,9 @@ struct sexp_struct {
|
||||||
#else
|
#else
|
||||||
sexp_uint_t offset, length;
|
sexp_uint_t offset, length;
|
||||||
sexp bytes;
|
sexp bytes;
|
||||||
|
#if SEXP_USE_STRING_INDEX_TABLE
|
||||||
|
sexp charlens;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
} string;
|
} string;
|
||||||
struct {
|
struct {
|
||||||
|
@ -1122,7 +1125,8 @@ enum sexp_uniform_vector_type {
|
||||||
#define sexp_bit_ref(u1v, i) (((sexp_uvector_data(u1v)[i/8])>>(i%8))&1)
|
#define sexp_bit_ref(u1v, i) (((sexp_uvector_data(u1v)[i/8])>>(i%8))&1)
|
||||||
#define sexp_bit_set(u1v, i, x) (x ? (sexp_uvector_data(u1v)[i/8]|=(1<<(i%8))) : (sexp_uvector_data(u1v)[i/8]&=~(1<<(i%8))))
|
#define sexp_bit_set(u1v, i, x) (x ? (sexp_uvector_data(u1v)[i/8]|=(1<<(i%8))) : (sexp_uvector_data(u1v)[i/8]&=~(1<<(i%8))))
|
||||||
|
|
||||||
#define sexp_string_size(x) (sexp_field(x, string, SEXP_STRING, length))
|
#define sexp_string_size(x) (sexp_field(x, string, SEXP_STRING, length))
|
||||||
|
#define sexp_string_charlens(x) (sexp_field(x, string, SEXP_STRING, charlens))
|
||||||
#if SEXP_USE_PACKED_STRINGS
|
#if SEXP_USE_PACKED_STRINGS
|
||||||
#define sexp_string_data(x) (sexp_field(x, string, SEXP_STRING, data))
|
#define sexp_string_data(x) (sexp_field(x, string, SEXP_STRING, data))
|
||||||
#define sexp_string_bytes(x) (x)
|
#define sexp_string_bytes(x) (x)
|
||||||
|
@ -1722,6 +1726,12 @@ SEXP_API int sexp_write_utf8_char (sexp ctx, int c, sexp out);
|
||||||
#define sexp_substring_cursor(ctx, s, i, j) sexp_substring_op(ctx, NULL, 3, s, i, j)
|
#define sexp_substring_cursor(ctx, s, i, j) sexp_substring_op(ctx, NULL, 3, s, i, j)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if SEXP_USE_STRING_INDEX_TABLE
|
||||||
|
SEXP_API void sexp_update_string_index_lookup(sexp ctx, sexp s);
|
||||||
|
#else
|
||||||
|
#define sexp_update_string_index_lookup(ctx, s)
|
||||||
|
#endif
|
||||||
|
|
||||||
#if SEXP_USE_GREEN_THREADS
|
#if SEXP_USE_GREEN_THREADS
|
||||||
SEXP_API int sexp_maybe_block_port (sexp ctx, sexp in, int forcep);
|
SEXP_API int sexp_maybe_block_port (sexp ctx, sexp in, int forcep);
|
||||||
SEXP_API void sexp_maybe_unblock_port (sexp ctx, sexp in);
|
SEXP_API void sexp_maybe_unblock_port (sexp ctx, sexp in);
|
||||||
|
|
56
sexp.c
56
sexp.c
|
@ -262,7 +262,7 @@ static struct sexp_type_struct _sexp_type_specs[] = {
|
||||||
#if SEXP_USE_PACKED_STRINGS
|
#if SEXP_USE_PACKED_STRINGS
|
||||||
{SEXP_STRING, 0, 0, 0, 0, 0, sexp_sizeof(string)+1, sexp_offsetof(string, length), 1, 0, 0, 0, 0, 0, 0, (sexp)"String", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
{SEXP_STRING, 0, 0, 0, 0, 0, sexp_sizeof(string)+1, sexp_offsetof(string, length), 1, 0, 0, 0, 0, 0, 0, (sexp)"String", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
||||||
#else
|
#else
|
||||||
{SEXP_STRING, sexp_offsetof(string, bytes), 1, 1, 0, 0, sexp_sizeof(string), 0, 0, 0, 0, 0, 0, 0, 0, (sexp)"String", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
{SEXP_STRING, sexp_offsetof(string, bytes), 1, 1+SEXP_USE_STRING_INDEX_TABLE, 0, 0, sexp_sizeof(string), 0, 0, 0, 0, 0, 0, 0, 0, (sexp)"String", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
||||||
#endif
|
#endif
|
||||||
{SEXP_VECTOR, sexp_offsetof(vector, data), 0, 0, sexp_offsetof(vector, length), 1, sexp_sizeof(vector), sexp_offsetof(vector, length), sizeof(sexp), 0, 0, 0, 0, 0, 0, (sexp)"Vector", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
{SEXP_VECTOR, sexp_offsetof(vector, data), 0, 0, sexp_offsetof(vector, length), 1, sexp_sizeof(vector), sexp_offsetof(vector, length), sizeof(sexp), 0, 0, 0, 0, 0, 0, (sexp)"Vector", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
||||||
{SEXP_FLONUM, 0, 0, 0, 0, 0, sexp_sizeof(flonum), 0, 0, 0, 0, 0, 0, 0, 0, (sexp)"Flonum", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
{SEXP_FLONUM, 0, 0, 0, 0, 0, sexp_sizeof(flonum), 0, 0, 0, 0, 0, 0, 0, 0, (sexp)"Flonum", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
||||||
|
@ -1198,13 +1198,31 @@ void sexp_utf8_encode_char (unsigned char* p, int len, int c) {
|
||||||
}
|
}
|
||||||
|
|
||||||
sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp index) {
|
sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp index) {
|
||||||
|
#if SEXP_USE_STRING_INDEX_TABLE
|
||||||
|
sexp charlens;
|
||||||
|
sexp_sint_t* chunklens;
|
||||||
|
sexp_sint_t chunk;
|
||||||
|
#endif
|
||||||
sexp_sint_t i, j, limit;
|
sexp_sint_t i, j, limit;
|
||||||
unsigned char *p;
|
unsigned char *p;
|
||||||
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
||||||
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, index);
|
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, index);
|
||||||
p = (unsigned char*)sexp_string_data(str);
|
p = (unsigned char*)sexp_string_data(str);
|
||||||
limit = sexp_string_size(str);
|
limit = sexp_string_size(str);
|
||||||
for (j=0, i=sexp_unbox_fixnum(index); i>0 && j<limit; i--)
|
i = sexp_unbox_fixnum(index);
|
||||||
|
j = 0;
|
||||||
|
#if SEXP_USE_STRING_INDEX_TABLE
|
||||||
|
if (i > SEXP_STRING_INDEX_TABLE_CHUNK_SIZE) {
|
||||||
|
charlens = sexp_string_charlens(str);
|
||||||
|
if (charlens) {
|
||||||
|
chunklens = (sexp_sint_t*)sexp_bytes_data(charlens);
|
||||||
|
chunk = i / SEXP_STRING_INDEX_TABLE_CHUNK_SIZE - 1;
|
||||||
|
j = chunklens[chunk];
|
||||||
|
i -= (chunk+1) * SEXP_STRING_INDEX_TABLE_CHUNK_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for ( ; i>0 && j<limit; i--)
|
||||||
j += sexp_utf8_initial_byte_count(p[j]);
|
j += sexp_utf8_initial_byte_count(p[j]);
|
||||||
if (i != 0)
|
if (i != 0)
|
||||||
return sexp_user_exception(ctx, self, "string-index->cursor: index out of range", index);
|
return sexp_user_exception(ctx, self, "string-index->cursor: index out of range", index);
|
||||||
|
@ -1227,6 +1245,36 @@ sexp sexp_string_cursor_offset (sexp ctx, sexp self, sexp_sint_t n, sexp cur) {
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if SEXP_USE_STRING_INDEX_TABLE
|
||||||
|
void sexp_update_string_index_lookup(sexp ctx, sexp s) {
|
||||||
|
char *p;
|
||||||
|
sexp_sint_t numchunks, len, i, *chunks;
|
||||||
|
sexp_gc_var1(tmp);
|
||||||
|
if (sexp_string_size(s) < SEXP_STRING_INDEX_TABLE_CHUNK_SIZE*1.2) {
|
||||||
|
sexp_string_charlens(s) = NULL; /* don't build table for just a few chars */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sexp_gc_preserve1(ctx, tmp);
|
||||||
|
tmp = s;
|
||||||
|
len = sexp_string_utf8_length((unsigned char*) sexp_string_data(s), sexp_string_size(s));
|
||||||
|
numchunks = ((len + SEXP_STRING_INDEX_TABLE_CHUNK_SIZE - 1) / SEXP_STRING_INDEX_TABLE_CHUNK_SIZE) - 1;
|
||||||
|
sexp_string_charlens(s) =
|
||||||
|
sexp_make_bytes_op(ctx, NULL, 2, sexp_make_fixnum(numchunks * sizeof(sexp_sint_t)), SEXP_VOID);
|
||||||
|
chunks = (sexp_sint_t*)sexp_bytes_data(sexp_string_charlens(s));
|
||||||
|
p = sexp_string_data(s);
|
||||||
|
i = 0;
|
||||||
|
while (1) {
|
||||||
|
p += sexp_utf8_initial_byte_count(*p);
|
||||||
|
if (++i % SEXP_STRING_INDEX_TABLE_CHUNK_SIZE == 0) {
|
||||||
|
chunks[i/SEXP_STRING_INDEX_TABLE_CHUNK_SIZE - 1] = p - sexp_string_data(s);
|
||||||
|
if (i / SEXP_STRING_INDEX_TABLE_CHUNK_SIZE >= numchunks-1)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sexp_gc_release1(ctx);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
sexp sexp_make_string_op (sexp ctx, sexp self, sexp_sint_t n, sexp len, sexp ch)
|
sexp sexp_make_string_op (sexp ctx, sexp self, sexp_sint_t n, sexp len, sexp ch)
|
||||||
{
|
{
|
||||||
sexp i = (sexp_charp(ch) ? sexp_make_fixnum(sexp_unbox_character(ch)) : ch);
|
sexp i = (sexp_charp(ch) ? sexp_make_fixnum(sexp_unbox_character(ch)) : ch);
|
||||||
|
@ -1259,6 +1307,7 @@ sexp sexp_make_string_op (sexp ctx, sexp self, sexp_sint_t n, sexp len, sexp ch)
|
||||||
sexp_string_bytes(s) = b;
|
sexp_string_bytes(s) = b;
|
||||||
sexp_string_offset(s) = 0;
|
sexp_string_offset(s) = 0;
|
||||||
sexp_string_size(s) = sexp_bytes_length(b);
|
sexp_string_size(s) = sexp_bytes_length(b);
|
||||||
|
sexp_update_string_index_lookup(ctx, s);
|
||||||
sexp_gc_release2(ctx);
|
sexp_gc_release2(ctx);
|
||||||
return s;
|
return s;
|
||||||
#endif
|
#endif
|
||||||
|
@ -1273,6 +1322,7 @@ sexp sexp_c_string (sexp ctx, const char *str, sexp_sint_t slen) {
|
||||||
if (sexp_exceptionp(s)) return s;
|
if (sexp_exceptionp(s)) return s;
|
||||||
memcpy(sexp_string_data(s), str, len);
|
memcpy(sexp_string_data(s), str, len);
|
||||||
sexp_string_data(s)[len] = '\0';
|
sexp_string_data(s)[len] = '\0';
|
||||||
|
sexp_update_string_index_lookup(ctx, s);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1294,6 +1344,7 @@ sexp sexp_substring_op (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp start
|
||||||
sexp_string_data(str)+sexp_unbox_string_cursor(start),
|
sexp_string_data(str)+sexp_unbox_string_cursor(start),
|
||||||
sexp_string_size(res));
|
sexp_string_size(res));
|
||||||
sexp_string_data(res)[sexp_string_size(res)] = '\0';
|
sexp_string_data(res)[sexp_string_size(res)] = '\0';
|
||||||
|
sexp_update_string_index_lookup(ctx, res);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1360,6 +1411,7 @@ sexp sexp_string_concatenate_op (sexp ctx, sexp self, sexp_sint_t n, sexp str_ls
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*p = '\0';
|
*p = '\0';
|
||||||
|
sexp_update_string_index_lookup(ctx, res);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,5 +33,6 @@ CPPFLAGS=-DSEXP_USE_UTF8_STRINGS=0
|
||||||
CPPFLAGS=-DSEXP_USE_DISJOINT_STRING_CURSORS=0
|
CPPFLAGS=-DSEXP_USE_DISJOINT_STRING_CURSORS=0
|
||||||
CFLAGS=-DSEXP_USE_STATIC_LIBS_NO_INCLUDE=0;CPPFLAGS=-DSEXP_USE_STATIC_LIBS=1
|
CFLAGS=-DSEXP_USE_STATIC_LIBS_NO_INCLUDE=0;CPPFLAGS=-DSEXP_USE_STATIC_LIBS=1
|
||||||
CPPFLAGS=-DSEXP_USE_MUTABLE_STRINGS=0
|
CPPFLAGS=-DSEXP_USE_MUTABLE_STRINGS=0
|
||||||
|
CPPFLAGS=-DSEXP_USE_STRING_INDEX_TABLE=1
|
||||||
CPPFLAGS=-DSEXP_USE_STRICT_TOPLEVEL_BINDINGS=1
|
CPPFLAGS=-DSEXP_USE_STRICT_TOPLEVEL_BINDINGS=1
|
||||||
CPPFLAGS=-DSEXP_USE_NO_FEATURES=1
|
CPPFLAGS=-DSEXP_USE_NO_FEATURES=1
|
||||||
|
|
Loading…
Add table
Reference in a new issue