mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-18 21:29:19 +02:00
add compile-time option to store precomputed index->cursor tables for strings
This commit is contained in:
parent
677ccdce68
commit
9569460a58
5 changed files with 88 additions and 9 deletions
2
eval.c
2
eval.c
|
@ -1947,6 +1947,8 @@ void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) {
|
|||
sexp_string_size(str) += new_len - old_len;
|
||||
}
|
||||
sexp_utf8_encode_char(p, new_len, c);
|
||||
if (old_len != new_len)
|
||||
sexp_update_string_index_lookup(ctx, str);
|
||||
}
|
||||
|
||||
sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch) {
|
||||
|
|
|
@ -210,12 +210,14 @@
|
|||
/* Making them immutable allows for packed UTF-8 strings. */
|
||||
/* #define SEXP_USE_MUTABLE_STRINGS 0 */
|
||||
|
||||
/* uncomment this to make string cursors just fixnum offsets */
|
||||
/* The default when using UTF-8 is to have a disjoint string */
|
||||
/* cursor type. This is an immediate type with no loss in */
|
||||
/* performance, and prevents confusion mixing indexes and */
|
||||
/* cursors. */
|
||||
/* #define SEXP_USE_DISJOINT_STRING_CURSORS 0 */
|
||||
/* uncomment this to enable precomputed index->cursor tables for strings */
|
||||
/* This makes string-ref faster at the expensive of making string */
|
||||
/* construction (including string-append and I/O) slower. */
|
||||
/* You can configure with SEXP_STRING_INDEX_TABLE_CHUNK_SIZE below, */
|
||||
/* the default is caching every 64th index (<=12.5% string overhead). */
|
||||
/* With a minimum of 1 you'd have up to 8x string overhead, and */
|
||||
/* string-ref would still be slightly slower than string-cursors. */
|
||||
/* #define SEXP_USE_STRING_INDEX_TABLE 1 */
|
||||
|
||||
/* uncomment this to disable automatic closing of ports */
|
||||
/* If enabled, the underlying FILE* for file ports will be */
|
||||
|
@ -647,6 +649,18 @@
|
|||
#define SEXP_USE_PACKED_STRINGS 1
|
||||
#endif
|
||||
|
||||
#if SEXP_USE_PACKED_STRINGS
|
||||
#define SEXP_USE_STRING_INDEX_TABLE 0
|
||||
#endif
|
||||
#ifndef SEXP_USE_STRING_INDEX_TABLE
|
||||
#define SEXP_USE_STRING_INDEX_TABLE 0
|
||||
#endif
|
||||
|
||||
/* for every chunk_size indexes store the precomputed offset */
|
||||
#ifndef SEXP_STRING_INDEX_TABLE_CHUNK_SIZE
|
||||
#define SEXP_STRING_INDEX_TABLE_CHUNK_SIZE 64
|
||||
#endif
|
||||
|
||||
#ifndef SEXP_USE_DISJOINT_STRING_CURSORS
|
||||
#define SEXP_USE_DISJOINT_STRING_CURSORS SEXP_USE_UTF8_STRINGS
|
||||
#endif
|
||||
|
|
|
@ -443,6 +443,9 @@ struct sexp_struct {
|
|||
#else
|
||||
sexp_uint_t offset, length;
|
||||
sexp bytes;
|
||||
#if SEXP_USE_STRING_INDEX_TABLE
|
||||
sexp charlens;
|
||||
#endif
|
||||
#endif
|
||||
} string;
|
||||
struct {
|
||||
|
@ -1123,6 +1126,7 @@ enum sexp_uniform_vector_type {
|
|||
#define sexp_bit_set(u1v, i, x) (x ? (sexp_uvector_data(u1v)[i/8]|=(1<<(i%8))) : (sexp_uvector_data(u1v)[i/8]&=~(1<<(i%8))))
|
||||
|
||||
#define sexp_string_size(x) (sexp_field(x, string, SEXP_STRING, length))
|
||||
#define sexp_string_charlens(x) (sexp_field(x, string, SEXP_STRING, charlens))
|
||||
#if SEXP_USE_PACKED_STRINGS
|
||||
#define sexp_string_data(x) (sexp_field(x, string, SEXP_STRING, data))
|
||||
#define sexp_string_bytes(x) (x)
|
||||
|
@ -1722,6 +1726,12 @@ SEXP_API int sexp_write_utf8_char (sexp ctx, int c, sexp out);
|
|||
#define sexp_substring_cursor(ctx, s, i, j) sexp_substring_op(ctx, NULL, 3, s, i, j)
|
||||
#endif
|
||||
|
||||
#if SEXP_USE_STRING_INDEX_TABLE
|
||||
SEXP_API void sexp_update_string_index_lookup(sexp ctx, sexp s);
|
||||
#else
|
||||
#define sexp_update_string_index_lookup(ctx, s)
|
||||
#endif
|
||||
|
||||
#if SEXP_USE_GREEN_THREADS
|
||||
SEXP_API int sexp_maybe_block_port (sexp ctx, sexp in, int forcep);
|
||||
SEXP_API void sexp_maybe_unblock_port (sexp ctx, sexp in);
|
||||
|
|
56
sexp.c
56
sexp.c
|
@ -262,7 +262,7 @@ static struct sexp_type_struct _sexp_type_specs[] = {
|
|||
#if SEXP_USE_PACKED_STRINGS
|
||||
{SEXP_STRING, 0, 0, 0, 0, 0, sexp_sizeof(string)+1, sexp_offsetof(string, length), 1, 0, 0, 0, 0, 0, 0, (sexp)"String", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
||||
#else
|
||||
{SEXP_STRING, sexp_offsetof(string, bytes), 1, 1, 0, 0, sexp_sizeof(string), 0, 0, 0, 0, 0, 0, 0, 0, (sexp)"String", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
||||
{SEXP_STRING, sexp_offsetof(string, bytes), 1, 1+SEXP_USE_STRING_INDEX_TABLE, 0, 0, sexp_sizeof(string), 0, 0, 0, 0, 0, 0, 0, 0, (sexp)"String", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
||||
#endif
|
||||
{SEXP_VECTOR, sexp_offsetof(vector, data), 0, 0, sexp_offsetof(vector, length), 1, sexp_sizeof(vector), sexp_offsetof(vector, length), sizeof(sexp), 0, 0, 0, 0, 0, 0, (sexp)"Vector", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
||||
{SEXP_FLONUM, 0, 0, 0, 0, 0, sexp_sizeof(flonum), 0, 0, 0, 0, 0, 0, 0, 0, (sexp)"Flonum", SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, SEXP_FALSE, NULL, NULL, NULL, NULL},
|
||||
|
@ -1198,13 +1198,31 @@ void sexp_utf8_encode_char (unsigned char* p, int len, int c) {
|
|||
}
|
||||
|
||||
sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp index) {
|
||||
#if SEXP_USE_STRING_INDEX_TABLE
|
||||
sexp charlens;
|
||||
sexp_sint_t* chunklens;
|
||||
sexp_sint_t chunk;
|
||||
#endif
|
||||
sexp_sint_t i, j, limit;
|
||||
unsigned char *p;
|
||||
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
||||
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, index);
|
||||
p = (unsigned char*)sexp_string_data(str);
|
||||
limit = sexp_string_size(str);
|
||||
for (j=0, i=sexp_unbox_fixnum(index); i>0 && j<limit; i--)
|
||||
i = sexp_unbox_fixnum(index);
|
||||
j = 0;
|
||||
#if SEXP_USE_STRING_INDEX_TABLE
|
||||
if (i > SEXP_STRING_INDEX_TABLE_CHUNK_SIZE) {
|
||||
charlens = sexp_string_charlens(str);
|
||||
if (charlens) {
|
||||
chunklens = (sexp_sint_t*)sexp_bytes_data(charlens);
|
||||
chunk = i / SEXP_STRING_INDEX_TABLE_CHUNK_SIZE - 1;
|
||||
j = chunklens[chunk];
|
||||
i -= (chunk+1) * SEXP_STRING_INDEX_TABLE_CHUNK_SIZE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for ( ; i>0 && j<limit; i--)
|
||||
j += sexp_utf8_initial_byte_count(p[j]);
|
||||
if (i != 0)
|
||||
return sexp_user_exception(ctx, self, "string-index->cursor: index out of range", index);
|
||||
|
@ -1227,6 +1245,36 @@ sexp sexp_string_cursor_offset (sexp ctx, sexp self, sexp_sint_t n, sexp cur) {
|
|||
|
||||
#endif
|
||||
|
||||
#if SEXP_USE_STRING_INDEX_TABLE
|
||||
void sexp_update_string_index_lookup(sexp ctx, sexp s) {
|
||||
char *p;
|
||||
sexp_sint_t numchunks, len, i, *chunks;
|
||||
sexp_gc_var1(tmp);
|
||||
if (sexp_string_size(s) < SEXP_STRING_INDEX_TABLE_CHUNK_SIZE*1.2) {
|
||||
sexp_string_charlens(s) = NULL; /* don't build table for just a few chars */
|
||||
return;
|
||||
}
|
||||
sexp_gc_preserve1(ctx, tmp);
|
||||
tmp = s;
|
||||
len = sexp_string_utf8_length((unsigned char*) sexp_string_data(s), sexp_string_size(s));
|
||||
numchunks = ((len + SEXP_STRING_INDEX_TABLE_CHUNK_SIZE - 1) / SEXP_STRING_INDEX_TABLE_CHUNK_SIZE) - 1;
|
||||
sexp_string_charlens(s) =
|
||||
sexp_make_bytes_op(ctx, NULL, 2, sexp_make_fixnum(numchunks * sizeof(sexp_sint_t)), SEXP_VOID);
|
||||
chunks = (sexp_sint_t*)sexp_bytes_data(sexp_string_charlens(s));
|
||||
p = sexp_string_data(s);
|
||||
i = 0;
|
||||
while (1) {
|
||||
p += sexp_utf8_initial_byte_count(*p);
|
||||
if (++i % SEXP_STRING_INDEX_TABLE_CHUNK_SIZE == 0) {
|
||||
chunks[i/SEXP_STRING_INDEX_TABLE_CHUNK_SIZE - 1] = p - sexp_string_data(s);
|
||||
if (i / SEXP_STRING_INDEX_TABLE_CHUNK_SIZE >= numchunks-1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
sexp_gc_release1(ctx);
|
||||
}
|
||||
#endif
|
||||
|
||||
sexp sexp_make_string_op (sexp ctx, sexp self, sexp_sint_t n, sexp len, sexp ch)
|
||||
{
|
||||
sexp i = (sexp_charp(ch) ? sexp_make_fixnum(sexp_unbox_character(ch)) : ch);
|
||||
|
@ -1259,6 +1307,7 @@ sexp sexp_make_string_op (sexp ctx, sexp self, sexp_sint_t n, sexp len, sexp ch)
|
|||
sexp_string_bytes(s) = b;
|
||||
sexp_string_offset(s) = 0;
|
||||
sexp_string_size(s) = sexp_bytes_length(b);
|
||||
sexp_update_string_index_lookup(ctx, s);
|
||||
sexp_gc_release2(ctx);
|
||||
return s;
|
||||
#endif
|
||||
|
@ -1273,6 +1322,7 @@ sexp sexp_c_string (sexp ctx, const char *str, sexp_sint_t slen) {
|
|||
if (sexp_exceptionp(s)) return s;
|
||||
memcpy(sexp_string_data(s), str, len);
|
||||
sexp_string_data(s)[len] = '\0';
|
||||
sexp_update_string_index_lookup(ctx, s);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -1294,6 +1344,7 @@ sexp sexp_substring_op (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp start
|
|||
sexp_string_data(str)+sexp_unbox_string_cursor(start),
|
||||
sexp_string_size(res));
|
||||
sexp_string_data(res)[sexp_string_size(res)] = '\0';
|
||||
sexp_update_string_index_lookup(ctx, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -1360,6 +1411,7 @@ sexp sexp_string_concatenate_op (sexp ctx, sexp self, sexp_sint_t n, sexp str_ls
|
|||
}
|
||||
}
|
||||
*p = '\0';
|
||||
sexp_update_string_index_lookup(ctx, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,5 +33,6 @@ CPPFLAGS=-DSEXP_USE_UTF8_STRINGS=0
|
|||
CPPFLAGS=-DSEXP_USE_DISJOINT_STRING_CURSORS=0
|
||||
CFLAGS=-DSEXP_USE_STATIC_LIBS_NO_INCLUDE=0;CPPFLAGS=-DSEXP_USE_STATIC_LIBS=1
|
||||
CPPFLAGS=-DSEXP_USE_MUTABLE_STRINGS=0
|
||||
CPPFLAGS=-DSEXP_USE_STRING_INDEX_TABLE=1
|
||||
CPPFLAGS=-DSEXP_USE_STRICT_TOPLEVEL_BINDINGS=1
|
||||
CPPFLAGS=-DSEXP_USE_NO_FEATURES=1
|
||||
|
|
Loading…
Add table
Reference in a new issue