mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-18 21:29:19 +02:00
Merge pull request #793 from dpk/string-ref-cache
Add a feature to cache the most recent string index->cursor result
This commit is contained in:
commit
d67fa42d0c
4 changed files with 76 additions and 4 deletions
8
eval.c
8
eval.c
|
@ -2026,8 +2026,14 @@ void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) {
|
||||||
sexp_copy_on_writep(str) = 0;
|
sexp_copy_on_writep(str) = 0;
|
||||||
}
|
}
|
||||||
sexp_utf8_encode_char(p, new_len, c);
|
sexp_utf8_encode_char(p, new_len, c);
|
||||||
if (old_len != new_len)
|
if (old_len != new_len) {
|
||||||
|
#if SEXP_USE_STRING_INDEX_TABLE
|
||||||
sexp_update_string_index_lookup(ctx, str);
|
sexp_update_string_index_lookup(ctx, str);
|
||||||
|
#elif SEXP_USE_STRING_REF_CACHE
|
||||||
|
sexp_cached_char_idx(str) = 0;
|
||||||
|
sexp_cached_cursor(str) = sexp_make_string_cursor(0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch) {
|
sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch) {
|
||||||
|
|
|
@ -256,6 +256,12 @@
|
||||||
/* */
|
/* */
|
||||||
/* #define SEXP_USE_STRING_INDEX_TABLE 1 */
|
/* #define SEXP_USE_STRING_INDEX_TABLE 1 */
|
||||||
|
|
||||||
|
/* uncomment this to cache a string cursor for string-ref calls */
|
||||||
|
/* The default is not to use a cache. The goal of caching is to */
|
||||||
|
/* soften the performance impact of repeated O(n) string-ref */
|
||||||
|
/* operations on the same string. */
|
||||||
|
/* #define SEXP_USE_STRING_REF_CACHE 1 */
|
||||||
|
|
||||||
/* uncomment this to disable automatic closing of ports */
|
/* uncomment this to disable automatic closing of ports */
|
||||||
/* If enabled, the underlying FILE* for file ports will be */
|
/* If enabled, the underlying FILE* for file ports will be */
|
||||||
/* automatically closed when they're garbage collected. Doesn't */
|
/* automatically closed when they're garbage collected. Doesn't */
|
||||||
|
|
|
@ -480,6 +480,9 @@ struct sexp_struct {
|
||||||
sexp bytes;
|
sexp bytes;
|
||||||
#if SEXP_USE_STRING_INDEX_TABLE
|
#if SEXP_USE_STRING_INDEX_TABLE
|
||||||
sexp charlens;
|
sexp charlens;
|
||||||
|
#elif SEXP_USE_STRING_REF_CACHE
|
||||||
|
sexp_uint_t cached_char_idx;
|
||||||
|
sexp cached_cursor;
|
||||||
#endif
|
#endif
|
||||||
sexp_uint_t offset, length;
|
sexp_uint_t offset, length;
|
||||||
#endif
|
#endif
|
||||||
|
@ -1204,6 +1207,10 @@ enum sexp_uniform_vector_type {
|
||||||
#define sexp_string_offset(x) (sexp_field(x, string, SEXP_STRING, offset))
|
#define sexp_string_offset(x) (sexp_field(x, string, SEXP_STRING, offset))
|
||||||
#define sexp_string_data(x) (sexp_bytes_data(sexp_string_bytes(x))+sexp_string_offset(x))
|
#define sexp_string_data(x) (sexp_bytes_data(sexp_string_bytes(x))+sexp_string_offset(x))
|
||||||
#endif
|
#endif
|
||||||
|
#if SEXP_USE_STRING_REF_CACHE
|
||||||
|
#define sexp_cached_char_idx(x) (sexp_field(x, string, SEXP_STRING, cached_char_idx))
|
||||||
|
#define sexp_cached_cursor(x) (sexp_field(x, string, SEXP_STRING, cached_cursor))
|
||||||
|
#endif
|
||||||
#define sexp_string_maybe_null_data(x) (sexp_not(x) ? NULL : sexp_string_data(x))
|
#define sexp_string_maybe_null_data(x) (sexp_not(x) ? NULL : sexp_string_data(x))
|
||||||
|
|
||||||
#if SEXP_USE_PACKED_STRINGS
|
#if SEXP_USE_PACKED_STRINGS
|
||||||
|
|
59
sexp.c
59
sexp.c
|
@ -500,6 +500,9 @@ static const char* sexp_initial_features[] = {
|
||||||
#if SEXP_USE_STRING_INDEX_TABLE
|
#if SEXP_USE_STRING_INDEX_TABLE
|
||||||
"string-index",
|
"string-index",
|
||||||
#endif
|
#endif
|
||||||
|
#if SEXP_USE_STRING_REF_CACHE
|
||||||
|
"string-ref-cache",
|
||||||
|
#endif
|
||||||
#if SEXP_USE_GREEN_THREADS
|
#if SEXP_USE_GREEN_THREADS
|
||||||
"threads",
|
"threads",
|
||||||
#endif
|
#endif
|
||||||
|
@ -1254,8 +1257,12 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str,
|
||||||
sexp_sint_t* chunklens;
|
sexp_sint_t* chunklens;
|
||||||
sexp_sint_t chunk;
|
sexp_sint_t chunk;
|
||||||
#endif
|
#endif
|
||||||
|
sexp cursor;
|
||||||
sexp_sint_t i, j, limit;
|
sexp_sint_t i, j, limit;
|
||||||
unsigned char *p;
|
unsigned char *p;
|
||||||
|
#if SEXP_USE_STRING_REF_CACHE
|
||||||
|
unsigned char *q;
|
||||||
|
#endif
|
||||||
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
||||||
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, index);
|
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, index);
|
||||||
p = (unsigned char*)sexp_string_data(str);
|
p = (unsigned char*)sexp_string_data(str);
|
||||||
|
@ -1272,12 +1279,37 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str,
|
||||||
i -= (chunk+1) * SEXP_STRING_INDEX_TABLE_CHUNK_SIZE;
|
i -= (chunk+1) * SEXP_STRING_INDEX_TABLE_CHUNK_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#elif SEXP_USE_STRING_REF_CACHE
|
||||||
|
if (i > (sexp_cached_char_idx(str) + ((sexp_string_length(str) - sexp_cached_char_idx(str)) >> 1))) {
|
||||||
|
j = sexp_string_size(str);
|
||||||
|
i = -(sexp_string_length(str) - i);
|
||||||
|
} else if (i > (sexp_cached_char_idx(str) >> 1)) {
|
||||||
|
j = sexp_unbox_string_cursor(sexp_cached_cursor(str));
|
||||||
|
i -= sexp_cached_char_idx(str);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
for ( ; i>0 && j<limit; i--)
|
|
||||||
j += sexp_utf8_initial_byte_count(p[j]);
|
#if SEXP_USE_STRING_REF_CACHE
|
||||||
|
if (i >= 0) {
|
||||||
|
#endif
|
||||||
|
for ( ; i>0 && j<limit; i--)
|
||||||
|
j += sexp_utf8_initial_byte_count(p[j]);
|
||||||
|
#if SEXP_USE_STRING_REF_CACHE
|
||||||
|
} else {
|
||||||
|
for (q=p+j; i<0 && q>=p; i++)
|
||||||
|
q = (unsigned char*)sexp_string_utf8_prev(q);
|
||||||
|
j = q - p;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (i != 0)
|
if (i != 0)
|
||||||
return sexp_user_exception(ctx, self, "string-index->cursor: index out of range", index);
|
return sexp_user_exception(ctx, self, "string-index->cursor: index out of range", index);
|
||||||
return sexp_make_string_cursor(j);
|
cursor = sexp_make_string_cursor(j);
|
||||||
|
#if SEXP_USE_STRING_REF_CACHE
|
||||||
|
sexp_cached_char_idx(str) = sexp_unbox_fixnum(index);
|
||||||
|
sexp_cached_cursor(str) = cursor;
|
||||||
|
#endif
|
||||||
|
return cursor;
|
||||||
}
|
}
|
||||||
|
|
||||||
sexp sexp_string_cursor_to_index (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp offset) {
|
sexp sexp_string_cursor_to_index (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp offset) {
|
||||||
|
@ -1286,7 +1318,24 @@ sexp sexp_string_cursor_to_index (sexp ctx, sexp self, sexp_sint_t n, sexp str,
|
||||||
sexp_assert_type(ctx, sexp_string_cursorp, SEXP_STRING_CURSOR, offset);
|
sexp_assert_type(ctx, sexp_string_cursorp, SEXP_STRING_CURSOR, offset);
|
||||||
if (off < 0 || off > (sexp_sint_t)sexp_string_size(str))
|
if (off < 0 || off > (sexp_sint_t)sexp_string_size(str))
|
||||||
return sexp_user_exception(ctx, self, "string-cursor->index: offset out of range", offset);
|
return sexp_user_exception(ctx, self, "string-cursor->index: offset out of range", offset);
|
||||||
|
#if SEXP_USE_STRING_REF_CACHE
|
||||||
|
sexp_uint_t cached_idx = sexp_cached_char_idx(str);
|
||||||
|
sexp_sint_t cached_off = sexp_unbox_string_cursor(sexp_cached_cursor(str));
|
||||||
|
unsigned char* string_data = (unsigned char*)sexp_string_data(str);
|
||||||
|
sexp_sint_t idx_delta;
|
||||||
|
if (off >= cached_off) {
|
||||||
|
idx_delta = sexp_string_utf8_length(string_data+cached_off, off-cached_off);
|
||||||
|
} else {
|
||||||
|
idx_delta = 0 - sexp_string_utf8_length(string_data+off, cached_off-off);
|
||||||
|
}
|
||||||
|
|
||||||
|
sexp_uint_t new_idx = cached_idx + idx_delta;
|
||||||
|
sexp_cached_char_idx(str) = new_idx;
|
||||||
|
sexp_cached_cursor(str) = offset;
|
||||||
|
return sexp_make_fixnum(new_idx);
|
||||||
|
#else
|
||||||
return sexp_make_fixnum(sexp_string_utf8_length((unsigned char*)sexp_string_data(str), off));
|
return sexp_make_fixnum(sexp_string_utf8_length((unsigned char*)sexp_string_data(str), off));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
sexp sexp_string_cursor_offset (sexp ctx, sexp self, sexp_sint_t n, sexp cur) {
|
sexp sexp_string_cursor_offset (sexp ctx, sexp self, sexp_sint_t n, sexp cur) {
|
||||||
|
@ -1358,6 +1407,10 @@ sexp sexp_make_string_op (sexp ctx, sexp self, sexp_sint_t n, sexp len, sexp ch)
|
||||||
sexp_string_bytes(s) = b;
|
sexp_string_bytes(s) = b;
|
||||||
sexp_string_offset(s) = 0;
|
sexp_string_offset(s) = 0;
|
||||||
sexp_string_size(s) = sexp_bytes_length(b);
|
sexp_string_size(s) = sexp_bytes_length(b);
|
||||||
|
#if SEXP_USE_STRING_REF_CACHE
|
||||||
|
sexp_cached_char_idx(s) = 0;
|
||||||
|
sexp_cached_cursor(s) = sexp_make_string_cursor(0);
|
||||||
|
#endif
|
||||||
sexp_update_string_index_lookup(ctx, s);
|
sexp_update_string_index_lookup(ctx, s);
|
||||||
sexp_gc_release2(ctx);
|
sexp_gc_release2(ctx);
|
||||||
return s;
|
return s;
|
||||||
|
|
Loading…
Add table
Reference in a new issue