mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-18 21:29:19 +02:00
Add a feature to cache the most recent string index->cursor result
This is lighter-weight than building a full index->cursor table for the string, adding a constant two words to the memory required to store a string, as opposed to one word for every n characters. The cached cursor is used for any string-ref operation requesting an index after the most-recently-requested index, making potentially quadratic repeated string-ref procedures run in linear time. In theory, it could also use a heuristic to speed up moving backwards through the string when it thinks that moving the old cursor backwards would be faster than starting again at the start of the string. In practice, my logging of when the cached cursor is actually reused during the Chibi compilation and startup process shows that the most common case of moving backwards is going back to the start of the string anyway. Benchmarks to follow.
This commit is contained in:
parent
3080087d8c
commit
c09897c449
4 changed files with 39 additions and 2 deletions
8
eval.c
8
eval.c
|
@ -1988,8 +1988,14 @@ void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) {
|
|||
sexp_string_size(str) += new_len - old_len;
|
||||
}
|
||||
sexp_utf8_encode_char(p, new_len, c);
|
||||
if (old_len != new_len)
|
||||
if (old_len != new_len) {
|
||||
#if SEXP_USE_STRING_INDEX_TABLE
|
||||
sexp_update_string_index_lookup(ctx, str);
|
||||
#elif SEXP_USE_STRING_REF_CACHE
|
||||
sexp_cached_char_idx(str) = 0;
|
||||
sexp_cached_cursor(str) = sexp_make_string_cursor(0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch) {
|
||||
|
|
|
@ -252,6 +252,12 @@
|
|||
/* */
|
||||
/* #define SEXP_USE_STRING_INDEX_TABLE 1 */
|
||||
|
||||
/* uncomment this to cache a string cursor for string-ref calls */
|
||||
/* The default is not to use a cache. The goal of caching is to */
|
||||
/* soften the performance impact of repeated O(n) string-ref */
|
||||
/* operations on the same string. */
|
||||
/* #define SEXP_USE_STRING_REF_CACHE 1 */
|
||||
|
||||
/* uncomment this to disable automatic closing of ports */
|
||||
/* If enabled, the underlying FILE* for file ports will be */
|
||||
/* automatically closed when they're garbage collected. Doesn't */
|
||||
|
|
|
@ -481,6 +481,9 @@ struct sexp_struct {
|
|||
sexp bytes;
|
||||
#if SEXP_USE_STRING_INDEX_TABLE
|
||||
sexp charlens;
|
||||
#elif SEXP_USE_STRING_REF_CACHE
|
||||
sexp_uint_t cached_char_idx;
|
||||
sexp cached_cursor;
|
||||
#endif
|
||||
sexp_uint_t offset, length;
|
||||
#endif
|
||||
|
@ -1198,6 +1201,10 @@ enum sexp_uniform_vector_type {
|
|||
#define sexp_string_offset(x) (sexp_field(x, string, SEXP_STRING, offset))
|
||||
#define sexp_string_data(x) (sexp_bytes_data(sexp_string_bytes(x))+sexp_string_offset(x))
|
||||
#endif
|
||||
#if SEXP_USE_STRING_REF_CACHE
|
||||
#define sexp_cached_char_idx(x) (sexp_field(x, string, SEXP_STRING, cached_char_idx))
|
||||
#define sexp_cached_cursor(x) (sexp_field(x, string, SEXP_STRING, cached_cursor))
|
||||
#endif
|
||||
#define sexp_string_maybe_null_data(x) (sexp_not(x) ? NULL : sexp_string_data(x))
|
||||
|
||||
#if SEXP_USE_PACKED_STRINGS
|
||||
|
|
20
sexp.c
20
sexp.c
|
@ -500,6 +500,9 @@ static const char* sexp_initial_features[] = {
|
|||
#if SEXP_USE_STRING_INDEX_TABLE
|
||||
"string-index",
|
||||
#endif
|
||||
#if SEXP_USE_STRING_REF_CACHE
|
||||
"string-ref-cache",
|
||||
#endif
|
||||
#if SEXP_USE_GREEN_THREADS
|
||||
"threads",
|
||||
#endif
|
||||
|
@ -1254,6 +1257,7 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str,
|
|||
sexp_sint_t* chunklens;
|
||||
sexp_sint_t chunk;
|
||||
#endif
|
||||
sexp cursor;
|
||||
sexp_sint_t i, j, limit;
|
||||
unsigned char *p;
|
||||
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
||||
|
@ -1272,12 +1276,22 @@ sexp sexp_string_index_to_cursor (sexp ctx, sexp self, sexp_sint_t n, sexp str,
|
|||
i -= (chunk+1) * SEXP_STRING_INDEX_TABLE_CHUNK_SIZE;
|
||||
}
|
||||
}
|
||||
#elif SEXP_USE_STRING_REF_CACHE
|
||||
if (i >= sexp_cached_char_idx(str)) {
|
||||
j = sexp_unbox_string_cursor(sexp_cached_cursor(str));
|
||||
i -= sexp_cached_char_idx(str);
|
||||
}
|
||||
#endif
|
||||
for ( ; i>0 && j<limit; i--)
|
||||
j += sexp_utf8_initial_byte_count(p[j]);
|
||||
if (i != 0)
|
||||
return sexp_user_exception(ctx, self, "string-index->cursor: index out of range", index);
|
||||
return sexp_make_string_cursor(j);
|
||||
cursor = sexp_make_string_cursor(j);
|
||||
#if SEXP_USE_STRING_REF_CACHE
|
||||
sexp_cached_char_idx(str) = sexp_unbox_fixnum(index);
|
||||
sexp_cached_cursor(str) = cursor;
|
||||
#endif
|
||||
return cursor;
|
||||
}
|
||||
|
||||
sexp sexp_string_cursor_to_index (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp offset) {
|
||||
|
@ -1358,6 +1372,10 @@ sexp sexp_make_string_op (sexp ctx, sexp self, sexp_sint_t n, sexp len, sexp ch)
|
|||
sexp_string_bytes(s) = b;
|
||||
sexp_string_offset(s) = 0;
|
||||
sexp_string_size(s) = sexp_bytes_length(b);
|
||||
#if SEXP_USE_STRING_REF_CACHE
|
||||
sexp_cached_char_idx(s) = 0;
|
||||
sexp_cached_cursor(s) = sexp_make_string_cursor(0);
|
||||
#endif
|
||||
sexp_update_string_index_lookup(ctx, s);
|
||||
sexp_gc_release2(ctx);
|
||||
return s;
|
||||
|
|
Loading…
Add table
Reference in a new issue