From f5787184dae2e98a736f63225aafe90a68923514 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Tue, 7 Nov 2017 18:18:56 +0000 Subject: [PATCH] WIP - string-set! --- runtime.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/runtime.c b/runtime.c index e7792702..f129899a 100644 --- a/runtime.c +++ b/runtime.c @@ -2117,8 +2117,10 @@ object Cyc_string_byte_length(void *data, object str) object Cyc_string_set(void *data, object str, object k, object chr) { + char buf[5]; char *raw; - int idx, len; + int idx, len, buf_len; + char_type input_char; Cyc_check_str(data, str); Cyc_check_num(data, k); @@ -2127,6 +2129,10 @@ object Cyc_string_set(void *data, object str, object k, object chr) Cyc_rt_raise2(data, "Expected char but received", chr); } + input_char = obj_obj2char(chr); + Cyc_utf8_encode_char(buf, 5, input_char); + buf_len = strlen(buf); + raw = string_str(str); idx = unbox_number(k); len = string_len(str); @@ -2134,8 +2140,7 @@ object Cyc_string_set(void *data, object str, object k, object chr) Cyc_check_bounds(data, "string-set!", len, idx); // Take fast path if all chars are just 1 byte - if (string_num_cp(str) == string_len(str)) { - // TODO: not good enough, chr could be multi-byte + if (string_num_cp(str) == string_len(str) && buf_len == 1) { raw[idx] = obj_obj2char(chr); } else { fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), len); @@ -2148,9 +2153,10 @@ fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), le char *tmp = raw; char_type codepoint; uint32_t state = 0; - int i = 0, count, start_len = 0, start_cp = 0; + int i = 0, count, start_len = 0, start_cp = 0, bytes = 0; for (count = 0; *tmp; ++tmp){ + bytes++; if (!Cyc_utf8_decode(&state, &codepoint, (uint8_t)*tmp)){ if (count < idx) { start_len = i; @@ -2159,6 +2165,7 @@ fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), le break; } count += 1; + bytes = 0; } i++; } @@ -2171,6 +2178,13 @@ fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), le // and we know the codepoint to be replaced. by calculating its length // we can compute where the end portion starts, and by using str we can // figure out how many remaining bytes/codepoints are in end + // + // 3 cases: + // - buf_len = bytes, just straight replace + // - buf_len > bytes, will need to allocate more memory (!!) + // - buf_len < bytes, just replace, but pad with NULL chars. + // in this case need to ensure string_len is not + // reduced because original value still matters for GC purposes } return str;