WIP - string-set!

This commit is contained in:
Justin Ethier 2017-11-07 18:18:56 +00:00
parent cfdec73d78
commit f5787184da

View file

@ -2117,8 +2117,10 @@ object Cyc_string_byte_length(void *data, object str)
object Cyc_string_set(void *data, object str, object k, object chr) object Cyc_string_set(void *data, object str, object k, object chr)
{ {
char buf[5];
char *raw; char *raw;
int idx, len; int idx, len, buf_len;
char_type input_char;
Cyc_check_str(data, str); Cyc_check_str(data, str);
Cyc_check_num(data, k); Cyc_check_num(data, k);
@ -2127,6 +2129,10 @@ object Cyc_string_set(void *data, object str, object k, object chr)
Cyc_rt_raise2(data, "Expected char but received", chr); Cyc_rt_raise2(data, "Expected char but received", chr);
} }
input_char = obj_obj2char(chr);
Cyc_utf8_encode_char(buf, 5, input_char);
buf_len = strlen(buf);
raw = string_str(str); raw = string_str(str);
idx = unbox_number(k); idx = unbox_number(k);
len = string_len(str); len = string_len(str);
@ -2134,8 +2140,7 @@ object Cyc_string_set(void *data, object str, object k, object chr)
Cyc_check_bounds(data, "string-set!", len, idx); Cyc_check_bounds(data, "string-set!", len, idx);
// Take fast path if all chars are just 1 byte // Take fast path if all chars are just 1 byte
if (string_num_cp(str) == string_len(str)) { if (string_num_cp(str) == string_len(str) && buf_len == 1) {
// TODO: not good enough, chr could be multi-byte
raw[idx] = obj_obj2char(chr); raw[idx] = obj_obj2char(chr);
} else { } else {
fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), len); fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), len);
@ -2148,9 +2153,10 @@ fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), le
char *tmp = raw; char *tmp = raw;
char_type codepoint; char_type codepoint;
uint32_t state = 0; uint32_t state = 0;
int i = 0, count, start_len = 0, start_cp = 0; int i = 0, count, start_len = 0, start_cp = 0, bytes = 0;
for (count = 0; *tmp; ++tmp){ for (count = 0; *tmp; ++tmp){
bytes++;
if (!Cyc_utf8_decode(&state, &codepoint, (uint8_t)*tmp)){ if (!Cyc_utf8_decode(&state, &codepoint, (uint8_t)*tmp)){
if (count < idx) { if (count < idx) {
start_len = i; start_len = i;
@ -2159,6 +2165,7 @@ fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), le
break; break;
} }
count += 1; count += 1;
bytes = 0;
} }
i++; i++;
} }
@ -2171,6 +2178,13 @@ fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), le
// and we know the codepoint to be replaced. by calculating its length // and we know the codepoint to be replaced. by calculating its length
// we can compute where the end portion starts, and by using str we can // we can compute where the end portion starts, and by using str we can
// figure out how many remaining bytes/codepoints are in end // figure out how many remaining bytes/codepoints are in end
//
// 3 cases:
// - buf_len = bytes, just straight replace
// - buf_len > bytes, will need to allocate more memory (!!)
// - buf_len < bytes, just replace, but pad with NULL chars.
// in this case need to ensure string_len is not
// reduced because original value still matters for GC purposes
} }
return str; return str;