WIP - string-set!

2025-07-15 16:57:35 +02:00 · 2017-11-07 18:18:56 +00:00 · 2017-11-07 18:18:56 +00:00 · f5787184da
commit f5787184da
parent cfdec73d78
1 changed files with 18 additions and 4 deletions
--- a/runtime.c
+++ b/runtime.c
@ -2117,8 +2117,10 @@ object Cyc_string_byte_length(void *data, object str)
 object Cyc_string_set(void *data, object str, object k, object chr)
 {
  char buf[5];
  char *raw;
-  int idx, len;
+  int idx, len, buf_len;
  char_type input_char;
  Cyc_check_str(data, str);
  Cyc_check_num(data, k);
@ -2127,6 +2129,10 @@ object Cyc_string_set(void *data, object str, object k, object chr)
    Cyc_rt_raise2(data, "Expected char but received", chr);
  }
  input_char = obj_obj2char(chr);
  Cyc_utf8_encode_char(buf, 5, input_char);
  buf_len = strlen(buf);
  raw = string_str(str);
  idx = unbox_number(k);
  len = string_len(str);
@ -2134,8 +2140,7 @@ object Cyc_string_set(void *data, object str, object k, object chr)
  Cyc_check_bounds(data, "string-set!", len, idx);
  // Take fast path if all chars are just 1 byte
-  if (string_num_cp(str) == string_len(str)) {
+  if (string_num_cp(str) == string_len(str) && buf_len == 1) {
    // TODO: not good enough, chr could be multi-byte
    raw[idx] = obj_obj2char(chr);
  } else {
 fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), len);
@ -2148,9 +2153,10 @@ fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), le
    char *tmp = raw;
    char_type codepoint;
    uint32_t state = 0;
-    int i = 0, count, start_len = 0, start_cp = 0;
+    int i = 0, count, start_len = 0, start_cp = 0, bytes = 0;
    for (count = 0; *tmp; ++tmp){
      bytes++;
      if (!Cyc_utf8_decode(&state, &codepoint, (uint8_t)*tmp)){
        if (count < idx) {
          start_len = i;
@ -2159,6 +2165,7 @@ fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), le
          break;
        }
        count += 1;
        bytes = 0;
      }
      i++;
    }
@ -2171,6 +2178,13 @@ fprintf(stderr, "DEBUG %s, num_cp = %d, len = %d\n", raw, string_num_cp(str), le
    // and we know the codepoint to be replaced. by calculating its length
    // we can compute where the end portion starts, and by using str we can
    // figure out how many remaining bytes/codepoints are in end
    //
    // 3 cases: 
    // - buf_len = bytes, just straight replace
    // - buf_len > bytes, will need to allocate more memory (!!)
    // - buf_len < bytes, just replace, but pad with NULL chars.
    //                    in this case need to ensure string_len is not 
    //                    reduced because original value still matters for GC purposes
  }
  return str;