From cb1bfef031e9768f6a27550e6cd65de1605f74a7 Mon Sep 17 00:00:00 2001
From: Justin Ethier <justin.ethier@gmail.com>
Date: Mon, 23 Oct 2017 18:47:01 -0400
Subject: [PATCH] WIP - string-set!

---
 runtime.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/runtime.c b/runtime.c
index 607f935d..5387dbf9 100644
--- a/runtime.c
+++ b/runtime.c
@@ -2101,7 +2101,45 @@ object Cyc_string_set(void *data, object str, object k, object chr)
   len = string_len(str);
 
   Cyc_check_bounds(data, "string-set!", len, idx);
-  raw[idx] = obj_obj2char(chr);
+
+  // Take fast path if all chars are just 1 byte
+  if (string_num_cp(str) == string_len(str)) {
+    raw[idx] = obj_obj2char(chr);
+  } else {
+    // TODO: utf8 support
+    // find codepoint at k, figure out how many bytes it is,
+    // allocate a new string (start) + chr + (end)
+    // or don't allocate if chr uses as many or fewer bytes 
+    // than the codepoint it is replacing
+
+    char *tmp = raw;
+    char_type codepoint;
+    uint32_t state = 0;
+    int i = 0, count, start_len = 0, start_cp = 0;
+
+    for (count = 0; *tmp; ++tmp){
+      if (!Cyc_utf8_decode(&state, &codepoint, *tmp)){
+        if (count < idx) {
+          start_len = i;
+          start_cp = count;
+        } else if (count == idx) {
+          break;
+        }
+        count += 1;
+      }
+      i++;
+    }
+    if (state != CYC_UTF8_ACCEPT)
+       Cyc_rt_raise2(data, "string-set! - invalid character at index", k);
+
+    // TODO: perform actual mutation
+    //
+    // Now we know length of start (both in codepoints and bytes),
+    // and we know the codepoint to be replaced. by calculating its length
+    // we can compute where the end portion starts, and by using str we can
+    // figure out how many remaining bytes/codepoints are in end
+
+  }
   return str;
 }