From 8b817966e82aaae8b8ed0987a14c718f34b89227 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Mon, 23 Oct 2017 13:26:29 +0000 Subject: [PATCH] WIP --- include/cyclone/types.h | 52 +++++++++++++++++++++++------------------ runtime.c | 26 ++++++++++++++++----- 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/include/cyclone/types.h b/include/cyclone/types.h index 776aada6..3667d661 100644 --- a/include/cyclone/types.h +++ b/include/cyclone/types.h @@ -465,6 +465,12 @@ void clear_mutations(void *data); /** Minimum allowed value of a fixnum */ #define CYC_FIXNUM_MIN -1073741824 +/** + * Explicit character type now that we are using UTF-8. + * Chars are still value types though + */ +typedef uint32_t char_type; + /** * Determine if an object is an integer. */ @@ -778,29 +784,29 @@ typedef struct { cs.str = alloca(sizeof(char) * (len + 1)); \ memcpy(cs.str, s, len + 1);} -///** -// * Create a new string with the given length -// * (so it does not need to be computed) -// */ -//#define make_string_with_len(cs, s, length) string_type cs; \ -//{ int len = length; \ -// cs.hdr.mark = gc_color_red; \ -// cs.hdr.grayed = 0; \ -// cs.tag = string_tag; cs.len = len; \ -// cs.num_cp = len; \ -// cs.str = alloca(sizeof(char) * (len + 1)); \ -// memcpy(cs.str, s, len); \ -// cs.str[len] = '\0';} -// -///** -// * Create a string object using the given C string and length. -// * No allocation is done for the given C string. -// */ -//#define make_string_noalloc(cs, s, length) string_type cs; \ -//{ cs.hdr.mark = gc_color_red; cs.hdr.grayed = 0; \ -// cs.tag = string_tag; cs.len = length; \ -// cs.num_cp = length; \ -// cs.str = s; } +/** + * Create a new string with the given length + * (so it does not need to be computed) + */ +#define make_utf8_string_with_len(cs, s, length, num_cp) string_type cs; \ +{ int len = length; \ + cs.hdr.mark = gc_color_red; \ + cs.hdr.grayed = 0; \ + cs.tag = string_tag; cs.len = len; \ + cs.num_cp = num_cp; \ + cs.str = alloca(sizeof(char) * (len + 1)); \ + memcpy(cs.str, s, len); \ + cs.str[len] = '\0';} + +/** + * Create a string object using the given C string and length. + * No allocation is done for the given C string. + */ +#define make_utf8_string_noalloc(cs, s, length) string_type cs; \ +{ cs.hdr.mark = gc_color_red; cs.hdr.grayed = 0; \ + cs.tag = string_tag; cs.len = length; \ + cs.num_cp = length; \ + cs.str = s; } /** Get the length of a string, in characters (code points) */ #define string_num_cp(x) (((string_type *) x)->num_cp) diff --git a/runtime.c b/runtime.c index c75c8ae1..4b228cd6 100644 --- a/runtime.c +++ b/runtime.c @@ -2041,7 +2041,7 @@ object Cyc_string_cmp(void *data, object str1, object str2) str[i] = ((string_type *)str1)->str; \ len[i] = string_len((str1)); \ total_len += len[i]; \ - total_cp += string_num_cp(str1); \ + total_cp += string_num_cp((str[i])); \ } \ for (i = 1; i < argc; i++) { \ tmp = va_arg(ap, object); \ @@ -2049,7 +2049,7 @@ object Cyc_string_cmp(void *data, object str1, object str2) str[i] = ((string_type *)tmp)->str; \ len[i] = string_len((tmp)); \ total_len += len[i]; \ - total_cp += string_num_cp(tmp); \ + total_cp += string_num_cp((str[i])); \ } \ buffer = bufferp = alloca(sizeof(char) * total_len); \ for (i = 0; i < argc; i++) { \ @@ -2058,7 +2058,7 @@ object Cyc_string_cmp(void *data, object str1, object str2) } \ *bufferp = '\0'; \ make_string(result, buffer); \ - string_num_cp(result) = total_cp; \ + string_num_cp((&result)) = total_cp; \ va_end(ap); \ _return_closcall1(data, cont, &result); \ } @@ -2081,7 +2081,7 @@ object Cyc_string_append(void *data, object cont, int _argc, object str1, ...) object Cyc_string_length(void *data, object str) { Cyc_check_str(data, str); - return obj_int2obj(string_num_cp(str)); + return obj_int2obj(string_len(str)); } object Cyc_string_set(void *data, object str, object k, object chr) @@ -2115,13 +2115,27 @@ object Cyc_string_ref(void *data, object str, object k) raw = string_str(str); idx = unbox_number(k); - len = string_len(str); + len = string_num_cp(str); if (idx < 0 || idx >= len) { Cyc_rt_raise2(data, "string-ref - invalid index", k); } - return obj_char2obj(raw[idx]); + { + char_type codepoint; + uint32_t state = 0; + int count; + + for (count = 0; *raw; ++raw){ + if (!Cyc_utf8_decode(&state, &codepoint, *raw)){ + if (count == idx) break; // Reached requested index + count += 1; + } + } + if (state != CYC_UTF8_ACCEPT) + Cyc_rt_raise2(data, "string-ref - invalid character at index", k); + return obj_char2obj(codepoint); + } } object Cyc_substring(void *data, object cont, object str, object start,