diff --git a/include/cyclone/runtime.h b/include/cyclone/runtime.h index dbbfb8d9..21b204c5 100644 --- a/include/cyclone/runtime.h +++ b/include/cyclone/runtime.h @@ -716,7 +716,7 @@ void Cyc_set_globals_changed(gc_thread_data *thd); #define CYC_UTF8_ACCEPT 0 #define CYC_UTF8_REJECT 1 uint32_t Cyc_utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte); -int Cyc_utf8_count_code_points(uint8_t* s, size_t* count); +int Cyc_utf8_count_code_points(uint8_t* s); uint32_t Cyc_utf8_validate_stream(uint32_t *state, char *str, size_t len); uint32_t Cyc_utf8_validate(char *str, size_t len); /**@}*/ diff --git a/include/cyclone/types.h b/include/cyclone/types.h index f6fb2b1e..776aada6 100644 --- a/include/cyclone/types.h +++ b/include/cyclone/types.h @@ -764,17 +764,20 @@ typedef struct { cs.num_cp = length; \ cs.str = s; } -///** Create a new string in the nursery */ -//#define make_string(cs, s) string_type cs; \ -//{ int len = strlen(s); \ -// cs.hdr.mark = gc_color_red; \ -// cs.hdr.grayed = 0; \ -// cs.tag = string_tag; \ -// cs.num_cp = len; \ -// cs.len = len; \ -// cs.str = alloca(sizeof(char) * (len + 1)); \ -// memcpy(cs.str, s, len + 1);} -// +/** Create a new string in the nursery */ +#define make_utf8_string(data, cs, s) string_type cs; \ +{ int len = strlen(s); \ + cs.hdr.mark = gc_color_red; \ + cs.hdr.grayed = 0; \ + cs.tag = string_tag; \ + cs.num_cp = Cyc_utf8_count_code_points(s); \ + if (cs.num_cp < 0) { \ + Cyc_rt_raise_msg(data, "Invalid UTF-8 characters in string"); \ + } \ + cs.len = len; \ + cs.str = alloca(sizeof(char) * (len + 1)); \ + memcpy(cs.str, s, len + 1);} + ///** // * Create a new string with the given length // * (so it does not need to be computed) diff --git a/runtime.c b/runtime.c index a0662e81..ef2bafb0 100644 --- a/runtime.c +++ b/runtime.c @@ -6403,15 +6403,18 @@ uint32_t Cyc_utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte) { * Count the number of code points in a string. * Based on example code from Bjoern Hoehrmann. */ -int Cyc_utf8_count_code_points(uint8_t* s, size_t* count) { +int Cyc_utf8_count_code_points(uint8_t* s) { uint32_t codepoint; uint32_t state = 0; + int count; - for (*count = 0; *s; ++s) + for (count = 0; *s; ++s) if (!Cyc_utf8_decode(&state, &codepoint, *s)) - *count += 1; + count += 1; - return state != CYC_UTF8_ACCEPT; + if (state != CYC_UTF8_ACCEPT) + return -1; + return count; } // TODO: index into X codepoint in a string