This commit is contained in:
Justin Ethier 2017-10-23 13:26:29 +00:00
parent 14626f15c4
commit 8b817966e8
2 changed files with 49 additions and 29 deletions

View file

@ -465,6 +465,12 @@ void clear_mutations(void *data);
/** Minimum allowed value of a fixnum */ /** Minimum allowed value of a fixnum */
#define CYC_FIXNUM_MIN -1073741824 #define CYC_FIXNUM_MIN -1073741824
/**
* Explicit character type now that we are using UTF-8.
* Chars are still value types though
*/
typedef uint32_t char_type;
/** /**
* Determine if an object is an integer. * Determine if an object is an integer.
*/ */
@ -778,29 +784,29 @@ typedef struct {
cs.str = alloca(sizeof(char) * (len + 1)); \ cs.str = alloca(sizeof(char) * (len + 1)); \
memcpy(cs.str, s, len + 1);} memcpy(cs.str, s, len + 1);}
///** /**
// * Create a new string with the given length * Create a new string with the given length
// * (so it does not need to be computed) * (so it does not need to be computed)
// */ */
//#define make_string_with_len(cs, s, length) string_type cs; \ #define make_utf8_string_with_len(cs, s, length, num_cp) string_type cs; \
//{ int len = length; \ { int len = length; \
// cs.hdr.mark = gc_color_red; \ cs.hdr.mark = gc_color_red; \
// cs.hdr.grayed = 0; \ cs.hdr.grayed = 0; \
// cs.tag = string_tag; cs.len = len; \ cs.tag = string_tag; cs.len = len; \
// cs.num_cp = len; \ cs.num_cp = num_cp; \
// cs.str = alloca(sizeof(char) * (len + 1)); \ cs.str = alloca(sizeof(char) * (len + 1)); \
// memcpy(cs.str, s, len); \ memcpy(cs.str, s, len); \
// cs.str[len] = '\0';} cs.str[len] = '\0';}
//
///** /**
// * Create a string object using the given C string and length. * Create a string object using the given C string and length.
// * No allocation is done for the given C string. * No allocation is done for the given C string.
// */ */
//#define make_string_noalloc(cs, s, length) string_type cs; \ #define make_utf8_string_noalloc(cs, s, length) string_type cs; \
//{ cs.hdr.mark = gc_color_red; cs.hdr.grayed = 0; \ { cs.hdr.mark = gc_color_red; cs.hdr.grayed = 0; \
// cs.tag = string_tag; cs.len = length; \ cs.tag = string_tag; cs.len = length; \
// cs.num_cp = length; \ cs.num_cp = length; \
// cs.str = s; } cs.str = s; }
/** Get the length of a string, in characters (code points) */ /** Get the length of a string, in characters (code points) */
#define string_num_cp(x) (((string_type *) x)->num_cp) #define string_num_cp(x) (((string_type *) x)->num_cp)

View file

@ -2041,7 +2041,7 @@ object Cyc_string_cmp(void *data, object str1, object str2)
str[i] = ((string_type *)str1)->str; \ str[i] = ((string_type *)str1)->str; \
len[i] = string_len((str1)); \ len[i] = string_len((str1)); \
total_len += len[i]; \ total_len += len[i]; \
total_cp += string_num_cp(str1); \ total_cp += string_num_cp((str[i])); \
} \ } \
for (i = 1; i < argc; i++) { \ for (i = 1; i < argc; i++) { \
tmp = va_arg(ap, object); \ tmp = va_arg(ap, object); \
@ -2049,7 +2049,7 @@ object Cyc_string_cmp(void *data, object str1, object str2)
str[i] = ((string_type *)tmp)->str; \ str[i] = ((string_type *)tmp)->str; \
len[i] = string_len((tmp)); \ len[i] = string_len((tmp)); \
total_len += len[i]; \ total_len += len[i]; \
total_cp += string_num_cp(tmp); \ total_cp += string_num_cp((str[i])); \
} \ } \
buffer = bufferp = alloca(sizeof(char) * total_len); \ buffer = bufferp = alloca(sizeof(char) * total_len); \
for (i = 0; i < argc; i++) { \ for (i = 0; i < argc; i++) { \
@ -2058,7 +2058,7 @@ object Cyc_string_cmp(void *data, object str1, object str2)
} \ } \
*bufferp = '\0'; \ *bufferp = '\0'; \
make_string(result, buffer); \ make_string(result, buffer); \
string_num_cp(result) = total_cp; \ string_num_cp((&result)) = total_cp; \
va_end(ap); \ va_end(ap); \
_return_closcall1(data, cont, &result); \ _return_closcall1(data, cont, &result); \
} }
@ -2081,7 +2081,7 @@ object Cyc_string_append(void *data, object cont, int _argc, object str1, ...)
object Cyc_string_length(void *data, object str) object Cyc_string_length(void *data, object str)
{ {
Cyc_check_str(data, str); Cyc_check_str(data, str);
return obj_int2obj(string_num_cp(str)); return obj_int2obj(string_len(str));
} }
object Cyc_string_set(void *data, object str, object k, object chr) object Cyc_string_set(void *data, object str, object k, object chr)
@ -2115,13 +2115,27 @@ object Cyc_string_ref(void *data, object str, object k)
raw = string_str(str); raw = string_str(str);
idx = unbox_number(k); idx = unbox_number(k);
len = string_len(str); len = string_num_cp(str);
if (idx < 0 || idx >= len) { if (idx < 0 || idx >= len) {
Cyc_rt_raise2(data, "string-ref - invalid index", k); Cyc_rt_raise2(data, "string-ref - invalid index", k);
} }
return obj_char2obj(raw[idx]); {
char_type codepoint;
uint32_t state = 0;
int count;
for (count = 0; *raw; ++raw){
if (!Cyc_utf8_decode(&state, &codepoint, *raw)){
if (count == idx) break; // Reached requested index
count += 1;
}
}
if (state != CYC_UTF8_ACCEPT)
Cyc_rt_raise2(data, "string-ref - invalid character at index", k);
return obj_char2obj(codepoint);
}
} }
object Cyc_substring(void *data, object cont, object str, object start, object Cyc_substring(void *data, object cont, object str, object start,