This commit is contained in:
Justin Ethier 2017-10-23 13:26:29 +00:00
parent 14626f15c4
commit 8b817966e8
2 changed files with 49 additions and 29 deletions

View file

@ -465,6 +465,12 @@ void clear_mutations(void *data);
/** Minimum allowed value of a fixnum */
#define CYC_FIXNUM_MIN -1073741824
/**
* Explicit character type now that we are using UTF-8.
* Chars are still value types though
*/
typedef uint32_t char_type;
/**
* Determine if an object is an integer.
*/
@ -778,29 +784,29 @@ typedef struct {
cs.str = alloca(sizeof(char) * (len + 1)); \
memcpy(cs.str, s, len + 1);}
///**
// * Create a new string with the given length
// * (so it does not need to be computed)
// */
//#define make_string_with_len(cs, s, length) string_type cs; \
//{ int len = length; \
// cs.hdr.mark = gc_color_red; \
// cs.hdr.grayed = 0; \
// cs.tag = string_tag; cs.len = len; \
// cs.num_cp = len; \
// cs.str = alloca(sizeof(char) * (len + 1)); \
// memcpy(cs.str, s, len); \
// cs.str[len] = '\0';}
//
///**
// * Create a string object using the given C string and length.
// * No allocation is done for the given C string.
// */
//#define make_string_noalloc(cs, s, length) string_type cs; \
//{ cs.hdr.mark = gc_color_red; cs.hdr.grayed = 0; \
// cs.tag = string_tag; cs.len = length; \
// cs.num_cp = length; \
// cs.str = s; }
/**
* Create a new string with the given length
* (so it does not need to be computed)
*/
#define make_utf8_string_with_len(cs, s, length, num_cp) string_type cs; \
{ int len = length; \
cs.hdr.mark = gc_color_red; \
cs.hdr.grayed = 0; \
cs.tag = string_tag; cs.len = len; \
cs.num_cp = num_cp; \
cs.str = alloca(sizeof(char) * (len + 1)); \
memcpy(cs.str, s, len); \
cs.str[len] = '\0';}
/**
* Create a string object using the given C string and length.
* No allocation is done for the given C string.
*/
#define make_utf8_string_noalloc(cs, s, length) string_type cs; \
{ cs.hdr.mark = gc_color_red; cs.hdr.grayed = 0; \
cs.tag = string_tag; cs.len = length; \
cs.num_cp = length; \
cs.str = s; }
/** Get the length of a string, in characters (code points) */
#define string_num_cp(x) (((string_type *) x)->num_cp)

View file

@ -2041,7 +2041,7 @@ object Cyc_string_cmp(void *data, object str1, object str2)
str[i] = ((string_type *)str1)->str; \
len[i] = string_len((str1)); \
total_len += len[i]; \
total_cp += string_num_cp(str1); \
total_cp += string_num_cp((str[i])); \
} \
for (i = 1; i < argc; i++) { \
tmp = va_arg(ap, object); \
@ -2049,7 +2049,7 @@ object Cyc_string_cmp(void *data, object str1, object str2)
str[i] = ((string_type *)tmp)->str; \
len[i] = string_len((tmp)); \
total_len += len[i]; \
total_cp += string_num_cp(tmp); \
total_cp += string_num_cp((str[i])); \
} \
buffer = bufferp = alloca(sizeof(char) * total_len); \
for (i = 0; i < argc; i++) { \
@ -2058,7 +2058,7 @@ object Cyc_string_cmp(void *data, object str1, object str2)
} \
*bufferp = '\0'; \
make_string(result, buffer); \
string_num_cp(result) = total_cp; \
string_num_cp((&result)) = total_cp; \
va_end(ap); \
_return_closcall1(data, cont, &result); \
}
@ -2081,7 +2081,7 @@ object Cyc_string_append(void *data, object cont, int _argc, object str1, ...)
object Cyc_string_length(void *data, object str)
{
Cyc_check_str(data, str);
return obj_int2obj(string_num_cp(str));
return obj_int2obj(string_len(str));
}
object Cyc_string_set(void *data, object str, object k, object chr)
@ -2115,13 +2115,27 @@ object Cyc_string_ref(void *data, object str, object k)
raw = string_str(str);
idx = unbox_number(k);
len = string_len(str);
len = string_num_cp(str);
if (idx < 0 || idx >= len) {
Cyc_rt_raise2(data, "string-ref - invalid index", k);
}
return obj_char2obj(raw[idx]);
{
char_type codepoint;
uint32_t state = 0;
int count;
for (count = 0; *raw; ++raw){
if (!Cyc_utf8_decode(&state, &codepoint, *raw)){
if (count == idx) break; // Reached requested index
count += 1;
}
}
if (state != CYC_UTF8_ACCEPT)
Cyc_rt_raise2(data, "string-ref - invalid character at index", k);
return obj_char2obj(codepoint);
}
}
object Cyc_substring(void *data, object cont, object str, object start,