Refactoring, added make_utf8_string

This commit is contained in:
Justin Ethier 2017-10-20 16:29:56 +00:00
parent 0ca396f8fa
commit ac8b280578
3 changed files with 22 additions and 16 deletions

View file

@ -716,7 +716,7 @@ void Cyc_set_globals_changed(gc_thread_data *thd);
#define CYC_UTF8_ACCEPT 0 #define CYC_UTF8_ACCEPT 0
#define CYC_UTF8_REJECT 1 #define CYC_UTF8_REJECT 1
uint32_t Cyc_utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte); uint32_t Cyc_utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte);
int Cyc_utf8_count_code_points(uint8_t* s, size_t* count); int Cyc_utf8_count_code_points(uint8_t* s);
uint32_t Cyc_utf8_validate_stream(uint32_t *state, char *str, size_t len); uint32_t Cyc_utf8_validate_stream(uint32_t *state, char *str, size_t len);
uint32_t Cyc_utf8_validate(char *str, size_t len); uint32_t Cyc_utf8_validate(char *str, size_t len);
/**@}*/ /**@}*/

View file

@ -764,17 +764,20 @@ typedef struct {
cs.num_cp = length; \ cs.num_cp = length; \
cs.str = s; } cs.str = s; }
///** Create a new string in the nursery */ /** Create a new string in the nursery */
//#define make_string(cs, s) string_type cs; \ #define make_utf8_string(data, cs, s) string_type cs; \
//{ int len = strlen(s); \ { int len = strlen(s); \
// cs.hdr.mark = gc_color_red; \ cs.hdr.mark = gc_color_red; \
// cs.hdr.grayed = 0; \ cs.hdr.grayed = 0; \
// cs.tag = string_tag; \ cs.tag = string_tag; \
// cs.num_cp = len; \ cs.num_cp = Cyc_utf8_count_code_points(s); \
// cs.len = len; \ if (cs.num_cp < 0) { \
// cs.str = alloca(sizeof(char) * (len + 1)); \ Cyc_rt_raise_msg(data, "Invalid UTF-8 characters in string"); \
// memcpy(cs.str, s, len + 1);} } \
// cs.len = len; \
cs.str = alloca(sizeof(char) * (len + 1)); \
memcpy(cs.str, s, len + 1);}
///** ///**
// * Create a new string with the given length // * Create a new string with the given length
// * (so it does not need to be computed) // * (so it does not need to be computed)

View file

@ -6403,15 +6403,18 @@ uint32_t Cyc_utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
* Count the number of code points in a string. * Count the number of code points in a string.
* Based on example code from Bjoern Hoehrmann. * Based on example code from Bjoern Hoehrmann.
*/ */
int Cyc_utf8_count_code_points(uint8_t* s, size_t* count) { int Cyc_utf8_count_code_points(uint8_t* s) {
uint32_t codepoint; uint32_t codepoint;
uint32_t state = 0; uint32_t state = 0;
int count;
for (*count = 0; *s; ++s) for (count = 0; *s; ++s)
if (!Cyc_utf8_decode(&state, &codepoint, *s)) if (!Cyc_utf8_decode(&state, &codepoint, *s))
*count += 1; count += 1;
return state != CYC_UTF8_ACCEPT; if (state != CYC_UTF8_ACCEPT)
return -1;
return count;
} }
// TODO: index into X codepoint in a string // TODO: index into X codepoint in a string