Refactoring, added make_utf8_string

This commit is contained in:
Justin Ethier 2017-10-20 16:29:56 +00:00
parent 0ca396f8fa
commit ac8b280578
3 changed files with 22 additions and 16 deletions

View file

@ -716,7 +716,7 @@ void Cyc_set_globals_changed(gc_thread_data *thd);
#define CYC_UTF8_ACCEPT 0
#define CYC_UTF8_REJECT 1
uint32_t Cyc_utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte);
int Cyc_utf8_count_code_points(uint8_t* s, size_t* count);
int Cyc_utf8_count_code_points(uint8_t* s);
uint32_t Cyc_utf8_validate_stream(uint32_t *state, char *str, size_t len);
uint32_t Cyc_utf8_validate(char *str, size_t len);
/**@}*/

View file

@ -764,17 +764,20 @@ typedef struct {
cs.num_cp = length; \
cs.str = s; }
///** Create a new string in the nursery */
//#define make_string(cs, s) string_type cs; \
//{ int len = strlen(s); \
// cs.hdr.mark = gc_color_red; \
// cs.hdr.grayed = 0; \
// cs.tag = string_tag; \
// cs.num_cp = len; \
// cs.len = len; \
// cs.str = alloca(sizeof(char) * (len + 1)); \
// memcpy(cs.str, s, len + 1);}
//
/** Create a new string in the nursery */
#define make_utf8_string(data, cs, s) string_type cs; \
{ int len = strlen(s); \
cs.hdr.mark = gc_color_red; \
cs.hdr.grayed = 0; \
cs.tag = string_tag; \
cs.num_cp = Cyc_utf8_count_code_points(s); \
if (cs.num_cp < 0) { \
Cyc_rt_raise_msg(data, "Invalid UTF-8 characters in string"); \
} \
cs.len = len; \
cs.str = alloca(sizeof(char) * (len + 1)); \
memcpy(cs.str, s, len + 1);}
///**
// * Create a new string with the given length
// * (so it does not need to be computed)

View file

@ -6403,15 +6403,18 @@ uint32_t Cyc_utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
* Count the number of code points in a string.
* Based on example code from Bjoern Hoehrmann.
*/
int Cyc_utf8_count_code_points(uint8_t* s, size_t* count) {
int Cyc_utf8_count_code_points(uint8_t* s) {
uint32_t codepoint;
uint32_t state = 0;
int count;
for (*count = 0; *s; ++s)
for (count = 0; *s; ++s)
if (!Cyc_utf8_decode(&state, &codepoint, *s))
*count += 1;
count += 1;
return state != CYC_UTF8_ACCEPT;
if (state != CYC_UTF8_ACCEPT)
return -1;
return count;
}
// TODO: index into X codepoint in a string