mirror of
https://github.com/justinethier/cyclone.git
synced 2025-05-24 04:25:06 +02:00
Updated Cyc_io_read_line to prevent truncation
Ensure last codepoint is fully-read before returning
This commit is contained in:
parent
6910e3e4cb
commit
d431b2af1c
2 changed files with 23 additions and 11 deletions
|
@ -725,7 +725,7 @@ void Cyc_set_globals_changed(gc_thread_data *thd);
|
||||||
int Cyc_utf8_encode(char *dest, int sz, uint32_t *src, int srcsz);
|
int Cyc_utf8_encode(char *dest, int sz, uint32_t *src, int srcsz);
|
||||||
uint32_t Cyc_utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte);
|
uint32_t Cyc_utf8_decode(uint32_t* state, uint32_t* codep, uint32_t byte);
|
||||||
int Cyc_utf8_count_code_points(uint8_t* s);
|
int Cyc_utf8_count_code_points(uint8_t* s);
|
||||||
int Cyc_utf8_count_code_points_and_bytes(uint8_t* s, int *cpts, int *bytes);
|
int Cyc_utf8_count_code_points_and_bytes(uint8_t* s, char_type *codepoint, int *cpts, int *bytes);
|
||||||
uint32_t Cyc_utf8_validate_stream(uint32_t *state, char *str, size_t len);
|
uint32_t Cyc_utf8_validate_stream(uint32_t *state, char *str, size_t len);
|
||||||
uint32_t Cyc_utf8_validate(char *str, size_t len);
|
uint32_t Cyc_utf8_validate(char *str, size_t len);
|
||||||
/**@}*/
|
/**@}*/
|
||||||
|
|
32
runtime.c
32
runtime.c
|
@ -6365,8 +6365,10 @@ object Cyc_io_read_char(void *data, object cont, object port)
|
||||||
object Cyc_io_read_line(void *data, object cont, object port)
|
object Cyc_io_read_line(void *data, object cont, object port)
|
||||||
{
|
{
|
||||||
FILE *stream = ((port_type *) port)->fp;
|
FILE *stream = ((port_type *) port)->fp;
|
||||||
char buf[1024];
|
char buf[1027];
|
||||||
int len, num_cp;
|
int len, num_cp, i = 0;
|
||||||
|
char_type codepoint;
|
||||||
|
uint32_t state;
|
||||||
|
|
||||||
Cyc_check_port(data, port);
|
Cyc_check_port(data, port);
|
||||||
if (stream == NULL) {
|
if (stream == NULL) {
|
||||||
|
@ -6375,10 +6377,21 @@ object Cyc_io_read_line(void *data, object cont, object port)
|
||||||
set_thread_blocked(data, cont);
|
set_thread_blocked(data, cont);
|
||||||
errno = 0;
|
errno = 0;
|
||||||
if (fgets(buf, 1023, stream) != NULL) {
|
if (fgets(buf, 1023, stream) != NULL) {
|
||||||
// TODO: not good enough for UTF-8, what if we stopped reading in the middle of a code point?
|
state = Cyc_utf8_count_code_points_and_bytes((uint8_t *)buf, &codepoint, &num_cp, &len);
|
||||||
// should reserve 3 extra bytes and, if last code point is not complete, read one byte at a
|
// Check if we stopped reading in the middle of a code point and
|
||||||
// time until it has been read
|
// if so, read one byte at a time until that code point is finished.
|
||||||
Cyc_utf8_count_code_points_and_bytes((uint8_t *)buf, &num_cp, &len);
|
while (state != CYC_UTF8_ACCEPT && i < 3) {
|
||||||
|
int c = fgetc(stream);
|
||||||
|
buf[len] = c;
|
||||||
|
len++;
|
||||||
|
Cyc_utf8_decode(&state, &codepoint, (uint8_t)c);
|
||||||
|
if (state == CYC_UTF8_ACCEPT) {
|
||||||
|
num_cp++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// Remove any trailing CR / newline chars
|
// Remove any trailing CR / newline chars
|
||||||
while (len > 0 && (buf[len - 1] == '\n' ||
|
while (len > 0 && (buf[len - 1] == '\n' ||
|
||||||
|
@ -6596,19 +6609,18 @@ int Cyc_utf8_count_code_points(uint8_t* s) {
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Cyc_utf8_count_code_points_and_bytes(uint8_t* s, int *cpts, int *bytes) {
|
int Cyc_utf8_count_code_points_and_bytes(uint8_t* s, char_type *codepoint, int *cpts, int *bytes) {
|
||||||
uint32_t codepoint;
|
|
||||||
uint32_t state = 0;
|
uint32_t state = 0;
|
||||||
*cpts = 0;
|
*cpts = 0;
|
||||||
*bytes = 0;
|
*bytes = 0;
|
||||||
for (; *s; ++s){
|
for (; *s; ++s){
|
||||||
*bytes += 1;
|
*bytes += 1;
|
||||||
if (!Cyc_utf8_decode(&state, &codepoint, *s))
|
if (!Cyc_utf8_decode(&state, codepoint, *s))
|
||||||
*cpts += 1;
|
*cpts += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (state != CYC_UTF8_ACCEPT)
|
if (state != CYC_UTF8_ACCEPT)
|
||||||
return -1;
|
return state;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue