From 5a50814a610d4baaf81cce126b8bc07002c2dd44 Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Wed, 14 Jul 2021 13:40:32 -0400 Subject: [PATCH] WIP for read_line slow --- runtime.c | 111 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 87 insertions(+), 24 deletions(-) diff --git a/runtime.c b/runtime.c index c24c942e..6adc2c3e 100644 --- a/runtime.c +++ b/runtime.c @@ -7890,13 +7890,14 @@ object Cyc_io_read_u8(void *data, object cont, object port) /* TODO: this function needs some work, but approximates what is needed */ object Cyc_io_read_line(void *data, object cont, object port) { - FILE *stream = ((port_type *) port)->fp; + FILE *stream; char buf[1027]; int len, num_cp, i = 0; char_type codepoint; uint32_t state; Cyc_check_port(data, port); + stream = ((port_type *) port)->fp; if (stream == NULL) { Cyc_rt_raise2(data, "Unable to read from closed port: ", port); } @@ -7942,34 +7943,96 @@ object Cyc_io_read_line(void *data, object cont, object port) return NULL; } -/* -char *fgets_port(char *buf, int size, port_type *p) -{ - // TODO: keep reading until fgets conditions are true - // also ensure we read a full code point for the last char - do { - _read_next_char(data, NULL, p); - c = p->mem_buf[p->buf_idx++]; - if (c == EOF) break; - } while(Cyc_utf8_decode(&state, &codepoint, (uint8_t)c)); -} -*/ - // WIP - a version of read_line that uses our internal port buffer. // This will be compatible with other I/O function such as read-char +// +// TODO: instead of fgets, read up to 1023 chars from our port buffer. per fgets, +// we stop when either (n-1) characters are read, the newline character is read, +// or the end-of-file is reached, whichever comes first +// +// other difference is we read one char at a time until final code point is completely read +// +// development in 2 phases: +// 1) build out so we can replace Cyc_io_read_line with this "slow" version, verifying everything works +// 2) integrate slow back into fast such that we can revert to this function if +// the buffer contains chars, else we use the fast read_line function +/* object Cyc_io_read_line_slow(void *data, object cont, object port) { - // TODO: instead of fgets, read up to 1023 chars from our port buffer. per fgets, - // we stop when either (n-1) characters are read, the newline character is read, - // or the end-of-file is reached, whichever comes first - // - // other difference is we read one char at a time until final code point is completely read - // - // development in 2 phases: - // 1) build out so we can replace Cyc_io_read_line with this "slow" version, verifying everything works - // 2) integrate slow back into fast such that we can revert to this function if - // the buffer contains chars, else we use the fast read_line function + FILE *stream; + port_type *p; + char buf[1027]; + int i, limit = 1024; // Ensure last code point is fully-read + + Cyc_check_port(data, port); + stream = ((port_type *) port)->fp; + if (stream == NULL) { + Cyc_rt_raise2(data, "Unable to read from closed port: ", port); + } + set_thread_blocked(data, cont); + + p = (port_type *)port; + for (i = 0; i < limit; i++) { + //_read_next_char(data, NULL, p); + if (p->mem_buf_len == 0 || p->mem_buf_len == p->buf_idx) { + int rv = read_from_port(p); + if (!rv) { + if (i == 0) { + return_thread_runnable_with_obj(data, Cyc_EOF, p); + } else { + break; // Handle buf contents below + } + } + } + buf[i] = p->mem_buf[p->buf_idx++]; + if (buf[i] == EOF) { + if (i == 0) { + return_thread_runnable_with_obj(data, Cyc_EOF, p); + } else { + break; // Handle buf contents below + } + } + if (buf[i] == '\n') { + break; + } + } + + // ensure we fully-read last code point + { + int c, len, num_cp, ii = 0; + char_type codepoint; + uint32_t state; + + state = Cyc_utf8_count_code_points_and_bytes((uint8_t *)buf, &codepoint, &num_cp, &len); + while (state != CYC_UTF8_ACCEPT && ii < 3) { + if (p->mem_buf_len == 0 || p->mem_buf_len == p->buf_idx) { + int rv = read_from_port(p); + if (!rv) { + break; // At EOF, return what we've got so far + // TODO: likely better to fall back to last fully-read codepoint. + // in that case we might still return EOF + } + } + c = p->mem_buf[p->buf_idx++]; + + buf[len] = c; + len++; + Cyc_utf8_decode(&state, &codepoint, (uint8_t)c); + if (state == CYC_UTF8_ACCEPT) { + num_cp++; + break; + } + ii++; + } + + // TODO: code section from read_line to + // + // Remove any trailing CR / newline chars + // + // this also returns the string to thread cont + } } +*/ /** * @brief Read next token from the input port.