mirror of
https://github.com/justinethier/cyclone.git
synced 2025-07-13 15:57:36 +02:00
String reading
This commit is contained in:
parent
d4836eb36d
commit
d0c0c18d74
2 changed files with 89 additions and 47 deletions
3
read.scm
3
read.scm
|
@ -737,7 +737,8 @@
|
|||
(let loop ((lis '())
|
||||
(t (parse2 fp)))
|
||||
(cond
|
||||
;; TODO: EOF
|
||||
((eof-object? t)
|
||||
(error "missing closing parenthesis"))
|
||||
((eq? t #\))
|
||||
(reverse lis))
|
||||
(else
|
||||
|
|
133
runtime.c
133
runtime.c
|
@ -5692,6 +5692,14 @@ int read_from_port(port_type *p)
|
|||
return rv;
|
||||
}
|
||||
|
||||
void _read_error(void *data, port_type *p, const char *msg)
|
||||
{
|
||||
char buf[1024];
|
||||
snprintf(buf, 1023, "Error (line %d, column %d): %s",
|
||||
p->line_num, p->col_num, msg);
|
||||
Cyc_rt_raise_msg(data, buf);
|
||||
}
|
||||
|
||||
void _read_line_comment(port_type *p)
|
||||
{
|
||||
while(1) {
|
||||
|
@ -5733,33 +5741,88 @@ void _read_whitespace(port_type *p)
|
|||
}
|
||||
}
|
||||
|
||||
void _read_error(void *data, port_type *p, const char *msg)
|
||||
static void _read_add_to_tok_buf(port_type *p, char c)
|
||||
{
|
||||
char buf[1024];
|
||||
snprintf(buf, 1023, "Error (line %d, column %d): %s",
|
||||
p->line_num, p->col_num, msg);
|
||||
Cyc_rt_raise_msg(data, buf);
|
||||
// FUTURE: more efficient to try and use mem_buf directly??
|
||||
// complicates things with more edge cases though
|
||||
if ((p->tok_end + 1) == p->tok_buf_len) { // +1 for trailing \0 later on
|
||||
p->tok_buf_len *= 2;
|
||||
p->tok_buf = realloc(p->tok_buf, p->tok_buf_len);
|
||||
if (!p->tok_buf) {
|
||||
fprintf(stderr, "Unable to grow token buffer!\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
p->tok_buf[p->tok_end++] = c;
|
||||
}
|
||||
|
||||
void _read_string(void *data, object cont, port_type *p)
|
||||
{
|
||||
char c;
|
||||
int escaped = 0;
|
||||
while(1) {
|
||||
// Read more data into buffer, if needed
|
||||
if (p->buf_idx == p->mem_buf_len) {
|
||||
if (!read_from_port(p)){
|
||||
_read_error(data, p, "Missing closing double-quote");
|
||||
}
|
||||
}
|
||||
c = p->mem_buf[p->buf_idx++];
|
||||
p->col_num++;
|
||||
|
||||
if (escaped) {
|
||||
escaped = 0;
|
||||
switch (c) {
|
||||
case '"':
|
||||
case '\'':
|
||||
case '?':
|
||||
case '|':
|
||||
case '\\':
|
||||
_read_add_to_tok_buf(p, c);
|
||||
break;
|
||||
case 'a':
|
||||
_read_add_to_tok_buf(p, '\a');
|
||||
break;
|
||||
case 'b':
|
||||
_read_add_to_tok_buf(p, '\b');
|
||||
break;
|
||||
case 'n':
|
||||
_read_add_to_tok_buf(p, '\n');
|
||||
break;
|
||||
case 'r':
|
||||
_read_add_to_tok_buf(p, '\r');
|
||||
break;
|
||||
case 't':
|
||||
_read_add_to_tok_buf(p, '\t');
|
||||
break;
|
||||
case 'x':
|
||||
// TODO: read hex scalar value
|
||||
break;
|
||||
default:
|
||||
_read_error(data, p, "invalid escape character in string"); // TODO: char
|
||||
break;
|
||||
}
|
||||
} else if (c == '"') {
|
||||
p->tok_buf[p->tok_end] = '\0'; // TODO: what if buffer is full?
|
||||
p->tok_end = 0; // Reset for next atom
|
||||
{
|
||||
make_string(str, p->tok_buf);
|
||||
return_closcall1(data, cont, &str);
|
||||
}
|
||||
} else if (c == '\\') {
|
||||
escaped = 1;
|
||||
} else if (c == '\n') {
|
||||
p->line_num++;
|
||||
p->col_num = 0;
|
||||
_read_add_to_tok_buf(p, c);
|
||||
} else {
|
||||
_read_add_to_tok_buf(p, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int _read_is_numeric(const char *tok)
|
||||
{
|
||||
/* Equivalent to:
|
||||
(define (sign? c)
|
||||
(or
|
||||
(equal? c #\+)
|
||||
(equal? c #\-)))
|
||||
|
||||
;; token-numeric? -> [chars] -> boolean
|
||||
(define (token-numeric? a)
|
||||
(or (char-numeric? (car a))
|
||||
(and (> (length a) 1)
|
||||
(eq? #\. (car a))
|
||||
(char-numeric? (cadr a)))
|
||||
(and (> (length a) 1)
|
||||
(or (char-numeric? (cadr a))
|
||||
(eq? #\. (cadr a)))
|
||||
(sign? (car a)))))
|
||||
*/
|
||||
int len = strlen(tok);
|
||||
return (len &&
|
||||
((isdigit(tok[0])) ||
|
||||
|
@ -5778,23 +5841,6 @@ void _read_return_atom(void *data, object cont, port_type *p)
|
|||
p->tok_buf[p->tok_end] = '\0'; // TODO: what if buffer is full?
|
||||
p->tok_end = 0; // Reset for next atom
|
||||
|
||||
/*
|
||||
TODO: not good enough, could be a number.
|
||||
logic from existing read is:
|
||||
|
||||
|
||||
;; parse-atom -> [chars] -> literal
|
||||
(define (parse-atom a)
|
||||
(if (token-numeric? a)
|
||||
(string->number (list->string a))
|
||||
(if (or (equal? a '(#\+ #\i #\n #\f #\. #\0))
|
||||
(equal? a '(#\- #\i #\n #\f #\. #\0)))
|
||||
(expt 2.0 1000000)
|
||||
(if (or (equal? a '(#\+ #\n #\a #\n #\. #\0))
|
||||
(equal? a '(#\- #\n #\a #\n #\. #\0)))
|
||||
(/ 0.0 0.0)
|
||||
(string->symbol (list->string a))))))
|
||||
*/
|
||||
if (_read_is_numeric(p->tok_buf)) {
|
||||
make_string(str, p->tok_buf);
|
||||
Cyc_string2number_(data, cont, &str);
|
||||
|
@ -5850,17 +5896,12 @@ void Cyc_io_read_token(void *data, object cont, object port)
|
|||
|
||||
} else if (c == '"') {
|
||||
if (p->tok_end) _read_return_atom(data, cont, p);
|
||||
Cyc_rt_raise_msg(data, "TODO: parsing for strings");
|
||||
_read_string(data, cont, p);
|
||||
} else if (c == '#' && !p->tok_end) {
|
||||
Cyc_rt_raise_msg(data, "TODO: parsing for #");
|
||||
} else {
|
||||
// No special meaning, add char to current token (an atom)
|
||||
|
||||
// TODO: need to realloc if tok_end == (tok_buf_len - 1)
|
||||
// this accounts for the \0 that needs to be appended when parsing the atom
|
||||
// FUTURE: more efficient to try and use mem_buf directly??
|
||||
// complicates things with more edge cases though
|
||||
p->tok_buf[p->tok_end++] = c;
|
||||
_read_add_to_tok_buf(p, c);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue