From 62a54a50d5a1dce13abfc1a23fa5fc1e228ae12d Mon Sep 17 00:00:00 2001 From: Justin Ethier Date: Tue, 15 Aug 2017 19:02:47 -0400 Subject: [PATCH] WIP, not quite right with differentiating chars --- read.scm | 41 +++++++++++++++++++++++------------------ runtime.c | 8 +++++++- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/read.scm b/read.scm index ec0eac44..3cd43a47 100644 --- a/read.scm +++ b/read.scm @@ -701,6 +701,9 @@ (parse2 (open-input-file "test.scm"))) ;(read-token (open-input-file "test.scm"))) +TODO: getting there, but still not parsed correctly: +(eq? c #\)) + ;; Notes on writing a fast parser: ; - Interface to the user is (read). This needs to be fast ; - Could read input a line at a time, but then need to buffer in the port_type object @@ -732,17 +735,6 @@ (let ((token (read-token fp))) ;; TODO: this will be a C call ;(write `(token ,token)) (cond - ;; Open paren, start read loop - ((eq? token #\() - (let loop ((lis '()) - (t (parse2 fp))) - (cond - ((eof-object? t) - (error "missing closing parenthesis")) - ((eq? t #\)) - (reverse lis)) - (else - (loop (cons t lis) (parse2 fp)))))) ((vector? token) (cond ((= (vector-length token) 2) ;; Special case: number @@ -752,7 +744,26 @@ (exact (string->number (vector-ref token 0) (vector-ref token 1))) (inexact (string->number (vector-ref token 0) (vector-ref token 1))))) ((= (vector-length token) 1) ;; Special case: error - (error (vector-ref token 0))) + (cond + ;; Open paren, start read loop + ((eq? (vector-ref token 0) #\() + (let loop ((lis '()) + (t (parse2 fp))) + (cond + ((eof-object? t) + (error "missing closing parenthesis")) + ((eq? t #\)) TODO: here and below, need to wait for the vectorized close? + (reverse lis)) + (else + (loop (cons t lis) (parse2 fp)))))) + ((eq? (vector-ref token 0) #\') + (list 'quote (parse2 fp))) + ((eq? (vector-ref token 0) #\`) + (list 'quasiquote (parse2 fp))) + ((eq? (vector-ref token 0) #\,) + (list 'unquote (parse2 fp))) + (else + (error (vector-ref token 0))))) (else (let loop ((lis '()) (t (parse2 fp))) @@ -773,12 +784,6 @@ (apply bytevector (reverse lis))) (else (loop (cons t lis) (parse2 fp)))))) - ((eq? token #\') - (list 'quote (parse2 fp))) - ((eq? token #\`) - (list 'quasiquote (parse2 fp))) - ((eq? token #\,) - (list 'unquote (parse2 fp))) ((eq? token (string->symbol ",@")) (list 'unquote-splicing (parse2 fp))) ((eq? token (string->symbol "#;")) diff --git a/runtime.c b/runtime.c index 13701ca1..f0a6965e 100644 --- a/runtime.c +++ b/runtime.c @@ -6106,7 +6106,13 @@ void Cyc_io_read_token(void *data, object cont, object port) _read_whitespace(p); } else if (c == '(' || c == ')' || c == '\'' || c == '`') { if (p->tok_end) _read_return_atom(data, cont, p); - return_thread_runnable(data, obj_char2obj(c)); + //return_thread_runnable(data, obj_char2obj(c)); + // Encode within a vector so we can distinguish between these and chars such as #\( + make_empty_vector(vec); + vec.num_elements = 1; + vec.elements = (object *) alloca(sizeof(object) * vec.num_elements); + vec.elements[0] = obj_char2obj(c); + return_thread_runnable(data, &vec); } else if (c == ',') { if (p->tok_end) _read_return_atom(data, cont, p);