;; Cyclone Scheme ;; Copyright (c) 2014, Justin Ethier ;; All rights reserved. ;; ;; This module contains the s-expression parser and supporting functions. ;; ;; The code in this module is used both by the compiler and at runtime, so ;; when bootstrapping from a Scheme, keep in mind the code in this module ;; cannot use features that are not also provided by Cyclone. ;; ;; FUTURE: if this was a module/library, would probably only want to export ;; read and read-all ;; ;; Extended information for each input port (define *in-port-table* '()) (define (reg-port fp) (let ((r (assoc fp *in-port-table*))) (cond ((not r) ;(write `(ADDED NEW ENTRY TO in port table!!)) (set! r (list fp #f ; Buffered char, if any 1 ; Line number 0)) ; Char number (set! *in-port-table* (cons r *in-port-table*)) r) (else r)))) ;; TODO: unreg-port - delete fp entry from *in-port-table* ;; would want to do this when port is closed (define (in-port:read-buf! ptbl) (let ((result (cadr ptbl))) (in-port:set-buf! ptbl #f) result)) (define (in-port:get-buf ptbl) (cadr ptbl)) (define (in-port:set-buf! ptbl buf) (set-car! (cdr ptbl) buf)) (define (in-port:get-lnum ptbl) (caddr ptbl)) (define (in-port:set-lnum! ptbl lnum) (set-car! (cddr ptbl) lnum)) (define (in-port:get-cnum ptbl) (cadddr ptbl)) (define (in-port:set-cnum! ptbl cnum) (set-car! (cdddr ptbl) cnum)) ;; END input port table ;; Helper functions (define (add-tok tok toks quotes) (define (loop i) (if (= quotes i) tok (cons 'quote (cons (loop (+ i 1)) '())))) (if quotes (cons (loop 0) toks) (cons tok toks))) ;; Get completed list of tokens (define (get-toks tok toks quotes) (if (null? tok) toks (add-tok (->tok tok) toks quotes))) ;; Add a token to the list, quoting it if necessary (define (->tok lst) (parse-atom (reverse lst))) ;; Did we read a dotted list (define (dotted? lst) (and (> (length lst) 2) (equal? (cadr (reverse lst)) (string->symbol ".")))) ;; Convert a list read by the reader into an improper list (define (->dotted-list lst) (cond ((null? lst) '()) ((equal? (car lst) (string->symbol ".")) (cadr lst)) (else (cons (car lst) (->dotted-list (cdr lst)))))) (define (parse-error msg lnum cnum) (error (string-append "Error (line " (number->string lnum) ", char " (number->string cnum) "): " msg))) ;; Add finished token, if there is one, and continue parsing (define (parse/tok fp tok toks all? comment? quotes parens ptbl curr-char) (cond ((null? tok) (parse fp '() toks all? comment? quotes parens ptbl)) (all? (parse fp '() (add-tok (->tok tok) toks quotes) all? comment? #f ; read tok, no more quote parens ptbl)) (else ;; Reached a terminating char, return current token and ;; save term char for the next (read). ;; Note: never call set-buf! if in "all?" mode, since ;; that mode builds a list of tokens (in-port:set-buf! ptbl curr-char) ;(write `(DEBUG ,tok ,ptbl)) ;(write "\n") (car (add-tok (->tok tok) toks quotes))))) ;; Parse input from stream ;; ;; Input: ;; - Port object ;; - Current token ;; - List of tokens read (if applicable) ;; - Bool - Read-all mode, or just read the next object? ;; - Bool - Are we inside a comment? ;; - Quote level ;; - Level of nested parentheses ;; - Entry in the in-port table for this port ;; ;; Output: next object, or list of objects (if read-all mode) ;; (define (parse fp tok toks all? comment? quotes parens ptbl) (in-port:set-cnum! ptbl (+ 1 (in-port:get-cnum ptbl))) (let ((c (if (in-port:get-buf ptbl) (in-port:read-buf! ptbl) ;; Already buffered (read-char fp)))) ;; DEBUGGING ;(write `(DEBUG read ,tok ,c)) ;(write (newline)) ;; END DEBUG (cond ((eof-object? c) (if (> parens 0) (parse-error "missing closing parenthesis" (in-port:get-lnum ptbl) (in-port:get-cnum ptbl))) (if all? (reverse (get-toks tok toks quotes)) (let ((last (get-toks tok toks quotes))) (if (> (length last) 0) (car last) c)))) ;; EOF (comment? (if (eq? c #\newline) (begin (in-port:set-lnum! ptbl (+ 1 (in-port:get-lnum ptbl))) (in-port:set-cnum! ptbl 0) (parse fp '() toks all? #f quotes parens ptbl)) (parse fp '() toks all? #t quotes parens ptbl))) ((char-whitespace? c) (if (equal? c #\newline) (in-port:set-lnum! ptbl (+ 1 (in-port:get-lnum ptbl)))) (if (equal? c #\newline) (in-port:set-cnum! ptbl 0)) (parse/tok fp tok toks all? #f quotes parens ptbl c)) ((eq? c #\;) (parse/tok fp tok toks all? #t quotes parens ptbl c)) ((eq? c #\') (cond ((and (not all?) (not quotes) (not (null? tok))) ;; Reached a terminal char, read out previous token ;; TODO: would also need to do this if previous char was ;; not a quote! ;; EG: 'a'b ==> (quote a) (quote b), NOT (quote (quote b)) (in-port:set-buf! ptbl c) (car (add-tok (->tok tok) toks quotes))) (else (let ((quote-level (if quotes (+ quotes 1) 1))) (cond ((null? tok) (parse fp '() toks all? comment? quote-level parens ptbl)) (else (parse fp '() (add-tok (->tok tok) toks quotes) all? comment? quote-level parens ptbl))))))) ((eq? c #\() ;(write `(DEBUG read open paren ,tok)) (cond ((and (not all?) (not (null? tok))) ;; Reached a terminal char, read out previous token (in-port:set-buf! ptbl c) (car (add-tok (->tok tok) toks quotes))) (else (let ((sub ;(_cyc-read-all fp (+ parens 1))) (parse fp '() '() #t #f #f (+ parens 1) ptbl)) (toks* (get-toks tok toks quotes))) (define new-toks (add-tok (if (and (pair? sub) (dotted? sub)) (->dotted-list sub) sub) toks* quotes)) ;(write `(DEBUG incrementing paren level ,parens ,sub)) (if all? (parse fp '() new-toks all? #f #f parens ptbl) (car new-toks)))))) ((eq? c #\)) ;(write `(DEBUG decrementing paren level ,parens)) (if (= parens 0) (parse-error "unexpected closing parenthesis" (in-port:get-lnum ptbl) (in-port:get-cnum ptbl))) (reverse (get-toks tok toks quotes))) ((eq? c #\") (cond ((and (not all?) (not (null? tok))) ;; Reached a terminal char, read out previous token (in-port:set-buf! ptbl c) (car (add-tok (->tok tok) toks quotes))) (else (let ((str (read-str fp '() ptbl)) (toks* (get-toks tok toks quotes))) (define new-toks (add-tok str toks* quotes)) (if all? (parse fp '() new-toks all? #f #f parens ptbl) (car new-toks)))))) ((eq? c #\#) (if (null? tok) ;; # reader (let ((next-c (read-char fp))) (in-port:set-cnum! ptbl (+ 1 (in-port:get-cnum ptbl))) (cond ;; Do not use add-tok below, no need to quote a bool ((eq? #\t next-c) (parse fp '() (cons #t toks) all? #f #f parens ptbl)) ((eq? #\f next-c) (parse fp '() (cons #f toks) all? #f #f parens ptbl)) ((eq? #\\ next-c) (let ((new-toks (cons (read-pound fp ptbl) toks))) (if all? (parse fp '() new-toks all? #f #f parens ptbl) (car new-toks)))) (else (parse-error "Unhandled input sequence" (in-port:get-lnum ptbl) (in-port:get-cnum ptbl))))) ;; just another char... (parse fp (cons c tok) toks all? #f quotes parens ptbl))) (else (parse fp (cons c tok) toks all? #f quotes parens ptbl))))) ;; Read chars past a leading #\ (define (read-pound fp ptbl) (define (done raw-buf) (let ((buf (reverse raw-buf))) (cond ((= 0 (length buf)) (parse-error "missing character" (in-port:get-lnum ptbl) (in-port:get-cnum ptbl))) ((= 1 (length buf)) (car buf)) ((equal? buf '(#\a #\l #\a #\r #\m)) (integer->char 7)) ((equal? buf '(#\b #\a #\c #\k #\s #\p #\a #\c #\e)) (integer->char 8)) ((equal? buf '(#\d #\e #\l #\e #\t #\e)) (integer->char 127)) ((equal? buf '(#\e #\s #\c #\a #\p #\e)) (integer->char 27)) ((equal? buf '(#\n #\e #\w #\l #\i #\n #\e)) (integer->char 10)) ((equal? buf '(#\n #\u #\l #\l)) (integer->char 0)) ((equal? buf '(#\r #\e #\t #\u #\r #\n)) (integer->char 13)) ((equal? buf '(#\s #\p #\a #\c #\e)) (integer->char 32)) ((equal? buf '(#\t #\a #\b)) (integer->char 9)) (else (parse-error (string-append "unable to parse character: " (list->string buf)) (in-port:get-lnum ptbl) (in-port:get-cnum ptbl)))))) (define (loop buf) (let ((c (peek-char fp))) (if (or (eof-object? c) (char-whitespace? c) (and (> (length buf) 0) (equal? c #\)))) (done buf) (loop (cons (read-char fp) buf))))) (loop '())) (define (read-str fp buf ptbl) (let ((c (read-char fp))) (cond ((eof-object? c) (parse-error "missing closing double-quote" (in-port:get-lnum ptbl) (in-port:get-cnum ptbl))) ((equal? #\\ c) (read-str fp (read-str-esc fp buf ptbl) ptbl)) ((equal? #\" c) (list->string (reverse buf))) (else (read-str fp (cons c buf) ptbl))))) ;; Read an escaped character within a string ;; The escape '\' has already been read at this point (define (read-str-esc fp buf ptbl) (let ((c (read-char fp))) (cond ((eof-object? c) (parse-error "missing escaped character within string" (in-port:get-lnum ptbl) (in-port:get-cnum ptbl))) ((or (equal? #\" c) (equal? #\\ c)) (cons c buf)) ((equal? #\n c) (cons #\newline buf)) (else (parse-error "invalid escape character in string" (in-port:get-lnum ptbl) (in-port:get-cnum ptbl)))))) (define (sign? c) (or (equal? c #\+) (equal? c #\-))) ;; parse-atom -> [chars] -> literal (define (parse-atom a) (cond ((or (char-numeric? (car a)) (and (> (length a) 1) (char-numeric? (cadr a)) (sign? (car a)))) (string->number (list->string a))) (else (string->symbol (list->string a))))) ;; Main lexer/parser (define cyc-read ;; TODO: should be (read), but that is breaking on csi 4.8.0.5 (lambda args (let ((fp (if (null? args) (current-input-port) (car args)))) (parse fp '() '() #f #f #f 0 (reg-port fp))))) ;; read-all -> port -> [objects] (define (read-all . args) (let ((fp (if (null? args) (current-input-port) (car args)))) (define (loop fp result) (let ((obj (cyc-read fp))) (if (eof-object? obj) (reverse result) (loop fp (cons obj result))))) (loop fp '()))) ;; TODO: for some reason this causes trouble in chicken 4.8. WTF?? ;; read -> port -> object ;(define read cyc-read) ;(define (repl) ; ;; Test code ; ;(let ((fp (open-input-file "tests/begin.scm"))) ; ;(let ((fp (open-input-file "tests/strings.scm"))) ; ;(let ((fp (open-input-file "eval.scm"))) ; ;(let ((fp (open-input-file "dev.scm"))) ; ; (write (read-all fp))) ; (let ((fp (current-input-port))) ; (write (cyc-read fp))) ; (repl)) ;;(repl)