mirror of
https://github.com/justinethier/cyclone.git
synced 2025-05-19 13:49:16 +02:00
382 lines
12 KiB
Scheme
382 lines
12 KiB
Scheme
;; Cyclone Scheme
|
|
;; Copyright (c) 2014, Justin Ethier
|
|
;; All rights reserved.
|
|
;;
|
|
;; This module contains the s-expression parser and supporting functions.
|
|
;;
|
|
;; The code in this module is used both by the compiler and at runtime, so
|
|
;; when bootstrapping from a Scheme, keep in mind the code in this module
|
|
;; cannot use features that are not also provided by Cyclone.
|
|
;;
|
|
;; FUTURE: if this was a module/library, would probably only want to export
|
|
;; read and read-all
|
|
;;
|
|
|
|
;; Extended information for each input port
|
|
(define *in-port-table* '())
|
|
(define (reg-port fp)
|
|
(let ((r (assoc fp *in-port-table*)))
|
|
(cond
|
|
((not r)
|
|
;(write `(ADDED NEW ENTRY TO in port table!!))
|
|
(set! r
|
|
(list fp
|
|
#f ; Buffered char, if any
|
|
1 ; Line number
|
|
0)) ; Char number
|
|
(set! *in-port-table* (cons r *in-port-table*))
|
|
r)
|
|
(else r))))
|
|
;; TODO: unreg-port - delete fp entry from *in-port-table*
|
|
;; would want to do this when port is closed
|
|
|
|
(define (in-port:read-buf! ptbl)
|
|
(let ((result (cadr ptbl)))
|
|
(in-port:set-buf! ptbl #f)
|
|
result))
|
|
(define (in-port:get-buf ptbl) (cadr ptbl))
|
|
(define (in-port:set-buf! ptbl buf) (set-car! (cdr ptbl) buf))
|
|
(define (in-port:get-lnum ptbl) (caddr ptbl))
|
|
(define (in-port:set-lnum! ptbl lnum) (set-car! (cddr ptbl) lnum))
|
|
(define (in-port:get-cnum ptbl) (cadddr ptbl))
|
|
(define (in-port:set-cnum! ptbl cnum) (set-car! (cdddr ptbl) cnum))
|
|
;; END input port table
|
|
|
|
;; Helper functions
|
|
(define (add-tok tok toks quotes)
|
|
(define (loop i)
|
|
(if (= quotes i)
|
|
tok
|
|
(cons 'quote (cons (loop (+ i 1)) '()))))
|
|
(if quotes
|
|
(cons
|
|
(loop 0)
|
|
toks)
|
|
(cons tok toks)))
|
|
|
|
;; Get completed list of tokens
|
|
(define (get-toks tok toks quotes)
|
|
(if (null? tok)
|
|
toks
|
|
(add-tok (->tok tok) toks quotes)))
|
|
|
|
;; Add a token to the list, quoting it if necessary
|
|
(define (->tok lst)
|
|
(parse-atom (reverse lst)))
|
|
|
|
;; Did we read a dotted list
|
|
(define (dotted? lst)
|
|
(and (> (length lst) 2)
|
|
(equal? (cadr (reverse lst)) (string->symbol "."))))
|
|
|
|
;; Convert a list read by the reader into an improper list
|
|
(define (->dotted-list lst)
|
|
(cond
|
|
((null? lst) '())
|
|
((equal? (car lst) (string->symbol "."))
|
|
(cadr lst))
|
|
(else
|
|
(cons (car lst) (->dotted-list (cdr lst))))))
|
|
|
|
(define (parse-error msg lnum cnum)
|
|
(error
|
|
(string-append
|
|
"Error (line "
|
|
(number->string lnum)
|
|
", char "
|
|
(number->string cnum)
|
|
"): "
|
|
msg)))
|
|
|
|
;; Add finished token, if there is one, and continue parsing
|
|
(define (parse/tok fp tok toks all? comment? quotes parens ptbl curr-char)
|
|
(cond
|
|
((null? tok)
|
|
(parse fp '() toks all? comment? quotes parens ptbl))
|
|
(all?
|
|
(parse fp '()
|
|
(add-tok (->tok tok) toks quotes)
|
|
all?
|
|
comment?
|
|
#f ; read tok, no more quote
|
|
parens
|
|
ptbl))
|
|
(else
|
|
;; Reached a terminating char, return current token and
|
|
;; save term char for the next (read).
|
|
;; Note: never call set-buf! if in "all?" mode, since
|
|
;; that mode builds a list of tokens
|
|
(in-port:set-buf! ptbl curr-char)
|
|
;(write `(DEBUG ,tok ,ptbl))
|
|
;(write "\n")
|
|
(car (add-tok (->tok tok) toks quotes)))))
|
|
|
|
;; Parse input from stream
|
|
;;
|
|
;; Input:
|
|
;; - Port object
|
|
;; - Current token
|
|
;; - List of tokens read (if applicable)
|
|
;; - Bool - Read-all mode, or just read the next object?
|
|
;; - Bool - Are we inside a comment?
|
|
;; - Quote level
|
|
;; - Level of nested parentheses
|
|
;; - Entry in the in-port table for this port
|
|
;;
|
|
;; Output: next object, or list of objects (if read-all mode)
|
|
;;
|
|
(define (parse fp tok toks all? comment? quotes parens ptbl)
|
|
(in-port:set-cnum! ptbl
|
|
(+ 1 (in-port:get-cnum ptbl)))
|
|
|
|
(let ((c (if (in-port:get-buf ptbl)
|
|
(in-port:read-buf! ptbl) ;; Already buffered
|
|
(read-char fp))))
|
|
;; DEBUGGING
|
|
;(write `(DEBUG read ,tok ,c))
|
|
;(write (newline))
|
|
;; END DEBUG
|
|
(cond
|
|
((eof-object? c)
|
|
(if (> parens 0)
|
|
(parse-error "missing closing parenthesis"
|
|
(in-port:get-lnum ptbl)
|
|
(in-port:get-cnum ptbl)))
|
|
(if all?
|
|
(reverse (get-toks tok toks quotes))
|
|
(let ((last (get-toks tok toks quotes)))
|
|
(if (> (length last) 0)
|
|
(car last)
|
|
c)))) ;; EOF
|
|
(comment?
|
|
(if (eq? c #\newline)
|
|
(begin
|
|
(in-port:set-lnum! ptbl
|
|
(+ 1 (in-port:get-lnum ptbl)))
|
|
(in-port:set-cnum! ptbl 0)
|
|
(parse fp '() toks all? #f quotes parens ptbl))
|
|
(parse fp '() toks all? #t quotes parens ptbl)))
|
|
((char-whitespace? c)
|
|
(if (equal? c #\newline)
|
|
(in-port:set-lnum! ptbl
|
|
(+ 1 (in-port:get-lnum ptbl))))
|
|
(if (equal? c #\newline)
|
|
(in-port:set-cnum! ptbl 0))
|
|
(parse/tok fp tok toks all? #f quotes parens ptbl c))
|
|
((eq? c #\;)
|
|
(parse/tok fp tok toks all? #t quotes parens ptbl c))
|
|
((eq? c #\')
|
|
(cond
|
|
((and (not all?) (not quotes) (not (null? tok)))
|
|
;; Reached a terminal char, read out previous token
|
|
;; TODO: would also need to do this if previous char was
|
|
;; not a quote!
|
|
;; EG: 'a'b ==> (quote a) (quote b), NOT (quote (quote b))
|
|
(in-port:set-buf! ptbl c)
|
|
(car (add-tok (->tok tok) toks quotes)))
|
|
(else
|
|
(let ((quote-level (if quotes
|
|
(+ quotes 1)
|
|
1)))
|
|
(cond
|
|
((null? tok)
|
|
(parse fp '() toks all? comment? quote-level parens ptbl))
|
|
(else
|
|
(parse fp '() (add-tok (->tok tok) toks quotes)
|
|
all? comment? quote-level parens ptbl)))))))
|
|
((eq? c #\()
|
|
;(write `(DEBUG read open paren ,tok))
|
|
(cond
|
|
((and (not all?) (not (null? tok)))
|
|
;; Reached a terminal char, read out previous token
|
|
(in-port:set-buf! ptbl c)
|
|
(car (add-tok (->tok tok) toks quotes)))
|
|
(else
|
|
(let ((sub ;(_cyc-read-all fp (+ parens 1)))
|
|
(parse fp '() '() #t #f #f (+ parens 1) ptbl))
|
|
(toks* (get-toks tok toks quotes)))
|
|
(define new-toks (add-tok
|
|
(if (and (pair? sub) (dotted? sub))
|
|
(->dotted-list sub)
|
|
sub)
|
|
toks*
|
|
quotes))
|
|
;(write `(DEBUG incrementing paren level ,parens ,sub))
|
|
(if all?
|
|
(parse fp '() new-toks all? #f #f parens ptbl)
|
|
(car new-toks))))))
|
|
((eq? c #\))
|
|
;(write `(DEBUG decrementing paren level ,parens))
|
|
(if (= parens 0)
|
|
(parse-error "unexpected closing parenthesis"
|
|
(in-port:get-lnum ptbl)
|
|
(in-port:get-cnum ptbl)))
|
|
(reverse (get-toks tok toks quotes)))
|
|
((eq? c #\")
|
|
(cond
|
|
((and (not all?) (not (null? tok)))
|
|
;; Reached a terminal char, read out previous token
|
|
(in-port:set-buf! ptbl c)
|
|
(car (add-tok (->tok tok) toks quotes)))
|
|
(else
|
|
(let ((str (read-str fp '() ptbl))
|
|
(toks* (get-toks tok toks quotes)))
|
|
(define new-toks (add-tok str toks* quotes))
|
|
(if all?
|
|
(parse fp '() new-toks all? #f #f parens ptbl)
|
|
(car new-toks))))))
|
|
((eq? c #\#)
|
|
(if (null? tok)
|
|
;; # reader
|
|
(let ((next-c (read-char fp)))
|
|
(in-port:set-cnum! ptbl
|
|
(+ 1 (in-port:get-cnum ptbl)))
|
|
(cond
|
|
;; Do not use add-tok below, no need to quote a bool
|
|
((eq? #\t next-c) (parse fp '() (cons #t toks) all? #f #f parens ptbl))
|
|
((eq? #\f next-c) (parse fp '() (cons #f toks) all? #f #f parens ptbl))
|
|
((eq? #\\ next-c)
|
|
(let ((new-toks (cons (read-pound fp ptbl) toks)))
|
|
(if all?
|
|
(parse fp '() new-toks all? #f #f parens ptbl)
|
|
(car new-toks))))
|
|
(else
|
|
(parse-error "Unhandled input sequence"
|
|
(in-port:get-lnum ptbl)
|
|
(in-port:get-cnum ptbl)))))
|
|
;; just another char...
|
|
(parse fp (cons c tok) toks all? #f quotes parens ptbl)))
|
|
(else
|
|
(parse fp (cons c tok) toks all? #f quotes parens ptbl)))))
|
|
|
|
;; Read chars past a leading #\
|
|
(define (read-pound fp ptbl)
|
|
(define (done raw-buf)
|
|
(let ((buf (reverse raw-buf)))
|
|
(cond
|
|
((= 0 (length buf))
|
|
(parse-error "missing character"
|
|
(in-port:get-lnum ptbl)
|
|
(in-port:get-cnum ptbl)))
|
|
((= 1 (length buf))
|
|
(car buf))
|
|
((equal? buf '(#\a #\l #\a #\r #\m))
|
|
(integer->char 7))
|
|
((equal? buf '(#\b #\a #\c #\k #\s #\p #\a #\c #\e))
|
|
(integer->char 8))
|
|
((equal? buf '(#\d #\e #\l #\e #\t #\e))
|
|
(integer->char 127))
|
|
((equal? buf '(#\e #\s #\c #\a #\p #\e))
|
|
(integer->char 27))
|
|
((equal? buf '(#\n #\e #\w #\l #\i #\n #\e))
|
|
(integer->char 10))
|
|
((equal? buf '(#\n #\u #\l #\l))
|
|
(integer->char 0))
|
|
((equal? buf '(#\r #\e #\t #\u #\r #\n))
|
|
(integer->char 13))
|
|
((equal? buf '(#\s #\p #\a #\c #\e))
|
|
(integer->char 32))
|
|
((equal? buf '(#\t #\a #\b))
|
|
(integer->char 9))
|
|
(else
|
|
(parse-error (string-append
|
|
"unable to parse character: "
|
|
(list->string buf))
|
|
(in-port:get-lnum ptbl)
|
|
(in-port:get-cnum ptbl))))))
|
|
(define (loop buf)
|
|
(let ((c (peek-char fp)))
|
|
(if (or (eof-object? c)
|
|
(char-whitespace? c)
|
|
(and (> (length buf) 0)
|
|
(equal? c #\))))
|
|
(done buf)
|
|
(loop (cons (read-char fp) buf)))))
|
|
(loop '()))
|
|
|
|
(define (read-str fp buf ptbl)
|
|
(let ((c (read-char fp)))
|
|
(cond
|
|
((eof-object? c)
|
|
(parse-error "missing closing double-quote"
|
|
(in-port:get-lnum ptbl)
|
|
(in-port:get-cnum ptbl)))
|
|
((equal? #\\ c)
|
|
(read-str fp (read-str-esc fp buf ptbl) ptbl))
|
|
((equal? #\" c)
|
|
(list->string (reverse buf)))
|
|
(else
|
|
(read-str fp (cons c buf) ptbl)))))
|
|
|
|
;; Read an escaped character within a string
|
|
;; The escape '\' has already been read at this point
|
|
(define (read-str-esc fp buf ptbl)
|
|
(let ((c (read-char fp)))
|
|
(cond
|
|
((eof-object? c)
|
|
(parse-error "missing escaped character within string"
|
|
(in-port:get-lnum ptbl)
|
|
(in-port:get-cnum ptbl)))
|
|
((or (equal? #\" c)
|
|
(equal? #\\ c))
|
|
(cons c buf))
|
|
((equal? #\n c)
|
|
(cons #\newline buf))
|
|
(else
|
|
(parse-error "invalid escape character in string"
|
|
(in-port:get-lnum ptbl)
|
|
(in-port:get-cnum ptbl))))))
|
|
|
|
(define (sign? c)
|
|
(or
|
|
(equal? c #\+)
|
|
(equal? c #\-)))
|
|
|
|
;; parse-atom -> [chars] -> literal
|
|
(define (parse-atom a)
|
|
(cond
|
|
((or (char-numeric? (car a))
|
|
(and (> (length a) 1)
|
|
(char-numeric? (cadr a))
|
|
(sign? (car a))))
|
|
(string->number (list->string a)))
|
|
(else
|
|
(string->symbol (list->string a)))))
|
|
|
|
;; Main lexer/parser
|
|
(define cyc-read ;; TODO: should be (read), but that is breaking on csi 4.8.0.5
|
|
(lambda args
|
|
(let ((fp (if (null? args)
|
|
(current-input-port)
|
|
(car args))))
|
|
(parse fp '() '() #f #f #f 0 (reg-port fp)))))
|
|
|
|
;; read-all -> port -> [objects]
|
|
(define (read-all . args)
|
|
(let ((fp (if (null? args)
|
|
(current-input-port)
|
|
(car args))))
|
|
(define (loop fp result)
|
|
(let ((obj (cyc-read fp)))
|
|
(if (eof-object? obj)
|
|
(reverse result)
|
|
(loop fp (cons obj result)))))
|
|
(loop fp '())))
|
|
|
|
;; TODO: for some reason this causes trouble in chicken 4.8. WTF??
|
|
;; read -> port -> object
|
|
;(define read cyc-read)
|
|
|
|
;(define (repl)
|
|
; ;; Test code
|
|
; ;(let ((fp (open-input-file "tests/begin.scm")))
|
|
; ;(let ((fp (open-input-file "tests/strings.scm")))
|
|
; ;(let ((fp (open-input-file "eval.scm")))
|
|
; ;(let ((fp (open-input-file "dev.scm")))
|
|
; ; (write (read-all fp)))
|
|
; (let ((fp (current-input-port)))
|
|
; (write (cyc-read fp)))
|
|
; (repl))
|
|
;;(repl)
|
|
|
|
|