commit cdec643680f0f3f4d42dac64974880bd21d0f01e Author: Justin Ethier Date: Sat Feb 21 22:15:18 2015 -0500 Initial file diff --git a/FEATURES.md b/FEATURES.md new file mode 100644 index 00000000..2bb16a8b --- /dev/null +++ b/FEATURES.md @@ -0,0 +1,38 @@ +TODO: list of features, table of RxRS features (??), etc + +R7RS Compliance + +Section | Status | Comments +------- | ------ | --------- +2.2 Whitespace and comments | | +2.3 Other notations | | +2.4 Datum labels | | +3.1 Variables, syntactic keywords, and regions | | +3.2 Disjointness of types | | +3.3 External representations | | +3.4 Storage model | | +3.5 Proper tail recursion | | +4.1 Primitive expression types | | +4.2 Derived expression types | | +4.3 Macros | | +5.1 Programs | | +5.2 Import declarations | | +5.3 Variable definitions | | +5.4 Syntax definitions | | +5.5 Record-type definitions | | +5.6 Libraries | | +5.7 The REPL | | +6.1 Equivalence predicates | | +6.2 Numbers | | +6.3 Booleans | | +6.4 Pairs and lists | | +6.5 Symbols | | +6.6 Characters | | +6.7 Strings | | +6.8 Vectors | | +6.9 Bytevectors | | +6.10 Control features | | +6.11 Exceptions | | +6.12 Environments and evaluation | | +6.13 Input and output | | +6.14 System interface | | diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..a90da68d --- /dev/null +++ b/Makefile @@ -0,0 +1,32 @@ +TESTSCM = unit-tests +TESTFILES = $(addprefix tests/, $(addsuffix .scm, $(TESTSCM))) + +all: cyclone + +trans.so: trans.scm + csc -s trans.scm + +cgen.so: cgen.scm + csc -s cgen.scm + +parser.so: parser.scm + csc -s parser.scm + +cyclone: cyclone.scm trans.so cgen.so parser.so + csc cyclone.scm + +.PHONY: test +test: $(TESTFILES) cyclone + $(foreach f,$(TESTSCM), echo tests/$(f) ; ./cyclone tests/$(f).scm && tests/$(f) && rm -rf tests/$(f);) + +repl: cyclone repl.scm eval.scm parser.scm + ./cyclone repl.scm + +.PHONY: tags +tags: + ctags -R * + +.PHONY: clean +clean: + rm -rf a.out *.o *.so *.c *.out tags cyclone repl + $(foreach f,$(TESTSCM), rm -rf $(f) $(f).c tests/$(f).c;) diff --git a/README.md b/README.md new file mode 100644 index 00000000..1eab6cc8 --- /dev/null +++ b/README.md @@ -0,0 +1,57 @@ +[cyclone-scheme](http://justinethier.github.com/nugget/cyclone) + +Cyclone is an experimental Scheme-to-C compiler that uses the [Cheney on the MTA](http://www.pipeline.com/~hbaker1/CheneyMTA.html) technique to implement full tail recursion, continuations, and generational garbage collection. + +Building +------------ + +Prerequisites: + +- make +- gcc +- CHICKEN Scheme + +CHICKEN is required to bootstrap the Scheme parts of Cyclone. A long-term goal is for the compiler to be self-hosting. + +From the source directory, to build and run the compiler: + + $ make + ... + $ ./cyclone + +To build the interpreter: + + $ make repl + ... + $ ./repl + +Installation +------------ +At the moment there is no support for a separate installation. Just run `cyclone` from the build directory. + +Documentation +------------- +Run the `cyclone` command to compile a single Scheme file, and the `repl` command to start the interactive interpreter. + +List of [features](FEATURES.md). + +TODO: "how it works section", or a link to a document that provides a brief overview. Much of this would also involve tying together references + +References +---------- + +- [CONS Should Not CONS Its Arguments, Part II: Cheney on the M.T.A.](http://www.pipeline.com/~hbaker1/CheneyMTA.html), by Henry Baker +- [CHICKEN Scheme](http://www.call-cc.org/) +- [Chibi Scheme](http://code.google.com/p/chibi-scheme/) +- [Compiling Scheme to C with closure conversion](http://matt.might.net/articles/compiling-scheme-to-c/) +- [Lisp in Small Pieces](http://pagesperso-systeme.lip6.fr/Christian.Queinnec/WWW/LiSP.html), by Christian Queinnec +- [R5RS Scheme Specification](http://www.schemers.org/Documents/Standards/R5RS/HTML/) +- [R7RS Scheme Specification](http://trac.sacrideo.us/wg/wiki) +- [Structure and Interpretation of Computer Programs](https://mitpress.mit.edu/sicp/full-text/book/book.html), by Harold Abelson and Gerald Jay Sussman with Julie Sussman +- [The 90 minute Scheme to C compiler](http://churchturing.org/y/90-min-scc.pdf), by Marc Feeley + +License +------- +Copyright (C) 2014 [Justin Ethier](http://github.com/justinethier) + +License terms TBD diff --git a/TODO b/TODO new file mode 100644 index 00000000..a47895c8 --- /dev/null +++ b/TODO @@ -0,0 +1,129 @@ +Working TODO list: + + - Issues with detecting cycles: + - (equal?) loops forever when comparing two different circular lists + - printing *global-environment* in the repl still loops forever + + - compiled/interpreted code integration + * call a compiled function from eval + * call an interpreted function from compiled code (or is eval good enough? maybe that is the answer?) + + - Use a lib.scm for libs, similar to eval/parser modules? + + - cyclone should return an error code (IE: 1) when compilation fails and a 0 when it is successful. that way it can be scripted via bash (for example) + or, does it already do this? + + - along the same lines, output should be controlled in a better way. or at minimum should print errors using stderr (possible with standard scheme, though??) + + - Globals - issue list + - call/cc may be broken for global define's, or at least is not optimal + in that the code for call/cc will be added separately to each define. + maybe this is OK, at least functionally + + - String support + issue is how to support strings themselves in memory. can add them directly to the string_type, but then apply won't work + because it could return an unknown number of bytes. on the other hand could use a separate data heap that is mirrored during GC. + may need some extra buffer for that because technically it could overflow any time a new string is allocated, not just during + function calls. but this would work for apply as well as everything else, I believe. obviously it makes GC a bit harder because + there is another pair of heaps to deal with. but all that would be done is that strings from heap A would be copied to B during GC. + GC would need to keep track of a pointer to each one. Sounds straightforward, but have to be careful of complications. + Initial plan: + - Add two "data" heap sections, and vars for each (head ptr, pos ptr [active only?], size) + - Allocate string on active data heap via make_string + - Initiate GC when stack exceeded or data heap under certain threshold + - Need adequate extra space in data heap (100K? make config), since we only check it upon function call + - Need to update GC to copy strings to other heap + - Wait, this is broken if anything is pointing to one of these strings, since memory location changes upon GC! + Is that a fatal issue? How to handle? could write string operations such that any operate on copies of + strings rather than pointing to another string. not nearly as efficient but avoids this problem. could revisit + other solutions down the road. + - Anything else? Probably want to branch for this development, just in case there are complications + + COMPLICATION - only need to memcpy strings on data heap during a major collection. during a minor collection the strings are already where they need to be + need to fully-implement this in the runtime by passing minor/major flag to transport + + TODO: trigger GC if data heap too low + TODO: once this works but before moving all, consolidate all this in docs/strings.txt or such. would be useful to keep these notes + + +- Error handling + need to perform much more error handling of input code. one of the biggest is to report if a function is passed the wrong number of arguments, as this will result in segfauls, bad transport errors, etc downstream if it is allowed. + +- Unit test improvements + - concatenate all into one file when compiling / running + - add assert functions, and actually test for equality + otherwise it is too easy to miss failing test cases, unless they + blow up the runtime + - This has already been done, just need to incorporate other existing tests. + +- Parser + ;'a'c ;; TODO: this is still an issue, try it + +- in regard to native apply support for primitives + - do we need to create a table of primitives, like husk? + might allow for more efficient comparisons than the stupid string cmp's + that are required if we use symbols to denote primitives (which also + breaks lexical scoping) + +- Improved symbol storage + as part of above, probably will need a more dynamic and accurate way to store symbols. + for example, how to store the + symbol, how to differentiate #t and 't etc. + perhaps could use a malloc'd table for this? do want the lookups to be fast - IE, integer (pointer) comparisons and NOT string comparisons (those are unacceptable for symbols) +- Improvements to symbol handling, including printing of symbols with invalid C chars, like 'hello-world +- Consoldate list of primitives in (prim?) and (c-compile-prim?). should also include other information such as number of args (or variable args), for error handling + +- Notes on implementing variables + + * could maintain env's in the interpreted code, and perform operations there to lookup vars. If a lookup fails though, would have to then fall back to looking in the compiled code. The compiler would have to (only if eval is used) set aside data to allow a reference back to vars in this case. Also, this becomes tricky because a var may not even be used, so it might not be added to any closures. There may have to be special analysis done if eval is used. + * chicken does this, obviously. could map var ==> rename (how to know?) and then look it up in a C-based symbol table + * lexical addressing (see chapter 5 of SICP) can be used to find a variable in recursive env's, so you can access it directly instead of having to recursively traverse environments. + +- Add eval support. + - moving forward with meta-circular evaluator from SICP. Let's try to add more cases and code, and see how far it can be pushed. + - also want to try integrating it with trans somehow. IE, if eval is a free var, then add eval code to the compiled program. + +Original notes: Will probably also require apply, read, etc. will probably be necessary to write interpreter in CPS style - see notes. One idea - parse and compile input to scheme expressions, then call apply. could that work? should also see chapter 6 of lisp in small pieces. + +- Integrate debug script into cyclone, so that by passing a specific command line arg, the compiler will output results of closure-conversion, prepended with the debug contents. That way the SCM code can be debugged independently of the compiled executable. + +- What happens when a continuation is captured? assigned to a variable? applied? + Should look into this a bit using cyclone and call/cc + +- Implement ER-macros using eval, in scheme. with this in place, could implement basic macros using ER and replace "desugar". presumably syntax rules could be implemented this way as well, but that can wait for a later time +- Pass port along with emit procedures, to allow the scheme code to write to an output file (IE, c file)?? Or is with-output-file good enough? unfortunately that is not in husk yet so it makes boostrapping harder +- Add more numeric support, and doubles +- WRT set! support, and mutable variables: + - set aggressive GC, and see if there are any problems with data being lost + need to do this with a more complicated example, though +- Could add other scheme library functions to the compiled prog just + like call/cc. alternatively could compile them into a library somewhere + for inclusion. +- define - can this with with mutable variable elimination, or does it require C globals (per cboyer)? Are there special cases for top-level? If cells can be used for vars, do we need to keep track of the roots to prevent invalid GC? lots of questions here +- Question about closures and continuations: + Presumably every function will recieve a closure. Do we have to differentiate between continuation (which every + function must have) and closure (which can be empty if no fv)? right now the MTA runtime combines the two by + having an fn argument to each closure. Is that OK? + + FWIW, chicken passes the following to generated C funcs: + - number of args + - env (the closure from caller) + - continuation (function to call into) + - actual args + +- may be necessary to specify arity of functions in call to apply +- GC - notes from: http://www.more-magic.net/posts/internals-gc.html + + JAE - Good notes here about mutations (use write barrier to keep track of changes, EG: vector-set!). remember changes so they can be handled properly during next GC: + + Another major oversight is the assumption that objects can only point from the stack into the heap. If Scheme was a purely functional language, this would be entirely accurate: new objects can refer to old objects, but there is no way that a preexisting object can be made to refer to a newly created object. For that, you need to support mutation. + But Scheme does support mutation! So what happens when you use vector-set! to store a newly created, stack-allocated value in an old, heap-allocated vector? If we used the above algorithm, the newly created element would either be part of the live set and get copied, but the vector's pointer would not be updated, or it wouldn't be part of the live set and the object would be lost in the stack reset. + The answer to this problem is also pretty simple: we add a so-called write barrier. Whenever a value is written to an object, it is remembered. Then, when performing a GC, these remembered values are considered to be part of the live set, just like the addresses in the saved call. This is also the reason CHICKEN always shows the number of mutations when you're asking for GC statistics: mutation may slow down a program because GCs might take longer. + + JAE - Important point, that the heap must be reallocated during a major GC if there is too much data in the stack / old heap. Considered this but not sure if cyclone's GC does that right now: + +The smart reader might have noticed a small problem here: what if the amount of garbage cleaned up is less than the data on the stack? Then, the stack data can't be copied to the new heap because it simply is too small. Well, this is when a third GC mode is triggered: a reallocating GC. This causes a new heap to be allocated, twice as big as the current heap. This is also split in from- and tospace. Then, Cheney's algorithm is performed on the old heap's fromspace, using one half of the new heap as tospace. When it's finished, the new tospace is called fromspace, and the other half of the new heap is called tospace. Then, the old heap is de-allocated. + +- farther off but along the same lines, how to support compilation of + multiple scheme files into multiple C modules? + +- Just a thought: if this ever became self-hosting, could distribute compiled C files diff --git a/cgen.scm b/cgen.scm new file mode 100644 index 00000000..d3a2683e --- /dev/null +++ b/cgen.scm @@ -0,0 +1,1079 @@ +;; +;; Cyclone Scheme +;; Copyright (c) 2014, Justin Ethier +;; All rights reserved. +;; +;; This module compiles scheme code to a Cheney-on-the-MTA C runtime. +;; + +(define (emit line) + (display line) + (newline)) + +(define (emits str) + (display str)) + +(define (emit-newline) + (newline)) + +(define (string-join lst delim) + (cond + ((null? lst) + "") + ((= (length lst) 1) + (car lst)) + (else + (string-append + (car lst) + delim + (string-join (cdr lst) delim))))) + +;; Escape chars in a C-string, so it can be safely written to a C file +(define (cstr:escape-chars str) + (letrec ((next (lambda (head tail) + (cond + ((null? head) (list->string (reverse tail))) + ((equal? (car head) #\") + (next (cdr head) (cons #\" (cons #\\ tail)))) + ((equal? (car head) #\\) + (next (cdr head) (cons #\\ (cons #\\ tail)))) + ((equal? (car head) #\newline) + (next (cdr head) + (cons #\n (cons #\\ tail)))) + (else + (next (cdr head) (cons (car head) tail))))))) + (next (string->list str) '()))) + +;; Name-mangling. + +;; We have to "mangle" Scheme identifiers into +;; C-compatible identifiers, because names like +;; foo-bar/baz are not identifiers in C. + +; mangle : symbol -> string +(define (mangle symbol) + (letrec + ((m (lambda (chars) + (if (null? chars) + '() + (if (or (and (char-alphabetic? (car chars)) (not (char=? (car chars) #\_))) + (char-numeric? (car chars))) + (cons (car chars) (m (cdr chars))) + (cons #\_ (append (integer->char-list (char->natural (car chars))) + (m (cdr chars)))))))) + (ident (list->string (m (string->list (symbol->string symbol)))))) + (if (member (string->symbol ident) *c-keywords*) + (string-append "_" ident) + ident))) + +(define (mangle-global symbol) + (string-append "__glo_" (mangle symbol))) + +(define *c-keywords* + '(auto _Bool break case char _Complex const continue default do double else + enum extern float for goto if _Imaginary inline int long register restrict + return short signed sizeof static struct switch typedef union unsigned + void volatile while + list ;; Not a keyword but reserved type + )) + +(define *c-main-function* +"main(int argc,char **argv) +{long stack_size = long_arg(argc,argv,\"-s\",STACK_SIZE); + long heap_size = long_arg(argc,argv,\"-h\",HEAP_SIZE); + global_stack_size = stack_size; + global_heap_size = heap_size; + main_main(stack_size,heap_size,(char *) &stack_size); + return 0;}") + +;;; Auto-generation of C macros +(define *c-call-arity* 0) + +(define (set-c-call-arity! arity) + (cond + ((not (number? arity)) + (error `(Non-numeric number of arguments received ,arity))) + (else + (if (> arity *c-call-arity*) + (set! *c-call-arity* arity))))) + +(define (emit-c-macros) + (c-macro-declare-globals) + (c-macro-GC-globals) + (emit (c-macro-after-longjmp)) + (emit-c-arity-macros 0)) + +(define (emit-c-arity-macros arity) + (when (<= arity *c-call-arity*) + (emit (c-macro-funcall arity)) + (emit (c-macro-return-funcall arity)) + (emit (c-macro-return-check arity)) + (emit-c-arity-macros (+ arity 1)))) + +(define (c-macro-after-longjmp) + (letrec ( + (append-args + (lambda (n) + (if (> n 0) + (string-append + (append-args (- n 1)) + ",gc_ans[" (number->string (- n 1)) "]") + ""))) + (append-next-clause + (lambda (i) + (cond + ((= i 0) + (string-append + " if (gc_num_ans == 0) { \\\n" + " funcall0((closure) gc_cont); \\\n" + (append-next-clause (+ i 1)))) + ((<= i *c-call-arity*) + (let ((this-clause + (string-append + " } else if (gc_num_ans == " (number->string i)") { \\\n" + " funcall" (number->string i) "((closure) gc_cont" (append-args i) "); \\\n"))) + (string-append + this-clause + (append-next-clause (+ i 1))))) + (else + " } else { \\\n" + " printf(\"Unsupported number of args from GC %d\\n\", gc_num_ans); \\\n" + " } \n"))))) + (string-append + "#define AFTER_LONGJMP \\\n" + (append-next-clause 0)))) + +(define (c-macro-return-funcall num-args) + (let ((args (c-macro-n-prefix num-args ",a")) + (n (number->string num-args)) + (arry-assign (c-macro-array-assign num-args "buf" "a"))) + (string-append + "/* Return to continuation after checking for stack overflow. */\n" + "#define return_funcall" n "(cfn" args ") \\\n" + "{char stack; \\\n" + " if (DEBUG_ALWAYS_GC || check_overflow(&stack,stack_limit1)) { \\\n" + " object buf[" n "]; " arry-assign "\\\n" + " GC(cfn,buf," n "); return; \\\n" + " } else {funcall" n "((closure) (cfn)" args "); return;}}\n"))) + +(define (c-macro-return-check num-args) + (let ((args (c-macro-n-prefix num-args ",a")) + (n (number->string num-args)) + (arry-assign (c-macro-array-assign num-args "buf" "a"))) + (string-append + "/* Evaluate an expression after checking for stack overflow. */\n" + "#define return_check" n "(_fn" args ") { \\\n" + " char stack; \\\n" + " if (DEBUG_ALWAYS_GC || check_overflow(&stack,stack_limit1)) { \\\n" + " object buf[" n "]; " arry-assign " \\\n" + " mclosure0(c1, _fn); \\\n" + " GC(&c1, buf, " n "); return; \\\n" + " } else { (_fn)(" n ",(closure)_fn" args "); }}\n"))) + +(define (c-macro-funcall num-args) + (let ((args (c-macro-n-prefix num-args ",a")) + (n (number->string num-args)) + (n-1 (number->string (if (> num-args 0) (- num-args 1) 0))) + (wrap (lambda (s) (if (> num-args 0) s "")))) + (string-append + "#define funcall" n "(cfn" args ") " + (wrap (string-append "if (prim(cfn)) { Cyc_apply(" n-1 ", (closure)a1, cfn" (if (> num-args 1) (substring args 3 (string-length args)) "") "); }")) + (wrap " else { ") + "((cfn)->fn)(" n ",cfn" args ")" + (wrap ";}") + ))) + +(define (c-macro-n-prefix n prefix) + (if (> n 0) + (string-append + (c-macro-n-prefix (- n 1) prefix) + (string-append prefix (number->string n))) + "")) + +(define (c-macro-array-assign n prefix assign) + (if (> n 0) + (string-append + (c-macro-array-assign (- n 1) prefix assign) + prefix "[" (number->string (- n 1)) "] = " + assign (number->string n) ";") + "")) + +(define (c-macro-GC-globals) + ; emit directly to be more efficient + ; TODO: convert all c-macro functions to direct emit??? + (emit "#define GC_GLOBALS \\") + (emits "{") + (for-each + (lambda (global) + (emits " \\\n transp(") + (emits (mangle-global (car global))) + (emits ");")) + *globals*) + (emit "}") + (emit "")) + +(define (c-macro-declare-globals) + (emits "#define DECLARE_GLOBALS ") + (for-each + (lambda (global) + (emit " \\") + (emits " static volatile object ") + (emits (mangle-global (car global))) + (emits " = nil;")) + *globals*) + (emit "") + (emit "")) + +;;; Compilation routines. + +;; Return generated code that also requests allocation of C variables on stack +(define (c-code/vars str cvars) + (list str + cvars)) + +;; Return generated code with no C variables allocated on the stack +(define (c-code str) (c-code/vars str (list))) + +;; Append arg count to a C code pair +(define (c:tuple/args cp num-args) + (append cp (list num-args))) + +;; Functions to work with data structures that contain C code: +;; +;; body - The actual body of C code +;; allocs - Allocations made by C code, eg "int c" +;; num-args - Number of function arguments combined in the tuple (optional) +;; +(define (c:body c-pair) (car c-pair)) +(define (c:allocs c-pair) (cadr c-pair)) +(define (c:num-args c-tuple) (caddr c-tuple)) + +(define (c:allocs->str c-allocs . prefix) + (apply + string-append + (map + (lambda (c) + (string-append + (if (null? prefix) + "" + (car prefix)) + c + "\n")) + c-allocs))) + +(define (c:allocs->str2 c-allocs prefix suffix) + (apply + string-append + (map + (lambda (c) + (string-append prefix c suffix)) + c-allocs))) + +(define (c:append cp1 cp2) + (c-code/vars + (string-append (c:body cp1) (c:body cp2)) + (append (c:allocs cp1) (c:allocs cp2)))) + +(define (c:append/prefix prefix cp1 cp2) + (c-code/vars + (string-append prefix (c:body cp1) (c:body cp2)) + (append (c:allocs cp1) (c:allocs cp2)))) + +(define (c:serialize cp prefix) + (string-append + (c:allocs->str (c:allocs cp) prefix) + prefix + (c:body cp))) + +;; c-compile-program : exp -> string +(define (c-compile-program exp) + (let* ((preamble "") + (append-preamble (lambda (s) + (set! preamble (string-append preamble " " s "\n")))) + (body (c-compile-exp exp append-preamble "cont"))) + ;(write `(DEBUG ,body)) + (string-append + preamble + (c:serialize body " ") ;" ;\n" +; "int main (int argc, char* argv[]) {\n" +; " return 0;\n" +; " }\n" +))) + +;; c-compile-exp : exp (string -> void) -> string +;; +;; exp - expression to compiler +;; append-preamble - ?? +;; cont - name of the next continuation +;; this is experimental and probably needs refinement +(define (c-compile-exp exp append-preamble cont) + (cond + ; Core forms: + ((const? exp) (c-compile-const exp)) + ((prim? exp) + ;; TODO: this needs to be more refined, probably w/a lookup table + (c-code (string-append "primitive_" (mangle exp)))) + ((ref? exp) (c-compile-ref exp)) + ((quote? exp) (c-compile-quote exp)) + ((if? exp) (c-compile-if exp append-preamble cont)) + + ; IR (2): + ((tagged-list? '%closure exp) + (c-compile-closure exp append-preamble cont)) + ; Global definition + ((define? exp) + (c-compile-global exp append-preamble cont)) + ; Special case - global function w/out a closure. Create an empty closure + ((tagged-list? 'lambda exp) + (c-compile-exp + `(%closure ,exp) + append-preamble cont)) + + ; Application: + ((app? exp) (c-compile-app exp append-preamble cont)) + (else (error "unknown exp in c-compile-exp: " exp)))) + +(define (c-compile-quote qexp) + (let ((exp (cadr qexp))) + (c-compile-scalars exp))) + +(define (c-compile-scalars args) + (letrec ( + (num-args 0) + (create-cons + (lambda (cvar a b) + (c-code/vars + (string-append "make_cons(" cvar "," (c:body a) "," (c:body b) ");") + (append (c:allocs a) (c:allocs b)))) + ) + (_c-compile-scalars + (lambda (args) + (cond + ((null? args) + (c-code "nil")) + ((not (pair? args)) + (c-compile-const args)) + (else + (let* ((cvar-name (mangle (gensym 'c))) + (cell (create-cons + cvar-name + (c-compile-const (car args)) + (_c-compile-scalars (cdr args))))) + (set! num-args (+ 1 num-args)) + (c-code/vars + (string-append "&" cvar-name) + (append + (c:allocs cell) + (list (c:body cell)))))))))) + (c:tuple/args + (_c-compile-scalars args) + num-args))) + +;; c-compile-const : const-exp -> c-pair +;; +;; Typically this function is used to compile constant values such as +;; a single number, boolean, etc. However, it can be passed a quoted +;; item such as a list, to compile as a literal. +(define (c-compile-const exp) + (cond + ((null? exp) + (c-code "nil")) + ((pair? exp) + (c-compile-scalars exp)) + ((integer? exp) + (let ((cvar-name (mangle (gensym 'c)))) + (c-code/vars + (string-append "&" cvar-name) ; Code is just the variable name + (list ; Allocate integer on the C stack + (string-append + "make_int(" cvar-name ", " (number->string exp) ");"))))) + ((boolean? exp) + (c-code (string-append + (if exp "boolean_t" "boolean_f")))) + ((char? exp) + (c-code (string-append "obj_char2obj(" + (number->string (char->integer exp)) ")"))) + ((string? exp) + (let ((cvar-name (mangle (gensym 'c)))) + (c-code/vars + (string-append "&" cvar-name) ; Code is just the variable name + (list ; Allocate integer on the C stack + (string-append + "make_string(" cvar-name ", " (->cstr exp) ");"))))) +;TODO: not good enough, need to store new symbols in a table so they can +;be inserted into the C program + ((symbol? exp) + (allocate-symbol exp) + (c-code (string-append "quote_" (mangle exp)))) + (else + (error "unknown constant: " exp)))) + +;; Convert a "scheme" string to a corresponding representation in C. +;; Keep in mind scheme strings can span lines, contain chars that +;; might not be allowed in C, etc. +(define (->cstr str) + (string-append "\"" (cstr:escape-chars str) "\"")) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define (prim->c-func p) + (cond + ((eq? p 'Cyc-global-vars) "Cyc_get_global_variables") + ((eq? p 'Cyc-get-cvar) "Cyc_get_cvar") + ((eq? p 'Cyc-set-cvar!) "Cyc_set_cvar") + ((eq? p 'Cyc-cvar?) "Cyc_is_cvar") + ((eq? p 'has-cycle?) "Cyc_has_cycle") + ((eq? p '+) "__sum") + ((eq? p '-) "__sub") + ((eq? p '*) "__mul") + ((eq? p '/) "__div") + ((eq? p '=) "__num_eq") + ((eq? p '>) "__num_gt") + ((eq? p '<) "__num_lt") + ((eq? p '>=) "__num_gte") + ((eq? p '<=) "__num_lte") + ((eq? p 'apply) "apply") + ((eq? p '%halt) "__halt") + ((eq? p 'error) "Cyc_error") + ((eq? p 'current-input-port) "Cyc_io_current_input_port") + ((eq? p 'open-input-file) "Cyc_io_open_input_file") + ((eq? p 'close-input-port) "Cyc_io_close_input_port") + ((eq? p 'read-char) "Cyc_io_read_char") + ((eq? p 'peek-char) "Cyc_io_peek_char") + ((eq? p 'display) "Cyc_display") + ((eq? p 'write) "Cyc_write") + ((eq? p 'car) "car") + ((eq? p 'cdr) "cdr") + ((eq? p 'caar) "caar") + ((eq? p 'cadr) "cadr") + ((eq? p 'cdar) "cdar") + ((eq? p 'cddr) "cddr") + ((eq? p 'caaar) "caaar") + ((eq? p 'caadr) "caadr") + ((eq? p 'cadar) "cadar") + ((eq? p 'caddr) "caddr") + ((eq? p 'cdaar) "cdaar") + ((eq? p 'cdadr) "cdadr") + ((eq? p 'cddar) "cddar") + ((eq? p 'cdddr) "cdddr") + ((eq? p 'caaaar) "caaaar") + ((eq? p 'caaadr) "caaadr") + ((eq? p 'caadar) "caadar") + ((eq? p 'caaddr) "caaddr") + ((eq? p 'cadaar) "cadaar") + ((eq? p 'cadadr) "cadadr") + ((eq? p 'caddar) "caddar") + ((eq? p 'cadddr) "cadddr") + ((eq? p 'cdaaar) "cdaaar") + ((eq? p 'cdaadr) "cdaadr") + ((eq? p 'cdadar) "cdadar") + ((eq? p 'cdaddr) "cdaddr") + ((eq? p 'cddaar) "cddaar") + ((eq? p 'cddadr) "cddadr") + ((eq? p 'cdddar) "cdddar") + ((eq? p 'cddddr) "cddddr") + ((eq? p 'char->integer) "Cyc_char2integer") + ((eq? p 'integer->char) "Cyc_integer2char") + ((eq? p 'string->number)"Cyc_string2number") + ((eq? p 'list->string) "Cyc_list2string") + ((eq? p 'string->list) "string2list") + ((eq? p 'string-append) "Cyc_string_append") + ((eq? p 'string->symbol) "Cyc_string2symbol") + ((eq? p 'symbol->string) "Cyc_symbol2string") + ((eq? p 'number->string) "Cyc_number2string") + ((eq? p 'assq) "assq") + ((eq? p 'assoc) "assoc") + ((eq? p 'member) "memberp") + ((eq? p 'length) "Cyc_length") + ((eq? p 'set-car!) "Cyc_set_car") + ((eq? p 'set-cdr!) "Cyc_set_cdr") + ((eq? p 'eq?) "Cyc_eq") + ((eq? p 'eqv?) "Cyc_eq") + ((eq? p 'equal?) "equalp") + ((eq? p 'boolean?) "Cyc_is_boolean") + ((eq? p 'char?) "Cyc_is_char") + ((eq? p 'null?) "Cyc_is_null") + ((eq? p 'number?) "Cyc_is_number") + ((eq? p 'pair?) "Cyc_is_cons") + ((eq? p 'string?) "Cyc_is_string") + ((eq? p 'eof-object?) "Cyc_is_eof_object") + ((eq? p 'symbol?) "Cyc_is_symbol") + ((eq? p 'cons) "make_cons") + ((eq? p 'cell) "make_cell") + ((eq? p 'cell-get) "cell_get") + ((eq? p 'set-cell!) "cell_set") + ((eq? p 'set-global!) "global_set") + (else + (error "unhandled primitive: " p)))) + +;; c-compile-prim : prim-exp -> string -> string +(define (c-compile-prim p cont) + (let* ((c-func (prim->c-func p)) + ;; Following closure defs are only used for apply, to + ;; create a new closure for the continuation, if needed. + ;; + ;; Apply is different in that it takes a continuation so that it can + ;; allocate arbitrary data as needed using alloca, and then call into + ;; the cont so allocations can remain on stack until GC. + (closure-sym (mangle (gensym 'c))) + (closure-def + (cond + ((and (eq? p 'apply) + (> (string-length cont) (string-length "__lambda_")) + (equal? (substring cont 0 9) "__lambda_")) + (string-append + "mclosure0(" closure-sym + "," cont "); ")) + (else #f))) + ;; END apply defs + (c-var-assign + (lambda (type) + (let ((cv-name (mangle (gensym 'c)))) + (c-code/vars + (string-append (if (eq? p 'apply) "" "&") cv-name) + (list + (string-append + ;; Define closure if necessary (apply only) + (cond + (closure-def closure-def) + (else "")) + + ;; Emit C variable + type " " cv-name " = " c-func "(" + + ;; Emit closure as first arg, if necessary (apply only) + (cond + (closure-def + (string-append "&" closure-sym ", ")) + ((eq? p 'apply) + (string-append cont ", ")) + (else ""))))))))) + (cond + ((prim/c-var-assign p) + (c-var-assign (prim/c-var-assign p))) + ((prim/cvar? p) + (let ((cv-name (mangle (gensym 'c)))) + (c-code/vars + (if (prim:allocates-object? p) + cv-name ;; Already a pointer + (string-append "&" cv-name)) ;; Point to data + (list + (string-append c-func "(" cv-name))))) + (else + (c-code (string-append c-func "(")))))) + +;; Determine if primitive assigns (allocates) a C variable +;; EG: int v = prim(); +(define (prim/c-var-assign p) + (cond + ((eq? p 'current-input-port) "port_type") + ((eq? p 'open-input-file) "port_type") + ((eq? p 'length) "integer_type") + ((eq? p 'char->integer) "integer_type") + ((eq? p 'string->number) "integer_type") + ((eq? p 'list->string) "string_type") +; ((eq? p 'string->list) "object") + ((eq? p 'string-append) "string_type") + ((eq? p 'symbol->string) "string_type") + ((eq? p 'number->string) "string_type") + ((eq? p 'apply) "object") + (else #f))) + +;; Primitive creates a C variable +(define (prim/cvar? exp) + (and (prim? exp) + (member exp '( + current-input-port open-input-file + char->integer string->number string-append list->string string->list + symbol->string number->string + + - * / apply cons length cell)))) + +;; Need to pass an integer arg count as the function's first parameter +(define (prim:arg-count? exp) + (and (prim? exp) + (member exp '(error string-append)))) + +;; Primitive allocates an object +(define (prim:allocates-object? exp) + (and (prim? exp) + (member exp '(string->list)))) + +;; END primitives +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; c-compile-ref : ref-exp -> string +(define (c-compile-ref exp) + (c-code + (if (member exp *global-syms*) + (mangle-global exp) + (mangle exp)))) + +; c-compile-args : list[exp] (string -> void) -> string +(define (c-compile-args args append-preamble prefix cont) + (letrec ((num-args 0) + (_c-compile-args + (lambda (args append-preamble prefix cont) + (if (not (pair? args)) + (c-code "") + (begin + ;(trace:debug `(c-compile-args ,(car args))) + (set! num-args (+ 1 num-args)) + (c:append/prefix + prefix + (c-compile-exp (car args) + append-preamble cont) + (_c-compile-args (cdr args) + append-preamble ", " cont))))))) + (c:tuple/args + (_c-compile-args args + append-preamble prefix cont) + num-args))) + +;; c-compile-app : app-exp (string -> void) -> string +(define (c-compile-app exp append-preamble cont) + ;(trace:debug `(c-compile-app: ,exp)) + (let (($tmp (mangle (gensym 'tmp)))) + (let* ((args (app->args exp)) + (fun (app->fun exp))) + (cond + ((lambda? fun) + (let* ((lid (allocate-lambda (c-compile-lambda fun))) ;; TODO: pass in free vars? may be needed to track closures + ;; properly, wait until this comes up in an example + (this-cont (string-append "__lambda_" (number->string lid))) + (cgen + (c-compile-args + args + append-preamble + "" + this-cont)) + (num-cargs (c:num-args cgen))) + (set-c-call-arity! num-cargs) + (c-code + (string-append + (c:allocs->str (c:allocs cgen)) + "return_check" (number->string num-cargs) + "(" this-cont + (if (> num-cargs 0) "," "") ; TODO: how to propagate continuation - cont " " + (c:body cgen) ");")))) + + ((prim? fun) + (let* ((c-fun + (c-compile-prim fun cont)) + (c-args + (c-compile-args args append-preamble "" "")) + (num-args (length args)) + (num-args-str + (string-append + (number->string num-args) + (if (> num-args 0) "," ""))) + (c-args* (if (prim:arg-count? fun) + (c:append (c-code num-args-str) c-args) + c-args))) + (if (prim/cvar? fun) + ;; Args need to go with alloc function + (c-code/vars + (c:body c-fun) + (append + (c:allocs c-args*) ;; fun alloc depends upon arg allocs + (list (string-append + (car (c:allocs c-fun)) + (if (prim/c-var-assign fun) "" ",") ; Allocating C var + (c:body c-args*) ");")))) + ;; Args stay with body + (c:append + (c:append c-fun c-args*) + (c-code ")"))))) + + ((equal? '%closure-ref fun) + (c-code (apply string-append (list + "(" + ;; TODO: probably not the ideal solution, but works for now + "(closureN)" + (mangle (car args)) + ")->elts[" + (number->string (- (cadr args) 1))"]")))) + + ;; TODO: may not be good enough, closure app could be from an elt + ((tagged-list? '%closure-ref fun) + (let* ((cfun (c-compile-args (list (car args)) append-preamble " " cont)) + (this-cont (c:body cfun)) + (cargs (c-compile-args (cdr args) append-preamble " " this-cont))) + (set-c-call-arity! (c:num-args cargs)) + (c-code + (string-append + (c:allocs->str (c:allocs cfun) "\n") + (c:allocs->str (c:allocs cargs) "\n") + "return_funcall" (number->string (c:num-args cargs)) + "(" + this-cont + (if (> (c:num-args cargs) 0) "," "") + (c:body cargs) + ");")))) + + ((tagged-list? '%closure fun) + (let* ((cfun (c-compile-closure + fun append-preamble cont)) + (this-cont (string-append "(closure)" (c:body cfun))) + (cargs (c-compile-args + args append-preamble " " this-cont)) + (num-cargs (c:num-args cargs))) + (set-c-call-arity! num-cargs) + (c-code + (string-append + (c:allocs->str (c:allocs cfun) "\n") + (c:allocs->str (c:allocs cargs) "\n") + "return_funcall" (number->string num-cargs) + "(" + this-cont + (if (> num-cargs 0) "," "") + (c:body cargs) + ");")))) + + (else + (error `(Unsupported function application ,exp))))))) + +; c-compile-if : if-exp -> string +(define (c-compile-if exp append-preamble cont) + (let* ((compile (lambda (exp) + (c-compile-exp exp append-preamble cont))) + (test (compile (if->condition exp))) + (then (compile (if->then exp))) + (els (compile (if->else exp)))) + (c-code (string-append + (c:allocs->str (c:allocs test) " ") + "if( !eq(boolean_f, " + (c:body test) + ") ){ \n" + (c:serialize then " ") + "\n} else { \n" + (c:serialize els " ") + "}\n")))) + +;; Global compilation +(define *globals* '()) +(define *global-syms* '()) +(define (global-lambda? global) (cadr global)) +(define (global-not-lambda? global) (not (cadr global))) +(define (add-global var-sym lambda? code) + ;(write `(add-global ,var-sym ,code)) + (set! *globals* (cons (list var-sym lambda? code) *globals*))) +(define (c-compile-global exp append-preamble cont) + (let ((var (define->var exp)) + (body (if (equal? 4 (length exp)) ; Simple var assignment contains superfluous %closure-ref + (cadddr exp) + (car (define->exp exp))))) + (add-global + var + (lambda? body) + (c-compile-exp body append-preamble cont)) + (c-code/vars "" (list "")))) + +;; Symbol compilation + +(define *symbols* '()) + +; These are (at least for now) preallocated by the runtime +(define *reserved-symbols* '(Cyc_procedure)) + +(define (allocate-symbol sym) + (if (and (not (member sym *symbols*)) + (not (member sym *reserved-symbols*))) + (set! *symbols* (cons sym *symbols*)))) + +;; Lambda compilation. + +;; Lambdas get compiled into procedures that, +;; once given a C name, produce a C function +;; definition with that name. + +;; These procedures are stored up an eventually +;; emitted. + +; type lambda-id = natural + +; num-lambdas : natural +(define num-lambdas 0) + +; lambdas : alist[lambda-id,string -> string] +(define lambdas '()) + +; allocate-lambda : (string -> string) -> lambda-id +(define (allocate-lambda lam) + (let ((id num-lambdas)) + (set! num-lambdas (+ 1 num-lambdas)) + (set! lambdas (cons (list id lam) lambdas)) + id)) + +; get-lambda : lambda-id -> (symbol -> string) +(define (get-lambda id) + (cdr (assv id lambdas))) + +(define (lambda->env exp) + (let ((formals (lambda-formals->list exp))) + (if (pair? formals) + (car formals) + 'unused))) + +;; c-compile-closure : closure-exp (string -> void) -> string +;; +;; This function compiles closures generated earlier in the +;; compilation process. Each closure is of the form: +;; +;; (%closure lambda arg ...) +;; +;; Where: +;; - `%closure` is the identifying tag +;; - `lambda` is the function to execute +;; - Each `arg` is a free variable that must be stored within +;; the closure. The closure conversion phase tags each access +;; to one with the corresponding index so `lambda` can use them. +;; +(define (c-compile-closure exp append-preamble cont) + (let* ((lam (closure->lam exp)) + (free-vars + (map + (lambda (free-var) + (if (tagged-list? '%closure-ref free-var) + (let ((var (cadr free-var)) + (idx (number->string (- (caddr free-var) 1)))) + (string-append + "((closureN)" (mangle var) ")->elts[" idx "]")) + (mangle free-var))) + (closure->fv exp))) ; Note these are not necessarily symbols, but in cc form + (cv-name (mangle (gensym 'c))) + (lid (allocate-lambda (c-compile-lambda lam))) + (create-nclosure (lambda () + (string-append + "closureN_type " cv-name ";\n" + cv-name ".tag = closureN_tag;\n " + cv-name ".fn = __lambda_" (number->string lid) ";\n" + cv-name ".num_elt = " (number->string (length free-vars)) ";\n" + cv-name ".elts = (object *)alloca(sizeof(object) * " + (number->string (length free-vars)) ");\n" + (let loop ((i 0) + (vars free-vars)) + (if (null? vars) + "" + (string-append + cv-name ".elts[" (number->string i) "] = " + (car vars) ";\n" + (loop (+ i 1) (cdr vars)))))))) + (create-mclosure (lambda () + (string-append + "mclosure" (number->string (length free-vars)) "(" cv-name ", " + ;; NOTE: + ;; Hopefully will not cause issues with varargs when casting to + ;; generic function type below. Works fine in gcc, not sure if + ;; this is portable to other compilers though + "(function_type)__lambda_" (number->string lid) + (if (> (length free-vars) 0) "," "") + (string-join free-vars ", ") + ");")))) + (c-code/vars + (string-append "&" cv-name) + (list + (if (> (length free-vars) 0) + (create-nclosure) + (create-mclosure)))))) + +; c-compile-formals : list[symbol] -> string +(define (c-compile-formals formals type) + (if (not (pair? formals)) + "" + (string-append + "object " + (mangle (car formals)) + (cond + ((pair? (cdr formals)) + (string-append ", " (c-compile-formals (cdr formals) type))) + ((not (equal? 'args:fixed type)) + (string-append ", object " (mangle (cdr formals)) ", ...")) + (else + ""))))) + +; c-compile-lambda : lamda-exp (string -> void) -> (string -> string) +(define (c-compile-lambda exp) + (let* ((preamble "") + (append-preamble (lambda (s) + (set! preamble (string-append preamble " " s "\n"))))) + (let* ((formals (c-compile-formals + (lambda->formals exp) + (lambda-formals-type exp))) + (tmp-ident (if (> (length (lambda-formals->list exp)) 0) + (mangle (car (lambda->formals exp))) + "")) + (has-closure? + (and + (> (string-length tmp-ident) 3) + (equal? "self" (substring tmp-ident 0 4)))) + (formals* + (string-append + (if has-closure? + "" + (if (equal? "" formals) + "closure _" ;; TODO: seems wrong, will GC be too aggressive + "closure _,")) ;; due to missing refs, with ignored closure? + formals)) + (env-closure (lambda->env exp)) + (body (c-compile-exp + (car (lambda->exp exp)) ;; car ==> assume single expr in lambda body after CPS + append-preamble + (mangle env-closure)))) + (cons + (lambda (name) + (string-append "static void " name + "(int argc, " + formals* + ") {\n" + preamble + (if (lambda-varargs? exp) + ;; Load varargs from C stack into Scheme list + (string-append + ; DEBUGGING: + ;"printf(\"%d %d\\n\", argc, " + ; (number->string (length (lambda-formals->list exp))) ");" + "load_varargs(" + (mangle (lambda-varargs-var exp)) + ", argc - " (number->string + (- (length (lambda-formals->list exp)) + 1 + (if has-closure? 1 0))) + ");\n"); + "") ; No varargs, skip + (c:serialize body " ") "; \n" + "}\n")) + formals*)))) + +(define (mta:code-gen input-program globals) + (set! *global-syms* globals) + (let ((compiled-program + (apply string-append + (map c-compile-program input-program)))) + (emit-c-macros) + (emit "#include \"runtime.h\"") + + ;; Emit symbols + (for-each + (lambda (sym) + (emit + (string-append + "defsymbol(" (mangle sym) ", " (symbol->string sym) ");"))) + *symbols*) + + ;; Emit lambdas: + ; Print the prototypes: + (for-each + (lambda (l) + (emit (string-append + "static void __lambda_" + (number->string (car l)) "(int argc, " + (cdadr l) + ") ;"))) + lambdas) + + (emit "") + + ; Print the definitions: + (for-each + (lambda (l) + (emit ((caadr l) (string-append "__lambda_" (number->string (car l)))))) + lambdas) + + (emit " + static void c_entry_pt(argc, env,cont) int argc; closure env,cont; { ") + + ;; Initialize symbol table + (for-each + (lambda (sym) + (emit (string-append " add_symbol(quote_" (mangle sym) ");"))) + *symbols*) + + ;; Initialize globals + (let* ((prefix " ") + (emit-global + (lambda (global) + (emits (c:allocs->str2 (c:allocs (caddr global)) prefix " \n")) + (emits prefix) + (emits (mangle-global (car global))) + (emits " = ") + (emits (c:body (caddr global))) + (emit "; ")))) + (for-each emit-global (filter global-lambda? *globals*)) + (for-each emit-global (filter global-not-lambda? *globals*)) + (emit "")) + + ;; Initialize Cyc_global_variables + ;; TODO: only need to do this if 'eval' was also compiled + (let ((pairs '()) + (head-pair #f)) + (for-each + (lambda (g) + (let ((cvar-sym (mangle (gensym 'cvar))) + (pair-sym (mangle (gensym 'pair)))) + (emits + (string-append + " make_cvar(" cvar-sym + ", (object *)&" (mangle-global (car g)) ");")) + (emits + (string-append + "make_cons(" pair-sym ", find_or_add_symbol(\"" (symbol->string (car g)) + "\"), &" cvar-sym ");\n")) + (set! pairs (cons pair-sym pairs)) + )) + *globals*) + (let loop ((code '()) + (ps pairs) + (cs (map (lambda (_) (mangle (gensym 'c))) pairs))) + (cond + ((null? ps) + (for-each + (lambda (str) + (emits str)) + code)) + ((null? (cdr ps)) + (loop (cons (string-append "make_cons(" (car cs) ", &" (car ps) ",nil);\n") code) + (cdr ps) + (cdr cs))) + (else + (if (not head-pair) + (set! head-pair (car cs))) + (loop (cons (string-append "make_cons(" (car cs) ", &" (car ps) ", &" (cadr cs) ");\n") code) + (cdr ps) + (cdr cs))))) + (if head-pair + (emits + (string-append "Cyc_global_variables = &" head-pair ";")))) + + (emit compiled-program) + (emit "}") + (emit *c-main-function*))) + +; Unused - +;;; Echo file to stdout +;(define (emit-fp fp) +; (let ((l (read-line fp))) +; (if (eof-object? l) +; (close-port fp) +; (begin +; (display l) +; (newline) +; (emit-fp fp))))) +; +;(define (read-runtime fp) +; (letrec* +; ((break "/** SCHEME CODE ENTRY POINT **/") +; (read-fp (lambda (header footer on-header?) +; (let ((l (read-line fp))) +; (cond +; ((eof-object? l) +; (close-port fp) +; (cons (reverse header) (reverse footer))) +; (else +; (cond +; ((equal? l break) +; (read-fp header footer #f)) +; (else +; (if on-header? +; (read-fp (cons l header) footer on-header?) +; (read-fp header (cons l footer) on-header?)))))))))) +; +; (read-fp (list) (list) #t))) diff --git a/cyclone.scm b/cyclone.scm new file mode 100644 index 00000000..cc7f007c --- /dev/null +++ b/cyclone.scm @@ -0,0 +1,165 @@ +;; Cyclone Scheme +;; Copyright (c) 2014, Justin Ethier +;; All rights reserved. +;; +;; This module contains a front-end for the compiler itself. +;; + +(cond-expand + (chicken + (require-extension extras) ;; pretty-print + (require-extension chicken-syntax) ;; when + (load "parser.so") + (load "trans.so") + (load "cgen.so")) +; (husk +; (import (husk pretty-print)) +; ;; TODO: load files +; ) + (else + (load "parser.scm") + (load "trans.scm") + (load "cgen.scm"))) + +;; Code emission. + +; c-compile-and-emit : (string -> A) exp -> void +(define (c-compile-and-emit input-program) + (call/cc + (lambda (return) + (define globals '()) + (emit *c-file-header-comment*) ; Guarantee placement at top of C file + + (trace:info "---------------- input program:") + (trace:info input-program) ;pretty-print + + (set! input-program (add-libs input-program)) + + (set! input-program (expand input-program)) + (trace:info "---------------- after macro expansion:") + (trace:info input-program) ;pretty-print + + (set! input-program + (filter-unused-variables + (isolate-globals input-program))) + (trace:info "---------------- after processing globals") + (trace:info input-program) ;pretty-print + + ; Note alpha-conversion is overloaded to convert internal defines to + ; set!'s below, since all remaining phases operate on set!, not define. + ; + ; TODO: consider moving some of this alpha-conv logic below back into trans? + (set! globals (global-vars input-program)) + (set! input-program + (map + (lambda (expr) + (alpha-convert expr globals return)) + input-program)) + (trace:info "---------------- after alpha conversion:") + (trace:info input-program) ;pretty-print + + (set! input-program + (map + (lambda (expr) + (cps-convert expr)) + input-program)) + (trace:info "---------------- after CPS:") + (trace:info input-program) ;pretty-print + + (set! input-program + (map + (lambda (expr) + (clear-mutables) + (analyze-mutable-variables expr) + (wrap-mutables expr globals)) + input-program)) + (trace:info "---------------- after wrap-mutables:") + (trace:info input-program) ;pretty-print + + (set! input-program + (map + (lambda (expr) + (if (define? expr) + ;; Global + `(define ,(define->var expr) + ,@(caddr (closure-convert (define->exp expr) globals))) + (caddr ;; Strip off superfluous lambda + (closure-convert expr globals)))) + input-program)) + ; (caddr ;; Strip off superfluous lambda + ; (closure-convert input-program))) + (trace:info "---------------- after closure-convert:") + (trace:info input-program) ;pretty-print + + (if (not *do-code-gen*) + (begin + (trace:error "DEBUG, existing program") + (exit))) + + (trace:info "---------------- C code:") + (mta:code-gen input-program globals) + (return '())))) ;; No codes to return + +;; TODO: longer-term, will be used to find where cyclone's data is installed +(define (get-data-path) + ".") + +(define (get-lib filename) + (string-append (get-data-path) "/" filename)) + +(define (read-file filename) + (call-with-input-file filename + (lambda (port) + (read-all port)))) + +;; Compile and emit: +(define (run-compiler args cc?) + (let* ((in-file (car args)) + (in-prog (read-file in-file)) + (exec-file (basename in-file)) + (src-file (string-append exec-file ".c")) + (create-c-file + (lambda (program) + (with-output-to-file + src-file + (lambda () + (c-compile-and-emit program))))) + (result (create-c-file in-prog))) + ;; Load other modules if necessary + (cond + ((not (null? result)) + (let ((program + (append + (if (member 'eval result) + (read-file (get-lib "eval.scm")) + '()) + (if (member 'read result) + (append + (read-file (get-lib "parser.scm")) + '((define read cyc-read))) + '()) + in-prog))) + (create-c-file program)))) + + ;; Compile the generated C file + (if cc? + (system + ;; -I is a hack, real answer is to use 'make install' to place .h file + (string-append "gcc " src-file " -I. -g -o " exec-file))))) + +;; Handle command line arguments +(let ((args (command-line-arguments))) ;; TODO: port (command-line-arguments) to husk?? + (cond + ((< (length args) 1) + (display "cyclone: no input file") + (newline)) + ((member "-h" args) + (display "TODO: display help text") + (newline)) + ((member "-v" args) + (display *version-banner*)) + ((member "-d" args) + (run-compiler args #f)) ;; Debug, do not run GCC + (else + (run-compiler args #t)))) + diff --git a/debug/unit-test-trans.scm b/debug/unit-test-trans.scm new file mode 100644 index 00000000..be5633b8 --- /dev/null +++ b/debug/unit-test-trans.scm @@ -0,0 +1,628 @@ +;; +;; A test framework to attempt to make it easier to debug generated programs. +;; The idea is to allow execution of Scheme code that has been transformed +;; using cyclone's source-to-source transformations. If the code executes +;; OK here, then it should execute fine after being transformed into C code. +;; Unless of course there is a bug here (hopefully not) or in the Scheme->C +;; compiler. +;; + +;; Return a function that can be called directly to +;; invoke the closure, or indirectly to access closure +;; elements. +;; +;; When called directly, the first arg is the closure +;; itself (self), followed by args passed when the +;; closure was defined. +(define (%closure . clo-args) + (define clo-data (list->vector clo-args)) + (define clo + (lambda args + (cond + ((and (> (length args) 1) + (equal? 'ref (car args))) + (vector-ref clo-data (cadr args))) + (else + (apply + (car clo-args) + (cons clo args)))))) + clo) + +(define (%closure-ref clo idx) + (clo 'ref idx)) +(define (%halt x) + (exit)) + +;; Test code from matt might, may need to tweak per corresponding +;; functionality in the MTA C runtime +;; Suitable definitions for the cell functions: +(define (cell value) (lambda (get? new-value) + (if get? value (set! value new-value)))) +(define (set-cell! c v) (c #f v)) +(define (cell-get c) (c #t #t)) +;; END matt might + +(define (test-fac) +((lambda (fac) + ((lambda (fac) + ((%closure + (lambda (self$698 r$689) + ((%closure + (lambda (self$699 r$687) + ((%closure + (lambda (self$700 $_$684) + ((%closure-ref (cell-get (%closure-ref self$700 1)) 0) + (cell-get (%closure-ref self$700 1)) + (%closure + (lambda (self$701 r$688) + ((lambda (r$686) (%halt r$686)) (display r$688)))) + 10)) + (%closure-ref self$699 1)) + r$687)) + (%closure-ref self$698 1)) + (set-cell! (%closure-ref self$698 1) r$689))) + fac) + (%closure + (lambda (self$694 k$690 n$685) + ((%closure + (lambda (self$695 r$691) + (if r$691 + ((%closure-ref (%closure-ref self$695 2) 0) + (%closure-ref self$695 2) + 1) + ((%closure + (lambda (self$696 r$693) + ((%closure-ref (cell-get (%closure-ref self$696 1)) 0) + (cell-get (%closure-ref self$696 1)) + (%closure + (lambda (self$697 r$692) + ((%closure-ref (%closure-ref self$697 1) 0) + (%closure-ref self$697 1) + (* (%closure-ref self$697 2) r$692))) + (%closure-ref self$696 2) + (%closure-ref self$696 3)) + r$693)) + (%closure-ref self$695 1) + (%closure-ref self$695 2) + (%closure-ref self$695 3)) + (- (%closure-ref self$695 3) 1)))) + (%closure-ref self$694 1) + k$690 + n$685) + (= 0 n$685))) + fac))) + (cell fac))) + #f)) +;(test-fac) + +(define (test-set) + ((lambda (x$684) + ((lambda (x$684) + ((%closure + (lambda (self$687 r$686) + ((lambda (r$685) (%halt r$685)) + (display (cell-get (%closure-ref self$687 1))))) + x$684) + (set-cell! x$684 #t))) + (cell x$684))) + #f)) +;(test-set) + +(define (test-adder) + ((lambda (increment make-adder) + ((%closure + (lambda (self$696 increment) + ((%closure + (lambda (self$697 make-adder) + ((%closure + (lambda (self$700 r$693) + ((%closure + (lambda (self$701 r$689) + ((%closure + (lambda (self$702 $_$684) + ((%closure-ref + (cell-get (%closure-ref self$702 2)) + 0) + (cell-get (%closure-ref self$702 2)) + (%closure + (lambda (self$703 r$692) + ((%closure + (lambda (self$704 r$690) + ((%closure + (lambda (self$705 $_$685) + ((%closure-ref + (cell-get (%closure-ref self$705 1)) + 0) + (cell-get (%closure-ref self$705 1)) + (%closure + (lambda (self$706 r$691) + ((lambda (r$688) (%halt r$688)) (display r$691)))) + 41)) + (%closure-ref self$704 1)) + r$690)) + (%closure-ref self$703 1)) + (set-cell! (%closure-ref self$703 1) r$692))) + (%closure-ref self$702 1)) + 1)) + (%closure-ref self$701 1) + (%closure-ref self$701 2)) + r$689)) + (%closure-ref self$700 1) + (%closure-ref self$700 2)) + (set-cell! (%closure-ref self$700 2) r$693))) + (%closure-ref self$697 1) + make-adder) + (%closure + (lambda (self$698 k$694 x$686) + ((%closure-ref k$694 0) + k$694 + (%closure + (lambda (self$699 k$695 y$687) + ((%closure-ref k$695 0) + k$695 + (+ (%closure-ref self$699 1) y$687))) + x$686)))))) + increment) + (cell (%closure-ref self$696 1)))) + make-adder) + (cell increment))) + #f + #f)) +;(test-adder) + +;; Transformed scheme code from if.scm +(define (test-if) + ((lambda (k$699) (if #t (k$699 1) (k$699 2))) + (lambda (r$698) + ((lambda (r$689) + ((lambda ($_$684) + ((lambda (k$697) (if #f (k$697 1) (k$697 2))) + (lambda (r$696) + ((lambda (r$690) + ((lambda ($_$685) + ((lambda (k$694) + ((lambda (r$695) + (if r$695 (k$694 (+ 3 4)) (k$694 (* 3 4)))) + (+ 1 2))) + (lambda (r$691) + ((lambda ($_$686) + ((lambda (k$692) + ((lambda (x$687) + ((lambda (r$693) + (if r$693 (k$692 (+ 1 1)) (k$692 (* 0 0)))) + (+ x$687 1))) + 0)) + (lambda (r$688) (%halt r$688)))) + r$691)))) + r$690)) + (display r$696))))) + r$689)) + (display r$698))))) + +(define (test-eval) +((lambda (analyze$737 + analyze-quoted$738 + analyze-self-evaluating$739 + env$740 + eval$741 + exp$742 + quoted?$743 + self-evaluating?$744 + tag$745 + tagged-list?$746) + ((%closure + (lambda (self$803 analyze$737) + ((%closure + (lambda (self$804 analyze-quoted$738) + ((%closure + (lambda (self$805 analyze-self-evaluating$739) + ((%closure + (lambda (self$806 eval$741) + ((%closure + (lambda (self$807 quoted?$743) + ((%closure + (lambda (self$808 self-evaluating?$744) + ((%closure + (lambda (self$809 tagged-list?$746) + ((%closure + (lambda (self$812 r$800) + ((%closure + (lambda (self$813 r$768) + ((%closure + (lambda (self$814 $_$747) + ((%closure + (lambda (self$818 r$796) + ((%closure + (lambda (self$819 r$769) + ((%closure + (lambda (self$820 $_$748) + ((%closure + (lambda (self$823 r$793) + ((%closure + (lambda (self$824 r$770) + ((%closure + (lambda (self$825 $_$749) + ((%closure + (lambda (self$828 r$790) + ((%closure + (lambda (self$829 r$771) + ((%closure + (lambda (self$830 $_$750) + ((%closure + (lambda (self$834 r$786) + ((%closure + (lambda (self$835 r$772) + ((%closure + (lambda (self$836 $_$751) + ((%closure + (lambda (self$839 r$783) + ((%closure + (lambda (self$840 r$773) + ((%closure + (lambda (self$841 $_$752) + ((%closure + (lambda (self$846 r$779) + ((%closure + (lambda (self$847 r$774) + ((%closure + (lambda (self$848 $_$753) + ((%closure-ref + (cell-get (%closure-ref self$848 1)) + 0) + (cell-get (%closure-ref self$848 1)) + (%closure + (lambda (self$849 r$778) + ((%closure + (lambda (self$850 r$775) + ((%closure + (lambda (self$851 $_$754) + ((%closure + (lambda (self$852 r$777) + ((%closure-ref + (cell-get (%closure-ref self$852 1)) + 0) + (cell-get (%closure-ref self$852 1)) + (%closure + (lambda (self$853 r$776) + ((lambda (r$767) (%halt r$767)) (write r$776)))) + r$777 + #f)) + (%closure-ref self$851 1)) + '(1 . 2))) + (%closure-ref self$850 1)) + r$775)) + (%closure-ref self$849 1)) + (write r$778))) + (%closure-ref self$848 1)) + 2 + #f)) + (%closure-ref self$847 1)) + r$774)) + (%closure-ref self$846 2)) + (set-cell! (%closure-ref self$846 1) r$779))) + (%closure-ref self$841 1) + (%closure-ref self$841 2)) + (%closure + (lambda (self$842 k$780 exp$755) + ((%closure + (lambda (self$843 r$781) + ((%closure + (lambda (self$844 qval$756) + ((%closure-ref (%closure-ref self$844 1) 0) + (%closure-ref self$844 1) + (%closure + (lambda (self$845 k$782 env$757) + ((%closure-ref k$782 0) + k$782 + (%closure-ref self$845 1))) + qval$756))) + (%closure-ref self$843 1)) + r$781)) + k$780) + (cadr exp$755)))))) + (%closure-ref self$840 1) + (%closure-ref self$840 2)) + r$773)) + (%closure-ref self$839 1) + (%closure-ref self$839 3)) + (set-cell! (%closure-ref self$839 2) r$783))) + (%closure-ref self$836 1) + (%closure-ref self$836 2) + (%closure-ref self$836 3)) + (%closure + (lambda (self$837 k$784 exp$758) + ((%closure-ref k$784 0) + k$784 + (%closure + (lambda (self$838 k$785 env$759) + ((%closure-ref k$785 0) + k$785 + (%closure-ref self$838 1))) + exp$758)))))) + (%closure-ref self$835 1) + (%closure-ref self$835 2) + (%closure-ref self$835 3)) + r$772)) + (%closure-ref self$834 2) + (%closure-ref self$834 3) + (%closure-ref self$834 4)) + (set-cell! (%closure-ref self$834 1) r$786))) + (%closure-ref self$830 1) + (%closure-ref self$830 2) + (%closure-ref self$830 3) + (%closure-ref self$830 4)) + (%closure + (lambda (self$831 k$787 exp$760) + ((%closure-ref + (cell-get (%closure-ref self$831 4)) + 0) + (cell-get (%closure-ref self$831 4)) + (%closure + (lambda (self$832 r$788) + (if r$788 + ((%closure-ref + (cell-get (%closure-ref self$832 2)) + 0) + (cell-get (%closure-ref self$832 2)) + (%closure-ref self$832 4) + (%closure-ref self$832 3)) + ((%closure-ref + (cell-get (%closure-ref self$832 5)) + 0) + (cell-get (%closure-ref self$832 5)) + (%closure + (lambda (self$833 r$789) + (if r$789 + ((%closure-ref + (cell-get (%closure-ref self$833 1)) + 0) + (cell-get (%closure-ref self$833 1)) + (%closure-ref self$833 3) + (%closure-ref self$833 2)) + ((%closure-ref (%closure-ref self$833 3) 0) + (%closure-ref self$833 3) + #f))) + (%closure-ref self$832 1) + (%closure-ref self$832 3) + (%closure-ref self$832 4)) + (%closure-ref self$832 3)))) + (%closure-ref self$831 1) + (%closure-ref self$831 2) + exp$760 + k$787 + (%closure-ref self$831 3)) + exp$760)) + (%closure-ref self$830 2) + (%closure-ref self$830 3) + (%closure-ref self$830 5) + (%closure-ref self$830 6)))) + (%closure-ref self$829 1) + (%closure-ref self$829 2) + (%closure-ref self$829 3) + (%closure-ref self$829 4) + (%closure-ref self$829 5) + (%closure-ref self$829 6)) + r$771)) + (%closure-ref self$828 1) + (%closure-ref self$828 2) + (%closure-ref self$828 3) + (%closure-ref self$828 4) + (%closure-ref self$828 5) + (%closure-ref self$828 6)) + (set-cell! (%closure-ref self$828 5) r$790))) + (%closure-ref self$825 1) + (%closure-ref self$825 2) + (%closure-ref self$825 3) + (%closure-ref self$825 4) + (%closure-ref self$825 5) + (%closure-ref self$825 6)) + (%closure + (lambda (self$826 k$791 exp$761) + ((%closure + (lambda (self$827 r$792) + ((%closure-ref + (cell-get (%closure-ref self$827 3)) + 0) + (cell-get (%closure-ref self$827 3)) + (%closure-ref self$827 2) + (%closure-ref self$827 1) + r$792)) + exp$761 + k$791 + (%closure-ref self$826 1)) + 'quote)) + (%closure-ref self$825 7)))) + (%closure-ref self$824 1) + (%closure-ref self$824 2) + (%closure-ref self$824 3) + (%closure-ref self$824 4) + (%closure-ref self$824 5) + (%closure-ref self$824 6) + (%closure-ref self$824 7)) + r$770)) + (%closure-ref self$823 1) + (%closure-ref self$823 2) + (%closure-ref self$823 3) + (%closure-ref self$823 4) + (%closure-ref self$823 5) + (%closure-ref self$823 6) + (%closure-ref self$823 7)) + (set-cell! (%closure-ref self$823 6) r$793))) + (%closure-ref self$820 1) + (%closure-ref self$820 2) + (%closure-ref self$820 3) + (%closure-ref self$820 4) + (%closure-ref self$820 5) + (%closure-ref self$820 6) + (%closure-ref self$820 7)) + (%closure + (lambda (self$821 k$794 exp$762) + ((%closure + (lambda (self$822 r$795) + (if r$795 + ((%closure-ref (%closure-ref self$822 1) 0) + (%closure-ref self$822 1) + #t) + ((%closure-ref (%closure-ref self$822 1) 0) + (%closure-ref self$822 1) + #f))) + k$794) + (number? exp$762)))))) + (%closure-ref self$819 1) + (%closure-ref self$819 2) + (%closure-ref self$819 3) + (%closure-ref self$819 4) + (%closure-ref self$819 5) + (%closure-ref self$819 6) + (%closure-ref self$819 7)) + r$769)) + (%closure-ref self$818 1) + (%closure-ref self$818 2) + (%closure-ref self$818 3) + (%closure-ref self$818 4) + (%closure-ref self$818 5) + (%closure-ref self$818 6) + (%closure-ref self$818 7)) + (set-cell! (%closure-ref self$818 7) r$796))) + (%closure-ref self$814 1) + (%closure-ref self$814 2) + (%closure-ref self$814 3) + (%closure-ref self$814 4) + (%closure-ref self$814 5) + (%closure-ref self$814 6) + (%closure-ref self$814 7)) + (%closure + (lambda (self$815 k$797 exp$763 tag$764) + ((%closure + (lambda (self$816 r$798) + (if r$798 + ((%closure + (lambda (self$817 r$799) + ((%closure-ref (%closure-ref self$817 1) 0) + (%closure-ref self$817 1) + (equal? r$799 (%closure-ref self$817 2)))) + (%closure-ref self$816 2) + (%closure-ref self$816 3)) + (car (%closure-ref self$816 1))) + ((%closure-ref (%closure-ref self$816 2) 0) + (%closure-ref self$816 2) + #f))) + exp$763 + k$797 + tag$764) + (pair? exp$763)))))) + (%closure-ref self$813 1) + (%closure-ref self$813 2) + (%closure-ref self$813 3) + (%closure-ref self$813 4) + (%closure-ref self$813 5) + (%closure-ref self$813 6) + (%closure-ref self$813 7)) + r$768)) + (%closure-ref self$812 1) + (%closure-ref self$812 2) + (%closure-ref self$812 3) + (%closure-ref self$812 4) + (%closure-ref self$812 5) + (%closure-ref self$812 6) + (%closure-ref self$812 7)) + (set-cell! (%closure-ref self$812 4) r$800))) + (%closure-ref self$809 1) + (%closure-ref self$809 2) + (%closure-ref self$809 3) + (%closure-ref self$809 4) + (%closure-ref self$809 5) + (%closure-ref self$809 6) + tagged-list?$746) + (%closure + (lambda (self$810 k$801 exp$765 env$766) + ((%closure-ref + (cell-get (%closure-ref self$810 1)) + 0) + (cell-get (%closure-ref self$810 1)) + (%closure + (lambda (self$811 r$802) + ((%closure-ref r$802 0) + r$802 + (%closure-ref self$811 2) + (%closure-ref self$811 1))) + env$766 + k$801) + exp$765)) + (%closure-ref self$809 1)))) + (%closure-ref self$808 1) + (%closure-ref self$808 2) + (%closure-ref self$808 3) + (%closure-ref self$808 4) + (%closure-ref self$808 5) + self-evaluating?$744) + (cell (%closure-ref self$808 6)))) + (%closure-ref self$807 1) + (%closure-ref self$807 2) + (%closure-ref self$807 3) + (%closure-ref self$807 4) + quoted?$743 + (%closure-ref self$807 6)) + (cell (%closure-ref self$807 5)))) + (%closure-ref self$806 1) + (%closure-ref self$806 2) + (%closure-ref self$806 3) + eval$741 + (%closure-ref self$806 5) + (%closure-ref self$806 6)) + (cell (%closure-ref self$806 4)))) + (%closure-ref self$805 1) + (%closure-ref self$805 2) + analyze-self-evaluating$739 + (%closure-ref self$805 4) + (%closure-ref self$805 5) + (%closure-ref self$805 6)) + (cell (%closure-ref self$805 3)))) + (%closure-ref self$804 1) + analyze-quoted$738 + (%closure-ref self$804 3) + (%closure-ref self$804 4) + (%closure-ref self$804 5) + (%closure-ref self$804 6)) + (cell (%closure-ref self$804 2)))) + analyze$737 + (%closure-ref self$803 2) + (%closure-ref self$803 3) + (%closure-ref self$803 4) + (%closure-ref self$803 5) + (%closure-ref self$803 6)) + (cell (%closure-ref self$803 1)))) + analyze-quoted$738 + analyze-self-evaluating$739 + eval$741 + quoted?$743 + self-evaluating?$744 + tagged-list?$746) + (cell analyze$737))) + #f + #f + #f + #f + #f + #f + #f + #f + #f + #f)) +(test-eval) + +; l21 line 1565 - passes #f to l43, which then tries to execute it: +; according to scaffolding though, this is a problem in the transformed Scheme code +; +;static void __lambda_21(object self_73833, object r_73789) { +; if( !eq(quote_f, r_73789) ){ +; return_funcall2( cell_get(((closureN)self_73833)->elts[0]), ((closureN)self_73833)->elts[2], ((closureN)self_73833)->elts[1]); +;} else { +; return_funcall1( ((closureN)self_73833)->elts[2], quote_f);} +;; +;} +;static void __lambda_43(object self_73811, object r_73802) { +; return_funcall2( r_73802, ((closureN)self_73811)->elts[1], ((closureN)self_73811)->elts[0]);; +;} + + diff --git a/docs/compiler-int-dev.scm b/docs/compiler-int-dev.scm new file mode 100644 index 00000000..bc856785 --- /dev/null +++ b/docs/compiler-int-dev.scm @@ -0,0 +1,95 @@ +;; This is a temporary test file, move everything to a test suite and/or docs once it works! + +; +;test code: +;(let ((x (apply length '((#t #f)))) +; (y (apply length '((#t #f))))) +;(if (apply length '((#t #f))) +; 2 +; #f)) + +(write (cons (apply cons '(1 2)) (apply cons '(3 4)))) + +;; The purpose of this file is to test interactions between the interpreter +;; and compiled code. +;; +;; Some requirements and notes: +;; - The interpreter can create new variables, but those new vars are only +;; accessible by the interpreter (otherwise code would not compile) +;; - The interpreter should be able to access compiled variables, including +;; functions +;; - The interpreter should be able to call compiled functions +;; - The interpreter should be able to change variables that originate +;; in compiled code +;; - If eval is never called, compiled code can be more efficient by omitting +;; information that is only required by the interpreter. +;; +;; How to represent environments? +;; Presumably the interpreter's global environment needs to include compiled globals as well. It should also be extended to include local vars, presumably prior to each call to eval?? +;; +;; global env can be extended to include C globals and locals. their representations will be: +;; +;; - globals are just C variables. problem is, we may need to include the locations of those vars. otherwise how can the interpreter mutate them? IE, if global 'x' is a list, need the memory location of 'x' not the list, if we want to mutate the list +;; simple - add a new type for globals that includes the memory address, +;; but whenever we look one up, return the obj at that address. +;; we want to avoid 'leaking' global objs outside of the env and associated +;; code. then when a set comes in, change the var at the memory address. +;; +;; obviously we want to just load the globals once when *global-env* is +;; built, and do not want to load them ever again. +;; +;; does any of this apply to locals? +;; +;; - locals are ... ? cells? +;; presumably we need to extend global-env to include locals, then pass that +;; extended env as the parameter to eval, each time it is called. +;; this obviously would happen in the compiled code generated by cyclone. +;; +;; local can be a local C variable: +;; ((lambda (x) (display x)) 1) +;; ... +;; static void __lambda_11(int argc, closure _,object x_737) { +;; return_check1(__lambda_10,Cyc_display(x_737));; +;; } +;; but it can also be a closure: +;; ((lambda (x) +;; ((lambda () (display x)))) +;; 1) +;; ... +;; static void __lambda_12(int argc, closure _,object x_737) { +;; +;; closureN_type c_7380; +;; c_7380.tag = closureN_tag; +;; c_7380.fn = __lambda_11; +;; c_7380.num_elt = 1; +;; c_7380.elts = (object *)alloca(sizeof(object) * 1); +;; c_7380.elts[0] = x_737; +;; +;; return_funcall0((closure)&c_7380);; +;; } +;; +;; static void __lambda_11(int argc, object self_7331) { +;; return_check1(__lambda_10,Cyc_display(((closureN)self_7331)->elts[0]));; +;; } +;; +;; case #1 - pass a global variable to the interpreter +(define x 1) +(define y 2) +(define *z* 3) +;(write (eval '(Cyc-global-vars))) +(write (eval 'x)) +(write (eval '(set! x 'mutated-x))) +;(write (eval '*global-environment*)) +(write (eval '*z*)) +; TODO: need a valid example of passing a local to eval (assume that is allowed) +;((lambda (tmp) +; (write (eval 'tmp))) #f) +(write (list 'after-eval 'x x 'y y '*z* *z*)) +x ;; oh shit, need to reference x/y otherwise they get optimized out! + +;; case #2 - pass a local (IE, lambda var) +;; No, this is not allowed, see: http://stackoverflow.com/questions/3844196/how-can-i-use-external-variables-in-eval-in-scheme/3851284#3851284 + +;; case #3 - mutate global/local. or is this the same as previous? + +;; case #4 - introduce new vars in interpreter, then use them later on?? diff --git a/docs/global-opts-notes.txt b/docs/global-opts-notes.txt new file mode 100644 index 00000000..b6874821 --- /dev/null +++ b/docs/global-opts-notes.txt @@ -0,0 +1,59 @@ +These are my old notes for using globals as a performance improvement over creating dozens of nested closures. The old problem is still there if the code is restructured, so these notes may still be valuable. (1) is the only improvement that was implemented. + + - Optimizations - there are massive performance problems in compiling eval.scm!!! + +1) it might be better for (define) statements to create GC roots, rather than attempting to pass + all defines around in closures. that should minimize closure size and the amount of data each function + needs to pass around (IE: funcall80) + + Here are pure lines-of-code metrics: + Phase Last line First line LOC % LOC + input 206 13 193 0.31% + expand 601 212 389 0.63% + alpha 1253 607 646 1.05% + CPS 2586 1259 1327 2.15% + wrap-mut 4151 2676 1475 2.39% + CC 29738 4157 25581 41.49% + C 61652 29743 31909 51.76% + + should compare times in each phase. but this still avoids WHY the last two phases are taking so long. + is it just the sheer amount of code that is being generated? + + notes about globals: + would have to occur after expand, since that could introduce more defines + want to take each top-level define and create a global for it + could filter top level into two categories - (define) expressions and other expressions, to be enclosed within a (begin) + could create a single list of these, eg: (define, define, ..., begin) and then map over it for the other phases + TBD: how to store defined functions? Do they become closures? how does that work? + +a global function can reference other globals, but I do not believe it needs to be a closure, because a global function can only reference other globals. + +Issue list + - alpha conversion broken for internal define shadowing global (see below) + - call/cc probably broken for global define's + +2) it looked like there were cases where a closure had a copy created of it, and the copy was passed along. it would be nice if the closure conversion phase could detect this and just pass the original closure along instead of building a new one. that saves a lot of work downstream, and seems like it might save work in cc as well (depending upon how complex the check is, of course!) + +of course, fn is changed upon new closure, so it is not exactly the same object... + +Could make this happen by separating the continuation (which contains next fn) from the environment (closure vars). Even just doing this conceptually (in CC phase) may allow CC to detect unchanged environments. Also, can the existing code just reassign fn? I think that would work since closures are newly-allocated upon function invocation, though maybe it would break loops? + +just brainstorming this at the moment. + +3) important to see if optimizations can be made independent of adding globals, since even with globals there could be performance problems if there were to be large closures for whatever reason. + +4) does sequencing (IE, begin) have to create a new lambda for each expr? + might not make a difference, but seems like it should just create a lambda + body, and let CPS worry about extracting the lambdas. may end up with the + same result, though + + by the same token, are there ever cases where it would make sense to pass a sequence all the way to the compiler? probably not, but just a thought + + - lexical scoping issue - local does not shadow a global in this case: + + (define a 2) ; global + (define (test2) + ; TODO: need to be able to shadow globals, too + ; using an internal define + (define a 1) + a) diff --git a/docs/images/cyclone-logo-01.pdn b/docs/images/cyclone-logo-01.pdn new file mode 100644 index 00000000..7576f44c Binary files /dev/null and b/docs/images/cyclone-logo-01.pdn differ diff --git a/docs/images/cyclone-logo-01.png b/docs/images/cyclone-logo-01.png new file mode 100644 index 00000000..c200dab7 Binary files /dev/null and b/docs/images/cyclone-logo-01.png differ diff --git a/docs/images/cyclone-logo-03-header.png b/docs/images/cyclone-logo-03-header.png new file mode 100644 index 00000000..ed5c64b3 Binary files /dev/null and b/docs/images/cyclone-logo-03-header.png differ diff --git a/docs/images/cyclone-logo-03-thumb.png b/docs/images/cyclone-logo-03-thumb.png new file mode 100644 index 00000000..916b01de Binary files /dev/null and b/docs/images/cyclone-logo-03-thumb.png differ diff --git a/docs/images/cyclone-logo-03.pdn b/docs/images/cyclone-logo-03.pdn new file mode 100644 index 00000000..b93529ba Binary files /dev/null and b/docs/images/cyclone-logo-03.pdn differ diff --git a/docs/images/cyclone-logo-03.png b/docs/images/cyclone-logo-03.png new file mode 100644 index 00000000..054014b2 Binary files /dev/null and b/docs/images/cyclone-logo-03.png differ diff --git a/docs/images/cyclone-logo-small.png b/docs/images/cyclone-logo-small.png new file mode 100644 index 00000000..deb2068c Binary files /dev/null and b/docs/images/cyclone-logo-small.png differ diff --git a/eval.scm b/eval.scm new file mode 100644 index 00000000..f25e3256 --- /dev/null +++ b/eval.scm @@ -0,0 +1,397 @@ +;; The meta-circular evaluator from SICP 4.1 +;; http://mitpress.mit.edu/sicp/full-text/book/book-Z-H-26.html#%_sec_4.1 +;; + +(define (eval exp . env) + (if (null? env) + ((analyze exp) *global-environment*) + ((analyze exp) (car env)))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Expression handling helper functions +(define (tagged-list? exp tag) + (if (pair? exp) + (equal? (car exp) tag) + #f)) + +(define (self-evaluating? exp) + (cond ((number? exp) #t) + ((boolean? exp) #t) + ((string? exp) #t) + ((char? exp) #t) + ((eof-object? exp) #t) + (else #f))) + +(define (variable? exp) (symbol? exp)) + +(define (quoted? exp) + (tagged-list? exp 'quote)) + +(define (assignment? exp) + (tagged-list? exp 'set!)) +(define (assignment-variable exp) (cadr exp)) +(define (assignment-value exp) (caddr exp)) + +(define (definition? exp) + (tagged-list? exp 'define)) +(define (definition-variable exp) + (if (symbol? (cadr exp)) + (cadr exp) + (caadr exp))) +(define (definition-value exp) + (if (symbol? (cadr exp)) + (caddr exp) + (make-lambda (cdadr exp) ; formal parameters + (cddr exp)))) ; body + +(define (lambda? exp) (tagged-list? exp 'lambda)) +(define (lambda-parameters exp) (cadr exp)) +(define (lambda-body exp) (cddr exp)) + +(define (make-lambda parameters body) + (cons 'lambda (cons parameters body))) + +(define (if? exp) (tagged-list? exp 'if)) +(define (if-predicate exp) (cadr exp)) +(define (if-consequent exp) (caddr exp)) +(define (if-alternative exp) + (if (not (null? (cdddr exp))) ;; TODO: add (not) support + (cadddr exp) + #f)) +(define (make-if predicate consequent alternative) + (list 'if predicate consequent alternative)) + +(define (begin? exp) (tagged-list? exp 'begin)) +(define (begin-actions exp) (cdr exp)) +(define (last-exp? seq) (null? (cdr seq))) +(define (first-exp seq) (car seq)) +(define (rest-exps seq) (cdr seq)) + +(define (sequence->exp seq) + (cond ((null? seq) seq) + ((last-exp? seq) (first-exp seq)) + (else (make-begin seq)))) +(define (make-begin seq) (cons 'begin seq)) + +(define (application? exp) (pair? exp)) +(define (operator exp) (car exp)) +(define (operands exp) (cdr exp)) +;(define (no-operands? ops) (null? ops)) +;(define (first-operand ops) (car ops)) +;(define (rest-operands ops) (cdr ops)) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Evaluator data structures + +(define procedure-tag 'Cyc_procedure) +(define (make-procedure parameters body env) + (list procedure-tag parameters body env)) +(define (compound-procedure? p) + (tagged-list? p procedure-tag)) +(define (procedure-parameters p) (cadr p)) +(define (procedure-body p) (caddr p)) +(define (procedure-environment p) (cadddr p)) + +;; Environments +(define (enclosing-environment env) (cdr env)) +(define (first-frame env) (car env)) +(define the-empty-environment '()) + +(define (make-frame variables values) + (cons variables values)) +(define (frame-variables frame) (car frame)) +(define (frame-values frame) (cdr frame)) +(define (add-binding-to-frame! var val frame) + (set-car! frame (cons var (car frame))) + (set-cdr! frame (cons val (cdr frame)))) + +(define (extend-environment vars vals base-env) + (if (= (length vars) (length vals)) + (cons (make-frame vars vals) base-env) + (if (< (length vars) (length vals)) + (error "Too many arguments supplied" vars vals) + (error "Too few arguments supplied" vars vals)))) + +(define (lookup-variable-value var env) + (define (env-loop env) + (define (scan vars vals) + (cond ((null? vars) + (env-loop (enclosing-environment env))) + ((eq? var (car vars)) + (cond-expand + (cyclone + (Cyc-get-cvar (car vals))) + (else + (car vals)))) + (else (scan (cdr vars) (cdr vals))))) + (if (eq? env the-empty-environment) + (error "Unbound variable" var) + (let ((frame (first-frame env))) + (scan (frame-variables frame) + (frame-values frame))))) + (env-loop env)) + +(define (set-variable-value! var val env) + (define (env-loop env) + (define (scan vars vals) + (cond ((null? vars) + (env-loop (enclosing-environment env))) + ((eq? var (car vars)) + (cond-expand + (cyclone + (if (Cyc-cvar? (car vals)) + (Cyc-set-cvar! (car vals) val) + (set-car! vals val))) + (else + (set-car! vals val)))) + (else (scan (cdr vars) (cdr vals))))) + (if (eq? env the-empty-environment) + (error "Unbound variable -- SET!" var) + (let ((frame (first-frame env))) + (scan (frame-variables frame) + (frame-values frame))))) + (env-loop env)) + +(define (define-variable! var val env) + (let ((frame (first-frame env))) + (define (scan vars vals) + (cond ((null? vars) + (add-binding-to-frame! var val frame)) + ((eq? var (car vars)) + ;; TODO: update compiled var + ;; cond-expand + ;; if cvar + ;; set-cvar + (set-car! vals val)) + (else (scan (cdr vars) (cdr vals))))) + (scan (frame-variables frame) + (frame-values frame)))) + +(define (primitive-procedure? proc) + (tagged-list? proc 'primitive)) + +(define (primitive-implementation proc) (cadr proc)) + +(define primitive-procedures + (list (list 'car car) + (list 'cdr cdr) + (list 'cons cons) + (list 'eq? eq?) + (list 'equal? equal?) + (list 'set-car! set-car!) + (list 'set-cdr! set-cdr!) + (list 'null? null?) + (list 'has-cycle? has-cycle?) + (list 'Cyc-global-vars Cyc-global-vars) + (list '+ +) + ; TODO: + )) + +(define (primitive-procedure-names) + (map car + primitive-procedures)) + +(define (primitive-procedure-objects) + (map (lambda (proc) (list 'primitive (cadr proc))) + primitive-procedures)) + +(define (apply-primitive-procedure proc args) + (apply ;apply-in-underlying-scheme + (primitive-implementation proc) args)) + +;; TODO: temporary testing +;; also, it would be nice to pass around something other than +;; symbols for primitives. could the runtime inject something into the env? +;; of course that is a problem for stuff like make_cons, that is just a +;; C macro... +;; (define (primitive-procedure? proc) +;; (equal? proc 'cons)) + +(define (setup-environment) + (let ((initial-env + (extend-environment (primitive-procedure-names) + (primitive-procedure-objects) + the-empty-environment))) + (cond-expand + (cyclone + ;; Also include compiled variables + (extend-environment + (map (lambda (v) (car v)) (Cyc-global-vars)) + (map (lambda (v) (cdr v)) (Cyc-global-vars)) + initial-env)) + (else initial-env)))) +(define *global-environment* (setup-environment)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Derived expressions +;; TODO: longer-term, this would be replaced by a macro system +(define (cond? exp) (tagged-list? exp 'cond)) +(define (cond-clauses exp) (cdr exp)) +(define (cond-else-clause? clause) + (eq? (cond-predicate clause) 'else)) +(define (cond-predicate clause) (car clause)) +(define (cond-actions clause) (cdr clause)) +(define (cond->if exp) + (expand-clauses (cond-clauses exp))) + +(define (expand-clauses clauses) + (if (null? clauses) + #f ; no else clause + (let ((first (car clauses)) + (rest (cdr clauses))) + (if (cond-else-clause? first) + (if (null? rest) + (sequence->exp (cond-actions first)) + (error "ELSE clause isn't last -- COND->IF" + clauses)) + (make-if (cond-predicate first) + (sequence->exp (cond-actions first)) + (expand-clauses rest)))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Improvement from section 4.1.7 - Separate syntactic analysis from execution +;; +;; TODO: need to finish this section +;; TODO: see 4.1.6 Internal Definitions +;; +(define (analyze exp) + (cond ((self-evaluating? exp) + (analyze-self-evaluating exp)) + ((quoted? exp) (analyze-quoted exp)) + ((variable? exp) (analyze-variable exp)) + ((assignment? exp) (analyze-assignment exp)) + ((definition? exp) (analyze-definition exp)) + ((if? exp) (analyze-if exp)) + ((lambda? exp) (analyze-lambda exp)) + ;; TODO: ideally, macro system would handle these next three + ((tagged-list? exp 'let) + (let ((vars (map car (cadr exp))) ;(let->bindings exp))) + (args (map cadr (cadr exp))) ;(let->bindings exp)))) + (body (cddr exp))) + (analyze + (cons + (cons 'lambda (cons vars body)) + args)))) + ((begin? exp) (analyze-sequence (begin-actions exp))) + ((cond? exp) (analyze (cond->if exp))) + ;; END derived expression processing + ((application? exp) (analyze-application exp)) + (else + (error "Unknown expression type -- ANALYZE" exp)))) + ;(lambda () 'TODO-unknown-exp-type)))) ; JAE - this is a debug line + +(define (analyze-self-evaluating exp) + (lambda (env) exp)) + +(define (analyze-quoted exp) + (let ((qval (cadr exp))) + (lambda (env) qval))) + +(define (analyze-variable exp) + (lambda (env) (lookup-variable-value exp env))) + +(define (analyze-assignment exp) + (let ((var (assignment-variable exp)) + (vproc (analyze (assignment-value exp)))) + (lambda (env) + (set-variable-value! var (vproc env) env) + 'ok))) + +(define (analyze-definition exp) + (let ((var (definition-variable exp)) + (vproc (analyze (definition-value exp)))) + (lambda (env) + (define-variable! var (vproc env) env) + 'ok))) + +(define (analyze-if exp) + (let ((pproc (analyze (if-predicate exp))) + (cproc (analyze (if-consequent exp))) + (aproc (analyze (if-alternative exp)))) + (lambda (env) + (if (pproc env) + (cproc env) + (aproc env))))) + +(define (analyze-lambda exp) + (let ((vars (lambda-parameters exp)) + (bproc (analyze-sequence (lambda-body exp)))) + (lambda (env) (make-procedure vars bproc env)))) + +(define (analyze-sequence exps) + (define (sequentially proc1 proc2) + (lambda (env) (proc1 env) (proc2 env))) + (define (loop first-proc rest-procs) + (if (null? rest-procs) + first-proc + (loop (sequentially first-proc (car rest-procs)) + (cdr rest-procs)))) + (let ((procs (map analyze exps))) + (if (null? procs) + (error "Empty sequence -- ANALYZE")) + (loop (car procs) (cdr procs)))) + +(define (analyze-application exp) + (let ((fproc (analyze (operator exp))) + (aprocs (map analyze (operands exp)))) + (lambda (env) + (execute-application (fproc env) + (map (lambda (aproc) (aproc env)) + aprocs))))) +(define (execute-application proc args) + (cond ((primitive-procedure? proc) + (apply-primitive-procedure proc args)) + ((compound-procedure? proc) + ((procedure-body proc) + (extend-environment (procedure-parameters proc) + args + (procedure-environment proc)))) + (else + (error + "Unknown procedure type -- EXECUTE-APPLICATION" + proc)))) + +;(define (analyze-application exp) +; (let ((fproc (analyze (operator exp))) +; (aprocs (operands exp))) ; TODO: (map analyze (operands exp)))) +; (lambda (env) +; (execute-application (fproc env) +;; TODO: (map (lambda (aproc) (aproc env)) +; aprocs)))) ;; TODO: temporary testing w/constants +;; TODO: aprocs))))) +;(define (execute-application proc args) +; (cond ((primitive-procedure? proc) +; (apply proc args)) +; ;(apply-primitive-procedure proc args)) +;;; TODO: +;; ;((compound-procedure? proc) +;; ; ((procedure-body proc) +;; ; (extend-environment (procedure-parameters proc) +;; ; args +;; ; (procedure-environment proc)))) +; (else +;#f))) ;; TODO: this is a temporary debug line +;; (error +;; "Unknown procedure type -- EXECUTE-APPLICATION" +;; proc)))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; JAE - Testing, should work both with cyclone and other compilers (husk, chicken, etc) +;; although, that may not be possible with (app) and possibly other forms. +;(write (eval 2 *global-environment*)) +;(write (eval ''(1 2) *global-environment*)) +;(write (eval ''(1 . 2) *global-environment*)) +;(write (eval '(if #t 'test-ok 'test-fail) *global-environment*)) +;(write (eval '(if 1 'test-ok) *global-environment*)) +;(write (eval '(if #f 'test-fail 'test-ok) *global-environment*)) +;(write (eval '((lambda (x) (cons x 2) (cons #t x)) 1) *global-environment*)) +;;(write (eval '((lambda () (cons 1 2) (cons #t #f))) *global-environment*)) +;;(write (eval '(cons 1 2) *global-environment*)) ; TODO +;;(write (eval '(+ 1 2) *global-environment*)) ; TODO + +;(define (loop) +; (display (eval (read) *global-environment*)) +; (display (newline)) +; (loop)) +;(loop) diff --git a/examples/fac.scm b/examples/fac.scm new file mode 100644 index 00000000..b0969eb0 --- /dev/null +++ b/examples/fac.scm @@ -0,0 +1 @@ +(define (fac n) (if (= n 0) 1 (* n (fac (- n 1))))) diff --git a/examples/tail-call-optimization.scm b/examples/tail-call-optimization.scm new file mode 100644 index 00000000..52319355 --- /dev/null +++ b/examples/tail-call-optimization.scm @@ -0,0 +1,11 @@ +;; This should run forever using a constant amount of memory +;; and max CPU: + +;; Original program: +;; (define (foo) (bar)) +;; (define (bar) (foo)) +;; (foo) + +(letrec ((foo (lambda () (bar))) + (bar (lambda () (foo)))) + (foo)) diff --git a/examples/tail-call-testing.scm b/examples/tail-call-testing.scm new file mode 100644 index 00000000..c829f7c6 --- /dev/null +++ b/examples/tail-call-testing.scm @@ -0,0 +1,14 @@ +;; A program to use all available memory, and eventually crash +(letrec ((foo (lambda (x) + (write (length x)) + (bar (cons 1 x)))) + (bar (lambda (x) (foo (cons 1 x))))) + (foo '())) + +;; TODO: try rewriting it so memory is reclaimed. Does it run +;; forever now? +;(letrec ((foo (lambda (x) +; (write (length x)) +; (bar (cons 1 x)))) +; (bar (lambda (x) (foo (cons 1 x))))) +; (foo '())) diff --git a/parser.scm b/parser.scm new file mode 100644 index 00000000..b5dbd895 --- /dev/null +++ b/parser.scm @@ -0,0 +1,376 @@ +;; Cyclone Scheme +;; Copyright (c) 2014, Justin Ethier +;; All rights reserved. +;; +;; This module contains the s-expression parser and supporting functions. +;; +;; FUTURE: if this was a module/library, would probably only want to export +;; read and read-all +;; + +;; Extended information for each input port +(define *in-port-table* '()) +(define (reg-port fp) + (let ((r (assoc fp *in-port-table*))) + (cond + ((not r) +;(write `(ADDED NEW ENTRY TO in port table!!)) + (set! r + (list fp + #f ; Buffered char, if any + 1 ; Line number + 0)) ; Char number + (set! *in-port-table* (cons r *in-port-table*)) + r) + (else r)))) +;; TODO: unreg-port - delete fp entry from *in-port-table* +;; would want to do this when port is closed + +(define (in-port:read-buf! ptbl) + (let ((result (cadr ptbl))) + (in-port:set-buf! ptbl #f) + result)) +(define (in-port:get-buf ptbl) (cadr ptbl)) +(define (in-port:set-buf! ptbl buf) (set-car! (cdr ptbl) buf)) +(define (in-port:get-lnum ptbl) (caddr ptbl)) +(define (in-port:set-lnum! ptbl lnum) (set-car! (cddr ptbl) lnum)) +(define (in-port:get-cnum ptbl) (cadddr ptbl)) +(define (in-port:set-cnum! ptbl cnum) (set-car! (cdddr ptbl) cnum)) +;; END input port table + +;; Helper functions +(define (add-tok tok toks quotes) + (define (loop i) + (if (= quotes i) + tok + (cons 'quote (cons (loop (+ i 1)) '())))) + (if quotes + (cons + (loop 0) + toks) + (cons tok toks))) + +;; Get completed list of tokens +(define (get-toks tok toks quotes) + (if (null? tok) + toks + (add-tok (->tok tok) toks quotes))) + +;; Add a token to the list, quoting it if necessary +(define (->tok lst) + (parse-atom (reverse lst))) + +;; Did we read a dotted list +(define (dotted? lst) + (and (> (length lst) 2) + (equal? (cadr (reverse lst)) (string->symbol ".")))) + +;; Convert a list read by the reader into an improper list +(define (->dotted-list lst) + (cond + ((null? lst) '()) + ((equal? (car lst) (string->symbol ".")) + (cadr lst)) + (else + (cons (car lst) (->dotted-list (cdr lst)))))) + +(define (parse-error msg lnum cnum) + (error + (string-append + "Error (line " + (number->string lnum) + ", char " + (number->string cnum) + "): " + msg))) + +;; Add finished token, if there is one, and continue parsing +(define (parse/tok fp tok toks all? comment? quotes parens ptbl curr-char) + (cond + ((null? tok) + (parse fp '() toks all? comment? quotes parens ptbl)) + (all? + (parse fp '() + (add-tok (->tok tok) toks quotes) + all? + comment? + #f ; read tok, no more quote + parens + ptbl)) + (else + ;; Reached a terminating char, return current token and + ;; save term char for the next (read). + ;; Note: never call set-buf! if in "all?" mode, since + ;; that mode builds a list of tokens + (in-port:set-buf! ptbl curr-char) +;(write `(DEBUG ,tok ,ptbl)) +;(write "\n") + (car (add-tok (->tok tok) toks quotes))))) + +;; Parse input from stream +;; +;; Input: +;; - Port object +;; - Current token +;; - List of tokens read (if applicable) +;; - Bool - Read-all mode, or just read the next object? +;; - Bool - Are we inside a comment? +;; - Quote level +;; - Level of nested parentheses +;; - Entry in the in-port table for this port +;; +;; Output: next object, or list of objects (if read-all mode) +;; +(define (parse fp tok toks all? comment? quotes parens ptbl) + (in-port:set-cnum! ptbl + (+ 1 (in-port:get-cnum ptbl))) + + (let ((c (if (in-port:get-buf ptbl) + (in-port:read-buf! ptbl) ;; Already buffered + (read-char fp)))) +;; DEBUGGING +;(write `(DEBUG read ,tok ,c)) +;(write (newline)) +;; END DEBUG + (cond + ((eof-object? c) + (if (> parens 0) + (parse-error "missing closing parenthesis" + (in-port:get-lnum ptbl) + (in-port:get-cnum ptbl))) + (if all? + (reverse (get-toks tok toks quotes)) + (let ((last (get-toks tok toks quotes))) + (if (> (length last) 0) + (car last) + c)))) ;; EOF + (comment? + (if (eq? c #\newline) + (begin + (in-port:set-lnum! ptbl + (+ 1 (in-port:get-lnum ptbl))) + (in-port:set-cnum! ptbl 0) + (parse fp '() toks all? #f quotes parens ptbl)) + (parse fp '() toks all? #t quotes parens ptbl))) + ((char-whitespace? c) + (if (equal? c #\newline) + (in-port:set-lnum! ptbl + (+ 1 (in-port:get-lnum ptbl)))) + (if (equal? c #\newline) + (in-port:set-cnum! ptbl 0)) + (parse/tok fp tok toks all? #f quotes parens ptbl c)) + ((eq? c #\;) + (parse/tok fp tok toks all? #t quotes parens ptbl c)) + ((eq? c #\') + (cond + ((and (not all?) (not quotes) (not (null? tok))) + ;; Reached a terminal char, read out previous token +;; TODO: would also need to do this if previous char was +;; not a quote! +;; EG: 'a'b ==> (quote a) (quote b), NOT (quote (quote b)) + (in-port:set-buf! ptbl c) + (car (add-tok (->tok tok) toks quotes))) + (else + (let ((quote-level (if quotes + (+ quotes 1) + 1))) + (cond + ((null? tok) + (parse fp '() toks all? comment? quote-level parens ptbl)) + (else + (parse fp '() (add-tok (->tok tok) toks quotes) + all? comment? quote-level parens ptbl))))))) + ((eq? c #\() +;(write `(DEBUG read open paren ,tok)) + (cond + ((and (not all?) (not (null? tok))) + ;; Reached a terminal char, read out previous token + (in-port:set-buf! ptbl c) + (car (add-tok (->tok tok) toks quotes))) + (else + (let ((sub ;(_cyc-read-all fp (+ parens 1))) + (parse fp '() '() #t #f #f (+ parens 1) ptbl)) + (toks* (get-toks tok toks quotes))) + (define new-toks (add-tok + (if (and (pair? sub) (dotted? sub)) + (->dotted-list sub) + sub) + toks* + quotes)) +;(write `(DEBUG incrementing paren level ,parens ,sub)) + (if all? + (parse fp '() new-toks all? #f #f parens ptbl) + (car new-toks)))))) + ((eq? c #\)) +;(write `(DEBUG decrementing paren level ,parens)) + (if (= parens 0) + (parse-error "unexpected closing parenthesis" + (in-port:get-lnum ptbl) + (in-port:get-cnum ptbl))) + (reverse (get-toks tok toks quotes))) + ((eq? c #\") + (cond + ((and (not all?) (not (null? tok))) + ;; Reached a terminal char, read out previous token + (in-port:set-buf! ptbl c) + (car (add-tok (->tok tok) toks quotes))) + (else + (let ((str (read-str fp '() ptbl)) + (toks* (get-toks tok toks quotes))) + (define new-toks (add-tok str toks* quotes)) + (if all? + (parse fp '() new-toks all? #f #f parens ptbl) + (car new-toks)))))) + ((eq? c #\#) + (if (null? tok) + ;; # reader + (let ((next-c (read-char fp))) + (in-port:set-cnum! ptbl + (+ 1 (in-port:get-cnum ptbl))) + (cond + ;; Do not use add-tok below, no need to quote a bool + ((eq? #\t next-c) (parse fp '() (cons #t toks) all? #f #f parens ptbl)) + ((eq? #\f next-c) (parse fp '() (cons #f toks) all? #f #f parens ptbl)) + ((eq? #\\ next-c) + (let ((new-toks (cons (read-pound fp ptbl) toks))) + (if all? + (parse fp '() new-toks all? #f #f parens ptbl) + (car new-toks)))) + (else + (parse-error "Unhandled input sequence" + (in-port:get-lnum ptbl) + (in-port:get-cnum ptbl))))) + ;; just another char... + (parse fp (cons c tok) toks all? #f quotes parens ptbl))) + (else + (parse fp (cons c tok) toks all? #f quotes parens ptbl))))) + +;; Read chars past a leading #\ +(define (read-pound fp ptbl) + (define (done raw-buf) + (let ((buf (reverse raw-buf))) + (cond + ((= 0 (length buf)) + (parse-error "missing character" + (in-port:get-lnum ptbl) + (in-port:get-cnum ptbl))) + ((= 1 (length buf)) + (car buf)) + ((equal? buf '(#\a #\l #\a #\r #\m)) + (integer->char 7)) + ((equal? buf '(#\b #\a #\c #\k #\s #\p #\a #\c #\e)) + (integer->char 8)) + ((equal? buf '(#\d #\e #\l #\e #\t #\e)) + (integer->char 127)) + ((equal? buf '(#\e #\s #\c #\a #\p #\e)) + (integer->char 27)) + ((equal? buf '(#\n #\e #\w #\l #\i #\n #\e)) + (integer->char 10)) + ((equal? buf '(#\n #\u #\l #\l)) + (integer->char 0)) + ((equal? buf '(#\r #\e #\t #\u #\r #\n)) + (integer->char 13)) + ((equal? buf '(#\s #\p #\a #\c #\e)) + (integer->char 32)) + ((equal? buf '(#\t #\a #\b)) + (integer->char 9)) + (else + (parse-error (string-append + "unable to parse character: " + (list->string buf)) + (in-port:get-lnum ptbl) + (in-port:get-cnum ptbl)))))) + (define (loop buf) + (let ((c (peek-char fp))) + (if (or (eof-object? c) + (char-whitespace? c) + (and (> (length buf) 0) + (equal? c #\)))) + (done buf) + (loop (cons (read-char fp) buf))))) + (loop '())) + +(define (read-str fp buf ptbl) + (let ((c (read-char fp))) + (cond + ((eof-object? c) + (parse-error "missing closing double-quote" + (in-port:get-lnum ptbl) + (in-port:get-cnum ptbl))) + ((equal? #\\ c) + (read-str fp (read-str-esc fp buf ptbl) ptbl)) + ((equal? #\" c) + (list->string (reverse buf))) + (else + (read-str fp (cons c buf) ptbl))))) + +;; Read an escaped character within a string +;; The escape '\' has already been read at this point +(define (read-str-esc fp buf ptbl) + (let ((c (read-char fp))) + (cond + ((eof-object? c) + (parse-error "missing escaped character within string" + (in-port:get-lnum ptbl) + (in-port:get-cnum ptbl))) + ((or (equal? #\" c) + (equal? #\\ c)) + (cons c buf)) + (else + (parse-error "invalid escape character in string" + (in-port:get-lnum ptbl) + (in-port:get-cnum ptbl)))))) + +(define (sign? c) + (or + (equal? c #\+) + (equal? c #\-))) + +;; parse-atom -> [chars] -> literal +(define (parse-atom a) + (cond + ((or (char-numeric? (car a)) + (and (> (length a) 1) + (char-numeric? (cadr a)) + (sign? (car a)))) + (string->number (list->string a))) + (else + (string->symbol (list->string a))))) + +;; Main lexer/parser +(define cyc-read ;; TODO: should be (read), but that is breaking on csi 4.8.0.5 + (lambda args + (let ((fp (if (null? args) + (current-input-port) + (car args)))) + (parse fp '() '() #f #f #f 0 (reg-port fp))))) + +;; read-all -> port -> [objects] +(define (read-all . args) + (let ((fp (if (null? args) + (current-input-port) + (car args)))) + (define (loop fp result) + (let ((obj (cyc-read fp))) + (if (eof-object? obj) + (reverse result) + (loop fp (cons obj result))))) + (loop fp '()))) + +;; TODO: for some reason this causes trouble in chicken 4.8. WTF?? +;; read -> port -> object +;(define read cyc-read) + +;(define (repl) +; ;; Test code +; ;(let ((fp (open-input-file "tests/begin.scm"))) +; ;(let ((fp (open-input-file "tests/strings.scm"))) +; ;(let ((fp (open-input-file "eval.scm"))) +; ;(let ((fp (open-input-file "dev.scm"))) +; ; (write (read-all fp))) +; (let ((fp (current-input-port))) +; (write (cyc-read fp))) +; (repl)) +;;(repl) + + diff --git a/repl.scm b/repl.scm new file mode 100644 index 00000000..6b35a140 --- /dev/null +++ b/repl.scm @@ -0,0 +1,16 @@ +;; Cyclone Scheme +;; Copyright (c) 2014, Justin Ethier +;; All rights reserved. +;; +;; This module contains a simple Read-Eval-Print Loop +;; +(display *Cyc-version-banner*) +(define (repl) + (display "cyclone> ") + (let ((c (eval (read)))) + (cond + ((not (eof-object? c)) + (write c) + (repl)) + (else #f)))) +(repl) diff --git a/runtime.h b/runtime.h new file mode 100644 index 00000000..1385cf18 --- /dev/null +++ b/runtime.h @@ -0,0 +1,1464 @@ +/* + * Cyclone Scheme + * This file contains the C runtime used by compiled programs. + */ + +// If this is set, GC is called every function call. +// Only turn this on for debugging!!! +#define DEBUG_ALWAYS_GC 1 + +// Debug GC flag +#define DEBUG_GC 0 + +// Show diagnostic information for the GC when program terminates +#define DEBUG_SHOW_DIAG 0 + +// Maximum number of args that GC will accept +#define NUM_GC_ANS 100 + +/* STACK_GROWS_DOWNWARD is a machine-specific preprocessor switch. */ +/* It is true for the Macintosh 680X0 and the Intel 80860. */ +#define STACK_GROWS_DOWNWARD 1 + +/* STACK_SIZE is the size of the stack buffer, in bytes. */ +/* Some machines like a smallish stack--i.e., 4k-16k, while others */ +/* like a biggish stack--i.e., 100k-500k. */ +#define STACK_SIZE 100000 + +/* HEAP_SIZE is the size of the 2nd generation, in bytes. */ +/* HEAP_SIZE should be at LEAST 225000*sizeof(cons_type). */ +#define HEAP_SIZE 6000000 + +long global_stack_size; +long global_heap_size; + +/* Define size of Lisp tags. Options are "short" or "long". */ +typedef long tag_type; + +#include +#include +#include +#include +#include +#include + +#ifndef CLOCKS_PER_SEC +/* gcc doesn't define this, even though ANSI requires it in .. */ +#define CLOCKS_PER_SEC 0 +#define setjmp _setjmp +#define longjmp _longjmp +#endif + +/* The following sparc hack is courtesy of Roger Critchlow. */ +/* It speeds up the output by more than a factor of THREE. */ +/* Do 'gcc -O -S cboyer13.c'; 'perlscript >cboyer.s'; 'gcc cboyer.s'. */ +#ifdef __GNUC__ +#ifdef sparc +#define never_returns __attribute__ ((noreturn)) +#else +#define never_returns /* __attribute__ ((noreturn)) */ +#endif +#else +#define never_returns /* __attribute__ ((noreturn)) */ +#endif + +#if STACK_GROWS_DOWNWARD +#define check_overflow(x,y) ((x) < (y)) +#else +#define check_overflow(x,y) ((x) > (y)) +#endif + +/* Define tag values. (I don't trust compilers to optimize enums.) */ +#define cons_tag 0 +#define symbol_tag 1 +#define forward_tag 2 +#define closure0_tag 3 +#define closure1_tag 4 +#define closure2_tag 5 +#define closure3_tag 6 +#define closure4_tag 7 +#define closureN_tag 8 +#define integer_tag 9 +#define double_tag 10 +#define string_tag 11 +#define primitive_tag 12 +#define eof_tag 13 +#define port_tag 14 +#define boolean_tag 15 +#define cvar_tag 16 + +#define nil NULL +#define eq(x,y) (x == y) +#define nullp(x) (x == NULL) +#define or(x,y) (x || y) +#define and(x,y) (x && y) + +/* Define general object type. */ + +typedef void *object; + +#define type_of(x) (((list) x)->tag) +#define forward(x) (((list) x)->cons_car) + +/* Define value types. + Depending on the underlying architecture, compiler, etc these types + have extra least significant bits that can be used to mark them as + values instead of objects (IE, pointer to a tagged object). + On many machines, addresses are multiples of four, leaving the two + least significant bits free - according to lisp in small pieces. + + experimenting with chars below: +*/ +#define obj_is_char(x) ((unsigned long)(x) & (unsigned long)1) +#define obj_obj2char(x) (char)((long)(x)>>1) +#define obj_char2obj(c) ((void *)(((c)<<1) | 1)) + +#define is_value_type(x) obj_is_char(x) +#define is_object_type(x) (x && !is_value_type(x)) + +/* Define function type. */ + +typedef void (*function_type)(); + +/* Define C-variable integration type */ +typedef struct {tag_type tag; object *pvar;} cvar_type; +typedef cvar_type *cvar; +#define make_cvar(n,v) cvar_type n; n.tag = cvar_tag; n.pvar = v; + +/* Define boolean type. */ +typedef struct {const tag_type tag; const char *pname;} boolean_type; +typedef boolean_type *boolean; + +#define boolean_pname(x) (((boolean_type *) x)->pname) + +#define defboolean(name,pname) \ +static boolean_type name##_boolean = {boolean_tag, #pname}; \ +static const object boolean_##name = &name##_boolean + +/* Define symbol type. */ + +typedef struct {const tag_type tag; const char *pname; object plist;} symbol_type; +typedef symbol_type *symbol; + +#define symbol_pname(x) (((symbol_type *) x)->pname) +#define symbol_plist(x) (((symbol_type *) x)->plist) + +#define defsymbol(name,pname) \ +static symbol_type name##_symbol = {symbol_tag, #pname, nil}; \ +static const object quote_##name = &name##_symbol + +/* Define numeric types */ +typedef struct {tag_type tag; int value;} integer_type; +#define make_int(n,v) integer_type n; n.tag = integer_tag; n.value = v; +typedef struct {tag_type tag; double value;} double_type; +#define make_double(n,v) double_type n; n.tag = double_tag; n.value = v; + +/* Define string type */ +typedef struct {tag_type tag; char *str;} string_type; +#define make_string(cv,s) string_type cv; cv.tag = string_tag; \ +{ int len = strlen(s); cv.str = dhallocp; \ + if ((dhallocp + len + 1) >= dhbottom + global_heap_size) { \ + printf("Fatal error: data heap overflow\n"); exit(1); } \ + memcpy(dhallocp, s, len + 1); dhallocp += len + 1; } + +/* I/O types */ + +// TODO: FILE* may not be good enough +// consider http://stackoverflow.com/questions/6206893/how-to-implement-char-ready-in-c +// TODO: a simple wrapper around FILE may not be good enough long-term +// TODO: how exactly mode will be used. need to know r/w, bin/txt +typedef struct {tag_type tag; FILE *fp; int mode;} port_type; +#define make_port(p,f,m) port_type p; p.tag = port_tag; p.fp = f; p.mode = m; + +static symbol_type __EOF = {eof_tag, "", nil}; // symbol_type in lieu of custom type +static const object Cyc_EOF = &__EOF; +/* Define cons type. */ + +typedef struct {tag_type tag; object cons_car,cons_cdr;} cons_type; +typedef cons_type *list; + +#define car(x) (((list) x)->cons_car) +#define cdr(x) (((list) x)->cons_cdr) +#define caar(x) (car(car(x))) +#define cadr(x) (car(cdr(x))) +#define cdar(x) (cdr(car(x))) +#define cddr(x) (cdr(cdr(x))) +#define caaar(x) (car(car(car(x)))) +#define caadr(x) (car(car(cdr(x)))) +#define cadar(x) (car(cdr(car(x)))) +#define caddr(x) (car(cdr(cdr(x)))) +#define cdaar(x) (cdr(car(car(x)))) +#define cdadr(x) (cdr(car(cdr(x)))) +#define cddar(x) (cdr(cdr(car(x)))) +#define cdddr(x) (cdr(cdr(cdr(x)))) +#define caaaar(x) (car(car(car(car(x))))) +#define caaadr(x) (car(car(car(cdr(x))))) +#define caadar(x) (car(car(cdr(car(x))))) +#define caaddr(x) (car(car(cdr(cdr(x))))) +#define cadaar(x) (car(cdr(car(car(x))))) +#define cadadr(x) (car(cdr(car(cdr(x))))) +#define caddar(x) (car(cdr(cdr(car(x))))) +#define cadddr(x) (car(cdr(cdr(cdr(x))))) +#define cdaaar(x) (cdr(car(car(car(x))))) +#define cdaadr(x) (cdr(car(car(cdr(x))))) +#define cdadar(x) (cdr(car(cdr(car(x))))) +#define cdaddr(x) (cdr(car(cdr(cdr(x))))) +#define cddaar(x) (cdr(cdr(car(car(x))))) +#define cddadr(x) (cdr(cdr(car(cdr(x))))) +#define cdddar(x) (cdr(cdr(cdr(car(x))))) +#define cddddr(x) (cdr(cdr(cdr(cdr(x))))) + +#define make_cons(n,a,d) \ +cons_type n; n.tag = cons_tag; n.cons_car = a; n.cons_cdr = d; + +#define atom(x) ((x == NULL) || (((cons_type *) x)->tag != cons_tag)) + +/* Closure types. (I don't trust compilers to optimize vector refs.) */ + +typedef struct {tag_type tag; function_type fn;} closure0_type; +typedef struct {tag_type tag; function_type fn; object elt1;} closure1_type; +typedef struct {tag_type tag; function_type fn; object elt1,elt2;} closure2_type; +typedef struct {tag_type tag; function_type fn; object elt1,elt2,elt3;} closure3_type; +typedef struct {tag_type tag; function_type fn; object elt1,elt2,elt3,elt4;} closure4_type; +typedef struct {tag_type tag; function_type fn; int num_elt; object *elts;} closureN_type; + +typedef closure0_type *closure0; +typedef closure1_type *closure1; +typedef closure2_type *closure2; +typedef closure3_type *closure3; +typedef closure4_type *closure4; +typedef closureN_type *closureN; +typedef closure0_type *closure; + +#define mclosure0(c,f) closure0_type c; c.tag = closure0_tag; c.fn = f; +#define mclosure1(c,f,a) closure1_type c; c.tag = closure1_tag; \ + c.fn = f; c.elt1 = a; +#define mclosure2(c,f,a1,a2) closure2_type c; c.tag = closure2_tag; \ + c.fn = f; c.elt1 = a1; c.elt2 = a2; +#define mclosure3(c,f,a1,a2,a3) closure3_type c; c.tag = closure3_tag; \ + c.fn = f; c.elt1 = a1; c.elt2 = a2; c.elt3 = a3; +#define mclosure4(c,f,a1,a2,a3,a4) closure4_type c; c.tag = closure4_tag; \ + c.fn = f; c.elt1 = a1; c.elt2 = a2; c.elt3 = a3; c.elt4 = a4; +#define setq(x,e) x = e + +#define DEBUG_mclosure0(c, f) closureN_type c; c.tag = closureN_tag; c.fn = f; \ + c.num_elt = 0; c.elts = (object *)alloca(sizeof(object) * c.num_elt); +#define DEBUG_mclosure1(c, f, a1) closureN_type c; c.tag = closureN_tag; c.fn = f; \ + c.num_elt = 1; c.elts = (object *)alloca(sizeof(object) * c.num_elt); c.elts[0] = a1; +#define DEBUG_mclosure2(c, f, a1, a2) closureN_type c; c.tag = closureN_tag; c.fn = f; \ + c.num_elt = 2; c.elts = (object *)alloca(sizeof(object) * c.num_elt); c.elts[0] = a1; c.elts[1] = a2; +#define DEBUG_mclosure3(c, f, a1, a2, a3) closureN_type c; c.tag = closureN_tag; c.fn = f; \ + c.num_elt = 3; c.elts = (object *)alloca(sizeof(object) * c.num_elt); c.elts[0] = a1; c.elts[1] = a2; c.elts[2] = a3; + + +#define mlist1(e1) (mcons(e1,nil)) +#define mlist2(e2,e1) (mcons(e2,mlist1(e1))) +#define mlist3(e3,e2,e1) (mcons(e3,mlist2(e2,e1))) +#define mlist4(e4,e3,e2,e1) (mcons(e4,mlist3(e3,e2,e1))) +#define mlist5(e5,e4,e3,e2,e1) (mcons(e5,mlist4(e4,e3,e2,e1))) +#define mlist6(e6,e5,e4,e3,e2,e1) (mcons(e6,mlist5(e5,e4,e3,e2,e1))) +#define mlist7(e7,e6,e5,e4,e3,e2,e1) (mcons(e7,mlist6(e6,e5,e4,e3,e2,e1))) + +#define rule(lhs,rhs) (mlist3(quote_equal,lhs,rhs)) + +#define make_cell(n,a) make_cons(n,a,nil); +static object cell_get(object cell){ + return car(cell); +} +static object cell_set(object cell, object value){ + ((list) cell)->cons_car = value; + return cell; +} + +#define global_set(glo,value) (glo=value) + +/* Variable argument count support + + This macro is intended to be executed at the top of a function that + is passed 'var' as a variable-length argument. 'count' is the number + of varargs that were passed. EG: + - C definition: f(object a, ...) + - C call: f(1, 2, 3) + - var: a + - count: 3 + + Argument count would need to be passed by the caller of f. Presumably + our compiler will compute the difference between the number of required + args and the number of provided ones, and pass the difference as 'count' + */ +#define load_varargs(var, count) { \ + int i; \ + object tmp; \ + list args = nil; \ + va_list va; \ + if (count > 0) { \ + args = alloca(sizeof(cons_type)*count); \ + va_start(va, var); \ + for (i = 0; i < count; i++) { \ + if (i) { \ + tmp = va_arg(va, object); \ + } else { \ + tmp = var; \ + } \ + args[i].tag = cons_tag; \ + args[i].cons_car = tmp; \ + args[i].cons_cdr = (i == (count-1)) ? nil : &args[i + 1]; \ + } \ + va_end(va); \ + } \ + var = args; \ +} + +/* Prototypes for Lisp built-in functions. */ + +static object Cyc_global_variables = nil; +static object Cyc_get_global_variables(); +static object Cyc_get_cvar(object var); +static object Cyc_set_cvar(object var, object value); +static object apply(object cont, object func, object args); +static void Cyc_apply(int argc, closure cont, object prim, ...); +static list mcons(object,object); +static object terpri(void); +static object Cyc_display(object); +static int equal(object,object); +static list assq(object,list); +static object get(object,object); +static object equalp(object,object); +static object memberp(object,list); +static char *transport(char *,int); +static void GC(closure,object*,int) never_returns; + +static void main_main(long stack_size,long heap_size,char *stack_base) never_returns; +static long long_arg(int argc,char **argv,char *name,long dval); + +/* Symbol Table */ + +/* Notes for the symbol table + + string->symbol can: + - lookup symbol in the table + - if found, return that pointer + - otherwise, allocate symbol in table and return ptr to it + + For now, GC of symbols is missing. long-term it probably would be desirable +*/ + +char *_strdup (const char *s); +static object add_symbol(symbol_type *psym); +static object add_symbol_by_name(const char *name); +static object find_symbol_by_name(const char *name); +static object find_or_add_symbol(const char *name); +list symbol_table = nil; + +char *_strdup (const char *s) { + char *d = malloc (strlen (s) + 1); + if (d) { strcpy (d,s); } + return d; +} + +static object find_symbol_by_name(const char *name) { + list l = symbol_table; + for (; !nullp(l); l = cdr(l)) { + const char *str = symbol_pname(car(l)); + if (strcmp(str, name) == 0) return car(l); + } + return nil; +} + +static object add_symbol(symbol_type *psym) { + symbol_table = mcons(psym, symbol_table); + return psym; +} + +static object add_symbol_by_name(const char *name) { + symbol_type sym = {symbol_tag, _strdup(name), nil}; + symbol_type *psym = malloc(sizeof(symbol_type)); + memcpy(psym, &sym, sizeof(symbol_type)); + return add_symbol(psym); +} + +static object find_or_add_symbol(const char *name){ + object sym = find_symbol_by_name(name); + if (sym){ + return sym; + } else { + return add_symbol_by_name(name); + } +} + +/* Global variables. */ + +static clock_t start; /* Starting time. */ + +static char *stack_begin; /* Initialized by main. */ +static char *stack_limit1; /* Initialized by main. */ +static char *stack_limit2; + +static char *bottom; /* Bottom of tospace. */ +static char *allocp; /* Cheney allocate pointer. */ +static char *alloc_end; + +/* TODO: not sure this is the best strategy for strings, especially if there + are a lot of long, later gen strings because that will cause a lot of + copying to occur during GC */ +static char *dhbottom; /* Bottom of data heap */ +static char *dhallocp; /* Current place in data heap */ +static char *dhalloc_end; + +static long no_gcs = 0; /* Count the number of GC's. */ +static long no_major_gcs = 0; /* Count the number of GC's. */ + +static volatile object gc_cont; /* GC continuation closure. */ +static volatile object gc_ans[NUM_GC_ANS]; /* argument for GC continuation closure. */ +static volatile int gc_num_ans; +static jmp_buf jmp_main; /* Where to jump to. */ + +//static object test_exp1, test_exp2; /* Expressions used within test. */ + +/* Define the Lisp atoms that we need. */ + +defboolean(f,f); +defboolean(t,t); +defsymbol(Cyc_191procedure, procedure); + +//static object quote_list_f; /* Initialized by main to '(f) */ +//static object quote_list_t; /* Initialized by main to '(t) */ + +//static volatile object unify_subst = nil; /* This is a global Lisp variable. */ +DECLARE_GLOBALS + +/* These (crufty) printing functions are used for debugging. */ +static object terpri() {printf("\n"); return nil;} + +static int equal(x, y) object x, y; +{ + if (nullp(x)) return nullp(y); + if (nullp(y)) return nullp(x); + if (obj_is_char(x)) return obj_is_char(y) && x == y; + switch(type_of(x)) { + case integer_tag: + return (type_of(y) == integer_tag && + ((integer_type *) x)->value == ((integer_type *) y)->value); + case double_tag: + return (type_of(y) == double_tag && + ((double_type *) x)->value == ((double_type *) y)->value); + case string_tag: + return (type_of(y) == string_tag && + strcmp(((string_type *) x)->str, + ((string_type *) y)->str) == 0); + default: + return x == y; + } +} + +static object Cyc_get_global_variables(){ + return Cyc_global_variables; +} + +static object Cyc_get_cvar(object var) { + if (is_object_type(var) && type_of(var) == cvar_tag) { + return *(((cvar_type *)var)->pvar); + } + return var; +} + +static object Cyc_set_cvar(object var, object value) { + if (is_object_type(var) && type_of(var) == cvar_tag) { + *(((cvar_type *)var)->pvar) = value; + } + return var;} + +static object Cyc_has_cycle(object lst) { + object slow_lst, fast_lst; + int is_obj = is_object_type(lst); + int type = type_of(lst); + if (nullp(lst) || is_value_type(lst) || + (is_object_type(lst) && type_of(lst) != cons_tag)) { + return (boolean_f); + } + slow_lst = lst; + fast_lst = cdr(lst); + while(1) { + if (nullp(fast_lst)) return boolean_f; + if (nullp(cdr(fast_lst))) return boolean_f; + if (eq(car(slow_lst), car(fast_lst))) return boolean_t; + + slow_lst = cdr(slow_lst); + fast_lst = cddr(fast_lst); + } +} + +static object Cyc_display(x) object x; +{object tmp = nil; + object has_cycle = boolean_f; + int i = 0; + if (nullp(x)) {printf("()"); return x;} + if (obj_is_char(x)) {printf("%c", obj_obj2char(x)); return x;} + switch (type_of(x)) + {case closure0_tag: + case closure1_tag: + case closure2_tag: + case closure3_tag: + case closure4_tag: + case closureN_tag: + printf("<%p>",(void *)((closure) x)->fn); + break; + case eof_tag: + printf(""); + break; + case port_tag: + printf(""); + break; + case primitive_tag: + printf(""); + break; + case cvar_tag: + Cyc_display(Cyc_get_cvar(x)); + break; + case boolean_tag: + printf("#%s",((boolean_type *) x)->pname); + break; + case symbol_tag: + printf("%s",((symbol_type *) x)->pname); + break; + case integer_tag: + printf("%d", ((integer_type *) x)->value); + break; + case double_tag: + printf("%lf", ((double_type *) x)->value); + break; + case string_tag: + printf("%s", ((string_type *) x)->str); + break; + case cons_tag: + has_cycle = Cyc_has_cycle(x); + printf("("); + Cyc_display(car(x)); + + // Experimenting with displaying lambda defs in REPL + // not good enough but this is a start. would probably need + // the same code in write() + if (equal(quote_Cyc_191procedure, car(x))) { + printf(" "); + Cyc_display(cadr(x)); + printf(")"); + break; + } + + for (tmp = cdr(x); tmp && ((closure) tmp)->tag == cons_tag; tmp = cdr(tmp)) { + if (has_cycle == boolean_t) { + if (i++ > 20) break; /* arbitrary number, for now */ + } + printf(" "); + Cyc_display(car(tmp)); + } + if (has_cycle == boolean_t) { + printf(" ..."); + } else if (tmp) { + printf(" . "); + Cyc_display(tmp); + } + printf(")"); + break; + default: + printf("Cyc_display: bad tag x=%ld\n", ((closure)x)->tag); getchar(); exit(0);} + return x;} + +static object Cyc_write(x) object x; +{object tmp = nil; + if (nullp(x)) {printf("()\n"); return x;} + if (obj_is_char(x)) {printf("#\\%c\n", obj_obj2char(x)); return x;} + switch (type_of(x)) + {case string_tag: + printf("\"%s\"", ((string_type *) x)->str); + break; + default: + Cyc_display(x);} + printf("\n"); + return x;} + +/* Some of these non-consing functions have been optimized from CPS. */ + +static object memberp(x,l) object x; list l; +{for (; !nullp(l); l = cdr(l)) if (boolean_f != equalp(x,car(l))) return boolean_t; + return boolean_f;} + +static object get(x,i) object x,i; +{register object plist; register object plistd; + if (nullp(x)) return x; + if (type_of(x)!=symbol_tag) {printf("get: bad x=%ld\n",((closure)x)->tag); exit(0);} + plist = symbol_plist(x); + for (; !nullp(plist); plist = cdr(plistd)) + {plistd = cdr(plist); + if (eq(car(plist),i)) return car(plistd);} + return nil;} + +static object equalp(x,y) object x,y; +{for (; ; x = cdr(x), y = cdr(y)) + {if (equal(x,y)) return boolean_t; + if (obj_is_char(x) || obj_is_char(y) || + nullp(x) || nullp(y) || + type_of(x)!=cons_tag || type_of(y)!=cons_tag) return boolean_f; + if (boolean_f == equalp(car(x),car(y))) return boolean_f;}} + +static list assq(x,l) object x; list l; +{for (; !nullp(l); l = cdr(l)) + {register list la = car(l); if (eq(x,car(la))) return la;} + return boolean_f;} + +static list assoc(x,l) object x; list l; +{for (; !nullp(l); l = cdr(l)) + {register list la = car(l); if (boolean_f != equalp(x,car(la))) return la;} + return boolean_f;} + + +// TODO: generate these using macros??? +static object __num_eq(x, y) object x, y; +{if (x && y && ((integer_type *)x)->value == ((integer_type *)y)->value) + return boolean_t; + return boolean_f;} + +static object __num_gt(x, y) object x, y; +{//printf("DEBUG cmp %d, x %d, y %d, x tag %d, y tag %d\n", + // (((integer_type *)x)->value > ((integer_type *)y)->value), + // ((integer_type *)x)->value, ((integer_type *)y)->value, + // ((list)x)->tag, ((list)y)->tag); + //exit(1); + if (((integer_type *)x)->value > ((integer_type *)y)->value) + return boolean_t; + return boolean_f;} + +static object __num_lt(x, y) object x, y; +{if (((integer_type *)x)->value < ((integer_type *)y)->value) + return boolean_t; + return boolean_f;} + +static object __num_gte(x, y) object x, y; +{if (((integer_type *)x)->value >= ((integer_type *)y)->value) + return boolean_t; + return boolean_f;} + +static object __num_lte(x, y) object x, y; +{if (((integer_type *)x)->value <= ((integer_type *)y)->value) + return boolean_t; + return boolean_f;} + +// TODO: static object Cyc_is_eq(x, y) object x, y) +static object Cyc_is_boolean(object o){ + if (!nullp(o) && + !is_value_type(o) && + ((list)o)->tag == boolean_tag && + (eq(boolean_f, o) || eq(boolean_t, o))) + return boolean_t; + return boolean_f;} + +static object Cyc_is_cons(object o){ + if (!nullp(o) && !is_value_type(o) && ((list)o)->tag == cons_tag) + return boolean_t; + return boolean_f;} + +static object Cyc_is_null(object o){ + if (nullp(o)) + return boolean_t; + return boolean_f;} + +static object Cyc_is_number(object o){ + if (!nullp(o) && !is_value_type(o) && ((list)o)->tag == integer_tag) + return boolean_t; + return boolean_f;} + +static object Cyc_is_symbol(object o){ + if (!nullp(o) && !is_value_type(o) && ((list)o)->tag == symbol_tag) + return boolean_t; + return boolean_f;} + +static object Cyc_is_string(object o){ + if (!nullp(o) && !is_value_type(o) && ((list)o)->tag == string_tag) + return boolean_t; + return boolean_f;} + +static object Cyc_is_char(object o){ + if (obj_is_char(o)) + return boolean_t; + return boolean_f;} + +static object Cyc_is_eof_object(object o) { + if (!nullp(o) && !is_value_type(o) && type_of(o) == eof_tag) + return boolean_t; + return boolean_f;} + +static object Cyc_is_cvar(object o) { + if (!nullp(o) && !is_value_type(o) && type_of(o) == cvar_tag) + return boolean_t; + return boolean_f;} + +static object Cyc_eq(object x, object y) { + if (eq(x, y)) + return boolean_t; + return boolean_f; +} + +static object Cyc_set_car(object l, object val) { + ((list)l)->cons_car = val; + return l; +} + +static object Cyc_set_cdr(object l, object val) { + ((list)l)->cons_cdr = val; + return l; +} + +static integer_type Cyc_length(object l){ + make_int(len, 0); + while(!nullp(l)){ + if (((list)l)->tag != cons_tag){ + printf("length - invalid parameter, expected list\n"); + exit(1); + } + l = cdr(l); + len.value++; + } + return len; +} + +static string_type Cyc_number2string(object n) { + char buffer[1024]; + int num = ((integer_type *) n)->value; + + snprintf(buffer, 1024, "%d", num); + make_string(str, buffer); + return str; +} + +static string_type Cyc_symbol2string(object sym) { + make_string(str, symbol_pname(sym)); + return str; +} + +static object Cyc_string2symbol(object str) { + object sym = find_symbol_by_name(symbol_pname(str)); + if (!sym) { + sym = add_symbol_by_name(symbol_pname(str)); + } + return sym; +} + +static string_type Cyc_list2string(object lst){ + char *buf; + int i = 0; + integer_type len = Cyc_length(lst); // Inefficient, walks whole list + buf = alloca(sizeof(char) * (len.value + 1)); + + while(!nullp(lst)){ + buf[i++] = obj_obj2char(car(lst)); + lst = cdr(lst); + } + buf[i] = '\0'; + + make_string(str, buf); + return str; +} + +#define string2list(c,s) object c = nil; { \ + char *str = ((string_type *)s)->str; \ + int len = strlen(str); \ + cons_type *buf; \ + if (len > 0) { \ + buf = alloca(sizeof(cons_type) * len); \ + __string2list(str, buf, len); \ + c = (object)&(buf[0]); \ + } \ +} + +static void __string2list(const char *str, cons_type *buf, int buflen){ + int i = 0; + while (str[i]){ + buf[i].tag = cons_tag; + buf[i].cons_car = obj_char2obj(str[i]); + buf[i].cons_cdr = (i == buflen - 1) ? nil : buf + (i + 1); + i++; + } +} + +static integer_type Cyc_string2number(object str){ + make_int(n, 0); + if (type_of(str) == string_tag && + ((string_type *) str)->str){ + // TODO: not good enough long-term since it doesn't parse floats + n.value = atoi(((string_type *) str)->str); + } + return n; +} + +// TODO: +static string_type Cyc_string_append(int argc, object str1, ...) { + // TODO: one way to do this, perhaps not the most efficient: + // compute lengths of the strings, + // store lens and str ptrs + // allocate buffer, memcpy each str to buffer + // make_string using buffer + + va_list ap; + int i = 0, total_len = 1; // for null char + int *len = alloca(sizeof(int) * argc); + char *buffer, *bufferp, **str = alloca(sizeof(char *) * argc); + object tmp; + + va_start(ap, str1); + str[i] = ((string_type *)str1)->str; + len[i] = strlen(str[i]); + total_len += len[i]; + + for (i = 1; i < argc; i++) { + tmp = va_arg(ap, object); + str[i] = ((string_type *)tmp)->str; + len[i] = strlen(str[i]); + total_len += len[i]; + } + + va_end(ap); + + buffer = bufferp = alloca(sizeof(char) * total_len); + for (i = 0; i < argc; i++) { + memcpy(bufferp, str[i], len[i]); + bufferp += len[i]; + } + *bufferp = '\0'; + make_string(result, buffer); + return result; +} + +static integer_type Cyc_char2integer(object chr){ + make_int(n, obj_obj2char(chr)); + return n; +} + +static object Cyc_integer2char(object n){ + int val = 0; + + if (!nullp(n)) { + val = ((integer_type *) n)->value; + } + + return obj_char2obj(val); +} + +/*static object sum(object x, object y) {}*/ + +static void my_exit(closure) never_returns; + +static void my_exit(env) closure env; { +#if DEBUG_SHOW_DIAG + printf("my_exit: heap bytes allocated=%d time=%ld ticks no_gcs=%ld no_m_gcs=%ld\n", + allocp-bottom,clock()-start,no_gcs,no_major_gcs); + printf("my_exit: ticks/second=%ld\n",(long) CLOCKS_PER_SEC); +#endif + exit(0);} + +static object Cyc_error(int count, object obj1, ...) { + va_list ap; + object tmp; + int i; + + va_start(ap, obj1); + printf("Error: "); + Cyc_display(obj1); + printf("\n"); + + for (i = 1; i < count; i++) { + tmp = va_arg(ap, object); + Cyc_display(tmp); + printf("\n"); + } + + va_end(ap); + exit(1); + return boolean_f; +} + +static void __halt(object obj) { +#if DEBUG_SHOW_DIAG + printf("\nhalt: "); + Cyc_display(obj); + printf("\n"); +#endif + my_exit(obj); +} + +#define __sum(c,x,y) integer_type c; c.tag = integer_tag; c.value = (((integer_type *)(x))->value + ((integer_type *)(y))->value); +#define __mul(c,x,y) integer_type c; c.tag = integer_tag; c.value = (((integer_type *)(x))->value * ((integer_type *)(y))->value); +#define __sub(c,x,y) integer_type c; c.tag = integer_tag; c.value = (((integer_type *)(x))->value - ((integer_type *)(y))->value); +#define __div(c,x,y) integer_type c; c.tag = integer_tag; c.value = (((integer_type *)(x))->value / ((integer_type *)(y))->value); + +/* I/O functions */ + +static port_type Cyc_io_current_input_port() { + make_port(p, stdin, 0); + return p; +} + +static port_type Cyc_io_open_input_file(object str) { + const char *fname = ((string_type *)str)->str; + make_port(p, NULL, 0); + p.fp = fopen(fname, "r"); + return p; +} + +static object Cyc_io_close_input_port(object port) { + if (port && type_of(port) == port_tag) { + FILE *stream = ((port_type *)port)->fp; + if (stream) fclose(stream); + ((port_type *)port)->fp = NULL; + } + return port; +} + +// TODO: port arg is optional! (maybe handle that in expansion section??) +static object Cyc_io_read_char(object port) { + if (type_of(port) == port_tag) { + int c = fgetc(((port_type *) port)->fp); + if (c != EOF) { + return obj_char2obj(c); + } + } + return Cyc_EOF; +} + +static object Cyc_io_peek_char(object port) { + FILE *stream; + int c; + + if (type_of(port) == port_tag) { + stream = ((port_type *) port)->fp; + c = fgetc(stream); + ungetc(c, stream); + if (c != EOF) { + return obj_char2obj(c); + } + } + return Cyc_EOF; +} + +/* Primitive types */ +//typedef common_type (*prim_function_type)(); +//typedef void (*prim_function_type)(); +typedef struct {tag_type tag; const char *pname; /*prim_function_type fn;*/} primitive_type; +typedef primitive_type *primitive; + +#define defprimitive(name/*, fnc*/) \ +static primitive_type name##_primitive = {primitive_tag, #name /*, &fnc*/}; \ +static const object primitive_##name = &name##_primitive + +#define prim(x) (x && ((primitive)x)->tag == primitive_tag) + +// TODO: there has to be a better way: +defprimitive(cons /*, Cyc_length*/); +defprimitive(length /*, Cyc_length*/); +defprimitive(car); +defprimitive(cdr); +defprimitive(cadr); +defprimitive(set_91car_67); +defprimitive(set_91cdr_67); +defprimitive(eq_127); +defprimitive(equal_127); +defprimitive(null_127); +defprimitive(_87); // The plus symbol: + +defprimitive(Cyc_91global_91vars); +defprimitive(has_91cycle_127); + +/* All constant-size objects */ +typedef union { + cons_type cons_t; + symbol_type symbol_t; + primitive_type primitive_t; + integer_type integer_t; + double_type double_t; + string_type string_t; +} common_type; + +/* + * + * @param cont - Continuation for the function to call into + * @param func - Function to execute + * @param args - A list of arguments to the function + */ +static object apply(object cont, object func, object args){ + object result; + common_type buf; + +// TODO: consider passing an argc for just this purpose +// if (nullp(args)) { +// printf("Error: no arguments passed to apply\n"); +// exit(1); +// } + + switch(type_of(func)) { + case primitive_tag: + if (func == primitive_cons) { + make_cons(c, car(args), cadr(args)); + buf.cons_t = c; + result = &buf; + } else if (func == primitive_length) { + buf.integer_t = Cyc_length(car (args)); + result = &buf; + } else if (func == primitive_eq_127) { + result = Cyc_eq(car(args), cadr(args)); + } else if (func == primitive_equal_127) { + result = equalp(car(args), cadr(args)); + } else if (func == primitive_null_127) { + object tmp = car(args); + result = Cyc_is_null(tmp); + } else if (func == primitive__87) { + __sum(i, car(args), cadr(args)); + buf.integer_t = i; + result = &buf; + } else if (func == primitive_car) { + result = car(car(args)); + } else if (func == primitive_cdr) { + result = cdr(car(args)); + } else if (func == primitive_cadr) { + result = cadr(car(args)); + } else if (func == primitive_set_91car_67) { + result = Cyc_set_car(car(args), cadr(args)); + } else if (func == primitive_set_91cdr_67) { + result = Cyc_set_cdr(car(args), cadr(args)); + } else if (func == primitive_Cyc_91global_91vars) { + result = Cyc_global_variables; + } else if (func == primitive_has_91cycle_127) { + result = Cyc_has_cycle(car(args)); +// caar(x) (car(car(x))) +// cdar(x) (cdr(car(x))) +// cddr(x) (cdr(cdr(x))) +// caaar(x) (car(car(car(x)))) +// caadr(x) (car(car(cdr(x)))) +// cadar(x) (car(cdr(car(x)))) +// caddr(x) (car(cdr(cdr(x)))) +// cdaar(x) (cdr(car(car(x)))) +// cdadr(x) (cdr(car(cdr(x)))) +// cddar(x) (cdr(cdr(car(x)))) +// cdddr(x) (cdr(cdr(cdr(x)))) +// caaaar(x) (car(car(car(car(x))))) +// caaadr(x) (car(car(car(cdr(x))))) +// caadar(x) (car(car(cdr(car(x))))) +// caaddr(x) (car(car(cdr(cdr(x))))) +// cadaar(x) (car(cdr(car(car(x))))) +// cadadr(x) (car(cdr(car(cdr(x))))) +// caddar(x) (car(cdr(cdr(car(x))))) +// cadddr(x) (car(cdr(cdr(cdr(x))))) +// cdaaar(x) (cdr(car(car(car(x))))) +// cdaadr(x) (cdr(car(car(cdr(x))))) +// cdadar(x) (cdr(car(cdr(car(x))))) +// cdaddr(x) (cdr(car(cdr(cdr(x))))) +// cddaar(x) (cdr(cdr(car(car(x))))) +// cddadr(x) (cdr(cdr(car(cdr(x))))) +// cdddar(x) (cdr(cdr(cdr(car(x))))) +// cddddr(x) (cdr(cdr(cdr(cdr(x))))) + } else { + printf("Unrecognized primitive function: %s\n", ((symbol_type *)func)->pname); + exit(1); + } + break; + default: + printf("Invalid object type %ld\n", type_of(func)); + exit(1); + } + return_funcall1(cont, result); + return nil; // TODO: restructure to avoid this? + // would require emitting apply's such that they are not assigning a val, + // but instead they replace the final call to return_X. + // Like at the end of Cyc_apply +} + +// Version of apply meant to be called from within compiled code +static void Cyc_apply(int argc, closure cont, object prim, ...){ + va_list ap; + object tmp; + int i; + list args = alloca(sizeof(cons_type) * argc); + + va_start(ap, prim); + + for (i = 0; i < argc; i++) { + tmp = va_arg(ap, object); + args[i].tag = cons_tag; + args[i].cons_car = tmp; + args[i].cons_cdr = (i == (argc-1)) ? nil : &args[i + 1]; + } + //printf("DEBUG applying primitive to "); + //Cyc_display((object)&args[0]); + //printf("\n"); + + va_end(ap); + apply(cont, prim, (object)&args[0]); +} +// END apply + +static char *transport(x, gcgen) char *x; int gcgen; +/* Transport one object. WARNING: x cannot be nil!!! */ +{ + if (nullp(x)) return x; + if (obj_is_char(x)) return x; +#if DEBUG_GC + printf("entered transport "); + printf("transport %ld\n", type_of(x)); +#endif + switch (type_of(x)) + {case cons_tag: + {register list nx = (list) allocp; + type_of(nx) = cons_tag; car(nx) = car(x); cdr(nx) = cdr(x); + forward(x) = nx; type_of(x) = forward_tag; + allocp = ((char *) nx)+sizeof(cons_type); + return (char *) nx;} + case closure0_tag: + {register closure0 nx = (closure0) allocp; + type_of(nx) = closure0_tag; nx->fn = ((closure0) x)->fn; + forward(x) = nx; type_of(x) = forward_tag; + allocp = ((char *) nx)+sizeof(closure0_type); + return (char *) nx;} + case closure1_tag: + {register closure1 nx = (closure1) allocp; + type_of(nx) = closure1_tag; nx->fn = ((closure1) x)->fn; + nx->elt1 = ((closure1) x)->elt1; + forward(x) = nx; type_of(x) = forward_tag; + x = (char *) nx; allocp = ((char *) nx)+sizeof(closure1_type); + return (char *) nx;} + case closure2_tag: + {register closure2 nx = (closure2) allocp; + type_of(nx) = closure2_tag; nx->fn = ((closure2) x)->fn; + nx->elt1 = ((closure2) x)->elt1; + nx->elt2 = ((closure2) x)->elt2; + forward(x) = nx; type_of(x) = forward_tag; + x = (char *) nx; allocp = ((char *) nx)+sizeof(closure2_type); + return (char *) nx;} + case closure3_tag: + {register closure3 nx = (closure3) allocp; + type_of(nx) = closure3_tag; nx->fn = ((closure3) x)->fn; + nx->elt1 = ((closure3) x)->elt1; + nx->elt2 = ((closure3) x)->elt2; + nx->elt3 = ((closure3) x)->elt3; + forward(x) = nx; type_of(x) = forward_tag; + x = (char *) nx; allocp = ((char *) nx)+sizeof(closure3_type); + return (char *) nx;} + case closure4_tag: + {register closure4 nx = (closure4) allocp; + type_of(nx) = closure4_tag; nx->fn = ((closure4) x)->fn; + nx->elt1 = ((closure4) x)->elt1; + nx->elt2 = ((closure4) x)->elt2; + nx->elt3 = ((closure4) x)->elt3; + nx->elt4 = ((closure4) x)->elt4; + forward(x) = nx; type_of(x) = forward_tag; + x = (char *) nx; allocp = ((char *) nx)+sizeof(closure4_type); + return (char *) nx;} + case closureN_tag: + {register closureN nx = (closureN) allocp; + int i; + type_of(nx) = closureN_tag; nx->fn = ((closureN) x)->fn; + nx->num_elt = ((closureN) x)->num_elt; + nx->elts = (object *)(((char *)nx) + sizeof(closureN_type)); + for (i = 0; i < nx->num_elt; i++) { + nx->elts[i] = ((closureN) x)->elts[i]; + } + forward(x) = nx; type_of(x) = forward_tag; + x = (char *) nx; allocp = ((char *) nx)+sizeof(closureN_type) + sizeof(object) * nx->num_elt; + return (char *) nx;} + case string_tag: + {register string_type *nx = (string_type *) allocp; + type_of(nx) = string_tag; + if (gcgen == 0) { + // Minor, data heap is not relocated + nx->str = ((string_type *)x)->str; + } else { + // Major collection, data heap is moving + nx->str = dhallocp; + int len = strlen(((string_type *) x)->str); + memcpy(dhallocp, ((string_type *) x)->str, len + 1); + dhallocp += len + 1; + } + forward(x) = nx; type_of(x) = forward_tag; + x = (char *) nx; allocp = ((char *) nx)+sizeof(integer_type); + return (char *) nx;} + case integer_tag: + {register integer_type *nx = (integer_type *) allocp; + type_of(nx) = integer_tag; nx->value = ((integer_type *) x)->value; + forward(x) = nx; type_of(x) = forward_tag; + x = (char *) nx; allocp = ((char *) nx)+sizeof(integer_type); + return (char *) nx;} + case port_tag: + {register port_type *nx = (port_type *) allocp; + type_of(nx) = port_tag; nx->fp = ((port_type *) x)->fp; + nx->mode = ((port_type *) x)->mode; + forward(x) = nx; type_of(x) = forward_tag; + x = (char *) nx; allocp = ((char *) nx)+sizeof(port_type); + return (char *) nx;} + case cvar_tag: + {register cvar_type *nx = (cvar_type *) allocp; + type_of(nx) = cvar_tag; nx->pvar = ((cvar_type *) x)->pvar; + forward(x) = nx; type_of(x) = forward_tag; + x = (char *) nx; allocp = ((char *) nx)+sizeof(cvar_type); + return (char *) nx;} + case forward_tag: + return (char *) forward(x); + case eof_tag: break; + case primitive_tag: break; + case boolean_tag: break; + case symbol_tag: break; // JAE TODO: raise an error here? Should not be possible in real code, though (IE, without GC DEBUG flag) + default: + printf("transport: bad tag x=%p x.tag=%ld\n",(void *)x,type_of(x)); exit(0);} + return x;} + +/* Use overflow macro which already knows which way the stack goes. */ +/* Major collection, transport objects on stack or old heap */ +#define transp(p) \ +temp = (p); \ +if (DEBUG_ALWAYS_GC || \ + (check_overflow(low_limit,temp) && \ + check_overflow(temp,high_limit)) || \ + (check_overflow(old_heap_low_limit - 1, temp) && \ + check_overflow(temp,old_heap_high_limit + 1))) \ + (p) = (object) transport(temp,major); + +static void GC_loop(int major, closure cont, object *ans, int num_ans) +{char foo; + int i; + register object temp; + register object low_limit = &foo; /* Move live data above us. */ + register object high_limit = stack_begin; + register char *scanp = allocp; /* Cheney scan pointer. */ + register object old_heap_low_limit = low_limit; // Minor-GC default + register object old_heap_high_limit = high_limit; // Minor-GC default + + char *tmp_bottom = bottom; /* Bottom of tospace. */ + char *tmp_allocp = allocp; /* Cheney allocate pointer. */ + char *tmp_alloc_end = alloc_end; + char *tmp_dhbottom = dhbottom; + char *tmp_dhallocp = dhallocp; + char *tmp_dhallocp_end = dhalloc_end; + if (major) { + // Initialize new heap (TODO: make a function for this) + bottom = calloc(1,global_heap_size); + allocp = (char *) ((((long) bottom)+7) & -8); + alloc_end = allocp + global_heap_size - 8; + scanp = allocp; + old_heap_low_limit = tmp_bottom; + old_heap_high_limit = tmp_alloc_end; + + dhallocp = dhbottom = calloc(1, global_heap_size); + dhalloc_end = dhallocp + global_heap_size - 8; + } + +#if DEBUG_GC + printf("\n=== started GC type = %d === \n", major); +#endif + /* Transport GC's continuation and its argument. */ + transp(cont); + gc_cont = cont; + gc_num_ans = num_ans; +#if DEBUG_GC + printf("DEBUG done transporting cont\n"); +#endif + + /* Prevent overrunning buffer */ + if (num_ans > NUM_GC_ANS) { + printf("Fatal error - too many arguments (%d) to GC\n", num_ans); + exit(1); + } + + for (i = 0; i < num_ans; i++){ + transp(ans[i]); + gc_ans[i] = ans[i]; + } +#if DEBUG_GC + printf("DEBUG done transporting gc_ans\n"); +#endif + + /* Transport global variables. */ + //transp(unify_subst); + transp(Cyc_global_variables); // Internal global used by the runtime + GC_GLOBALS + while (scanpelt1); + scanp += sizeof(closure1_type); break; + case closure2_tag: +#if DEBUG_GC + printf("DEBUG transport closure2 \n"); +#endif + transp(((closure2) scanp)->elt1); transp(((closure2) scanp)->elt2); + scanp += sizeof(closure2_type); break; + case closure3_tag: +#if DEBUG_GC + printf("DEBUG transport closure3 \n"); +#endif + transp(((closure3) scanp)->elt1); transp(((closure3) scanp)->elt2); + transp(((closure3) scanp)->elt3); + scanp += sizeof(closure3_type); break; + case closure4_tag: +#if DEBUG_GC + printf("DEBUG transport closure4 \n"); +#endif + transp(((closure4) scanp)->elt1); transp(((closure4) scanp)->elt2); + transp(((closure4) scanp)->elt3); transp(((closure4) scanp)->elt4); + scanp += sizeof(closure4_type); break; + case closureN_tag: +#if DEBUG_GC + printf("DEBUG transport closureN \n"); +#endif + {int i; int n = ((closureN) scanp)->num_elt; + for (i = 0; i < n; i++) { + transp(((closureN) scanp)->elts[i]); + } + scanp += sizeof(closureN_type) + sizeof(object) * n; + } + break; + case string_tag: +#if DEBUG_GC + printf("DEBUG transport string \n"); +#endif + scanp += sizeof(string_type); break; + case integer_tag: +#if DEBUG_GC + printf("DEBUG transport integer \n"); +#endif + scanp += sizeof(integer_type); break; + case port_tag: +#if DEBUG_GC + printf("DEBUG transport port \n"); +#endif + scanp += sizeof(port_type); break; + case cvar_tag: +#if DEBUG_GC + printf("DEBUG transport cvar \n"); +#endif + scanp += sizeof(cvar_type); break; + case eof_tag: + case primitive_tag: + case symbol_tag: + case boolean_tag: + default: + printf("GC: bad tag scanp=%p scanp.tag=%ld\n",(void *)scanp,type_of(scanp)); + exit(0);} + + if (major) { + free(tmp_bottom); + free(tmp_dhbottom); + } +} + +static void GC(cont,ans,num_ans) closure cont; object *ans; int num_ans; +{ + /* Only room for one more minor-GC, so do a major one. + * Not sure this is the best strategy, it may be better to do major + * ones sooner, perhaps after every x minor GC's. + * + * Also may need to consider dynamically increasing heap size, but + * by how much (1.3x, 1.5x, etc) and when? I suppose when heap usage + * after a collection is above a certain percentage, then it would be + * necessary to increase heap size the next time. + */ + if (allocp >= (bottom + (global_heap_size - global_stack_size))) { + //printf("Possibly only room for one more minor GC. no_gcs = %ld\n", no_gcs); + no_major_gcs++; + GC_loop(1, cont, ans, num_ans); + } else { + no_gcs++; /* Count the number of minor GC's. */ + GC_loop(0, cont, ans, num_ans); + } + + /* You have to let it all go, Neo. Fear, doubt, and disbelief. Free your mind... */ + longjmp(jmp_main,1); /* Return globals gc_cont, gc_ans. */ +} + +/* This heap cons is used only for initialization. */ +static list mcons(a,d) object a,d; +{register cons_type *c = malloc(sizeof(cons_type)); + c->tag = cons_tag; c->cons_car = a; c->cons_cdr = d; + return c;} + +static void c_entry_pt(int,closure,closure); +static void main_main (stack_size,heap_size,stack_base) + long stack_size,heap_size; char *stack_base; +{char in_my_frame; + mclosure0(clos_exit,&my_exit); /* Create a closure for exit function. */ + gc_ans[0] = &clos_exit; /* It becomes the argument to test. */ + gc_num_ans = 1; + /* Allocate stack buffer. */ + stack_begin = stack_base; +#if STACK_GROWS_DOWNWARD + stack_limit1 = stack_begin - stack_size; + stack_limit2 = stack_limit1 - 2000; +#else + stack_limit1 = stack_begin + stack_size; + stack_limit2 = stack_limit1 + 2000; +#endif +#if DEBUG_SHOW_DIAG + printf("main: sizeof(cons_type)=%ld\n",(long) sizeof(cons_type)); +#endif + if (check_overflow(stack_base,&in_my_frame)) + {printf("main: Recompile with STACK_GROWS_DOWNWARD set to %ld\n", + (long) (1-STACK_GROWS_DOWNWARD)); exit(0);} +#if DEBUG_SHOW_DIAG + printf("main: stack_size=%ld stack_base=%p stack_limit1=%p\n", + stack_size,(void *)stack_base,(void *)stack_limit1); + printf("main: Try different stack sizes from 4 K to 1 Meg.\n"); +#endif + /* Do initializations of Lisp objects and rewrite rules. + quote_list_f = mlist1(boolean_f); quote_list_t = mlist1(boolean_t); */ + + /* Make temporary short names for certain atoms. */ + { + + /* Define the rules, but only those that are actually referenced. */ + + /* Create closure for the test function. */ + mclosure0(run_test,&c_entry_pt); + gc_cont = &run_test; + /* Initialize constant expressions for the test runs. */ + + /* Allocate heap area for second generation. */ + /* Use calloc instead of malloc to assure pages are in main memory. */ +#if DEBUG_SHOW_DIAG + printf("main: Allocating and initializing heap...\n"); +#endif + bottom = calloc(1,heap_size); + allocp = (char *) ((((long) bottom)+7) & -8); + alloc_end = allocp + heap_size - 8; + + dhallocp = dhbottom = calloc(1, heap_size); + dhalloc_end = dhallocp + heap_size - 8; +#if DEBUG_SHOW_DIAG + printf("main: heap_size=%ld allocp=%p alloc_end=%p\n", + (long) heap_size,(void *)allocp,(void *)alloc_end); + printf("main: Try a larger heap_size if program bombs.\n"); + printf("Starting...\n"); +#endif + start = clock(); /* Start the timing clock. */ + + /* Tank, load the jump program... */ + setjmp(jmp_main); + AFTER_LONGJMP + /* */ + printf("main: your setjmp and/or longjmp are broken.\n"); exit(0);}} + +static long long_arg(argc,argv,name,dval) + int argc; char **argv; char *name; long dval; +{int j; + for(j=1;(j+1)string '(#\( #\" #\a #\b #\c #\" #\)))) +(assert:equal "strings" d "(\"abc\")") +(assert:equal "strings" d "(\"abc\")") ;; Test GC +(assert:equal "strings" d "(\"abc\")") ;; Test GC +(set! a "hello 2") +(assert:equal "strings" a "hello 2") + +;; Recursion example: +(letrec ((fnc (lambda (i) + (begin + ;(display i) + (if (> i 0) (fnc (- i 1)) 0))))) + (fnc 10)) + +(assert:equal "numeric small reverse" (reverse '(1 2)) '(2 1)) +(assert:equal "small reverse" (reverse '(a b c)) '(c b a)) +(assert:equal "larger reverse" (reverse '(1 2 3 4 5 6 7 8 9 10)) '(10 9 8 7 6 5 4 3 2 1)) +;; ;TODO: improper list, this is an error: (reverse '(1 . 2)) +(assert:equal "char whitespace" (char-whitespace? #\space) #t) +(assert:equal "char whitespace" (char-whitespace? #\a) #f) +(assert:equal "char numeric" (char-numeric? #\1) #t) +(assert:equal "char numeric" (char-numeric? #\newline) #f) +(assert:equal "" (and 1 2 3) 3) +(assert:equal "" (and #t #f 'a 'b 'c) #f) +(assert:equal "" (or 1 2 3) 1) +(assert:equal "" (or #f 'a 'b 'c) 'a) +(assert:equal "" (string-append "") "") +;error - (string-append 1) +(assert:equal "" (string-append "test") "test") +(assert:equal "" (string-append "ab" "cdefgh ij" "klmno" "p" "q" "rs " "tuv" "w" " x " "yz") + "abcdefgh ijklmnopqrs tuvw x yz") +(assert:equal "" (string->number "0") 0) +(assert:equal "" (string->number "42") 42) +;(assert:equal "" (string->number "343243243232") ;; Note no bignum support +(assert:equal "" (string->number "3.14159") 3) ;; Currently no float support +(assert:equal "" (list->string (list #\A #\B #\C)) "ABC") +(assert:equal "" (list->string (list #\A)) "A") +(assert:equal "" (list->string (list)) "") +(assert:equal "" (integer->char 65) #\A) +(assert:equal "" (char->integer #\a) 97) + +(assert:equal "" (number->string (+ 1 2)) "3") +(assert:equal "" (string->list "test") '(#\t #\e #\s #\t)) +(assert:equal "" (string->symbol "a-b-c-d") 'a-b-c-d) +(assert:equal "" (symbol->string 'a/test-01) "a/test-01") +(assert:equal "" (eq? 'a-1 'a-1) #t) +(assert:equal "" (eq? (string->symbol "aa") 'aa) #t) +(assert:equal "" (equal? (string->symbol "aa") 'aa) #t) + +;; Map +(assert:equal "map 1" (map (lambda (x) (car x)) '((a . b) (1 . 2) (#\h #\w))) '(a 1 #\h)) +(assert:equal "map 2" (map car '((a . b) (1 . 2) (#\h #\w))) '(a 1 #\h)) +(assert:equal "map 3" (map cdr '((a . b) (1 . 2) (#\h #\w))) '(b 2 (#\w))) +(assert:equal "map length" + (map length '(() (1) (1 2) (1 2 3) (1 2 3 4))) + '(0 1 2 3 4)) + +;; Prove internal defines are compiled properly +;; +;; Illustrates an old problem with compiling parser. +;; how to handle the internal define p? +;; trans was trying to wrap p with a lambda, which is not going to +;; work because callers want to pass a,b,c directly. +(define (glob a b c) + (define (p d) + (list a b c d)) + (p 4)) +(assert:equal "internal defs for global funcs" + (glob 1 2 3) + '(1 2 3 4)) + +;; Global shadowing issue +;; Do not allow global define to shadow local ones +(define x 'global) +((lambda () + (define x 1) + ((lambda () + (define x 2) + (assert:equal "local define of x" x 2))) + (assert:equal "another local define of x" x 1))) +(assert:equal "global define of x" x 'global) + +; TODO: could add parser tests for these +;( +;123(list) +;1'b +;(write +; (list +; 1;2 +; )) +;1;2 +;3"four five" +;#\space +;) + +;; EVAL section +(define x 1) +(define y 2) +(define *z* 3) +;(write (eval '(Cyc-global-vars))) +(assert:equal "eval compiled - x" (eval 'x) x) +(eval '(set! x 'mutated-x)) +(assert:equal "Access var with a mangled name" (eval '*z*) *z*) +(assert:equal "Access compile var mutated by eval" x 'mutated-x) +;; END eval + +; TODO: use display, output without surrounding quotes +(write (list *num-passed* " tests passed with no errors")) +;; diff --git a/trans.scm b/trans.scm new file mode 100644 index 00000000..2804d1d3 --- /dev/null +++ b/trans.scm @@ -0,0 +1,1543 @@ +;; +;; Cyclone Scheme +;; Copyright (c) 2014, Justin Ethier +;; All rights reserved. +;; +;; This module performs Scheme-to-Scheme transformations, and also contains +;; various utility functions used by the compiler. +;; + +(define *version* "0.0.1 (Pre-release)") + +(define *version-banner* + (string-append " + :@ + @@@ + @@@@: + `@@@@@+ + .@@@+@@@ Cyclone + @@ @@ An experimental Scheme compiler + ,@ TODO: project URL + '@ + .@ + @@ #@ (c) 2014 Justin Ethier + `@@@#@@@. Version " *version* " + #@@@@@ + +@@@+ + @@# + `@. + +")) + +(define *c-file-header-comment* + (string-append "/** + ** This file was automatically generated by the Cyclone scheme compiler + ** + ** (c) 2014 Justin Ethier + ** Version " *version* " + ** + **/ +")) + +;; Features implemented by this Scheme +(define *features* '(cyclone)) + +;; Built-in functions +;; TODO: relocate these somewhere else, like a lib.scm!!! +;; TODO: Longer-term, we will want to insert all these but then have an +;; algorithm in place to remove the definitions that are not used. +;; Basically, after the expansion phase but before alpha conversion, +;; there should be enough information to figure out what is unused, and +;; discard it. Obviously this may be a no-go in certain situations, such +;; as if a (read (eval)) REPL is present. Although maybe not, since that +;; would use the interpreter. +(define *built-ins* `( + (define *Cyc-version-banner* ,*version-banner*) + ;; TODO: The whitespace characters are space, tab, line feed, form feed (not in parser yet), and carriage return. + (define (char-whitespace? c) (member c '(#\tab #\space #\return #\newline))) + (define (char-numeric? c) (member c '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))) + (define (foldl func accum lst) + (if (null? lst) + accum + (foldl func (func (car lst) accum) (cdr lst)))) + (define (foldr func end lst) + (if (null? lst) + end + (func (car lst) (foldr func end (cdr lst))))) + (define (list . objs) objs) + (define (map func lst) + (foldr (lambda (x y) (cons (func x) y)) '() lst)) + (define (not x) (if x #f #t)) + (define (reverse lst) (foldl cons '() lst)) +)) + +;; Built-in macros +;; TODO: just a stub, real code would read (define-syntax) +;; from a lib file or such +(define *defined-macros* + (list + (cons 'and + (lambda (expr rename compare) + (cond ((null? (cdr expr))) + ((null? (cddr expr)) (cadr expr)) + (else (list (rename 'if) (cadr expr) + (cons (rename 'and) (cddr expr)) + #f))))) + (cons 'or + (lambda (expr rename compare) + (cond ((null? (cdr expr)) #f) + ((null? (cddr expr)) (cadr expr)) + (else + (list (rename 'let) (list (list (rename 'tmp) (cadr expr))) + (list (rename 'if) (rename 'tmp) + (rename 'tmp) + (cons (rename 'or) (cddr expr)))))))) + (cons 'let (lambda (exp rename compare) (let=>lambda exp))) + (cons 'begin (lambda (exp rename compare) (begin=>let exp))) + (cons 'letrec (lambda (exp rename compare) (letrec=>lets+sets exp))) + (cons 'cond + (lambda (expr rename compare) + (if (null? (cdr expr)) + (if #f #f) + ((lambda (cl) + (if (compare (rename 'else) (car cl)) + (if (pair? (cddr expr)) + (error "non-final else in cond" expr) + (cons (rename 'begin) (cdr cl))) + (if (if (null? (cdr cl)) #t (compare (rename '=>) (cadr cl))) + (list (list (rename 'lambda) (list (rename 'tmp)) + (list (rename 'if) (rename 'tmp) + (if (null? (cdr cl)) + (rename 'tmp) + (list (car (cddr cl)) (rename 'tmp))) + (cons (rename 'cond) (cddr expr)))) + (car cl)) + (list (rename 'if) + (car cl) + (cons (rename 'begin) (cdr cl)) + (cons (rename 'cond) (cddr expr)))))) + (cadr expr))))) + (cons 'cond-expand + ;; Based on the cond-expand macro from Chibi scheme + (lambda (expr rename compare) + (define (check x) + (if (pair? x) + (case (car x) + ((and) (every check (cdr x))) + ((or) (any check (cdr x))) + ((not) (not (check (cadr x)))) + ;((library) (eval `(find-module ',(cadr x)) (%meta-env))) + (else (error "cond-expand: bad feature" x))) + (memq x *features*))) + (let expand ((ls (cdr expr))) + (cond ((null? ls)) ; (error "cond-expand: no expansions" expr) + ((not (pair? (car ls))) (error "cond-expand: bad clause" (car ls))) + ((eq? 'else (caar ls)) ;(identifier->symbol (caar ls))) + (if (pair? (cdr ls)) + (error "cond-expand: else in non-final position") + `(,(rename 'begin) ,@(cdar ls)))) + ((check (caar ls)) `(,(rename 'begin) ,@(cdar ls))) + (else (expand (cdr ls))))))) + )) + + +(define (built-in-syms) + '(call/cc define)) + +(define (add-libs ast) + (cond + ((list? ast) + (append *built-ins* ast)) + (else + (error "Unexpected input program:" ast)))) + +;; Tuning +(define *do-code-gen* #t) ; Generate C code? + +;; Trace +(define *trace-level* 4) +(define (trace level msg pp prefix) + (if (>= *trace-level* level) + (begin + (display "/* ") + (newline) + (display prefix) + (pp msg) + (display " */") + (newline)))) +(define (trace:error msg) (trace 1 msg pretty-print "")) +(define (trace:warn msg) (trace 2 msg pretty-print "")) +(define (trace:info msg) (trace 3 msg pretty-print "")) +(define (trace:debug msg) (trace 4 msg display "DEBUG: ")) + +(define (cyc:error msg) + (error msg) + (exit 1)) + +;; File Utilities + +;; Get the basename of a file, without the extension. +;; EG: "file.scm" ==> "file" +(define (basename filename) + (let ((pos (list-index #\. (reverse (string->list filename))))) + (if (= pos -1) + filename + (substring filename 0 (- (string-length filename) pos 1))))) + +;; Find the first occurence of e within the given list. +;; Returns -1 if e is not found. +(define list-index + (lambda (e lst) + (if (null? lst) + -1 + (if (eq? (car lst) e) + 0 + (if (= (list-index e (cdr lst)) -1) + -1 + (+ 1 (list-index e (cdr lst)))))))) + + +;; Utilities. + +(cond-expand + (cyclone + ; member : symbol sorted-set[symbol] -> boolean + (define (member sym S) + (if (not (pair? S)) + #f + (if (eq? sym (car S)) + #t + (member sym (cdr S))))) + + ; void : -> void + (define (void) (if #f #t))) + (else #f)) + +; tagged-list? : symbol value -> boolean +(define (tagged-list? tag l) + (and (pair? l) + (eq? tag (car l)))) + +; char->natural : char -> natural +(define (char->natural c) + (let ((i (char->integer c))) + (if (< i 0) + (* -2 i) + (+ (* 2 i) 1)))) + +; integer->char-list : integer -> string +(define (integer->char-list n) + (string->list (number->string n))) + +; gensym-count : integer +(define gensym-count 0) + +; gensym : symbol -> symbol +(define gensym (lambda params + (if (null? params) + (begin + (set! gensym-count (+ gensym-count 1)) + (string->symbol (string-append + "$" + (number->string gensym-count)))) + (begin + (set! gensym-count (+ gensym-count 1)) + (string->symbol (string-append + (if (symbol? (car params)) + (symbol->string (car params)) + (car params)) + "$" + (number->string gensym-count))))))) + +; symbol boolean +(define (symbolstring sym1) + (symbol->string sym2))) + +; insert : symbol sorted-set[symbol] -> sorted-set[symbol] +(define (insert sym S) + (if (not (pair? S)) + (list sym) + (cond + ((eq? sym (car S)) S) + ((symbol sorted-set[symbol] +(define (remove sym S) + (if (not (pair? S)) + '() + (if (eq? (car S) sym) + (cdr S) + (cons (car S) (remove sym (cdr S)))))) + +; union : sorted-set[symbol] sorted-set[symbol] -> sorted-set[symbol] +(define (union set1 set2) + ; NOTE: This should be implemented as merge for efficiency. + (if (not (pair? set1)) + set2 + (insert (car set1) (union (cdr set1) set2)))) + +; difference : sorted-set[symbol] sorted-set[symbol] -> sorted-set[symbol] +(define (difference set1 set2) + ; NOTE: This can be similarly optimized. + (if (not (pair? set2)) + set1 + (difference (remove (car set2) set1) (cdr set2)))) + +; reduce : (A A -> A) list[A] A -> A +(define (reduce f lst init) + (if (not (pair? lst)) + init + (reduce f (cdr lst) (f (car lst) init)))) + +; azip : list[A] list[B] -> alist[A,B] +(define (azip list1 list2) + (if (and (pair? list1) (pair? list2)) + (cons (list (car list1) (car list2)) + (azip (cdr list1) (cdr list2))) + '())) + +; assq-remove-key : alist[A,B] A -> alist[A,B] +(define (assq-remove-key env key) + (if (not (pair? env)) + '() + (if (eq? (car (car env)) key) + (assq-remove-key (cdr env) key) + (cons (car env) (assq-remove-key (cdr env) key))))) + +; assq-remove-keys : alist[A,B] list[A] -> alist[A,B] +(define (assq-remove-keys env keys) + (if (not (pair? keys)) + env + (assq-remove-keys (assq-remove-key env (car keys)) (cdr keys)))) + +;; Simplified version of filter from SRFI 1 +(define (filter pred lis) + (let recur ((lis lis)) + (if (null? lis) + lis + (let ((head (car lis)) + (tail (cdr lis))) + (if (pred head) + (let ((new-tail (recur tail))) + (if (eq? tail new-tail) lis + (cons head new-tail))) + (recur tail)))))) + + +;; Data type predicates and accessors. + +; const? : exp -> boolean +(define (const? exp) + (or (integer? exp) + (string? exp) + (char? exp) + (boolean? exp))) + +; ref? : exp -> boolean +(define (ref? exp) + (symbol? exp)) + +; quote? : exp -> boolean +(define (quote? exp) + (tagged-list? 'quote exp)) + +; let? : exp -> boolean +(define (let? exp) + (tagged-list? 'let exp)) + +; let->bindings : let-exp -> alist[symbol,exp] +(define (let->bindings exp) + (cadr exp)) + +; let->exp : let-exp -> exp +(define (let->exp exp) + (cddr exp)) + +; let->bound-vars : let-exp -> list[symbol] +(define (let->bound-vars exp) + (map car (cadr exp))) + +; let->args : let-exp -> list[exp] +(define (let->args exp) + (map cadr (cadr exp))) + +; letrec? : exp -> boolean +(define (letrec? exp) + (tagged-list? 'letrec exp)) + +; letrec->bindings : letrec-exp -> alist[symbol,exp] +(define (letrec->bindings exp) + (cadr exp)) + +; letrec->exp : letrec-exp -> exp +(define (letrec->exp exp) + (cddr exp)) + +; letrec->exp : letrec-exp -> list[symbol] +(define (letrec->bound-vars exp) + (map car (cadr exp))) + +; letrec->exp : letrec-exp -> list[exp] +(define (letrec->args exp) + (map cadr (cadr exp))) + +; lambda? : exp -> boolean +(define (lambda? exp) + (tagged-list? 'lambda exp)) + +(define (lambda-varargs? exp) + (and (lambda? exp) + (or (symbol? (lambda->formals exp)) + (and (pair? (lambda->formals exp)) + (not (list? (lambda->formals exp))))))) + +; lambda->formals : lambda-exp -> list[symbol] +(define (lambda->formals exp) + (cadr exp)) + +(define (lambda-varargs? exp) + (let ((type (lambda-formals-type exp))) + (or (equal? type 'args:varargs) + (equal? type 'args:fixed-with-varargs)))) + +(define (lambda-varargs-var exp) + (if (lambda-varargs? exp) + (if (equal? (lambda-formals-type exp) 'args:varargs) + (lambda-formals exp) ; take symbol directly + (car (reverse (lambda-formals->list exp)))) ; Last arg is varargs + #f)) + +(define (lambda-formals-type exp) + (let ((args (lambda->formals exp))) + (cond + ((symbol? args) 'args:varargs) + ((list? args) 'args:fixed) + ((pair? args) 'args:fixed-with-varargs) + (else + (error `(Unexpected formals list in lambda-formals-type: ,args)))))) + +(define (lambda-formals->list exp) + (if (lambda-varargs? exp) + (let ((args (lambda->formals exp))) + (if (symbol? args) + (list args) + (pair->list args))) + (lambda->formals exp))) + +;; Repack a list of args (symbols) into lambda formals, by type +;; assumes args is a proper list +(define (list->lambda-formals args type) + (cond + ((eq? type 'args:fixed) args) + ((eq? type 'args:fixed-with-varargs) (list->pair args)) + ((eq? type 'args:varargs) + (if (> (length args) 1) + (error `(Too many args for varargs ,args)) + (car args))) + (else (error `(Unexpected type ,type))))) + +;; Create a proper copy of an improper list +;; EG: (1 2 . 3) ==> (1 2 3) +(define (pair->list p) + (let loop ((lst p)) + (if (not (pair? lst)) + (cons lst '()) + (cons (car lst) (loop (cdr lst)))))) + +;; Create an improper copy of a proper list +(define (list->pair l) + (let loop ((lst l)) + (cond + ((not (pair? lst)) + lst) + ((null? (cdr lst)) + (car lst)) + (else + (cons (car lst) (loop (cdr lst))))))) + +; lambda->exp : lambda-exp -> exp +(define (lambda->exp exp) + (cddr exp)) ;; JAE - changed from caddr, so we can handle multiple expressions + +; if? : exp -> boolean +(define (if? exp) + (tagged-list? 'if exp)) + +; if->condition : if-exp -> exp +(define (if->condition exp) + (cadr exp)) + +; if->then : if-exp -> exp +(define (if->then exp) + (caddr exp)) + +;; if-else? : if-exp -> bool +;; Determines whether an if expression has an else clause +(define (if-else? exp) + (and (tagged-list? 'if exp) + (> (length exp) 3))) + +; if->else : if-exp -> exp +(define (if->else exp) + (cadddr exp)) + +; app? : exp -> boolean +(define (app? exp) + (pair? exp)) + +; app->fun : app-exp -> exp +(define (app->fun exp) + (car exp)) + +; app->args : app-exp -> list[exp] +(define (app->args exp) + (cdr exp)) + +; prim? : exp -> boolean +(define (prim? exp) + (member exp '( + Cyc-global-vars + Cyc-get-cvar + Cyc-set-cvar! + Cyc-cvar? ;; Cyclone-specific + has-cycle? + + + - + * + / + = + > + < + >= + <= + apply + %halt + error + cons + cell-get + set-global! + set-cell! + cell + eq? + eqv? + equal? + assoc + assq + member + length + set-car! + set-cdr! + car + cdr + caar cadr cdar cddr + caaar caadr cadar caddr cdaar cdadr cddar cdddr + caaaar caaadr caadar caaddr cadaar cadadr + caddar cadddr cdaaar cdaadr cdadar cdaddr cddaar cddadr cdddar cddddr + char->integer + integer->char + string->number + string-append + string->list + list->string + string->symbol + symbol->string + number->string + boolean? + char? + eof-object? + null? + number? + pair? + string? + symbol? + current-input-port + open-input-file + close-input-port + read-char + peek-char + write + display))) + +(define (prim-call? exp) + (and (list? exp) (prim? (car exp)))) + +; begin? : exp -> boolean +(define (begin? exp) + (tagged-list? 'begin exp)) + +; begin->exps : begin-exp -> list[exp] +(define (begin->exps exp) + (cdr exp)) + +; define : exp -> boolean +(define (define? exp) + (tagged-list? 'define exp)) + +(define (define-lambda? exp) + (let ((var (cadr exp))) + (or + ;; Standard function + (and (list? var) + (> (length var) 0) + (symbol? (car var))) + ;; Varargs function + (and (pair? var) + (symbol? (car var)))))) + +(define (define->lambda exp) + (cond + ((define-lambda? exp) + (let ((var (caadr exp)) + (args (cdadr exp)) + (body (cddr exp))) + `(define ,var (lambda ,args ,@body)))) + (else exp))) + +; define->var : define-exp -> var +(define (define->var exp) + (cond + ((define-lambda? exp) + (caadr exp)) + (else + (cadr exp)))) + +; define->exp : define-exp -> exp +(define (define->exp exp) + (cddr exp)) + +; set! : exp -> boolean +(define (set!? exp) + (tagged-list? 'set! exp)) + +; set!->var : set!-exp -> var +(define (set!->var exp) + (cadr exp)) + +; set!->exp : set!-exp -> exp +(define (set!->exp exp) + (caddr exp)) + +; closure? : exp -> boolean +(define (closure? exp) + (tagged-list? 'closure exp)) + +; closure->lam : closure-exp -> exp +(define (closure->lam exp) + (cadr exp)) + +; closure->env : closure-exp -> exp +(define (closure->env exp) + (caddr exp)) + +(define (closure->fv exp) + (cddr exp)) + +; env-make? : exp -> boolean +(define (env-make? exp) + (tagged-list? 'env-make exp)) + +; env-make->id : env-make-exp -> env-id +(define (env-make->id exp) + (cadr exp)) + +; env-make->fields : env-make-exp -> list[symbol] +(define (env-make->fields exp) + (map car (cddr exp))) + +; env-make->values : env-make-exp -> list[exp] +(define (env-make->values exp) + (map cadr (cddr exp))) + +; env-get? : exp -> boolen +(define (env-get? exp) + (tagged-list? 'env-get exp)) + +; env-get->id : env-get-exp -> env-id +(define (env-get->id exp) + (cadr exp)) + +; env-get->field : env-get-exp -> symbol +(define (env-get->field exp) + (caddr exp)) + +; env-get->env : env-get-exp -> exp +(define (env-get->env exp) + (cadddr exp)) + +; set-cell!? : set-cell!-exp -> boolean +(define (set-cell!? exp) + (tagged-list? 'set-cell! exp)) + +; set-cell!->cell : set-cell!-exp -> exp +(define (set-cell!->cell exp) + (cadr exp)) + +; set-cell!->value : set-cell!-exp -> exp +(define (set-cell!->value exp) + (caddr exp)) + +; cell? : exp -> boolean +(define (cell? exp) + (tagged-list? 'cell exp)) + +; cell->value : cell-exp -> exp +(define (cell->value exp) + (cadr exp)) + +; cell-get? : exp -> boolean +(define (cell-get? exp) + (tagged-list? 'cell-get exp)) + +; cell-get->cell : cell-exp -> exp +(define (cell-get->cell exp) + (cadr exp)) + + + +;; Syntax manipulation. + +;; ; substitute-var : alist[var,exp] ref-exp -> exp +;; (define (substitute-var env var) +;; (let ((sub (assq var env))) +;; (if sub +;; (cadr sub) +;; var))) +;; +;; ; substitute : alist[var,exp] exp -> exp +;; (define (substitute env exp) +;; +;; (define (substitute-with env) +;; (lambda (exp) +;; (substitute env exp))) +;; +;; (cond +;; ; Core forms: +;; ((null? env) exp) +;; ((const? exp) exp) +;; ((prim? exp) exp) +;; ((ref? exp) (substitute-var env exp)) +;; ((lambda? exp) `(lambda ,(lambda->formals exp) +;; ,@(map (lambda (body-exp) +;; ;; TODO: could be more efficient +;; (substitute +;; (assq-remove-keys env (lambda->formals exp)) +;; body-exp)) +;; (lambda->exp exp)))) +;; ((set!? exp) `(set! ,(substitute-var env (set!->var exp)) +;; ,(substitute env (set!->exp exp)))) +;; ((if? exp) `(if ,(substitute env (if->condition exp)) +;; ,(substitute env (if->then exp)) +;; ,(substitute env (if->else exp)))) +;; +;; ; Sugar: +;; ((let? exp) `(let ,(azip (let->bound-vars exp) +;; (map (substitute-with env) (let->args exp))) +;; ,(substitute (assq-remove-keys env (let->bound-vars exp)) +;; (car (let->exp exp))))) +;; ((letrec? exp) (let ((new-env (assq-remove-keys env (letrec->bound-vars exp)))) +;; `(letrec ,(azip (letrec->bound-vars exp) +;; (map (substitute-with new-env) +;; (letrec->args exp))) +;; ,(substitute new-env (car (letrec->exp exp)))))) +;; ((begin? exp) (cons 'begin (map (substitute-with env) (begin->exps exp)))) +;; +;; ; IR (1): +;; ((cell? exp) `(cell ,(substitute env (cell->value exp)))) +;; ((cell-get? exp) `(cell-get ,(substitute env (cell-get->cell exp)))) +;; ((set-cell!? exp) `(set-cell! ,(substitute env (set-cell!->cell exp)) +;; ,(substitute env (set-cell!->value exp)))) +;; +;; ; IR (2): +;; ((closure? exp) `(closure ,(substitute env (closure->lam exp)) +;; ,(substitute env (closure->env exp)))) +;; ((env-make? exp) `(env-make ,(env-make->id exp) +;; ,@(azip (env-make->fields exp) +;; (map (substitute-with env) +;; (env-make->values exp))))) +;; ((env-get? exp) `(env-get ,(env-get->id exp) +;; ,(env-get->field exp) +;; ,(substitute env (env-get->env exp)))) +;; +;; ; Application: +;; ((app? exp) (map (substitute-with env) exp)) +;; (else (error "unhandled expression type in substitution: " exp)))) +;; + +;; Macro expansion +(define (macro? exp) (assoc (car exp) *defined-macros*)) +(define (macro-expand exp) + (let ((macro (assoc (car exp) *defined-macros*))) + ;; assumes ER macro + (if macro + ((cdr macro) + exp + (lambda (sym) ;; TODO: not good enough, need to actually rename, and keep same results if + sym) ;; the same symbol is renamed more than once + (lambda (sym-a sym-b) ;; TODO: the compare function from exrename. + (eq? sym-a sym-b))) ;; this may need to be more sophisticated + exp))) ;; TODO: error instead?? + +; expand : exp -> exp +(define (expand exp) + (cond + ((const? exp) exp) + ((prim? exp) exp) + ((ref? exp) exp) + ((quote? exp) exp) + ((lambda? exp) `(lambda ,(lambda->formals exp) + ,@(map expand (lambda->exp exp)))) + ((define? exp) (if (define-lambda? exp) + (expand (define->lambda exp)) + `(define ,(expand (define->var exp)) + ,@(expand (define->exp exp))))) + ((set!? exp) `(set! ,(expand (set!->var exp)) + ,(expand (set!->exp exp)))) + ((if? exp) `(if ,(expand (if->condition exp)) + ,(expand (if->then exp)) + ,(if (if-else? exp) + (expand (if->else exp)) + ;; Insert default value for missing else clause + ;; FUTURE: append the empty (unprinted) value + ;; instead of #f + #f))) + ((app? exp) + (cond +;; TODO: could check for a define-syntax here and load into memory +;; if found. would then want to continue expanding. may need to +;; return some value such as #t or nil as a placeholder, since the +;; define-syntax form would not be carried forward in the compiled code +;; ((define-syntax? exp) ...) + ((macro? exp) + (expand ;; Could expand into another macro + (macro-expand exp))) + (else + (map expand exp)))) + (else + (error "unknown exp: " exp)))) + +; TODO: eventually, merge below functions with above *defined-macros* defs and +;; replace both with a lib of (define-syntax) constructs + +; let=>lambda : let-exp -> app-exp +(define (let=>lambda exp) + (if (let? exp) + (let ((vars (map car (let->bindings exp))) + (args (map cadr (let->bindings exp)))) + `((lambda (,@vars) ,@(let->exp exp)) ,@args)) + exp)) + +; letrec=>lets+sets : letrec-exp -> exp +(define (letrec=>lets+sets exp) + (if (letrec? exp) + (let* ((bindings (letrec->bindings exp)) + (namings (map (lambda (b) (list (car b) #f)) bindings)) + (names (letrec->bound-vars exp)) + (sets (map (lambda (binding) + (cons 'set! binding)) + bindings)) + (args (letrec->args exp))) + `(let ,namings + (begin ,@(append sets (letrec->exp exp))))))) +;; NOTE: chibi uses the following macro. turns vars into defines? +;;(define-syntax letrec +;; (er-macro-transformer +;; (lambda (expr rename compare) +;; ((lambda (defs) +;; `((,(rename 'lambda) () ,@defs ,@(cddr expr)))) +;; (map (lambda (x) (cons (rename 'define) x)) (cadr expr)))))) +;; + +; begin=>let : begin-exp -> let-exp +(define (begin=>let exp) + (define (singlet? l) + (and (list? l) + (= (length l) 1))) + + (define (dummy-bind exps) + (cond + ((singlet? exps) (car exps)) + + ; JAE - should be fine until CPS phase + ((pair? exps) + `((lambda () + ,@exps))))) + ;((pair? exps) `(let (($_ ,(car exps))) + ; ,(dummy-bind (cdr exps)))))) + (dummy-bind (begin->exps exp))) + + +;; Top-level analysis + +; Separate top-level defines (globals) from other expressions +; +; This function extracts out non-define statements, and adds them to +; a "main" after the defines. +; +(define (isolate-globals exp) + (let loop ((top-lvl exp) + (globals '()) + (exprs '())) + (cond + ((null? top-lvl) + (append + (reverse globals) + (expand + `((begin ,@(reverse exprs)))))) + (else + (cond + ((define? (car top-lvl)) + (cond + ;; Global is redefined, convert it to a (set!) at top-level + ((has-global? globals (define->var (car top-lvl))) + (loop (cdr top-lvl) + globals + (cons + `(set! ,(define->var (car top-lvl)) + ,@(define->exp (car top-lvl))) + exprs))) + ;; Form cannot be properly converted to CPS later on, so split it up + ;; into two parts - use the define to initialize it to false (CPS is fine), + ;; and place the expression into a top-level (set!), which can be + ;; handled by the existing CPS conversion. + ((and (list? (car (define->exp (car top-lvl)))) + (not (lambda? (car (define->exp (car top-lvl)))))) + (loop (cdr top-lvl) + (cons + `(define ,(define->var (car top-lvl)) #f) + globals) + (cons + `(set! ,(define->var (car top-lvl)) + ,@(define->exp (car top-lvl))) + exprs))) + ;; First time we've seen this define, add it and keep going + (else + (loop (cdr top-lvl) + (cons (car top-lvl) globals) + exprs)))) + (else + (loop (cdr top-lvl) + globals + (cons (car top-lvl) exprs)))))))) + +; Has global already been found? +; +; NOTE: +; Linear search may get expensive (n^2), but with a modest set of +; define statements hopefully it will be acceptable. If not, will need +; to use a faster data structure (EG: map or hashtable) +(define (has-global? exp var) + (call/cc + (lambda (return) + (for-each + (lambda (e) + (if (and (define? e) + (equal? (define->var e) var)) + (return #t))) + exp) + #f))) + +; Compute list of global variables based on expression in top-level form +; EG: (def, def, expr, ...) +(define (global-vars exp) + (let ((globals '())) + (for-each + (lambda (e) + (if (define? e) + (set! globals (cons (define->var e) globals)))) + exp) + globals)) + +;; Remove global variables that are not used by the rest of the program. +;; Many improvements can be made, including: +;; +;; TODO: remove unused locals +;; TODO: do not keep defines that call themselves recursively +(define (filter-unused-variables asts) + (define (do-filter code) + (let ((all-fv (apply ;; More efficient way to do this? + append ;; Could use delete-duplicates + (map + (lambda (ast) + (if (define? ast) + (free-vars (define->exp ast)) + (free-vars ast))) + code)))) + (filter + (lambda (ast) + (or (not (define? ast)) + (member (define->var ast) all-fv))) + code))) + ;; Keep filtering until no more vars are removed + (define (loop code) + (let ((new-code (do-filter code))) + (if (> (length code) (length new-code)) + (loop new-code) + new-code))) + (loop asts)) + +;; Syntactic analysis. + +; free-vars : exp -> sorted-set[var] +(define (free-vars ast . opts) + (define bound-only? + (and (not (null? opts)) + (car opts))) + + (define (search exp) + (cond + ; Core forms: + ((const? exp) '()) + ((prim? exp) '()) + ((quote? exp) '()) + ((ref? exp) (if bound-only? '() (list exp))) + ((lambda? exp) + (difference (reduce union (map search (lambda->exp exp)) '()) + (lambda-formals->list exp))) + ((if? exp) (union (search (if->condition exp)) + (union (search (if->then exp)) + (search (if->else exp))))) + ((define? exp) (union (list (define->var exp)) + (search (define->exp exp)))) + ((set!? exp) (union (list (set!->var exp)) + (search (set!->exp exp)))) + ; Application: + ((app? exp) (reduce union (map search exp) '())) + (else (error "unknown expression: " exp)))) + (search ast)) + + + + + +;; Mutable variable analysis and elimination. + +;; Mutables variables analysis and elimination happens +;; on a desugared Intermediate Language (1). + +;; Mutable variable analysis turns mutable variables +;; into heap-allocated cells: + +;; For any mutable variable mvar: + +;; (lambda (... mvar ...) body) +;; => +;; (lambda (... $v ...) +;; (let ((mvar (cell $v))) +;; body)) + +;; (set! mvar value) => (set-cell! mvar value) + +;; mvar => (cell-get mvar) + +; mutable-variables : list[symbol] +(define mutable-variables '()) + +(define (clear-mutables) + (set! mutable-variables '())) + +; mark-mutable : symbol -> void +(define (mark-mutable symbol) + (set! mutable-variables (cons symbol mutable-variables))) + +; is-mutable? : symbol -> boolean +(define (is-mutable? symbol) + (define (is-in? S) + (if (not (pair? S)) + #f + (if (eq? (car S) symbol) + #t + (is-in? (cdr S))))) + (is-in? mutable-variables)) + +; analyze-mutable-variables : exp -> void +(define (analyze-mutable-variables exp) + (cond + ; Core forms: + ((const? exp) (void)) + ((prim? exp) (void)) + ((ref? exp) (void)) + ((quote? exp) (void)) + ((lambda? exp) (begin + (map analyze-mutable-variables (lambda->exp exp)) + (void))) + ((set!? exp) (begin (mark-mutable (set!->var exp)) + (analyze-mutable-variables (set!->exp exp)))) + ((if? exp) (begin + (analyze-mutable-variables (if->condition exp)) + (analyze-mutable-variables (if->then exp)) + (analyze-mutable-variables (if->else exp)))) + + ; Sugar: + ((let? exp) (begin + (map analyze-mutable-variables (map cadr (let->bindings exp))) + (map analyze-mutable-variables (let->exp exp)) + (void))) + ((letrec? exp) (begin + (map analyze-mutable-variables (map cadr (letrec->bindings exp))) + (map analyze-mutable-variables (letrec->exp exp)) + (void))) + ((begin? exp) (begin + (map analyze-mutable-variables (begin->exps exp)) + (void))) + + ; Application: + ((app? exp) (begin + (map analyze-mutable-variables exp) + (void))) + (else (error "unknown expression type: " exp)))) + + +; wrap-mutables : exp -> exp +(define (wrap-mutables exp globals) + + (define (wrap-mutable-formals formals body-exp) + (if (not (pair? formals)) + body-exp + (if (is-mutable? (car formals)) + `((lambda (,(car formals)) + ,(wrap-mutable-formals (cdr formals) body-exp)) + (cell ,(car formals))) + (wrap-mutable-formals (cdr formals) body-exp)))) + + (cond + ; Core forms: + ((const? exp) exp) + ((ref? exp) (if (and (not (member exp globals)) + (is-mutable? exp)) + `(cell-get ,exp) + exp)) + ((prim? exp) exp) + ((quote? exp) exp) + ((lambda? exp) `(lambda ,(lambda->formals exp) + ,(wrap-mutable-formals (lambda-formals->list exp) + (wrap-mutables (car (lambda->exp exp)) globals)))) ;; Assume single expr in lambda body, since after CPS phase + ((set!? exp) `(,(if (member (set!->var exp) globals) + 'set-global! + 'set-cell!) + ,(set!->var exp) + ,(wrap-mutables (set!->exp exp) globals))) + ((if? exp) `(if ,(wrap-mutables (if->condition exp) globals) + ,(wrap-mutables (if->then exp) globals) + ,(wrap-mutables (if->else exp) globals))) + + ; Application: + ((app? exp) (map (lambda (e) (wrap-mutables e globals)) exp)) + (else (error "unknown expression type: " exp)))) + +;; Alpha conversion +;; (aka alpha renaming) +;; +;; This phase is intended to rename identifiers to preserve lexical scoping +;; +;; TODO: does not properly handle renaming builtin functions, would probably need to +;; pass that renaming information downstream +(define (alpha-convert ast globals return-unbound) + ;; Initialize top-level variables + (define (initialize-top-level-vars ast fv) + (if (> (length fv) 0) + ;; Free variables found, set initial values + `((lambda ,fv ,ast) + ,@(map (lambda (_) #f) fv)) + ast)) + + ;; Find any defined variables in the given code block + (define (find-defined-vars ast) + (filter + (lambda (expr) + (not (null? expr))) + (map + (lambda (expr) + (if (define? expr) + (define->var expr) + '())) + ast))) + + ;; Take a list of identifiers and generate a list of + ;; renamed pairs, EG: (var . renamed-var) + (define (make-a-lookup vars) + (map + (lambda (a) (cons a (gensym a))) + vars)) + + ;; Wrap any defined variables in a lambda, so they can be initialized + (define (initialize-defined-vars ast vars) + (if (> (length vars) 0) + `(((lambda ,vars ,@ast) + ,@(map (lambda (_) #f) vars))) + ast)) + + ;; Perform actual alpha conversion + (define (convert ast renamed) +;(write `(DEBUG convert ,ast)) +;(write (newline)) + (cond + ((const? ast) ast) + ((quote? ast) ast) + ((ref? ast) + (let ((renamed (assoc ast renamed))) + (cond + (renamed + (cdr renamed)) + (else ast)))) + ((define? ast) + ;; Only internal defines at this point, of form: (define ident value) + `(set! ,@(map (lambda (a) (convert a renamed)) (cdr ast)))) + ((set!? ast) + ;; Without define, we have no way of knowing if this was a + ;; define or a set prior to this phase. But no big deal, since + ;; the set will still work in either case, so no need to check + `(set! ,@(map (lambda (a) (convert a renamed)) (cdr ast)))) + ((if? ast) + ;; Add a failsafe here in case macro expansion added more + ;; incomplete if expressions. + ;; FUTURE: append the empty (unprinted) value instead of #f + (if (if-else? ast) + `(if ,@(map (lambda (a) (convert a renamed)) (cdr ast))) + (convert (append ast '(#f)) renamed))) + ((prim-call? ast) + (cons (car ast) (map + (lambda (a) (convert a renamed)) + (cdr ast)))) + ((lambda? ast) + (let* ((args (lambda-formals->list ast)) + (ltype (lambda-formals-type ast)) + (a-lookup (map (lambda (a) (cons a (gensym a))) args)) + (body (lambda->exp ast)) + (define-vars (find-defined-vars body)) + (defines-a-lookup (make-a-lookup define-vars)) + ) + `(lambda + ,(list->lambda-formals + (map (lambda (p) (cdr p)) a-lookup) + ltype) + ,@(initialize-defined-vars + (convert + body + (append a-lookup defines-a-lookup renamed)) + (map (lambda (p) (cdr p)) defines-a-lookup))))) + ((app? ast) + (map (lambda (a) (convert a renamed)) ast)) + (else + (error "unhandled expression: " ast)))) + + (let* ((fv (difference (free-vars ast) globals)) + ;; Only find set! and lambda vars + (bound-vars (union globals (free-vars ast #t))) + ;; vars never bound in prog, but could be built-in + (unbound-vars (difference fv bound-vars)) + ;; vars we know nothing about - error! + (unknown-vars (difference unbound-vars (built-in-syms))) + ) + (cond + ((> (length unknown-vars) 0) + (let ((unbound-to-return (list))) + (if (member 'eval unknown-vars) + (set! unbound-to-return (cons 'eval unbound-to-return))) + (if (or (member 'read unknown-vars) + (member 'read-all unknown-vars)) + (set! unbound-to-return (cons 'read unbound-to-return))) + (if (and (> (length unbound-to-return) 0) + (= (length unknown-vars) (length unbound-to-return))) + (return-unbound unbound-to-return) + ;; TODO: should not report above (eval read) as errors + (error "Unbound variable(s)" unknown-vars)))) + ((define? ast) + ;; Deconstruct define so underlying code can assume internal defines + (let ((body (car ;; Only one member by now + (define->exp ast)))) +;(write `(DEBUG body ,body)) + (cond + ((lambda? body) + (let* ((args (lambda-formals->list body)) + (ltype (lambda-formals-type body)) + (a-lookup (map (lambda (a) (cons a (gensym a))) args)) + (define-vars (find-defined-vars (lambda->exp body))) + (defines-a-lookup (make-a-lookup define-vars)) + ) + ;; Any internal defines need to be initialized within the lambda, + ;; so the lambda formals are preserved. So we need to deconstruct + ;; the defined lambda and then reconstruct it, with #f placeholders + ;; for any internal definitions. + ;; + ;; Also, initialize-top-level-vars cannot be used directly due to + ;; the required splicing. + `(define + ,(define->var ast) + (lambda + ,(list->lambda-formals + (map (lambda (p) (cdr p)) a-lookup) + ltype) + ,@(convert (let ((fv* (union + define-vars + (difference fv (built-in-syms)))) + (ast* (lambda->exp body))) + (if (> (length fv*) 0) + `(((lambda ,fv* ,@ast*) + ,@(map (lambda (_) #f) fv*))) + ast*)) + (append a-lookup defines-a-lookup)))))) + (else + `(define + ,(define->var ast) + ,@(convert (initialize-top-level-vars + (define->exp ast) + (difference fv (built-in-syms))) + (list))))))) + (else + (convert (initialize-top-level-vars + ast + (difference fv (built-in-syms))) + (list)))))) + +;; CPS conversion +;; +;; This is a port of code from the 90-minute Scheme->C Compiler by Marc Feeley +;; +;; Convert intermediate code to continuation-passing style, to allow for +;; first-class continuations and call/cc +;; + +(define (cps-convert ast) + + (define (cps ast cont-ast) + (cond + ((const? ast) + (list cont-ast ast)) + + ((ref? ast) + (list cont-ast ast)) + + ((quote? ast) + (list cont-ast ast)) + + ((set!? ast) + (cps-list (cddr ast) ;; expr passed to set + (lambda (val) + (list cont-ast + `(set! ,(cadr ast) ,@val))))) ;; cadr => variable + + ((if? ast) + (let ((xform + (lambda (cont-ast) + (cps-list (list (cadr ast)) + (lambda (test) + (list 'if + (car test) + (cps (caddr ast) + cont-ast) + (cps (cadddr ast) + cont-ast))))))) + (if (ref? cont-ast) ; prevent combinatorial explosion + (xform cont-ast) + (let ((k (gensym 'k))) + (list (list 'lambda + (list k) + (xform k)) + cont-ast))))) + + ((prim-call? ast) + (cps-list (cdr ast) ; args to primitive function + (lambda (args) + (list cont-ast + `(,(car ast) ; op + ,@args))))) + + ((lambda? ast) + (let ((k (gensym 'k)) + (ltype (lambda-formals-type ast))) + (list cont-ast + `(lambda + ,(list->lambda-formals + (cons k (cadr ast)) ; lam params + (if (equal? ltype 'args:varargs) + 'args:fixed-with-varargs ;; OK? promote due to k + ltype)) + ,(cps-seq (cddr ast) k))))) + +; +; TODO: begin is expanded already by desugar code... better to do it here? +; ((seq? ast) +; (cps-seq (ast-subx ast) cont-ast)) + + ((app? ast) + (let ((fn (app->fun ast))) + (if (lambda? fn) + (cps-list (app->args ast) + (lambda (vals) + (cons (list + 'lambda + (lambda->formals fn) + (cps-seq (cddr fn) ;(ast-subx fn) + cont-ast)) + vals))) + (cps-list ast ;(ast-subx ast) + (lambda (args) + (cons (car args) + (cons cont-ast + (cdr args)))))))) + + (else + (error "unknown ast" ast)))) + + (define (cps-list asts inner) + (define (body x) + (cps-list (cdr asts) + (lambda (new-asts) + (inner (cons x new-asts))))) + + (cond ((null? asts) + (inner '())) + ((or (const? (car asts)) + (ref? (car asts))) + (body (car asts))) + (else + (let ((r (gensym 'r))) ;(new-var 'r))) + (cps (car asts) + `(lambda (,r) ,(body r))))))) + + (define (cps-seq asts cont-ast) + (cond ((null? asts) + (list cont-ast #f)) + ((null? (cdr asts)) + (cps (car asts) cont-ast)) + (else + (let ((r (gensym 'r))) + (cps (car asts) + `(lambda + (,r) + ,(cps-seq (cdr asts) cont-ast))))))) + + ;; Remove dummy symbol inserted into define forms converted to CPS + (define (remove-unused ast) + (list (car ast) (cadr ast) (cadddr ast))) + + (let* ((global-def? (define? ast)) ;; No internal defines by this phase + (ast-cps + (if global-def? + (remove-unused + `(define ,(define->var ast) + ,@(let ((k (gensym 'k)) + (r (gensym 'r))) + (cps (car (define->exp ast)) 'unused)))) + (cps ast + (let ((r (gensym 'r))) + `(lambda (,r) (%halt ,r))))))) +;; TODO: this is very broken if call/cc is used by a global function!!! +;; TODO: if needed, should call/cc be added as a global? +;; may need a separate scanning phase to detect call/cc and add the def + (if (member 'call/cc (free-vars ast)) + ; add this definition for call/cc if call/cc is needed + (list + (list + 'lambda + (list 'call/cc) + ast-cps) + '(lambda (k f) + (f k (lambda (_ result) (k result))))) + ast-cps) + )) + + +;; Closure-conversion. +;; +;; Closure conversion eliminates all of the free variables from every +;; lambda term. +;; +;; The code below is based on a fusion of a port of the 90-min-scc code by +;; Marc Feeley and the closure conversion code in Matt Might's scheme->c +;; compiler. + +(define (pos-in-list x lst) + (let loop ((lst lst) (i 0)) + (cond ((not (pair? lst)) #f) + ((eq? (car lst) x) i) + (else + (loop (cdr lst) (+ i 1)))))) + +(define (closure-convert exp globals) + (define (convert exp self-var free-var-lst) + (define (cc exp) + (cond + ((const? exp) exp) + ((quote? exp) exp) + ((ref? exp) + (let ((i (pos-in-list exp free-var-lst))) + (if i + `(%closure-ref + ,self-var + ,(+ i 1)) + exp))) + ((or + (tagged-list? '%closure-ref exp) + (tagged-list? '%closure exp) + (prim-call? exp)) + `(,(car exp) + ,@(map cc (cdr exp)))) ;; TODO: need to splice? + ((set!? exp) `(set! ,(set!->var exp) + ,(cc (set!->exp exp)))) + ((lambda? exp) + (let* ((new-self-var (gensym 'self)) + (body (lambda->exp exp)) + (new-free-vars + (difference + (difference (free-vars body) (lambda-formals->list exp)) + globals))) + `(%closure + (lambda + ,(list->lambda-formals + (cons new-self-var (lambda-formals->list exp)) + (lambda-formals-type exp)) + ,(convert (car body) new-self-var new-free-vars)) ;; TODO: should this be a map??? was a list in 90-min-scc. + ,@(map (lambda (v) ;; TODO: splice here? + (cc v)) + new-free-vars)))) + ((if? exp) `(if ,@(map cc (cdr exp)))) + ((cell? exp) `(cell ,(cc (cell->value exp)))) + ((cell-get? exp) `(cell-get ,(cc (cell-get->cell exp)))) + ((set-cell!? exp) `(set-cell! ,(cc (set-cell!->cell exp)) + ,(cc (set-cell!->value exp)))) + ((app? exp) + (let ((fn (car exp)) + (args (map cc (cdr exp)))) + (if (lambda? fn) + (let* ((body (lambda->exp fn)) + (new-free-vars + (difference + (difference (free-vars body) (lambda-formals->list fn)) + globals)) + (new-free-vars? (> (length new-free-vars) 0))) + (if new-free-vars? + ; Free vars, create a closure for them + (let* ((new-self-var (gensym 'self))) + `((%closure + (lambda + ,(list->lambda-formals + (cons new-self-var (lambda-formals->list fn)) + (lambda-formals-type fn)) + ,(convert (car body) new-self-var new-free-vars)) + ,@(map (lambda (v) (cc v)) + new-free-vars)) + ,@args)) + ; No free vars, just create simple lambda + `((lambda ,(lambda->formals fn) + ,@(map cc body)) + ,@args))) + (let ((f (cc fn))) + `((%closure-ref ,f 0) + ,f + ,@args))))) + (else + (error "unhandled exp: " exp)))) + (cc exp)) + + `(lambda () + ,(convert exp #f '()))) + +; Suitable definitions for the cell functions: +;(define (cell value) (lambda (get? new-value) +; (if get? value (set! value new-value)))) +;(define (set-cell! c v) (c #f v)) +;(define (cell-get c) (c #t #t)) +