;; ast.scm -- ast utilities ;; Copyright (c) 2010-2012 Alex Shinn. All rights reserved. ;; BSD-style license: http://synthcode.com/license.txt ;;> Abstract Syntax Tree. Interface to the types used by ;;> the compiler, and other core types less commonly ;;> needed in user code, plus related utilities. ;;> \section{Analysis and Expansion} ;;> \procedure{(analyze x [env])} ;;> Expands and analyzes the expression \var{x} and returns the ;;> resulting AST. ;;> \procedure{(optimize ast)} ;;> Runs an optimization pass on \var{ast} and returns the ;;> resulting simplified expression. (define (ast-renames ast) (define i 0) (define renames '()) (define (rename-symbol id) (set! i (+ i 1)) (string->symbol (string-append (symbol->string (identifier->symbol id)) "." (number->string i)))) (define (rename-lambda lam) (or (assq lam renames) (let ((res (list lam))) (set! renames (cons res renames)) res))) (define (rename! id lam) (let ((cell (rename-lambda lam))) (set-cdr! cell (cons (cons id (rename-symbol id)) (cdr cell))))) (define (check-ref id lam env) (let ((sym (identifier->symbol id))) (let lp1 ((ls env)) (cond ((pair? ls) (let lp2 ((ls2 (car ls)) (found? #f)) (cond ((null? ls2) (if (not found?) (lp1 (cdr ls)))) ((and (eq? id (caar ls2)) (eq? lam (cdar ls2))) (lp2 (cdr ls2) #t)) ((eq? sym (identifier->symbol (caar ls2))) (rename! (caar ls2) (cdar ls2)) (lp2 (cdr ls2) found?)) (else (lp2 (cdr ls2) found?))))))))) (define (extend-env lam env) (cons (map (lambda (x) (cons x lam)) (flatten-dot (lambda-params lam))) env)) (let lp ((x ast) (env '())) (cond ((lambda? x) (lp (lambda-body x) (extend-env x env))) ((ref? x) (check-ref (ref-name x) (cdr (ref-cell x)) env)) ((cnd? x) (lp (cnd-test x) env) (lp (cnd-pass x) env) (lp (cnd-fail x) env)) ((set? x) (lp (set-var x) env) (lp (set-value x) env)) ((seq? x) (for-each (lambda (x) (lp x env)) (seq-ls x))) ((pair? x) (for-each (lambda (x) (lp x env)) x)))) renames) (define (flatten-dot x) (cond ((pair? x) (cons (car x) (flatten-dot (cdr x)))) ((null? x) x) (else (list x)))) (define (get-rename id lam renames) (let ((ls (assq lam renames))) (if (not ls) (identifier->symbol id) (cond ((assq id (cdr ls)) => cdr) (else (identifier->symbol id)))))) (define (map* f ls) (cond ((pair? ls) (cons (f (car ls)) (map* f (cdr ls)))) ((null? ls) '()) (else (f ls)))) ;;> Performs a full syntax expansion of the form \var{x} and ;;> returns the resulting s-expression. (define (macroexpand x) (ast->sexp (analyze x))) ;;> Convert \var{ast} to a s-expression, renaming variables if ;;> necessary. (define (ast->sexp ast) (let ((renames (ast-renames ast))) (let a2s ((x ast)) (cond ((lambda? x) `(lambda ,(map* (lambda (id) (get-rename id x renames)) (lambda-params x)) ,@(map (lambda (d) `(define ,(identifier->symbol (caar d)) #f)) (lambda-defs x)) ,@(if (seq? (lambda-body x)) (map a2s (seq-ls (lambda-body x))) (list (a2s (lambda-body x)))))) ((cnd? x) `(if ,(a2s (cnd-test x)) ,(a2s (cnd-pass x)) ,(a2s (cnd-fail x)))) ((set? x) `(set! ,(a2s (set-var x)) ,(a2s (set-value x)))) ((ref? x) (get-rename (ref-name x) (cdr (ref-cell x)) renames)) ((seq? x) `(begin ,@(map a2s (seq-ls x)))) ((lit? x) (let ((v (lit-value x))) (if (or (pair? v) (null? v) (symbol? v)) `',v v))) ((pair? x) (cons (a2s (car x)) (a2s (cdr x)))) ((opcode? x) (cond ((opcode-name x) => string->symbol) (else x))) (else x))))) ;;> \section{Types} ;;> All objects have an associated type, and types may have parent ;;> types. When using ;;> \hyperlink["http://srfi.schemers.org/srfi-9/srfi-9/html"]{SRFI-9} ;;> \scheme{define-record-type}, the name is bound to a first class ;;> type object. ;;> The following core types are also available by name, and may be ;;> used in the \scheme{match} \scheme{($ ...)} syntax. ;;> \itemlist[ ;;> \item{\scheme{Object} - the parent of all types} ;;> \item{\scheme{Number} - abstract numeric type} ;;> \item{\scheme{Bignum} - arbitrary precision exact integers} ;;> \item{\scheme{Flonum} - inexact real numbers} ;;> \item{\scheme{Integer} - abstract integer type} ;;> \item{\scheme{Symbol} - symbols} ;;> \item{\scheme{Char} - character} ;;> \item{\scheme{Boolean} - \scheme{#t} or \scheme{#f}} ;;> \item{\scheme{String} - strings of characters} ;;> \item{\scheme{Byte-Vector} - uniform vector of octets} ;;> \item{\scheme{Pair} - a \var{car} and \var{cdr}, the basis for lists} ;;> \item{\scheme{Vector} - vectors} ;;> \item{\scheme{Opcode} - a primitive opcode or C function} ;;> \item{\scheme{Procedure} - a closure} ;;> \item{\scheme{Bytecode} - the compiled code for a closure} ;;> \item{\scheme{Env} - an environment structure} ;;> \item{\scheme{Macro} - a macro object, usually not first-class} ;;> \item{\scheme{Lam} - a lambda AST type} ;;> \item{\scheme{Cnd} - an conditional AST type (i.e. \scheme{if})} ;;> \item{\scheme{Ref} - a reference AST type} ;;> \item{\scheme{Set} - a mutation AST type (i.e. \scheme{set!})} ;;> \item{\scheme{Seq} - a sequence AST type} ;;> \item{\scheme{Lit} - a literal AST type} ;;> \item{\scheme{Sc} - a syntactic closure} ;;> \item{\scheme{Context} - a context object (including threads)} ;;> \item{\scheme{Exception} - an exception object} ;;> ] ;;> The following extended type predicates may also be used to test ;;> individual objects for their type: ;;> \itemlist[ ;;> \item{\scheme{environment?}} ;;> \item{\scheme{bytecode?}} ;;> \item{\scheme{macro?}} ;;> \item{\scheme{syntactic-closure?}} ;;> \item{\scheme{lambda?}} ;;> \item{\scheme{cnd?}} ;;> \item{\scheme{ref?}} ;;> \item{\scheme{set?}} ;;> \item{\scheme{seq?}} ;;> \item{\scheme{lit?}} ;;> \item{\scheme{opcode?}} ;;> \item{\scheme{type?}} ;;> \item{\scheme{context?}} ;;> \item{\scheme{exception?}} ;;> ] ;;> \procedure{(type-of x)} ;;> Returns the type of any object \var{x}. ;;> \procedure{(type-name type)} ;;> Returns the name of type \var{type}. ;;> \procedure{(type-parent type)} ;;> Returns the immediate parent of type \var{type}, ;;> or \scheme{#f} for a type with no parent. (define (type-parent type) (let ((v (type-cpl type))) (and (vector? v) (> (vector-length v) 1) (vector-ref v (- (vector-length v) 2))))) ;;> \procedure{(type-cpl type)} ;;> Returns the class precedence list of type \var{type} as a ;;> vector, or \scheme{#f} for a type with no parent. ;;> \procedure{(type-slots type)} ;;> Returns the slot list of type \var{type}. ;;> \section{Accessors} ;;> This section describes additional accessors on AST and other core ;;> types. ;;> \subsection{Procedures} ;;> \itemlist[ ;;> \item{\scheme{(procedure-code f)} - the compiled bytecode object} ;;> \item{\scheme{(procedure-vars f)} - the variables closed over by \var{f}} ;;> \item{\scheme{(procedure-name f)} - the name of \var{f} if known, else \scheme{#f}} ;;> ] (define (procedure-name x) (bytecode-name (procedure-code x))) (define (procedure-name-set! x name) (bytecode-name-set! (procedure-code x) name)) ;;> \subsection{Macros} ;;> \itemlist[ ;;> \item{\scheme{(macro-procedure f)} - the macro procedure} ;;> \item{\scheme{(macro-env f)} - the environment the macro was defined in} ;;> \item{\scheme{(macro-source f)} - the source location the macro was defined in} ;;> \item{\scheme{(macro-aux f)} - custom auxiliary data stored with the macro} ;;> \item{\scheme{(macro-aux-set! f x)}} ;;> ] ;;> \subsection{Bytecode Objects} ;;> \itemlist[ ;;> \item{\scheme{(bytecode-name bc)} - the macro procedure} ;;> \item{\scheme{(bytecode-literals bc)} - literals the bytecode references} ;;> \item{\scheme{(bytecode-source bc)} - the source location the procedure was defined in} ;;> ] ;;> \subsection{Syntactic Closures} ;;> \itemlist[ ;;> \item{\scheme{(syntactic-closure-env sc)}} ;;> \item{\scheme{(syntactic-closure-vars sc)}} ;;> \item{\scheme{(syntactic-closure-expr sc)}} ;;> ] ;;> Return the environment, free variables, and expression ;;> associated with \var{sc} respectively. ;;> \subsection{Exceptions} ;;> \itemlist[ ;;> \item{\scheme{(exception-kind exn)}} ;;> \item{\scheme{(exception-message exn)}} ;;> \item{\scheme{(exception-irritants exn)}} ;;> ] ;;> Return the kind, message, and irritants ;;> associated with \var{exn} respectively. ;;> \subsection{Lambdas} ;;> \itemlist[ ;;> \item{\scheme{(lambda-name lam)} - the name of the lambda, if known} ;;> \item{\scheme{(lambda-name-set! lam x)}} ;;> \item{\scheme{(lambda-params lam)} - the lambda parameter list} ;;> \item{\scheme{(lambda-params-set! lam x)}} ;;> \item{\scheme{(lambda-body lam)} - the body of the lambda} ;;> \item{\scheme{(lambda-body-set! lam x)}} ;;> \item{\scheme{(lambda-defs lam)} - internal definitions of the lambda} ;;> \item{\scheme{(lambda-defs-set! lam x)}} ;;> \item{\scheme{(lambda-locals lam)} - local variables as a list of identifiers} ;;> \item{\scheme{(lambda-locals-set! lam x)}} ;;> \item{\scheme{(lambda-flags lam)} - various flags describing the lambda} ;;> \item{\scheme{(lambda-flags-set! lam x)}} ;;> \item{\scheme{(lambda-free-vars lam)} - free variables the lambda will need to close over} ;;> \item{\scheme{(lambda-free-vars-set! lam x)}} ;;> \item{\scheme{(lambda-set-vars lam)} - variables the lambda mutates} ;;> \item{\scheme{(lambda-set-vars-set! lam x)}} ;;> \item{\scheme{(lambda-return-type lam)} - the return type of the lambda} ;;> \item{\scheme{(lambda-return-type-set! lam x)}} ;;> \item{\scheme{(lambda-param-types lam)} - the types of the input parameters} ;;> \item{\scheme{(lambda-param-types-set! lam x)}} ;;> \item{\scheme{(lambda-source lam)} - the source code of the lambda} ;;> \item{\scheme{(lambda-source-set! lam x)}} ;;> ] ;;> \subsection{Conditionals} ;;> \itemlist[ ;;> \item{\scheme{(cnd-test cnd)} - the test for the conditional} ;;> \item{\scheme{(cnd-test-set! cnd x)}} ;;> \item{\scheme{(cnd-pass cnd)} - the success branch} ;;> \item{\scheme{(cnd-pass-set! cnd x)}} ;;> \item{\scheme{(cnd-fail cnd)} - the failure branch} ;;> \item{\scheme{(cnd-fail-set! cnd x)}} ;;> ] ;;> \subsection{Sequences} ;;> \itemlist[ ;;> \item{\scheme{(seq-ls seq)} - the list of sequence expressions} ;;> \item{\scheme{(seq-ls-set! seq x)}} ;;> ] ;;> \subsection{References} ;;> \itemlist[ ;;> \item{\scheme{(ref-name ref)} - the name of the referenced variable} ;;> \item{\scheme{(ref-name-set! ref x)}} ;;> \item{\scheme{(ref-cell ref)} - the environment cell the reference resolves to} ;;> \item{\scheme{(ref-cell-set! ref x)}} ;;> ] ;;> \subsection{Mutations} ;;> \itemlist[ ;;> \item{\scheme{(set-var set)} - a reference to the mutated variable} ;;> \item{\scheme{(set-var-set! set x)}} ;;> \item{\scheme{(set-value set)} - the value to set the variable to} ;;> \item{\scheme{(set-value-set! set x)}} ;;> ] ;;> \subsection{Literals} ;;> \itemlist[ ;;> \item{\scheme{(lit-value lit)} - the literal value} ;;> \item{\scheme{(lit-value-set! lit x)}} ;;> ] ;;> \subsection{Pairs} ;;> \itemlist[ ;;> \item{\scheme{(pair-source x)}} ;;> \item{\scheme{(pair-source-set! x source)}} ;;> ] ;;> Set or return the source code info associated with a pair x. ;;> Source info is represented as another pair whose \var{car} is ;;> the source file name and whose \var{cdr} is the line number. ;;> \section{Miscellaneous Utilities} ;;> \procedure{(gc)} ;;> Force a garbage collection. ;;> \procedure{(object-size x)} ;;> Returns the heap space directly used by \var{x}, not ;;> counting any elements of \var{x}. ;;> \procedure{(integer->immediate n)} ;;> Returns the interpretation of the integer \var{n} as ;;> an immediate object, useful for debugging. ;;> \procedure{(string-contains str pat [start])} ;;> Returns the first string cursor of \var{pat} in \var{str}, ;;> of \scheme{#f} if it's not found. (cond-expand (safe-string-cursors (define orig-string-contains string-contains) (set! string-contains (lambda (str pat . o) (let ((res (if (pair? o) (orig-string-contains str pat (string-cursor-where (car o))) (orig-string-contains str pat)))) (and res (make-string-cursor str res (string-size str))))))) (else )) ;;> \procedure{(string-cursor-copy! dst src from start end)} ;;> Copies the characters from \var{src}[\var{start}..\var{end}] ;;> to \var{dst} starting at \var{from}. ;;> \procedure{(safe-setenv name value)} ;;> Equivalent to \scheme{setenv} but does nothing and returns ;;> \scheme{#f} if \var{value} is a function definition. Used to ;;> circumvent the vulnerability of the shellshock bug. (define (safe-setenv name value) (define (function-def? str) (and (> (string-size value) 5) (equal? "() {" (substring value 0 4)))) (and (not (function-def? value)) (setenv name value))) ;;> \procedure{(atomically expr)} ;;> Run \var{expr} atomically, disabling yields. Ideally should only be ;;> used for brief, deterministic expressions. If used incorrectly (e.g. ;;> running an infinite loop) can render the system unusable. ;;> Never expose to a sandbox. (cond-expand (threads (define-syntax atomically (syntax-rules () ((atomically . body) (let* ((atomic? (%set-atomic! #t)) (res (begin . body))) (%set-atomic! atomic?) res))))) (else (define-syntax atomically (syntax-rules () ((atomically . body) (begin . body))))))