diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..d352a6d5 --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ + +.PHONY: all doc dist clean cleaner test install uninstall + +all: chibi-scheme + +GC_OBJ=./gc/gc.a + +$GC_OBJ: ./gc/alloc.c + cd gc && make test + +sexp.o: sexp.c sexp.h + gcc -c -g -Os -o $@ $< + +eval.o: eval.c eval.h sexp.h + gcc -c -g -Os -o $@ $< + +chibi-scheme: sexp.o eval.o $(GC_OBJ) + gcc -g -Os -o $@ $^ + +clean: + rm -f *.o + +cleaner: clean + rm -f chibi-scheme + rm -rf *.dSYM + diff --git a/config.h b/config.h new file mode 100644 index 00000000..bb962e65 --- /dev/null +++ b/config.h @@ -0,0 +1,8 @@ +/* config.h -- general configuration */ +/* Copyright (c) 2009 Alex Shinn. All rights reserved. */ +/* BSD-style license: http://synthcode.com/license.txt */ + +#define USE_BOEHM 1 +#define USE_HUFF_SYMS 1 +#define USE_DEBUG 1 + diff --git a/debug.c b/debug.c new file mode 100644 index 00000000..cded6223 --- /dev/null +++ b/debug.c @@ -0,0 +1,82 @@ +/* debug.c -- optional debugging utilities */ +/* Copyright (c) 2009 Alex Shinn. All rights reserved. */ +/* BSD-style license: http://synthcode.com/license.txt */ + +static const char* reverse_opcode_names[] = + {"NOOP", "STACK_REF", "STACK_SET", "GLOBAL_REF", "GLOBAL_SET", "CLOSURE_REF", + "CLOSURE_SET", "VECTOR_REF", "VECTOR_SET", "MAKE_PROCEDURE", "MAKE_VECTOR", + "PUSH", "DUP", "DROP", "SWAP", "CAR", "CDR", "SET_CAR", "SET_CDR", "CONS", + "ADD", "SUB", "MUL", "DIV", "MOD", "NEG", "INV", "LT", "CALL", + "JUMP_UNLESS", "JUMP", "RET", "DONE" + }; + +void disasm (bytecode bc) { + unsigned char *ip=bc->data, opcode; + loop: + opcode = *ip++; + if (opcode*sizeof(char*) < sizeof(reverse_opcode_names)) { + fprintf(stderr, " %s ", reverse_opcode_names[opcode]); + } else { + fprintf(stderr, " %d ", opcode); + } + switch (opcode) { + case OP_STACK_REF: + case OP_STACK_SET: + case OP_CLOSURE_REF: + case OP_CLOSURE_SET: + fprintf(stderr, "%d", (long) ((sexp*)ip)[0]); + ip += sizeof(sexp); + break; + case OP_GLOBAL_REF: + case OP_GLOBAL_SET: + case OP_CALL: + case OP_PUSH: + write_sexp(stderr, ((sexp*)ip)[0]); + ip += sizeof(sexp); + break; + case OP_JUMP: + case OP_JUMP_UNLESS: + fprintf(stderr, "%d", ip[0]); + ip++; + break; + } + fprintf(stderr, "\n"); + if ((! (opcode == OP_RET) || (opcode == OP_DONE)) + && (ip - bc->data < bc->len)) + goto loop; +} + +void print_bytecode (bytecode bc) { + int i; + fprintf(stderr, "bytecode @ %p, data @ %p, length = %d\n", + bc, bc->data, bc->len); + for (i=0; i+16 < bc->len; i+=8) { + fprintf(stderr, "%02x: %02x %02x %02x %02x %02x %02x %02x %02x ", i, + bc->data[i], bc->data[i+1], bc->data[i+2], bc->data[i+3], + bc->data[i+4], bc->data[i+5], bc->data[i+6], bc->data[i+7]); + i += 8; + fprintf(stderr, "%02x %02x %02x %02x %02x %02x %02x %02x\n", + bc->data[i], bc->data[i+1], bc->data[i+2], bc->data[i+3], + bc->data[i+4], bc->data[i+5], bc->data[i+6], bc->data[i+7]); + } + if (i != bc->len) { + fprintf(stderr, "%02x:", i); + for ( ; i < bc->len; i++) { + if ((i % 8) == 0 && (i % 16) != 0) + fprintf(stderr, " "); + fprintf(stderr, " %02x", bc->data[i]); + } + fprintf(stderr, "\n"); + } +} + +void print_stack (sexp *stack, int top) { + int i; + for (i=0; ibindings; SEXP_PAIRP(ls); ls=SEXP_CDR(ls)) { + if (SEXP_CAAR(ls) == key) { + res = SEXP_CAR(ls); + break; + } + } + e = e->parent; + } while (e && ! res); + + return res; +} + +int env_global_p (env e, sexp id) { + while (e->parent) { + if (assq(id, e->bindings) != SEXP_FALSE) + return 0; + else + e = e->parent; + } + return 1; +} + +void env_define(env e, sexp key, sexp value) { + sexp cell = env_cell(e, key); + if (cell) { + SEXP_CDR(cell) = value; + } else { + e->bindings = cons(cons(key, value), e->bindings); + } +} + +env extend_env_closure (env e, sexp fv) { + int i; + env e2 = (env) malloc(sizeof(struct env)); + e2->tag = SEXP_ENV; + e2->parent = e; + e2->bindings = SEXP_NULL; + for (i=0; SEXP_PAIRP(fv); fv = SEXP_CDR(fv), i++) { + e2->bindings = cons(cons(SEXP_CAR(fv), make_integer(i)), e2->bindings); + } + return e2; +} + +env make_standard_env() { + int i; + env e = (env) malloc(sizeof(struct env)); + e->tag = SEXP_ENV; + e->parent = NULL; + e->bindings = SEXP_NULL; + for (i=0; i<(sizeof(core_forms)/sizeof(struct core_form)); i++) { + env_define(e, intern(core_forms[i].name), (sexp)(&core_forms[i])); + } + for (i=0; i<(sizeof(opcodes)/sizeof(struct opcode)); i++) { + env_define(e, intern(opcodes[i].name), (sexp)(&opcodes[i])); + } + return e; +} + +/************************* bytecode utilities ***************************/ + +void shrink_bcode(bytecode *bc, unsigned int i) { + bytecode tmp; + if ((*bc)->len != i) { + fprintf(stderr, "shrinking to %d\n", i); + tmp = (bytecode) malloc(sizeof(struct bytecode) + i); + tmp->tag = SEXP_BYTECODE; + tmp->len = i; + memcpy(tmp->data, (*bc)->data, i); + SEXP_FREE(*bc); + *bc = tmp; + } +} + +void emit(bytecode *bc, unsigned int *i, char c) { + bytecode tmp; + if ((*bc)->len < (*i)+1) { + fprintf(stderr, "expanding (%d < %d)\n", (*bc)->len, (*i)+1); + tmp = (bytecode) malloc(sizeof(unsigned int) + (*bc)->len*2); + tmp->len = (*bc)->len*2; + memcpy(tmp->data, (*bc)->data, (*bc)->len); + SEXP_FREE(*bc); + *bc = tmp; + } + (*bc)->data[(*i)++] = c; +} + +void emit_word(bytecode *bc, unsigned int *i, unsigned long val) { + bytecode tmp; + if ((*bc)->len < (*i)+4) { + tmp = (bytecode) malloc(sizeof(unsigned int) + (*bc)->len*2); + tmp->len = (*bc)->len*2; + memcpy(tmp->data, (*bc)->data, (*bc)->len); + SEXP_FREE(*bc); + *bc = tmp; + } + *((unsigned long*)(&((*bc)->data[*i]))) = val; + *i += sizeof(unsigned long); +} + +sexp make_procedure(sexp bc, sexp vars) { + sexp proc = SEXP_NEW(); + if (! proc) return SEXP_ERROR; + proc->tag = SEXP_PROCEDURE; + proc->data1 = (void*) bc; + proc->data2 = (void*) vars; + return proc; +} + +/************************* the compiler ***************************/ + +void analyze(sexp obj, bytecode *bc, unsigned int *i, env e, + sexp params, sexp fv, sexp sv, unsigned int *d) { + int tmp1, tmp2; + env e2 = e; + sexp o1, o2, cell; + + if (SEXP_PAIRP(obj)) { + /* fprintf(stderr, ":: pair\n"); */ + if (SEXP_SYMBOLP(SEXP_CAR(obj))) { + fprintf(stderr, ":: symbol application\n"); + o1 = env_cell(e, SEXP_CAR(obj)); + /* fprintf(stderr, ":: => %p\n", o1); */ + if (! o1) + errx(1, "unknown operator: %s", SEXP_CAR(obj)); + o1 = SEXP_CDR(o1); + /* fprintf(stderr, ":: => %p\n", o1); */ + if (SEXP_COREP(o1)) { + /* core form */ + fprintf(stderr, ":: core form\n"); + switch (((core_form)o1)->code) { + case CORE_LAMBDA: + fprintf(stderr, ":: lambda\n"); + analyze_lambda(SEXP_FALSE, SEXP_CADR(obj), SEXP_CDDR(obj), + bc, i, e, params, fv, sv, d); + break; + case CORE_DEFINE: + fprintf(stderr, "compiling global set: %p\n", SEXP_CADR(obj)); + if ((((core_form)o1)->code == CORE_DEFINE) + && SEXP_PAIRP(SEXP_CADR(obj))) { + analyze_lambda(SEXP_CAR(SEXP_CADR(obj)), + SEXP_CDR(SEXP_CADR(obj)), + SEXP_CDDR(obj), + bc, i, e, params, fv, sv, d); + } else { + analyze(SEXP_CADDR(obj), bc, i, e, params, fv, sv, d); + } + emit(bc, i, OP_GLOBAL_SET); + emit_word(bc, i, (unsigned long) (SEXP_PAIRP(SEXP_CADR(obj)) + ? SEXP_CAR(SEXP_CADR(obj)) + : SEXP_CADR(obj))); + emit(bc, i, OP_PUSH); + (*d)++; + emit_word(bc, i, (unsigned long) SEXP_UNDEF); + break; + case CORE_SET: + fprintf(stderr, "set!: "); write_sexp(stderr, SEXP_CADR(obj)); + fprintf(stderr, " sv: "); write_sexp(stderr, sv); + fprintf(stderr, "\n"); + analyze(SEXP_CADDR(obj), bc, i, e, params, fv, sv, d); + analyze_var_ref(SEXP_CADR(obj), bc, i, e, params, fv, SEXP_NULL, d); + emit(bc, i, OP_SET_CAR); + break; + case CORE_BEGIN: + for (o2 = SEXP_CDR(obj); SEXP_PAIRP(o2); o2 = SEXP_CDR(o2)) { + analyze(SEXP_CAR(o2), bc, i, e, params, fv, sv, d); + if (SEXP_PAIRP(SEXP_CDR(o2))) emit(bc, i, OP_DROP); + } + break; + case CORE_IF: + fprintf(stderr, "test clause: %d\n", *i); + analyze(SEXP_CADR(obj), bc, i, e, params, fv, sv, d); + emit(bc, i, OP_JUMP_UNLESS); /* jumps if test fails */ + tmp1 = *i; + emit(bc, i, 0); + fprintf(stderr, "pass clause: %d\n", *i); + analyze(SEXP_CADDR(obj), bc, i, e, params, fv, sv, d); + emit(bc, i, OP_JUMP); + tmp2 = *i; + emit(bc, i, 0); + ((signed char*) (*bc)->data)[tmp1] = (*i)-tmp1-1; /* patch */ + fprintf(stderr, "fail clause: %d\n", *i); + if (SEXP_PAIRP(SEXP_CDDDR(obj))) { + analyze(SEXP_CADDDR(obj), bc, i, e, params, fv, sv, d); + } else { + emit(bc, i, OP_PUSH); + (*d)++; + emit_word(bc, i, (unsigned long) SEXP_UNDEF); + } + ((signed char*) (*bc)->data)[tmp2] = (*i)-tmp2-1; /* patch */ + break; + case CORE_QUOTE: + emit(bc, i, OP_PUSH); + (*d)++; + emit_word(bc, i, (unsigned long)SEXP_CADR(obj)); + break; + default: + errx(1, "unknown core form: %s", ((core_form)o1)->code); + } + } else if (SEXP_OPCODEP(o1)) { + fprintf(stderr, ":: opcode\n"); + /* direct opcode */ + /* verify arity */ + switch (((opcode)o1)->op_class) { + case OPC_TYPE_PREDICATE: + case OPC_PREDICATE: + case OPC_ARITHMETIC: + case OPC_ARITHMETIC_INV: + case OPC_ARITHMETIC_CMP: + if (SEXP_NULLP(SEXP_CDR(obj))) { + errx(1, "unknown opcode class: %d", ((opcode)o1)->op_class); + } else if (SEXP_NULLP(SEXP_CDDR(obj))) { + if (((opcode)o1)->op_class == OPC_ARITHMETIC_INV) { + analyze(SEXP_CADR(obj), bc, i, e, params, fv, sv, d); + emit(bc, i, ((opcode)o1)->op_inverse); + } else { + analyze(SEXP_CADR(obj), bc, i, e, params, fv, sv, d); + } + } else { + /* fprintf(stderr, ":: class: %d\n", ((opcode)o1)->op_class); */ + for (o2 = reverse(SEXP_CDR(obj)); SEXP_PAIRP(o2); o2 = SEXP_CDR(o2)) { + /* fprintf(stderr, ":: arg: %d\n", SEXP_CAR(o2)); */ + analyze(SEXP_CAR(o2), bc, i, e, params, fv, sv, d); + } + fprintf(stderr, ":: name: %d\n", ((opcode)o1)->op_name); + emit(bc, i, ((opcode)o1)->op_name); + (*d) -= length(SEXP_CDDR(obj)); + } + break; + default: + errx(1, "unknown opcode class: %d", ((opcode)o1)->op_class); + } + } else { + /* function call */ + analyze_app(obj, bc, i, e, params, fv, sv, d); + } + } else if (SEXP_PAIRP(SEXP_CAR(obj))) { + o2 = env_cell(e, SEXP_CAAR(obj)); +/* if (o2 */ +/* && SEXP_COREP(SEXP_CDR(o2)) */ +/* && (((core_form)SEXP_CDR(o2))->code == CORE_LAMBDA)) { */ +/* /\* let *\/ */ +/* } else { */ + /* computed application */ + analyze_app(obj, bc, i, e, params, fv, sv, d); +/* } */ + } else { + errx(1, "invalid operator: %s", SEXP_CAR(obj)); + } + } else if (SEXP_SYMBOLP(obj)) { + analyze_var_ref(obj, bc, i, e, params, fv, sv, d); + } else { + fprintf(stderr, "push: %d\n", (unsigned long)obj); + emit(bc, i, OP_PUSH); + emit_word(bc, i, (unsigned long)obj); + (*d)++; + } +} + +void analyze_var_ref (sexp obj, bytecode *bc, unsigned int *i, env e, + sexp params, sexp fv, sexp sv, unsigned int *d) { + int tmp; + fprintf(stderr, "symbol lookup, param length: %d sv: ", length(params)); + write_sexp(stderr, sv); + fprintf(stderr, "\n"); + if ((tmp = list_index(params, obj)) >= 0) { + fprintf(stderr, "compiling local ref: %p => %d (d = %d)\n", obj, tmp, *d); + emit(bc, i, OP_STACK_REF); + emit_word(bc, i, tmp + *d + 4); + (*d)++; + } else if ((tmp = list_index(fv, obj)) >= 0) { + fprintf(stderr, "compiling closure ref: %p => %d\n", obj, tmp); + emit(bc, i, OP_CLOSURE_REF); + emit_word(bc, i, tmp); + (*d)++; + } else { + fprintf(stderr, "compiling global ref: %p\n", obj); + emit(bc, i, OP_GLOBAL_REF); + emit_word(bc, i, (unsigned long) obj); + (*d)++; + } + if (list_index(sv, obj) >= 0) { + fprintf(stderr, "mutable variables, fetching CAR\n"); + emit(bc, i, OP_CAR); + } +} + +void analyze_app (sexp obj, bytecode *bc, unsigned int *i, + env e, sexp params, sexp fv, sexp sv, unsigned int *d) { + sexp o1; + unsigned long len = length(SEXP_CDR(obj)); + + /* push the arguments onto the stack */ + for (o1 = reverse(SEXP_CDR(obj)); SEXP_PAIRP(o1); o1 = SEXP_CDR(o1)) { + analyze(SEXP_CAR(o1), bc, i, e, params, fv, sv, d); + } + + /* push the operator onto the stack */ + analyze(SEXP_CAR(obj), bc, i, e, params, fv, sv, d); + + /* make the call */ + emit(bc, i, OP_CALL); + emit_word(bc, i, (unsigned long) make_integer(len)); +} + +sexp free_vars (env e, sexp formals, sexp obj, sexp fv) { + sexp o1; + if (SEXP_SYMBOLP(obj)) { + if (env_global_p(e, obj) + || (list_index(formals, obj) >= 0) + || (list_index(fv, obj) >= 0)) + return fv; + else + return cons(obj, fv); + } else if (SEXP_PAIRP(obj)) { + if (SEXP_SYMBOLP(SEXP_CAR(obj))) { + if ((o1 = env_cell(e, SEXP_CAR(obj))) + && SEXP_COREP(o1) + && (((core_form)SEXP_CDR(o1))->code == CORE_LAMBDA)) { + return free_vars(e, SEXP_CADR(obj), SEXP_CADDR(obj), fv); + } + } + while (SEXP_PAIRP(obj)) { + fv = free_vars(e, formals, SEXP_CAR(obj), fv); + obj = SEXP_CDR(obj); + } + return fv; + } else { + return fv; + } +} + +sexp set_vars (env e, sexp formals, sexp obj, sexp sv) { + sexp tmp; + if (SEXP_NULLP(formals)) + return sv; + if (SEXP_PAIRP(obj)) { + if (SEXP_SYMBOLP(SEXP_CAR(obj))) { + if ((tmp = env_cell(e, SEXP_CAR(obj))) && SEXP_COREP(SEXP_CDR(tmp))) { + if (((core_form)SEXP_CDR(tmp))->code == CORE_LAMBDA) { + formals = lset_diff(formals, SEXP_CADR(obj)); + return set_vars(e, formals, SEXP_CADDR(obj), sv); + } else if (((core_form)SEXP_CDR(tmp))->code == CORE_SET) { + if ((list_index(formals, SEXP_CADR(obj)) >= 0) + && ! (list_index(sv, SEXP_CADR(obj)) >= 0)) { + fprintf(stderr, "found set! "); write_sexp(stderr, SEXP_CADR(obj)); + fprintf(stderr, "\n"); + sv = cons(SEXP_CADR(obj), sv); + return set_vars(e, formals, SEXP_CADDR(obj), sv); + } + } + } + } + while (SEXP_PAIRP(obj)) { + sv = set_vars(e, formals, SEXP_CAR(obj), sv); + obj = SEXP_CDR(obj); + } + } + return sv; +} + +void analyze_lambda (sexp name, sexp formals, sexp body, + bytecode *bc, unsigned int *i, env e, + sexp params, sexp fv, sexp sv, unsigned int *d) { + sexp obj; + sexp fv2 = free_vars(e, formals, body, SEXP_NULL), ls; + env e2 = extend_env_closure(e, formals); + int k; + fprintf(stderr, "%d free-vars\n", length(fv2)); + write_sexp(stderr, fv2); + fprintf(stderr, "\n"); + obj = (sexp) compile(formals, body, e2, fv2, sv, 0); + emit(bc, i, OP_PUSH); + emit_word(bc, i, (unsigned long) SEXP_UNDEF); + emit(bc, i, OP_PUSH); + emit_word(bc, i, (unsigned long) make_integer(length(fv2))); + emit(bc, i, OP_MAKE_VECTOR); + (*d)++; + for (ls=fv2, k=0; SEXP_PAIRP(ls); ls=SEXP_CDR(ls), k++) { + analyze_var_ref(SEXP_CAR(ls), bc, i, e, params, fv, SEXP_NULL, d); + emit(bc, i, OP_PUSH); + emit_word(bc, i, (unsigned long) make_integer(k)); + emit(bc, i, OP_STACK_REF); + emit_word(bc, i, 3); + emit(bc, i, OP_VECTOR_SET); + emit(bc, i, OP_DROP); + (*d)--; + } + emit(bc, i, OP_PUSH); + emit_word(bc, i, (unsigned long) obj); + emit(bc, i, OP_MAKE_PROCEDURE); +} + +bytecode compile(sexp params, sexp obj, env e, sexp fv, sexp sv, int done_p) { + unsigned int i = 0, j, d = 0; + bytecode bc = (bytecode) malloc(sizeof(struct bytecode)+INIT_BCODE_SIZE); + sexp sv2 = set_vars(e, params, obj, SEXP_NULL), ls; + fprintf(stderr, "set-vars: "); write_sexp(stderr, sv2); fprintf(stderr, "\n"); + bc->tag = SEXP_BYTECODE; + bc->len = INIT_BCODE_SIZE; + fprintf(stderr, "analyzing\n"); + for (ls=params; SEXP_PAIRP(ls); ls=SEXP_CDR(ls)) { + if ((j = list_index(sv2, SEXP_CAR(ls)) >= 0)) { + fprintf(stderr, "consing mutable var\n"); + emit(&bc, &i, OP_PUSH); + emit_word(&bc, &i, (unsigned long) SEXP_NULL); + emit(&bc, &i, OP_STACK_REF); + emit_word(&bc, &i, j+3); + emit(&bc, &i, OP_CONS); + emit(&bc, &i, OP_STACK_SET); + emit_word(&bc, &i, j+4); + emit(&bc, &i, OP_DROP); + } + } + sv = append(sv2, sv); + for ( ; SEXP_PAIRP(obj); obj=SEXP_CDR(obj)) { + fprintf(stderr, "loop: "); write_sexp(stderr, obj); fprintf(stderr, "\n"); + analyze(SEXP_CAR(obj), &bc, &i, e, params, fv, sv, &d); + if (SEXP_PAIRP(SEXP_CDR(obj))) emit(&bc, &i, OP_DROP); + } + emit(&bc, &i, done_p ? OP_DONE : OP_RET); + /* fprintf(stderr, "shrinking\n"); */ + shrink_bcode(&bc, i); + fprintf(stderr, "done compiling:\n"); + print_bytecode(bc); + disasm(bc); + return bc; +} + +/*********************** the virtual machine **************************/ + +sexp vm(bytecode bc, env e, sexp* stack, unsigned int top) { + unsigned char *ip=bc->data; + sexp cp, tmp; + int i; + + loop: + /* fprintf(stderr, "opcode: %d, ip: %d\n", *ip, ip); */ + /* print_bytecode(bc); */ + switch (*ip++) { + case OP_NOOP: + fprintf(stderr, "noop\n"); + break; + case OP_GLOBAL_REF: + fprintf(stderr, "global ref: ip: %p => %p: ", ip, ((sexp*)ip)[0]); + fflush(stderr); + write_sexp(stderr, ((sexp*)ip)[0]); + fprintf(stderr, "\n"); + tmp = env_cell(e, ((sexp*)ip)[0]); + stack[top++]=SEXP_CDR(tmp); + ip += sizeof(sexp); + break; + case OP_GLOBAL_SET: + fprintf(stderr, "global set: %p: ", ((sexp*)ip)[0]); + fflush(stderr); + write_sexp(stderr, ((sexp*)ip)[0]); + fprintf(stderr, "\n"); + env_define(e, ((sexp*)ip)[0], stack[--top]); + ip += sizeof(sexp); + break; + case OP_STACK_REF: + fprintf(stderr, "stack ref: ip=%p, %d - %d => ", + ip, top, (unsigned long) ((sexp*)ip)[0]); + fflush(stderr); + write_sexp(stderr, stack[top - (unsigned int) ((sexp*)ip)[0]]); + fprintf(stderr, "\n"); + stack[top] = stack[top - (unsigned int) ((sexp*)ip)[0]]; + ip += sizeof(sexp); + top++; + break; + case OP_STACK_SET: + stack[top - (unsigned int) ((sexp*)ip)[0]] = stack[top-1]; + stack[top-1] = SEXP_UNDEF; + ip += sizeof(sexp); + break; + case OP_CLOSURE_REF: + fprintf(stderr, "closure-ref %d => ", ((sexp*)ip)[0]); + fflush(stderr); + write_sexp(stderr, vector_ref(cp,((sexp*)ip)[0])); + fprintf(stderr, "\n"); + stack[top++]=vector_ref(cp,((sexp*)ip)[0]); + ip += sizeof(sexp); + break; + case OP_VECTOR_REF: + stack[top-2]=vector_ref(stack[top-1], stack[top-2]); + top--; + break; + case OP_VECTOR_SET: + fprintf(stderr, "vector-set! %p %d => ", stack[top-1], unbox_integer(stack[top-2])); + write_sexp(stderr, stack[top-3]); + fprintf(stderr, "\n"); + vector_set(stack[top-1], stack[top-2], stack[top-3]); + stack[top-3]=SEXP_UNDEF; + top-=2; + break; + case OP_MAKE_PROCEDURE: + stack[top-2]=make_procedure(stack[top-1], stack[top-2]); + top--; + break; + case OP_MAKE_VECTOR: + stack[top-2]=make_vector(unbox_integer(stack[top-1]), stack[top-2]); + top--; + break; + case OP_PUSH: + /* fprintf(stderr, " (push)\n"); */ + stack[top++]=((sexp*)ip)[0]; + ip += sizeof(sexp); + break; + case OP_DUP: + stack[top]=stack[top-1]; + top++; + break; + case OP_DROP: + top--; + break; + case OP_SWAP: + tmp = stack[top-2]; + stack[top-2]=stack[top-1]; + stack[top-1]=tmp; + break; + case OP_CAR: + stack[top-1]=car(stack[top-1]); + break; + case OP_CDR: + stack[top-1]=cdr(stack[top-1]); + break; + case OP_SET_CAR: + set_car(stack[top-1], stack[top-2]); + stack[top-2]=SEXP_UNDEF; + top--; + break; + case OP_SET_CDR: + set_cdr(stack[top-1], stack[top-2]); + stack[top-2]=SEXP_UNDEF; + top--; + break; + case OP_CONS: + stack[top-2]=cons(stack[top-1], stack[top-2]); + top--; + break; + case OP_ADD: + fprintf(stderr, "OP_ADD %d %d\n", stack[top-1], stack[top-2]); + stack[top-2]=sexp_add(stack[top-1],stack[top-2]); + top--; + break; + case OP_SUB: + stack[top-2]=sexp_sub(stack[top-1],stack[top-2]); + top--; + break; + case OP_MUL: + stack[top-2]=sexp_mul(stack[top-2],stack[top-1]); + top--; + break; + case OP_DIV: + stack[top-2]=sexp_div(stack[top-2],stack[top-1]); + top--; + break; + case OP_MOD: + stack[top-2]=sexp_mod(stack[top-2],stack[top-1]); + top--; + break; + case OP_LT: + stack[top-2]=((stack[top-2] < stack[top-1]) ? SEXP_TRUE : SEXP_FALSE); + top--; + break; + case OP_CALL: + fprintf(stderr, "CALL\n"); + i = (unsigned long) ((sexp*)ip)[0]; + tmp = stack[top-1]; + if (! SEXP_PROCEDUREP(tmp)) + errx(2, "non-procedure application: %p", tmp); + stack[top-1] = (sexp) i; + stack[top] = (sexp) (ip+4); + stack[top+1] = cp; + top+=2; + bc = procedure_code(tmp); + print_bytecode(bc); + ip = bc->data; + cp = procedure_vars(tmp); + fprintf(stderr, "... calling procedure at %p\ncp: ", ip); + write_sexp(stderr, cp); + fprintf(stderr, "\n"); + /* print_stack(stack, top); */ + break; + case OP_JUMP_UNLESS: + fprintf(stderr, "JUMP UNLESS, stack top is %d\n", stack[top-1]); + if (stack[--top] == SEXP_FALSE) { + fprintf(stderr, "test passed, jumping to + %d => %d\n", ((signed char*)ip)[0], ip + ((signed char*)ip)[0]); + ip += ((signed char*)ip)[0]; + } else { + fprintf(stderr, "test failed, not jumping\n"); + ip++; + } + break; + case OP_JUMP: + fprintf(stderr, "jumping to + %d => %d\n", ((signed char*)ip)[0], ip + ((signed char*)ip)[0]); + ip += ((signed char*)ip)[0]; + break; + case OP_RET: + fprintf(stderr, "returning @ %d: ", top-1); + fflush(stderr); + write_sexp(stderr, stack[top-1]); + fprintf(stderr, "...\n"); + print_stack(stack, top); + /* top-1 */ + /* stack: args ... n ip result */ + cp = stack[top-2]; + fprintf(stderr, "1\n"); + ip = (unsigned char*) stack[top-3]; + fprintf(stderr, "2\n"); + i = unbox_integer(stack[top-4]); + fprintf(stderr, "3 (i=%d)\n", i); + stack[top-i-4] = stack[top-1]; + fprintf(stderr, "4\n"); + top = top-i-3; + fprintf(stderr, "... done returning\n"); + break; + case OP_DONE: + fprintf(stderr, "finally returning @ %d: ", top-1); + fflush(stderr); + write_sexp(stderr, stack[top-1]); + fprintf(stderr, "\n"); + goto end_loop; + default: + fprintf(stderr, "unknown opcode: %d\n", *(ip-1)); + stack[top] = SEXP_ERROR; + goto end_loop; + } + fprintf(stderr, "looping\n"); + goto loop; + + end_loop: + return stack[top-1]; +} + +/************************** eval interface ****************************/ + +sexp eval_in_stack(sexp obj, env e, sexp* stack, unsigned int top) { + bytecode bc = compile(SEXP_NULL, cons(obj, SEXP_NULL), e, SEXP_NULL, SEXP_NULL, 1); + fprintf(stderr, "evaling\n"); + return vm(bc, e, stack, top); +} + +sexp eval(sexp obj, env e) { + sexp* stack = (sexp*) malloc(sizeof(sexp) * INIT_STACK_SIZE); + sexp res = eval_in_stack(obj, e, stack, 0); + free(stack); + return res; +} + +int main (int argc, char **argv) { + sexp obj, res, *stack; + env e; + + sexp_init(); + e = make_standard_env(); + stack = (sexp*) malloc(sizeof(sexp) * INIT_STACK_SIZE); + + /* repl */ + fprintf(stdout, "> "); + fflush(stdout); + while ((obj = read_sexp(stdin)) != SEXP_EOF) { + write_sexp(stdout, obj); + fprintf(stdout, "\n => "); + res = eval_in_stack(obj, e, stack, 0); + write_sexp(stdout, res); + fprintf(stdout, "\n> "); + fflush(stdout); + } + return 0; +} + diff --git a/eval.h b/eval.h new file mode 100644 index 00000000..f8c806e6 --- /dev/null +++ b/eval.h @@ -0,0 +1,119 @@ +/* eval.h -- headers for eval library */ +/* Copyright (c) 2009 Alex Shinn. All rights reserved. */ +/* BSD-style license: http://synthcode.com/license.txt */ + +#ifndef SCM_EVAL_H +#define SCM_EVAL_H + +#include "sexp.h" + +/************************* additional types ***************************/ + +#define INIT_BCODE_SIZE 128 +#define INIT_STACK_SIZE 1024 + +typedef struct bytecode { + char tag; + unsigned int len; + unsigned char data[]; +} *bytecode; + +/* env binding: #(id chain offset flags) */ +/* chain is the index into the closure parent list (0 for current lambda) */ +/* macros/constants have a value instead of chain */ +typedef struct env { + char tag; + struct env *parent; + sexp bindings; +} *env; + +typedef struct opcode { + char tag; + char op_class; + char op_name; + char num_args; + char var_args_p; + char arg1_type; + char arg2_type; + char* name; + char op_inverse; + sexp proc; +} *opcode; + +typedef struct core_form { + char tag; + char* name; + char code; +} *core_form; + +enum core_form_names { + CORE_DEFINE, + CORE_SET, + CORE_LAMBDA, + CORE_IF, + CORE_BEGIN, + CORE_QUOTE, + CORE_DEFINE_SYNTAX, + CORE_LET_SYNTAX, + CORE_LETREC_SYNTAX, +}; + +enum opcode_classes { + OPC_GENERIC, + OPC_TYPE_PREDICATE, + OPC_PREDICATE, + OPC_ARITHMETIC, + OPC_ARITHMETIC_INV, + OPC_ARITHMETIC_CMP, + OPC_CONSTRUCTOR, +}; + +enum opcode_names { + OP_NOOP, /* 0 */ + OP_STACK_REF, /* 1 */ + OP_STACK_SET, /* 2 */ + OP_GLOBAL_REF, /* 3 */ + OP_GLOBAL_SET, /* 4 */ + OP_CLOSURE_REF, /* 5 */ + OP_CLOSURE_SET, /* 6 */ + OP_VECTOR_REF, /* 7 */ + OP_VECTOR_SET, /* 8 */ + OP_MAKE_PROCEDURE, + OP_MAKE_VECTOR, + OP_PUSH, + OP_DUP, /* C */ + OP_DROP, + OP_SWAP, + OP_CAR, + OP_CDR, /* 10 */ + OP_SET_CAR, /* 11 */ + OP_SET_CDR, /* 12 */ + OP_CONS, + OP_ADD, /* 14 */ + OP_SUB, + OP_MUL, /* 16 */ + OP_DIV, + OP_MOD, /* 18 */ + OP_NEG, + OP_INV, /* 1A */ + OP_LT, + OP_CALL, /* 1C */ + OP_JUMP_UNLESS, + OP_JUMP, /* 1E */ + OP_RET, + OP_DONE, +}; + +/**************************** prototypes ******************************/ + +bytecode compile(sexp params, sexp obj, env e, sexp fv, sexp sv, int done_p); +void analyze_app (sexp obj, bytecode *bc, unsigned int *i, + env e, sexp params, sexp fv, sexp sv, unsigned int *d); +void analyze_lambda (sexp name, sexp formals, sexp body, + bytecode *bc, unsigned int *i, env e, + sexp params, sexp fv, sexp sv, unsigned int *d); +void analyze_var_ref (sexp name, bytecode *bc, unsigned int *i, env e, + sexp params, sexp fv, sexp sv, unsigned int *d); + +#endif /* ! SCM_EVAL_H */ + diff --git a/sexp.c b/sexp.c index 6a480dcb..1d09e158 100644 --- a/sexp.c +++ b/sexp.c @@ -1,58 +1,24 @@ +/* sexp.c -- sexp library implementation */ +/* Copyright (c) 2009 Alex Shinn. All rights reserved. */ +/* BSD-style license: http://synthcode.com/license.txt */ -#include -#include -#include -#include +#include "sexp.h" -/* simple tagging - * ends in 00: pointer - * 01: fixnum - * 011: symbol - * 111: immediate symbol - * 0110: char - * 1110: other immediate object (NULL, TRUE, FALSE) - */ - -#define SEXP_FIXNUM_BITS 2 -#define SEXP_IMMEDIATE_BITS 3 -#define SEXP_EXTENDED_BITS 4 - -#define SEXP_FIXNUM_MASK 3 -#define SEXP_IMMEDIATE_MASK 7 -#define SEXP_EXTENDED_MASK 15 - -#define SEXP_POINTER_TAG 0 -#define SEXP_FIXNUM_TAG 1 -#define SEXP_LSYMBOL_TAG 3 -#define SEXP_ISYMBOL_TAG 7 -#define SEXP_CHAR_TAG 6 - -enum sexp_types { - SEXP_FIXNUM, - SEXP_CHAR, - SEXP_BOOLEAN, - SEXP_PAIR, - SEXP_SYMBOL, - SEXP_STRING, - SEXP_VECTOR, - SEXP_PROCEDURE, - SEXP_ENV, - SEXP_BYTECODE, - SEXP_CORE, - SEXP_OPCODE, +/* optional huffman-compressed immediate symbols */ +#ifdef USE_HUFF_SYMS +struct huff_entry { + unsigned char len; + unsigned short bits; }; - -typedef struct sexp_struct { - char tag; - void *data1; - void *data2; -} *sexp; - #include "sexp-hufftabs.c" +static struct huff_entry huff_table[] = { +#include "sexp-huff.c" +}; +#endif static int initialized_p = 0; -/* static sexp the_dot_symbol; */ +static sexp the_dot_symbol; static sexp the_quote_symbol; static sexp the_quasiquote_symbol; static sexp the_unquote_symbol; @@ -63,116 +29,6 @@ static sexp the_define_symbol; static sexp the_set_x_symbol; static sexp the_if_symbol; -#define MAKE_IMMEDIATE(n) ((sexp) ((n<<4) + 14)) -#define SEXP_NULL MAKE_IMMEDIATE(0) -#define SEXP_FALSE MAKE_IMMEDIATE(1) -#define SEXP_TRUE MAKE_IMMEDIATE(2) -#define SEXP_EOF MAKE_IMMEDIATE(3) -#define SEXP_UNDEF MAKE_IMMEDIATE(4) -#define SEXP_ERROR MAKE_IMMEDIATE(5) -#define SEXP_CLOSE MAKE_IMMEDIATE(6) /* internal use */ -#define SEXP_RAWDOT MAKE_IMMEDIATE(7) /* internal use */ - -#define SEXP_NULLP(x) ((x) == SEXP_NULL) -#define SEXP_POINTERP(x) (((unsigned long)(x) & SEXP_FIXNUM_MASK) == SEXP_POINTER_TAG) -#define SEXP_INTEGERP(x) (((unsigned long)(x) & SEXP_FIXNUM_MASK) == SEXP_FIXNUM_TAG) -#define SEXP_ISYMBOLP(x) (((unsigned long)(x) & SEXP_IMMEDIATE_MASK) == SEXP_ISYMBOL_TAG) -#define SEXP_CHARP(x) (((unsigned long)(x) & SEXP_EXTENDED_MASK) == SEXP_CHAR_TAG) -#define SEXP_BOOLEANP(x) (((x) == SEXP_TRUE) || ((x) == SEXP_FALSE)) - -#define SEXP_PAIRP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_PAIR) -#define SEXP_STRINGP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_STRING) -#define SEXP_LSYMBOLP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_SYMBOL) -#define SEXP_VECTORP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_VECTOR) -#define SEXP_PROCEDUREP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_PROCEDURE) -#define SEXP_ENVP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_ENV) -#define SEXP_BYTECODEP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag ==SEXP_BYTECODE) -#define SEXP_COREP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_CORE) -#define SEXP_OPCODEP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_OPCODE) - -#define SEXP_SYMBOLP(x) (SEXP_ISYMBOLP(x) || SEXP_LSYMBOLP(x)) - -/* #define SEXP_DOTP(x) (SEXP_SYMBOLP(x) && (strncmp(string_data(x), ".", 2) == 0)) */ -/* #define SEXP_DOTP(x) (x==the_dot_symbol) */ -#define SEXP_DOTP(x) (((unsigned long)(x))==((0x5D00<>SEXP_FIXNUM_BITS) -#define make_character(n) ((sexp) (((long) n<>SEXP_EXTENDED_BITS) - -#define vector_length(x) ((unsigned long) x->data1) -#define vector_data(x) ((sexp*) x->data2) - -#define vector_ref(x, i) (vector_data(x)[unbox_integer(i)]) -#define vector_set(x, i, v) (vector_data(x)[unbox_integer(i)] = (v)) - -#define procedure_code(x) ((bytecode) ((sexp)x)->data1) -#define procedure_vars(x) ((sexp) ((sexp)x)->data2) - -#define string_length(x) ((unsigned long) x->data1) -#define string_data(x) ((char*) x->data2) - -#define symbol_pointer(x) ((sexp) (((unsigned long)x)-SEXP_LSYMBOL_TAG)) -#define symbol_length(x) ((unsigned long) (symbol_pointer(x)->data1)) -#define symbol_data(x) ((char*) (symbol_pointer(x)->data2)) - -#define sexp_add(a, b) ((sexp)(((unsigned long)a)+((unsigned long)b)-SEXP_FIXNUM_TAG)) -#define sexp_sub(a, b) ((sexp)(((unsigned long)a)-((unsigned long)b)+SEXP_FIXNUM_TAG)) -#define sexp_mul(a, b) ((sexp)((((((unsigned long)a)-SEXP_FIXNUM_TAG)*(((unsigned long)b)>>SEXP_FIXNUM_BITS))+SEXP_FIXNUM_TAG))) -#define sexp_div(a, b) ((sexp)(((((unsigned long)a)>>SEXP_FIXNUM_BITS)/(((unsigned long)b)>>SEXP_FIXNUM_BITS))<>SEXP_FIXNUM_BITS)%(((unsigned long)b)>>SEXP_FIXNUM_BITS))<tag = SEXP_PAIR; - pair->data1 = (void*) head; - pair->data2 = (void*) tail; - return pair; -} - -#define list2(a, b) cons(a, cons(b, SEXP_NULL)) -#define list3(a, b, c) cons(a, cons(b, cons(c, SEXP_NULL))) -#define list4(a, b, c, d) cons(a, cons(b, cons(c, cons(d, SEXP_NULL)))) - -#define SEXP_CAR(x) (((sexp)x)->data1) -#define SEXP_CDR(x) (((sexp)x)->data2) - -#define SEXP_CAAR(x) (SEXP_CAR(SEXP_CAR(x))) -#define SEXP_CADR(x) (SEXP_CAR(SEXP_CDR(x))) -#define SEXP_CDAR(x) (SEXP_CDR(SEXP_CAR(x))) -#define SEXP_CDDR(x) (SEXP_CDR(SEXP_CDR(x))) - -#define SEXP_CADDR(x) (SEXP_CAR(SEXP_CDDR(x))) -#define SEXP_CDDDR(x) (SEXP_CDR(SEXP_CDDR(x))) -#define SEXP_CADDDR(x) (SEXP_CADR(SEXP_CDDR(x))) -#define SEXP_CDDDDR(x) (SEXP_CDDR(SEXP_CDDR(x))) - -sexp read_sexp (FILE *in); - -/* separators: space, tab, newline, ; () [] , ' " */ -/* 9 10 11 12 13 32 34 39 40 41 44 59 91 93 */ -/* 0 1 2 3 4 23 25 30 31 32 35 50 82 84 */ -/* 0000000 */ -/* 0000001 */ -/* 0000010 */ -/* 0000011 */ -/* 0000100 */ -/* 0010111 */ -/* 0011001 */ -/* 0011110 */ -/* 0011111 */ -/* 0100000 */ -/* 0100011 */ -/* 0110010 */ -/* 1010010 */ -/* 1010100 */ - static char separators[] = { /* 1 2 3 4 5 6 7 8 9 a b c d e f */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, /* x0_ */ @@ -185,11 +41,56 @@ static char separators[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* x7_ */ }; -static int is_separator (int c) { +static int is_separator(int c) { /* return (!((c-9)&(~3))) | (~(c^4)); */ return 0tag) { + case SEXP_PAIR: + free_sexp(car(obj)); + free_sexp(cdr(obj)); + break; + case SEXP_VECTOR: + len = vector_length(obj); + elts = vector_data(obj); + for (i=0; itag = SEXP_PAIR; + pair->data1 = (void*) head; + pair->data2 = (void*) tail; + return pair; +} + sexp car(sexp obj) { return (SEXP_PAIRP(obj)) ? SEXP_CAR(obj) : SEXP_ERROR; } @@ -199,19 +100,17 @@ sexp cdr(sexp obj) { } sexp set_car(sexp obj, sexp val) { - if (SEXP_PAIRP(obj)) { + if (SEXP_PAIRP(obj)) return SEXP_CAR(obj) = val; - } else { + else return SEXP_ERROR; - } } sexp set_cdr(sexp obj, sexp val) { - if (SEXP_PAIRP(obj)) { + if (SEXP_PAIRP(obj)) return SEXP_CDR(obj) = val; - } else { + else return SEXP_ERROR; - } } int listp (sexp obj) { @@ -231,6 +130,24 @@ int list_index (sexp ls, sexp elt) { return -1; } +sexp memq (sexp x, sexp ls) { + while (SEXP_PAIRP(ls)) + if (x == SEXP_CAR(ls)) + return ls; + else + ls = SEXP_CDR(ls); + return SEXP_FALSE; +} + +sexp assq (sexp x, sexp ls) { + while (SEXP_PAIRP(ls)) + if (x == SEXP_CAAR(ls)) + return ls; + else + ls = SEXP_CDR(ls); + return SEXP_FALSE; +} + sexp lset_diff(sexp a, sexp b) { sexp res = SEXP_NULL; for ( ; SEXP_PAIRP(a); a=SEXP_CDR(a)) @@ -247,23 +164,18 @@ sexp reverse(sexp ls) { } sexp nreverse(sexp ls) { - sexp a; - sexp b; - sexp tmp; - + sexp a, b, tmp; if (ls == SEXP_NULL) { return ls; } else if (! SEXP_PAIRP(ls)) { return SEXP_ERROR; } else { - b = ls; + b=ls; a=cdr(ls); set_cdr(b, SEXP_NULL); - for ( ; SEXP_PAIRP(a); ) { - tmp = cdr(a); + for ( ; SEXP_PAIRP(a); b=a, a=tmp) { + tmp=cdr(a); set_cdr(a, b); - b = a; - a = tmp; } return b; } @@ -279,35 +191,13 @@ sexp list(int count, ...) { sexp res = SEXP_NULL; int i; va_list ap; - va_start(ap, count); - for (i=0; i d*4) { - fprintf(stderr, "resizing symbol table\n"); - newtable = malloc(symbol_table_primes[symbol_table_prime_index++] - * sizeof(sexp)); - free(symbol_table); + newtable = SEXP_ALLOC(symbol_table_primes[symbol_table_prime_index++] + * sizeof(sexp)); + SEXP_FREE(symbol_table); symbol_table = newtable; } @@ -435,9 +308,8 @@ sexp list_to_vector(sexp ls) { sexp x; sexp *elts = vector_data(vec); int i; - for (i=0, x=ls; SEXP_PAIRP(x); i++, x=cdr(x)) { + for (i=0, x=ls; SEXP_PAIRP(x); i++, x=cdr(x)) elts[i] = car(x); - } return vec; } @@ -449,23 +321,21 @@ sexp vector(int count, ...) { int i; va_start(ap, count); - for (i=0; i"); - } else if (SEXP_POINTERP(obj)) { - switch (obj->tag) { case SEXP_PAIR: fprintf(out, "("); @@ -482,7 +352,7 @@ void write_sexp (FILE *out, sexp obj) { break; case SEXP_VECTOR: len = vector_length(obj); - sexp *elts = vector_data(obj); + elts = vector_data(obj); if (len == 0) { fprintf(out, "#()"); } else { @@ -509,41 +379,32 @@ void write_sexp (FILE *out, sexp obj) { /* FALLTHROUGH */ case SEXP_SYMBOL: fprintf(out, "%s", string_data(obj)); - if (obj->tag == SEXP_STRING) { + if (obj->tag == SEXP_STRING) fprintf(out, "\""); - } break; } - } else if (SEXP_INTEGERP(obj)) { - fprintf(out, "%d", unbox_integer(obj)); - } else if (SEXP_CHARP(obj)) { - if (33 <= unbox_character(obj) < 127) { fprintf(out, "#\\%c", unbox_character(obj)); } else { fprintf(out, "#\\x%02d", unbox_character(obj)); } - } else if (SEXP_SYMBOLP(obj)) { +#ifdef USE_HUFF_SYMS if (((unsigned long)obj&7)==7) { - c = ((unsigned long)obj)>>3; - while (c) { #include "sexp-unhuff.c" putc(res, out); } + } else +#endif - } else { fprintf(out, "%s", symbol_data(obj)); - } - } else { - switch ((unsigned long) obj) { case (int) SEXP_NULL: fprintf(out, "()"); @@ -566,34 +427,6 @@ void write_sexp (FILE *out, sexp obj) { } } -void* free_sexp (sexp obj) { - int len, i; - sexp *elts; - - if (SEXP_POINTERP(obj)) { - switch (obj->tag) { - case SEXP_PAIR: - free_sexp(car(obj)); - free_sexp(cdr(obj)); - break; - case SEXP_VECTOR: - len = vector_length(obj); - elts = vector_data(obj); - for (i=0; i -#else -#define errx(code, msg, ...) (fprintf(stderr,msg"\n",__VA_ARGS__), exit(code)) -#endif - -#define INIT_BCODE_SIZE 128 -#define INIT_STACK_SIZE 1024 - -typedef struct bytecode { - char tag; - unsigned int len; - unsigned char data[]; -} *bytecode; - -/* env binding: #(id chain offset flags) */ -/* chain is the index into the closure parent list (0 for current lambda) */ -/* macros/constants have a value instead of chain */ -typedef struct env { - char tag; - struct env *parent; - sexp bindings; -} *env; - -enum core_form_names { - CORE_DEFINE, - CORE_SET, - CORE_LAMBDA, - CORE_IF, - CORE_BEGIN, - CORE_QUOTE, - CORE_DEFINE_SYNTAX, - CORE_LET_SYNTAX, - CORE_LETREC_SYNTAX, -}; - -typedef struct core_form { - char tag; - char* name; - char code; -} *core_form; - -static struct core_form core_forms[] = { - {SEXP_CORE, "define", CORE_DEFINE}, - {SEXP_CORE, "set!", CORE_SET}, - {SEXP_CORE, "lambda", CORE_LAMBDA}, - {SEXP_CORE, "if", CORE_IF}, - {SEXP_CORE, "begin", CORE_BEGIN}, - {SEXP_CORE, "quote", CORE_QUOTE}, - {SEXP_CORE, "define-syntax", CORE_DEFINE_SYNTAX}, - {SEXP_CORE, "let-syntax", CORE_LET_SYNTAX}, - {SEXP_CORE, "letrec-syntax", CORE_LETREC_SYNTAX}, -}; - -enum opcode_classes { - OPC_GENERIC, - OPC_TYPE_PREDICATE, - OPC_PREDICATE, - OPC_ARITHMETIC, - OPC_ARITHMETIC_INV, - OPC_ARITHMETIC_CMP, - OPC_CONSTRUCTOR, -}; - -/* #define OP_UNSAFE(op) ((op)+128) */ - -enum opcode_names { - OP_NOOP, /* 0 */ - OP_STACK_REF, /* 1 */ - OP_STACK_SET, /* 2 */ - OP_GLOBAL_REF, /* 3 */ - OP_GLOBAL_SET, /* 4 */ - OP_CLOSURE_REF, /* 5 */ - OP_CLOSURE_SET, /* 6 */ - OP_VECTOR_REF, /* 7 */ - OP_VECTOR_SET, /* 8 */ - OP_MAKE_PROCEDURE, - OP_MAKE_VECTOR, - OP_PUSH, - OP_DUP, /* C */ - OP_DROP, - OP_SWAP, - OP_CAR, - OP_CDR, /* 10 */ - OP_SET_CAR, /* 11 */ - OP_SET_CDR, /* 12 */ - OP_CONS, - OP_ADD, /* 14 */ - OP_SUB, - OP_MUL, /* 16 */ - OP_DIV, - OP_MOD, /* 18 */ - OP_NEG, - OP_INV, /* 1A */ - OP_LT, - OP_CALL, /* 1C */ - OP_JUMP_UNLESS, - OP_JUMP, /* 1E */ - OP_RET, - OP_DONE, -}; - -static const char* reverse_opcode_names[] = - {"NOOP", "STACK_REF", "STACK_SET", "GLOBAL_REF", "GLOBAL_SET", "CLOSURE_REF", - "CLOSURE_SET", "VECTOR_REF", "VECTOR_SET", "MAKE_PROCEDURE", "MAKE_VECTOR", - "PUSH", "DUP", "DROP", "SWAP", "CAR", "CDR", "SET_CAR", "SET_CDR", "CONS", - "ADD", "SUB", "MUL", "DIV", "MOD", "NEG", "INV", "LT", "CALL", - "JUMP_UNLESS", "JUMP", "RET", "DONE" - }; - -typedef struct opcode { - char tag; - char op_class; - char op_name; - char num_args; - char var_args_p; - char arg1_type; - char arg2_type; - char* name; - char op_inverse; - sexp proc; -} *opcode; - -static struct opcode opcodes[] = { -{SEXP_OPCODE, OPC_TYPE_PREDICATE, OP_CAR, 1, 0, SEXP_PAIR, 0, "car", 0, NULL}, -{SEXP_OPCODE, OPC_TYPE_PREDICATE, OP_CDR, 1, 0, SEXP_PAIR, 0, "cdr", 0, NULL}, -{SEXP_OPCODE, OPC_ARITHMETIC, OP_ADD, 0, 1, SEXP_FIXNUM, 0, "+", 0, NULL}, -{SEXP_OPCODE, OPC_ARITHMETIC_INV, OP_SUB, 0, 1, SEXP_FIXNUM, 0, "-", OP_NEG, NULL}, -{SEXP_OPCODE, OPC_ARITHMETIC, OP_MUL, 0, 1, SEXP_FIXNUM, 0, "*", 0, NULL}, -{SEXP_OPCODE, OPC_ARITHMETIC_INV, OP_DIV, 0, 1, SEXP_FIXNUM, 0, "/", OP_INV, 0}, -{SEXP_OPCODE, OPC_ARITHMETIC, OP_MOD, 2, 0, SEXP_FIXNUM, SEXP_FIXNUM, "%", 0, NULL}, -{SEXP_OPCODE, OPC_ARITHMETIC_CMP, OP_LT, 0, 1, SEXP_FIXNUM, 0, "<", 0, NULL}, -{SEXP_OPCODE, OPC_CONSTRUCTOR, OP_CONS, 2, 0, 0, 0, "cons", 0, NULL}, -{SEXP_OPCODE, OPC_CONSTRUCTOR, OP_MAKE_VECTOR, 2, 0, SEXP_FIXNUM, 0, "make-vector", 0, NULL}, -{SEXP_OPCODE, OPC_CONSTRUCTOR, OP_MAKE_PROCEDURE, 2, 0, 0, 0, "make-procedure", 0, NULL}, -}; - -void disasm (bytecode bc) { - unsigned char *ip=bc->data, opcode; - loop: - opcode = *ip++; - if (opcode*sizeof(char*) < sizeof(reverse_opcode_names)) { - fprintf(stderr, " %s ", reverse_opcode_names[opcode]); - } else { - fprintf(stderr, " %d ", opcode); - } - switch (opcode) { - case OP_STACK_REF: - case OP_STACK_SET: - case OP_CLOSURE_REF: - case OP_CLOSURE_SET: - fprintf(stderr, "%d", (long) ((sexp*)ip)[0]); - ip += sizeof(sexp); - break; - case OP_GLOBAL_REF: - case OP_GLOBAL_SET: - case OP_CALL: - case OP_PUSH: - write_sexp(stderr, ((sexp*)ip)[0]); - ip += sizeof(sexp); - break; - case OP_JUMP: - case OP_JUMP_UNLESS: - fprintf(stderr, "%d", ip[0]); - ip++; - break; - } - fprintf(stderr, "\n"); - if ((! (opcode == OP_RET) || (opcode == OP_DONE)) - && (ip - bc->data < bc->len)) - goto loop; -} - -sexp env_cell(env e, sexp key) { - sexp ls, res=NULL; - - do { - for (ls=e->bindings; SEXP_PAIRP(ls); ls=SEXP_CDR(ls)) { - if (SEXP_CAAR(ls) == key) { - res = SEXP_CAR(ls); - break; - } - } - e = e->parent; - } while (e && ! res); - - return res; -} - -sexp make_procedure(sexp bc, sexp vars) { - sexp proc = SEXP_NEW(); - if (! proc) return SEXP_ERROR; - proc->tag = SEXP_PROCEDURE; - proc->data1 = (void*) bc; - proc->data2 = (void*) vars; - return proc; -} - -int env_global_p (env e, sexp id) { - while (e->parent) { - if (assq(id, e->bindings) != SEXP_FALSE) - return 0; - else - e = e->parent; - } - return 1; -} - -void env_define(env e, sexp key, sexp value) { - sexp cell = env_cell(e, key); - if (cell) { - SEXP_CDR(cell) = value; - } else { - e->bindings = cons(cons(key, value), e->bindings); } } -env extend_env_closure (env e, sexp fv) { - int i; - env e2 = (env) malloc(sizeof(struct env)); - e2->tag = SEXP_ENV; - e2->parent = e; - e2->bindings = SEXP_NULL; - for (i=0; SEXP_PAIRP(fv); fv = SEXP_CDR(fv), i++) { - e2->bindings = cons(cons(SEXP_CAR(fv), make_integer(i)), e2->bindings); - } - return e2; -} - -env make_standard_env() { - int i; - env e = (env) malloc(sizeof(struct env)); - e->tag = SEXP_ENV; - e->parent = NULL; - e->bindings = SEXP_NULL; - for (i=0; i<(sizeof(core_forms)/sizeof(struct core_form)); i++) { - env_define(e, intern(core_forms[i].name), (sexp)(&core_forms[i])); - } - for (i=0; i<(sizeof(opcodes)/sizeof(struct opcode)); i++) { - env_define(e, intern(opcodes[i].name), (sexp)(&opcodes[i])); - } - return e; -} - -/* ******************************************************************** */ - -/* char *buffncpy(char *buf, unsigned int n, unsigned int len) { */ -/* char *res; */ -/* if (n==len) { */ -/* res = buf; */ -/* } else { */ -/* res = (char*) malloc(n); */ -/* strncpy(res, buf, n); */ -/* free(buf); */ -/* } */ -/* return res; */ -/* } */ - -/* char *buffngrow(char *buf, unsigned int newlen) { */ -/* char *tmp = (char*) malloc(newlen); */ -/* strncpy(tmp, buf, newlen/2); */ -/* free(buf); */ -/* return tmp; */ -/* } */ - -void print_bytecode (bytecode bc) { - int i; - fprintf(stderr, "bytecode @ %p, data @ %p, length = %d\n", bc, bc->data, bc->len); - for (i=0; i+16 < bc->len; i+=8) { - fprintf(stderr, "%02x: %02x %02x %02x %02x %02x %02x %02x %02x ", i, - bc->data[i], bc->data[i+1], bc->data[i+2], bc->data[i+3], - bc->data[i+4], bc->data[i+5], bc->data[i+6], bc->data[i+7]); - i += 8; - fprintf(stderr, "%02x %02x %02x %02x %02x %02x %02x %02x\n", - bc->data[i], bc->data[i+1], bc->data[i+2], bc->data[i+3], - bc->data[i+4], bc->data[i+5], bc->data[i+6], bc->data[i+7]); - } - if (i != bc->len) { - fprintf(stderr, "%02x:", i); - for ( ; i < bc->len; i++) { - if ((i % 8) == 0 && (i % 16) != 0) - fprintf(stderr, " "); - fprintf(stderr, " %02x", bc->data[i]); - } - fprintf(stderr, "\n"); - } -} - -void print_stack (sexp *stack, int top) { - int i; - for (i=0; ilen != i) { - fprintf(stderr, "shrinking to %d\n", i); - tmp = (bytecode) malloc(sizeof(struct bytecode) + i); - tmp->tag = SEXP_BYTECODE; - tmp->len = i; - memcpy(tmp->data, (*bc)->data, i); - SEXP_FREE(*bc); - *bc = tmp; - } -} - -void emit(bytecode *bc, unsigned int *i, char c) { - bytecode tmp; - if ((*bc)->len < (*i)+1) { - fprintf(stderr, "expanding (%d < %d)\n", (*bc)->len, (*i)+1); - tmp = (bytecode) malloc(sizeof(unsigned int) + (*bc)->len*2); - tmp->len = (*bc)->len*2; - memcpy(tmp->data, (*bc)->data, (*bc)->len); - SEXP_FREE(*bc); - *bc = tmp; - } - (*bc)->data[(*i)++] = c; -} - -void emit_word(bytecode *bc, unsigned int *i, unsigned long val) { - bytecode tmp; - if ((*bc)->len < (*i)+4) { - tmp = (bytecode) malloc(sizeof(unsigned int) + (*bc)->len*2); - tmp->len = (*bc)->len*2; - memcpy(tmp->data, (*bc)->data, (*bc)->len); - SEXP_FREE(*bc); - *bc = tmp; - } - *((unsigned long*)(&((*bc)->data[*i]))) = val; - *i += sizeof(unsigned long); -} - -bytecode compile(sexp params, sexp obj, env e, sexp fv, sexp sv, int done_p); -void analyze_app (sexp obj, bytecode *bc, unsigned int *i, - env e, sexp params, sexp fv, sexp sv, unsigned int *d); -void analyze_lambda (sexp name, sexp formals, sexp body, - bytecode *bc, unsigned int *i, env e, - sexp params, sexp fv, sexp sv, unsigned int *d); -void analyze_var_ref (sexp name, bytecode *bc, unsigned int *i, env e, - sexp params, sexp fv, sexp sv, unsigned int *d); - -void analyze(sexp obj, bytecode *bc, unsigned int *i, env e, - sexp params, sexp fv, sexp sv, unsigned int *d) { - int tmp1, tmp2; - env e2 = e; - sexp o1, o2, cell; - - if (SEXP_PAIRP(obj)) { - /* fprintf(stderr, ":: pair\n"); */ - if (SEXP_SYMBOLP(SEXP_CAR(obj))) { - fprintf(stderr, ":: symbol application\n"); - o1 = env_cell(e, SEXP_CAR(obj)); - /* fprintf(stderr, ":: => %p\n", o1); */ - if (! o1) - errx(1, "unknown operator: %s", SEXP_CAR(obj)); - o1 = SEXP_CDR(o1); - /* fprintf(stderr, ":: => %p\n", o1); */ - if (SEXP_COREP(o1)) { - /* core form */ - fprintf(stderr, ":: core form\n"); - switch (((core_form)o1)->code) { - case CORE_LAMBDA: - fprintf(stderr, ":: lambda\n"); - analyze_lambda(SEXP_FALSE, SEXP_CADR(obj), SEXP_CDDR(obj), - bc, i, e, params, fv, sv, d); - break; - case CORE_DEFINE: - fprintf(stderr, "compiling global set: %p\n", SEXP_CADR(obj)); - if ((((core_form)o1)->code == CORE_DEFINE) - && SEXP_PAIRP(SEXP_CADR(obj))) { - analyze_lambda(SEXP_CAR(SEXP_CADR(obj)), - SEXP_CDR(SEXP_CADR(obj)), - SEXP_CDDR(obj), - bc, i, e, params, fv, sv, d); - } else { - analyze(SEXP_CADDR(obj), bc, i, e, params, fv, sv, d); - } - emit(bc, i, OP_GLOBAL_SET); - emit_word(bc, i, (unsigned long) (SEXP_PAIRP(SEXP_CADR(obj)) - ? SEXP_CAR(SEXP_CADR(obj)) - : SEXP_CADR(obj))); - emit(bc, i, OP_PUSH); - (*d)++; - emit_word(bc, i, (unsigned long) SEXP_UNDEF); - break; - case CORE_SET: - fprintf(stderr, "set!: "); write_sexp(stderr, SEXP_CADR(obj)); - fprintf(stderr, " sv: "); write_sexp(stderr, sv); - fprintf(stderr, "\n"); - analyze(SEXP_CADDR(obj), bc, i, e, params, fv, sv, d); - analyze_var_ref(SEXP_CADR(obj), bc, i, e, params, fv, SEXP_NULL, d); - emit(bc, i, OP_SET_CAR); - break; - case CORE_BEGIN: - for (o2 = SEXP_CDR(obj); SEXP_PAIRP(o2); o2 = SEXP_CDR(o2)) { - analyze(SEXP_CAR(o2), bc, i, e, params, fv, sv, d); - if (SEXP_PAIRP(SEXP_CDR(o2))) emit(bc, i, OP_DROP); - } - break; - case CORE_IF: - fprintf(stderr, "test clause: %d\n", *i); - analyze(SEXP_CADR(obj), bc, i, e, params, fv, sv, d); - emit(bc, i, OP_JUMP_UNLESS); /* jumps if test fails */ - tmp1 = *i; - emit(bc, i, 0); - fprintf(stderr, "pass clause: %d\n", *i); - analyze(SEXP_CADDR(obj), bc, i, e, params, fv, sv, d); - emit(bc, i, OP_JUMP); - tmp2 = *i; - emit(bc, i, 0); - ((signed char*) (*bc)->data)[tmp1] = (*i)-tmp1-1; /* patch */ - fprintf(stderr, "fail clause: %d\n", *i); - if (SEXP_PAIRP(SEXP_CDDDR(obj))) { - analyze(SEXP_CADDDR(obj), bc, i, e, params, fv, sv, d); - } else { - emit(bc, i, OP_PUSH); - (*d)++; - emit_word(bc, i, (unsigned long) SEXP_UNDEF); - } - ((signed char*) (*bc)->data)[tmp2] = (*i)-tmp2-1; /* patch */ - break; - case CORE_QUOTE: - emit(bc, i, OP_PUSH); - (*d)++; - emit_word(bc, i, (unsigned long)SEXP_CADR(obj)); - break; - default: - errx(1, "unknown core form: %s", ((core_form)o1)->code); - } - } else if (SEXP_OPCODEP(o1)) { - fprintf(stderr, ":: opcode\n"); - /* direct opcode */ - /* verify arity */ - switch (((opcode)o1)->op_class) { - case OPC_TYPE_PREDICATE: - case OPC_PREDICATE: - case OPC_ARITHMETIC: - case OPC_ARITHMETIC_INV: - case OPC_ARITHMETIC_CMP: - if (SEXP_NULLP(SEXP_CDR(obj))) { - errx(1, "unknown opcode class: %d", ((opcode)o1)->op_class); - } else if (SEXP_NULLP(SEXP_CDDR(obj))) { - if (((opcode)o1)->op_class == OPC_ARITHMETIC_INV) { - analyze(SEXP_CADR(obj), bc, i, e, params, fv, sv, d); - emit(bc, i, ((opcode)o1)->op_inverse); - } else { - analyze(SEXP_CADR(obj), bc, i, e, params, fv, sv, d); - } - } else { - /* fprintf(stderr, ":: class: %d\n", ((opcode)o1)->op_class); */ - for (o2 = reverse(SEXP_CDR(obj)); SEXP_PAIRP(o2); o2 = SEXP_CDR(o2)) { - /* fprintf(stderr, ":: arg: %d\n", SEXP_CAR(o2)); */ - analyze(SEXP_CAR(o2), bc, i, e, params, fv, sv, d); - } - fprintf(stderr, ":: name: %d\n", ((opcode)o1)->op_name); - emit(bc, i, ((opcode)o1)->op_name); - (*d) -= length(SEXP_CDDR(obj)); - } - break; - default: - errx(1, "unknown opcode class: %d", ((opcode)o1)->op_class); - } - } else { - /* function call */ - analyze_app(obj, bc, i, e, params, fv, sv, d); - } - } else if (SEXP_PAIRP(SEXP_CAR(obj))) { - o2 = env_cell(e, SEXP_CAAR(obj)); -/* if (o2 */ -/* && SEXP_COREP(SEXP_CDR(o2)) */ -/* && (((core_form)SEXP_CDR(o2))->code == CORE_LAMBDA)) { */ -/* /\* let *\/ */ -/* } else { */ - /* computed application */ - analyze_app(obj, bc, i, e, params, fv, sv, d); -/* } */ - } else { - errx(1, "invalid operator: %s", SEXP_CAR(obj)); - } - } else if (SEXP_SYMBOLP(obj)) { - analyze_var_ref(obj, bc, i, e, params, fv, sv, d); - } else { - fprintf(stderr, "push: %d\n", (unsigned long)obj); - emit(bc, i, OP_PUSH); - emit_word(bc, i, (unsigned long)obj); - (*d)++; - } -} - -void analyze_var_ref (sexp obj, bytecode *bc, unsigned int *i, env e, - sexp params, sexp fv, sexp sv, unsigned int *d) { - int tmp; - /* variable reference */ - /* cell = env_cell(e, obj); */ - fprintf(stderr, "symbol lookup, param length: %d sv: ", length(params)); - write_sexp(stderr, sv); - fprintf(stderr, "\n"); - if ((tmp = list_index(params, obj)) >= 0) { - fprintf(stderr, "compiling local ref: %p => %d (d = %d)\n", obj, tmp, *d); - emit(bc, i, OP_STACK_REF); - emit_word(bc, i, tmp + *d + 4); - (*d)++; - } else if ((tmp = list_index(fv, obj)) >= 0) { - fprintf(stderr, "compiling closure ref: %p => %d\n", obj, tmp); - emit(bc, i, OP_CLOSURE_REF); - emit_word(bc, i, tmp); - (*d)++; - } else { - fprintf(stderr, "compiling global ref: %p\n", obj); - emit(bc, i, OP_GLOBAL_REF); - emit_word(bc, i, (unsigned long) obj); - (*d)++; - } - if (list_index(sv, obj) >= 0) { - fprintf(stderr, "mutable variables, fetching CAR\n"); - emit(bc, i, OP_CAR); - } -} - -void analyze_app (sexp obj, bytecode *bc, unsigned int *i, - env e, sexp params, sexp fv, sexp sv, unsigned int *d) { - sexp o1; - unsigned long len = length(SEXP_CDR(obj)); - - /* push the arguments onto the stack */ - for (o1 = reverse(SEXP_CDR(obj)); SEXP_PAIRP(o1); o1 = SEXP_CDR(o1)) { - analyze(SEXP_CAR(o1), bc, i, e, params, fv, sv, d); - } - - /* push the operator onto the stack */ - analyze(SEXP_CAR(obj), bc, i, e, params, fv, sv, d); - - /* make the call */ - emit(bc, i, OP_CALL); - emit_word(bc, i, (unsigned long) make_integer(len)); -} - -sexp free_vars (env e, sexp formals, sexp obj, sexp fv) { - sexp o1; - if (SEXP_SYMBOLP(obj)) { - if (env_global_p(e, obj) - || (list_index(formals, obj) >= 0) - || (list_index(fv, obj) >= 0)) - return fv; - else - return cons(obj, fv); - } else if (SEXP_PAIRP(obj)) { - if (SEXP_SYMBOLP(SEXP_CAR(obj))) { - if ((o1 = env_cell(e, SEXP_CAR(obj))) - && SEXP_COREP(o1) - && (((core_form)SEXP_CDR(o1))->code == CORE_LAMBDA)) { - return free_vars(e, SEXP_CADR(obj), SEXP_CADDR(obj), fv); - } - } - while (SEXP_PAIRP(obj)) { - fv = free_vars(e, formals, SEXP_CAR(obj), fv); - obj = SEXP_CDR(obj); - } - return fv; - } else { - return fv; - } -} - -sexp set_vars (env e, sexp formals, sexp obj, sexp sv) { - sexp tmp; - if (SEXP_NULLP(formals)) - return sv; - if (SEXP_PAIRP(obj)) { - if (SEXP_SYMBOLP(SEXP_CAR(obj))) { - if ((tmp = env_cell(e, SEXP_CAR(obj))) && SEXP_COREP(SEXP_CDR(tmp))) { - if (((core_form)SEXP_CDR(tmp))->code == CORE_LAMBDA) { - formals = lset_diff(formals, SEXP_CADR(obj)); - return set_vars(e, formals, SEXP_CADDR(obj), sv); - } else if (((core_form)SEXP_CDR(tmp))->code == CORE_SET) { - if ((list_index(formals, SEXP_CADR(obj)) >= 0) - && ! (list_index(sv, SEXP_CADR(obj)) >= 0)) { - fprintf(stderr, "found set! "); write_sexp(stderr, SEXP_CADR(obj)); - fprintf(stderr, "\n"); - sv = cons(SEXP_CADR(obj), sv); - return set_vars(e, formals, SEXP_CADDR(obj), sv); - } - } - } - } - while (SEXP_PAIRP(obj)) { - sv = set_vars(e, formals, SEXP_CAR(obj), sv); - obj = SEXP_CDR(obj); - } - } - return sv; -} - -void analyze_lambda (sexp name, sexp formals, sexp body, - bytecode *bc, unsigned int *i, env e, - sexp params, sexp fv, sexp sv, unsigned int *d) { - sexp obj; - sexp fv2 = free_vars(e, formals, body, SEXP_NULL), ls; - env e2 = extend_env_closure(e, formals); - int k; - fprintf(stderr, "%d free-vars\n", length(fv2)); - write_sexp(stderr, fv2); - fprintf(stderr, "\n"); - obj = (sexp) compile(formals, body, e2, fv2, sv, 0); - emit(bc, i, OP_PUSH); - emit_word(bc, i, (unsigned long) SEXP_UNDEF); - emit(bc, i, OP_PUSH); - emit_word(bc, i, (unsigned long) make_integer(length(fv2))); - emit(bc, i, OP_MAKE_VECTOR); - (*d)++; - for (ls=fv2, k=0; SEXP_PAIRP(ls); ls=SEXP_CDR(ls), k++) { - analyze_var_ref(SEXP_CAR(ls), bc, i, e, params, fv, SEXP_NULL, d); - emit(bc, i, OP_PUSH); - emit_word(bc, i, (unsigned long) make_integer(k)); - emit(bc, i, OP_STACK_REF); - emit_word(bc, i, 3); - emit(bc, i, OP_VECTOR_SET); - emit(bc, i, OP_DROP); - (*d)--; - } - emit(bc, i, OP_PUSH); - emit_word(bc, i, (unsigned long) obj); - emit(bc, i, OP_MAKE_PROCEDURE); -} - -sexp vm(bytecode bc, env e, sexp* stack, unsigned int top) { - unsigned char *ip=bc->data; - sexp cp, tmp; - int i; - - loop: - /* fprintf(stderr, "opcode: %d, ip: %d\n", *ip, ip); */ - /* print_bytecode(bc); */ - switch (*ip++) { - case OP_NOOP: - fprintf(stderr, "noop\n"); - break; - case OP_GLOBAL_REF: - fprintf(stderr, "global ref: ip: %p => %p: ", ip, ((sexp*)ip)[0]); - fflush(stderr); - write_sexp(stderr, ((sexp*)ip)[0]); - fprintf(stderr, "\n"); - tmp = env_cell(e, ((sexp*)ip)[0]); - stack[top++]=SEXP_CDR(tmp); - ip += sizeof(sexp); - break; - case OP_GLOBAL_SET: - fprintf(stderr, "global set: %p: ", ((sexp*)ip)[0]); - fflush(stderr); - write_sexp(stderr, ((sexp*)ip)[0]); - fprintf(stderr, "\n"); - env_define(e, ((sexp*)ip)[0], stack[--top]); - ip += sizeof(sexp); - break; - case OP_STACK_REF: - fprintf(stderr, "stack ref: ip=%p, %d - %d => ", - ip, top, (unsigned long) ((sexp*)ip)[0]); - fflush(stderr); - write_sexp(stderr, stack[top - (unsigned int) ((sexp*)ip)[0]]); - fprintf(stderr, "\n"); - stack[top] = stack[top - (unsigned int) ((sexp*)ip)[0]]; - ip += sizeof(sexp); - top++; - break; - case OP_STACK_SET: - stack[top - (unsigned int) ((sexp*)ip)[0]] = stack[top-1]; - stack[top-1] = SEXP_UNDEF; - ip += sizeof(sexp); - break; - case OP_CLOSURE_REF: - fprintf(stderr, "closure-ref %d => ", ((sexp*)ip)[0]); - fflush(stderr); - write_sexp(stderr, vector_ref(cp,((sexp*)ip)[0])); - fprintf(stderr, "\n"); - stack[top++]=vector_ref(cp,((sexp*)ip)[0]); - ip += sizeof(sexp); - break; -/* case OP_CLOSURE_SET: */ -/* cp[*ip++]=stack[--top]; */ -/* break; */ - case OP_VECTOR_REF: - stack[top-2]=vector_ref(stack[top-1], stack[top-2]); - top--; - break; - case OP_VECTOR_SET: - fprintf(stderr, "vector-set! %p %d => ", stack[top-1], unbox_integer(stack[top-2])); - write_sexp(stderr, stack[top-3]); - fprintf(stderr, "\n"); - vector_set(stack[top-1], stack[top-2], stack[top-3]); - stack[top-3]=SEXP_UNDEF; - top-=2; - break; - case OP_MAKE_PROCEDURE: - stack[top-2]=make_procedure(stack[top-1], stack[top-2]); - top--; - break; - case OP_MAKE_VECTOR: - stack[top-2]=make_vector(unbox_integer(stack[top-1]), stack[top-2]); - top--; - break; - case OP_PUSH: - /* fprintf(stderr, " (push)\n"); */ - stack[top++]=((sexp*)ip)[0]; - ip += sizeof(sexp); - break; - case OP_DUP: - stack[top]=stack[top-1]; - top++; - break; - case OP_DROP: - top--; - break; - case OP_SWAP: - tmp = stack[top-2]; - stack[top-2]=stack[top-1]; - stack[top-1]=tmp; - break; - case OP_CAR: - stack[top-1]=car(stack[top-1]); - break; - case OP_CDR: - stack[top-1]=cdr(stack[top-1]); - break; - case OP_SET_CAR: - set_car(stack[top-1], stack[top-2]); - stack[top-2]=SEXP_UNDEF; - top--; - break; - case OP_SET_CDR: - set_cdr(stack[top-1], stack[top-2]); - stack[top-2]=SEXP_UNDEF; - top--; - break; - case OP_CONS: - stack[top-2]=cons(stack[top-1], stack[top-2]); - top--; - break; - case OP_ADD: - fprintf(stderr, "OP_ADD %d %d\n", stack[top-1], stack[top-2]); - stack[top-2]=sexp_add(stack[top-1],stack[top-2]); - top--; - break; - case OP_SUB: - stack[top-2]=sexp_sub(stack[top-1],stack[top-2]); - top--; - break; - case OP_MUL: - stack[top-2]=sexp_mul(stack[top-2],stack[top-1]); - top--; - break; - case OP_DIV: - stack[top-2]=sexp_div(stack[top-2],stack[top-1]); - top--; - break; - case OP_MOD: - stack[top-2]=sexp_mod(stack[top-2],stack[top-1]); - top--; - break; - case OP_LT: - stack[top-2]=((stack[top-2] < stack[top-1]) ? SEXP_TRUE : SEXP_FALSE); - top--; - break; - case OP_CALL: - fprintf(stderr, "CALL\n"); - i = (unsigned long) ((sexp*)ip)[0]; - tmp = stack[top-1]; - if (! SEXP_PROCEDUREP(tmp)) - errx(2, "non-procedure application: %p", tmp); - stack[top-1] = (sexp) i; - stack[top] = (sexp) (ip+4); - stack[top+1] = cp; - top+=2; - bc = procedure_code(tmp); - print_bytecode(bc); - ip = bc->data; - cp = procedure_vars(tmp); - fprintf(stderr, "... calling procedure at %p\ncp: ", ip); - write_sexp(stderr, cp); - fprintf(stderr, "\n"); - /* print_stack(stack, top); */ - break; - case OP_JUMP_UNLESS: - fprintf(stderr, "JUMP UNLESS, stack top is %d\n", stack[top-1]); - if (stack[--top] == SEXP_FALSE) { - fprintf(stderr, "test passed, jumping to + %d => %d\n", ((signed char*)ip)[0], ip + ((signed char*)ip)[0]); - ip += ((signed char*)ip)[0]; - } else { - fprintf(stderr, "test failed, not jumping\n"); - ip++; - } - break; - case OP_JUMP: - fprintf(stderr, "jumping to + %d => %d\n", ((signed char*)ip)[0], ip + ((signed char*)ip)[0]); - ip += ((signed char*)ip)[0]; - break; - case OP_RET: - fprintf(stderr, "returning @ %d: ", top-1); - fflush(stderr); - write_sexp(stderr, stack[top-1]); - fprintf(stderr, "...\n"); - print_stack(stack, top); - /* top-1 */ - /* stack: args ... n ip result */ - cp = stack[top-2]; - fprintf(stderr, "1\n"); - ip = (unsigned char*) stack[top-3]; - fprintf(stderr, "2\n"); - i = unbox_integer(stack[top-4]); - fprintf(stderr, "3 (i=%d)\n", i); - stack[top-i-4] = stack[top-1]; - fprintf(stderr, "4\n"); - top = top-i-3; - fprintf(stderr, "... done returning\n"); - break; - case OP_DONE: - fprintf(stderr, "finally returning @ %d: ", top-1); - fflush(stderr); - write_sexp(stderr, stack[top-1]); - fprintf(stderr, "\n"); - goto end_loop; - default: - fprintf(stderr, "unknown opcode: %d\n", *(ip-1)); - stack[top] = SEXP_ERROR; - goto end_loop; - } - fprintf(stderr, "looping\n"); - goto loop; - - end_loop: - return stack[top-1]; -} - -bytecode compile(sexp params, sexp obj, env e, sexp fv, sexp sv, int done_p) { - unsigned int i = 0, j, d = 0; - bytecode bc = (bytecode) malloc(sizeof(struct bytecode)+INIT_BCODE_SIZE); - sexp sv2 = set_vars(e, params, obj, SEXP_NULL), ls; - fprintf(stderr, "set-vars: "); write_sexp(stderr, sv2); fprintf(stderr, "\n"); - bc->tag = SEXP_BYTECODE; - bc->len = INIT_BCODE_SIZE; - fprintf(stderr, "analyzing\n"); - for (ls=params; SEXP_PAIRP(ls); ls=SEXP_CDR(ls)) { - if ((j = list_index(sv2, SEXP_CAR(ls)) >= 0)) { - fprintf(stderr, "consing mutable var\n"); - emit(&bc, &i, OP_PUSH); - emit_word(&bc, &i, (unsigned long) SEXP_NULL); - emit(&bc, &i, OP_STACK_REF); - emit_word(&bc, &i, j+3); - emit(&bc, &i, OP_CONS); - emit(&bc, &i, OP_STACK_SET); - emit_word(&bc, &i, j+4); - emit(&bc, &i, OP_DROP); - } - } - sv = append(sv2, sv); - for ( ; SEXP_PAIRP(obj); obj=SEXP_CDR(obj)) { - fprintf(stderr, "loop: "); write_sexp(stderr, obj); fprintf(stderr, "\n"); - analyze(SEXP_CAR(obj), &bc, &i, e, params, fv, sv, &d); - if (SEXP_PAIRP(SEXP_CDR(obj))) emit(&bc, &i, OP_DROP); - } - emit(&bc, &i, done_p ? OP_DONE : OP_RET); - /* fprintf(stderr, "shrinking\n"); */ - shrink_bcode(&bc, i); - fprintf(stderr, "done compiling:\n"); - print_bytecode(bc); - disasm(bc); - return bc; -} - -sexp eval_in_stack(sexp obj, env e, sexp* stack, unsigned int top) { - bytecode bc = compile(SEXP_NULL, cons(obj, SEXP_NULL), e, SEXP_NULL, SEXP_NULL, 1); - fprintf(stderr, "evaling\n"); - return vm(bc, e, stack, top); -} - -sexp eval(sexp obj, env e) { - sexp* stack = (sexp*) malloc(sizeof(sexp) * INIT_STACK_SIZE); - sexp res = eval_in_stack(obj, e, stack, 0); - free(stack); - return res; -} - -int main (int argc, char **argv) { - sexp obj, res, *stack; - env e; - - sexp_init(); - e = make_standard_env(); - stack = (sexp*) malloc(sizeof(sexp) * INIT_STACK_SIZE); - - /* repl */ - fprintf(stdout, "> "); - fflush(stdout); - while ((obj = read_sexp(stdin)) != SEXP_EOF) { - write_sexp(stdout, obj); - fprintf(stdout, "\n => "); - res = eval_in_stack(obj, e, stack, 0); - /* fprintf(stderr, " (=> %d)\n", res); */ - write_sexp(stdout, res); - fprintf(stdout, "\n> "); - fflush(stdout); - } - fprintf(stdout, "\n"); - return 0; -} - diff --git a/sexp.h b/sexp.h new file mode 100644 index 00000000..abd26c7a --- /dev/null +++ b/sexp.h @@ -0,0 +1,195 @@ +/* sexp.h -- header for sexp library */ +/* Copyright (c) 2009 Alex Shinn. All rights reserved. */ +/* BSD-style license: http://synthcode.com/license.txt */ + +#ifndef SEXP_H +#define SEXP_H + +#include +#include +#include +#include + +#include "config.h" + +#ifdef HAVE_ERR_H +#include +#else +/* requires that msg be a string literal */ +#define errx(code, msg, ...) (fprintf(stderr,msg"\n",__VA_ARGS__), exit(code)) +#endif + +#define sexp_debug(msg, obj, ...) (fprintf(stderr,msg,__VA_ARGS__), fflush(stderr), write_sexp(stderr, obj), fprintf(stderr,"\n")) + +#ifdef USE_BOEHM +#include "gc/include/gc.h" +#define SEXP_ALLOC GC_malloc +#define SEXP_ALLOC_ATOMIC GC_malloc_atomic +#define SEXP_REALLOC GC_realloc +#define SEXP_FREE GC_free +#else +#define SEXP_ALLOC malloc +#define SEXP_ALLOC_ATOMIC SEXP_ALLOC +#define SEXP_REALLOC realloc +#define SEXP_FREE free +#endif + +#define SEXP_NEW() ((sexp) SEXP_ALLOC(sizeof(struct sexp_struct))) + +/* tagging system + * bits end in 00: pointer + * 01: fixnum + * 011: symbol + * 111: immediate symbol + * 0110: char + * 1110: other immediate object (NULL, TRUE, FALSE) + */ + +#define SEXP_FIXNUM_BITS 2 +#define SEXP_IMMEDIATE_BITS 3 +#define SEXP_EXTENDED_BITS 4 + +#define SEXP_FIXNUM_MASK 3 +#define SEXP_IMMEDIATE_MASK 7 +#define SEXP_EXTENDED_MASK 15 + +#define SEXP_POINTER_TAG 0 +#define SEXP_FIXNUM_TAG 1 +#define SEXP_LSYMBOL_TAG 3 +#define SEXP_ISYMBOL_TAG 7 +#define SEXP_CHAR_TAG 6 + +enum sexp_types { + SEXP_FIXNUM, + SEXP_CHAR, + SEXP_BOOLEAN, + SEXP_PAIR, + SEXP_SYMBOL, + SEXP_STRING, + SEXP_VECTOR, + /* the following are used only by the evaluator */ + SEXP_PROCEDURE, + SEXP_ENV, + SEXP_BYTECODE, + SEXP_CORE, + SEXP_OPCODE, +}; + +typedef struct sexp_struct { + char tag; + void *data1; + void *data2; +} *sexp; + +#define MAKE_IMMEDIATE(n) ((sexp) ((n<<4) + 14)) +#define SEXP_NULL MAKE_IMMEDIATE(0) +#define SEXP_FALSE MAKE_IMMEDIATE(1) +#define SEXP_TRUE MAKE_IMMEDIATE(2) +#define SEXP_EOF MAKE_IMMEDIATE(3) +#define SEXP_UNDEF MAKE_IMMEDIATE(4) +#define SEXP_ERROR MAKE_IMMEDIATE(5) +#define SEXP_CLOSE MAKE_IMMEDIATE(6) /* internal use */ +#define SEXP_RAWDOT MAKE_IMMEDIATE(7) /* internal use */ + +#define SEXP_NULLP(x) ((x) == SEXP_NULL) +#define SEXP_POINTERP(x) (((unsigned long)(x) & SEXP_FIXNUM_MASK) == SEXP_POINTER_TAG) +#define SEXP_INTEGERP(x) (((unsigned long)(x) & SEXP_FIXNUM_MASK) == SEXP_FIXNUM_TAG) +#define SEXP_ISYMBOLP(x) (((unsigned long)(x) & SEXP_IMMEDIATE_MASK) == SEXP_ISYMBOL_TAG) +#define SEXP_CHARP(x) (((unsigned long)(x) & SEXP_EXTENDED_MASK) == SEXP_CHAR_TAG) +#define SEXP_BOOLEANP(x) (((x) == SEXP_TRUE) || ((x) == SEXP_FALSE)) + +#define SEXP_PAIRP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_PAIR) +#define SEXP_STRINGP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_STRING) +#define SEXP_LSYMBOLP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_SYMBOL) +#define SEXP_VECTORP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_VECTOR) +#define SEXP_PROCEDUREP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_PROCEDURE) +#define SEXP_ENVP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_ENV) +#define SEXP_BYTECODEP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag ==SEXP_BYTECODE) +#define SEXP_COREP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_CORE) +#define SEXP_OPCODEP(x) (SEXP_POINTERP(x) && ((sexp)(x))->tag == SEXP_OPCODE) + +#define SEXP_SYMBOLP(x) (SEXP_ISYMBOLP(x) || SEXP_LSYMBOLP(x)) + +#ifdef USE_HUFF_SYMS +#define SEXP_DOTP(x) (((unsigned long)(x))==((0x5D00<>SEXP_FIXNUM_BITS) +#define make_character(n) ((sexp) (((long) n<>SEXP_EXTENDED_BITS) + +#define vector_length(x) ((unsigned long) x->data1) +#define vector_data(x) ((sexp*) x->data2) + +#define vector_ref(x, i) (vector_data(x)[unbox_integer(i)]) +#define vector_set(x, i, v) (vector_data(x)[unbox_integer(i)] = (v)) + +#define procedure_code(x) ((bytecode) ((sexp)x)->data1) +#define procedure_vars(x) ((sexp) ((sexp)x)->data2) + +#define string_length(x) ((unsigned long) x->data1) +#define string_data(x) ((char*) x->data2) + +#define symbol_pointer(x) ((sexp) (((unsigned long)x)-SEXP_LSYMBOL_TAG)) +#define symbol_length(x) ((unsigned long) (symbol_pointer(x)->data1)) +#define symbol_data(x) ((char*) (symbol_pointer(x)->data2)) + +#define sexp_add(a, b) ((sexp)(((unsigned long)a)+((unsigned long)b)-SEXP_FIXNUM_TAG)) +#define sexp_sub(a, b) ((sexp)(((unsigned long)a)-((unsigned long)b)+SEXP_FIXNUM_TAG)) +#define sexp_mul(a, b) ((sexp)((((((unsigned long)a)-SEXP_FIXNUM_TAG)*(((unsigned long)b)>>SEXP_FIXNUM_BITS))+SEXP_FIXNUM_TAG))) +#define sexp_div(a, b) ((sexp)(((((unsigned long)a)>>SEXP_FIXNUM_BITS)/(((unsigned long)b)>>SEXP_FIXNUM_BITS))<>SEXP_FIXNUM_BITS)%(((unsigned long)b)>>SEXP_FIXNUM_BITS))<data1) +#define SEXP_CDR(x) (((sexp)x)->data2) + +#define SEXP_CAAR(x) (SEXP_CAR(SEXP_CAR(x))) +#define SEXP_CADR(x) (SEXP_CAR(SEXP_CDR(x))) +#define SEXP_CDAR(x) (SEXP_CDR(SEXP_CAR(x))) +#define SEXP_CDDR(x) (SEXP_CDR(SEXP_CDR(x))) + +#define SEXP_CADDR(x) (SEXP_CAR(SEXP_CDDR(x))) +#define SEXP_CDDDR(x) (SEXP_CDR(SEXP_CDDR(x))) +#define SEXP_CADDDR(x) (SEXP_CADR(SEXP_CDDR(x))) +#define SEXP_CDDDDR(x) (SEXP_CDDR(SEXP_CDDR(x))) + +sexp cons(sexp head, sexp tail); +sexp car(sexp obj); +sexp cdr(sexp obj); +sexp set_car(sexp obj, sexp val); +sexp set_cdr(sexp obj, sexp val); + +int listp(sexp obj); +int list_index(sexp ls, sexp elt); +sexp lset_diff(sexp a, sexp b); +sexp reverse(sexp ls); +sexp nreverse(sexp ls); +sexp append(sexp a, sexp b); +sexp list(int count, ...); +sexp memq(sexp x, sexp ls); +sexp assq (sexp x, sexp ls); +unsigned long length(sexp ls); +sexp make_string(char *str); +int string_hash(char *str, int acc); +sexp intern(char *str); +sexp make_vector(unsigned long len, sexp dflt); +sexp list_to_vector(sexp ls); +sexp vector(int count, ...); +void write_sexp(FILE *out, sexp obj); +void free_sexp(sexp obj); +char* read_string(FILE *in); +char* read_symbol(FILE *in, int init); +int read_number(FILE *in); +sexp read_sexp_raw(FILE *in); +sexp read_sexp(FILE *in); +void sexp_init(); + +#endif /* ! SEXP_H */ +