initial unicode support

This commit is contained in:
Alex Shinn 2010-07-12 00:00:31 +09:00
parent 1ecc2bb55c
commit dbb4db1728
10 changed files with 349 additions and 41 deletions

View file

@ -5,18 +5,23 @@ The `dynamic-wind' implementation is adapted from the implementation
in the appendix to the Scheme48 reference manual, reportedly first
written by Chris Hanson and John Lamping.
Thanks to the following people for patches:
Thanks to the following people for patches and bug reports:
* Alexander Shendi
* Andreas Rottman
* Bruno Deferrari
* Derrick Eddington
* Eduardo Cavazos
* Felix Winkelmann
* Gregor Klinke
* Jeremy Wolff
* Jeronimo Pellegrini
* John Cowan
* John Samsa
* Lars J Aas
* Lorenzo Campedelli
* Michal Kowalski (sladegen)
* Taylor Venable
If you would prefer not to be listed, or are one of the users listed
without a full name, please contact me. If you've made a contribution

21
TODO
View file

@ -8,7 +8,12 @@
** DONE exceptions
- State "DONE" [2009-04-09 Thu 14:45]
** TODO native x86 backend
API redesign in preparation complete, initial
tests on native factorial and closures working.
** TODO fasl/image files
sexp_copy_context() can form the basis for images,
FASL for arbitrary modules will need additional
help with resolving external references.
** DONE shared stack on EVAL
- State "DONE" [2009-12-26 Sat 08:22]
@ -58,8 +63,17 @@
* runtime
** DONE bignums
- State "DONE" [2009-07-07 Tue 14:42]
** TODO unicode
** TODO threads
** DONE unicode
- State "DONE" from "TODO" [2010-07-11 Sun 23:58]
Supported with UTF-8 strings, string-ref is O(n) and
string-set! may need to reallocate the whole string.
string-cursor-ref can be used for O(1) string access.
** DONE threads
- State "DONE" from "TODO" [2010-07-11 Sun 15:31]
VM now supports an optional hook for green threads,
and a SRFI-18 interface is provided as a separate module.
I/O operations will currently block all threads though,
this needs to be addressed.
** DONE virtual ports
- State "DONE" [2010-01-02 Sat 20:12]
** DONE dynamic-wind
@ -107,7 +121,8 @@
** DONE loop library
- State "DONE" [2009-12-08 Tue 14:54]
** TODO network interface
** TODO posix interface
** DONE posix interface
- State "DONE" from "TODO" [2010-07-11 Sun 15:36]
Splitting this into several parts.
*** DONE filesystem interface
- State "DONE" [2009-12-26 Sat 01:50]

157
eval.c
View file

@ -43,7 +43,7 @@ void sexp_warn_undefs (sexp ctx, sexp from, sexp to) {
sexp x;
for (x=from; sexp_pairp(x) && x!=to; x=sexp_env_next_cell(x))
if (sexp_cdr(x) == SEXP_UNDEF)
sexp_warn(ctx, "reference to undefined variable", sexp_car(x));
sexp_warn(ctx, "reference to undefined variable: ", sexp_car(x));
}
@ -299,14 +299,6 @@ static sexp sexp_make_lit (sexp ctx, sexp value) {
return res;
}
/************************* backend ***************************/
#if SEXP_USE_NATIVE_X86
#include "opt/x86.c"
#else
#include "vm.c"
#endif
/****************************** contexts ******************************/
#define SEXP_STACK_SIZE (sexp_sizeof(stack)+sizeof(sexp)*SEXP_INIT_STACK_SIZE)
@ -1123,6 +1115,142 @@ static sexp sexp_string_cmp_op (sexp ctx sexp_api_params(self, n), sexp str1, se
return sexp_make_fixnum(diff);
}
#if SEXP_USE_UTF8_STRINGS
static int sexp_utf8_initial_byte_count(int c) {
if (c < 0xC0) return 1;
if (c < 0xE0) return 2;
return ((c>>4)&1)+3;
}
static int sexp_utf8_char_byte_count(int c) {
if (c < 0x80) return 1;
if (c < 0x800) return 2;
if (c < 0x10000) return 3;
return 4;
}
static int sexp_string_utf8_length (unsigned char *p, int len) {
unsigned char *q = p+len;
int i;
for (i=0; p<q; i++)
p += sexp_utf8_initial_byte_count(*p);
return i;
}
sexp sexp_string_index_to_offset (sexp ctx sexp_api_params(self, n), sexp str, sexp index) {
sexp_sint_t i, j, limit;
unsigned char *p;
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, index);
p = (unsigned char*)sexp_string_data(str);
limit = sexp_string_length(str);
for (j=0, i=sexp_unbox_fixnum(index); i>0 && j<limit; i--)
j += sexp_utf8_initial_byte_count(p[j]);
if (i>0)
return sexp_user_exception(ctx, self, "string-index->offset: index out of range", index);
return sexp_make_fixnum(j);
}
sexp sexp_string_utf8_ref (sexp ctx, sexp str, sexp i) {
unsigned char *p=(unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(i);
if (*p < 0x80)
return sexp_make_character(*p);
else if ((*p < 0xC0) || (*p > 0xF7))
return sexp_user_exception(ctx, NULL, "string-ref: invalid utf8 byte", i);
else if (*p < 0xE0)
return sexp_make_character(((p[0]&0x3F)<<6) + (p[1]&0x3F));
else if (*p < 0xF0)
return sexp_make_character(((p[0]&0x1F)<<12) + ((p[1]&0x3F)<<6) + (p[2]&0x3F));
else
return sexp_make_character(((p[0]&0x0F)<<16) + ((p[1]&0x3F)<<6) + ((p[2]&0x3F)<<6) + (p[2]&0x3F));
}
sexp sexp_string_utf8_index_ref (sexp ctx sexp_api_params(self, n), sexp str, sexp i) {
sexp off;
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, i);
off = sexp_string_index_to_offset(ctx sexp_api_pass(self, n), str, i);
if (sexp_exceptionp(off)) return off;
return sexp_string_utf8_ref(ctx, str, off);
}
void sexp_utf8_encode_char (unsigned char* p, int len, int c) {
switch (len) {
case 4: *p++ = (0xF0 + ((c)>>18)); *p++ = (0x80 + ((c>>12)&0x3F));
*p++ = (0x80 + ((c>>6)&0x3F)); *p = (0x80 + (c&0x3F)); break;
case 3: *p++ = (0xE0 + ((c)>>12)); *p++ = (0x80 + ((c>>6)&0x3F));
*p = (0x80 + (c&0x3F)); break;
case 2: *p++ = (0xC0 + ((c)>>6)); *p = (0x80 + (c&0x3F)); break;
default: *p = c; break;
}
}
void sexp_write_utf8_char (sexp ctx, int c, sexp out) {
unsigned char buf[8];
int len = sexp_utf8_char_byte_count(c);
sexp_utf8_encode_char(buf, len, c);
buf[len+1] = 0;
sexp_write_string(ctx, (char*)buf, out);
}
sexp sexp_read_utf8_char (sexp ctx, sexp port, int i) {
if (i >= 0x80) {
if ((i < 0xC0) || (i > 0xF7)) {
return sexp_user_exception(ctx, NULL, "read-char: invalid utf8 byte", sexp_make_fixnum(i));
} else if (i < 0xE0) {
i = ((i&0x3F)<<6) + (sexp_read_char(ctx, port)&0x3F);
} else if (i < 0xF0) {
i = ((i&0x1F)<<12) + ((sexp_read_char(ctx, port)&0x3F)<<6);
i += sexp_read_char(ctx, port)&0x3F;
} else {
i = ((i&0x0F)<<16) + ((sexp_read_char(ctx, port)&0x3F)<<6);
i += (sexp_read_char(ctx, port)&0x3F)<<6;
i += sexp_read_char(ctx, port)&0x3F;
}
}
return sexp_make_character(i);
}
#if SEXP_USE_MUTABLE_STRINGS
void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) {
sexp b;
unsigned char *p, *q;
int i = sexp_unbox_fixnum(index), c = sexp_unbox_character(ch),
old_len, new_len, len;
p = (unsigned char*)sexp_string_data(str) + i;
old_len = sexp_utf8_initial_byte_count(*p);
new_len = sexp_utf8_char_byte_count(c);
if (old_len != new_len) { /* resize bytes if needed */
len = sexp_string_length(str)+(new_len-old_len);
b = sexp_make_bytes(ctx, sexp_make_fixnum(len), SEXP_VOID);
if (! sexp_exceptionp(b)) {
q = (unsigned char*)sexp_bytes_data(b);
memcpy(q, sexp_string_data(str), i);
memcpy(q+i+new_len, p+old_len, len-i-new_len+1);
sexp_string_bytes(str) = b;
p = q + i;
}
sexp_string_length(str) += new_len - old_len;
}
sexp_utf8_encode_char(p, new_len, c);
}
sexp sexp_string_utf8_index_set (sexp ctx sexp_api_params(self, n), sexp str, sexp i, sexp ch) {
sexp off;
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, i);
sexp_assert_type(ctx, sexp_charp, SEXP_CHAR, ch);
off = sexp_string_index_to_offset(ctx sexp_api_pass(self, n), str, i);
if (sexp_exceptionp(off)) return off;
sexp_string_utf8_set(ctx, str, off, ch);
return SEXP_VOID;
}
#endif
#endif
#ifdef PLAN9
#include "opt/plan9.c"
#endif
@ -1438,6 +1566,9 @@ sexp sexp_load_standard_env (sexp ctx, sexp e, sexp version) {
#endif
#if SEXP_USE_BOEHM
sexp_push(ctx, tmp, sym=sexp_intern(ctx, "boehm-gc", -1));
#endif
#if SEXP_USE_UTF8_STRINGS
sexp_push(ctx, tmp, sym=sexp_intern(ctx, "boehm-gc", -1));
#endif
sexp_push(ctx, tmp, sym=sexp_intern(ctx, "chibi", -1));
sexp_env_define(ctx, e, sexp_intern(ctx, "*features*", -1), tmp);
@ -1519,6 +1650,14 @@ sexp sexp_env_copy_op (sexp ctx sexp_api_params(self, n), sexp to, sexp from, se
return SEXP_VOID;
}
/************************* backend ***************************/
#if SEXP_USE_NATIVE_X86
#include "opt/x86.c"
#else
#include "vm.c"
#endif
/************************** eval interface ****************************/
sexp sexp_compile (sexp ctx, sexp x) {

View file

@ -76,6 +76,9 @@ enum sexp_opcode_names {
SEXP_OP_VECTOR_REF,
SEXP_OP_VECTOR_SET,
SEXP_OP_VECTOR_LENGTH,
SEXP_OP_BYTES_REF,
SEXP_OP_BYTES_SET,
SEXP_OP_BYTES_LENGTH,
SEXP_OP_STRING_REF,
SEXP_OP_STRING_SET,
SEXP_OP_STRING_LENGTH,
@ -132,6 +135,7 @@ SEXP_API sexp sexp_compile_error (sexp ctx, const char *message, sexp obj);
SEXP_API sexp sexp_analyze (sexp context, sexp x);
SEXP_API void sexp_stack_trace (sexp ctx, sexp out);
SEXP_API sexp sexp_apply (sexp context, sexp proc, sexp args);
SEXP_API sexp sexp_apply1 (sexp ctx, sexp f, sexp x);
SEXP_API sexp sexp_free_vars (sexp context, sexp x, sexp fv);
SEXP_API int sexp_param_index (sexp lambda, sexp name);
SEXP_API sexp sexp_eval_op (sexp context sexp_api_params(self, n), sexp obj, sexp env);

View file

@ -145,6 +145,16 @@
/* non-immediate symbols in a single list. */
/* #define SEXP_USE_HASH_SYMS 0 */
/* uncomment this to disable UTF-8 string support */
/* The default settings store strings in memory as UTF-8, */
/* and assumes strings passed to/from the C FFI are UTF-8. */
/* #define SEXP_USE_UTF8_STRINGS 0 */
/* uncomment this to disable the string-set! opcode */
/* By default (non-literal) strings are mutable. */
/* Making them immutable allows for packed UTF-8 strings. */
/* #define SEXP_USE_MUTABLE_STRINGS 0 */
/* uncomment this to disable string ports */
/* If disabled some basic functionality such as number->string */
/* will not be available by default. */
@ -201,7 +211,7 @@
/* the default number of opcodes to run each thread for */
#ifndef SEXP_DEFAULT_QUANTUM
#define SEXP_DEFAULT_QUANTUM 1000
#define SEXP_DEFAULT_QUANTUM 500
#endif
/************************************************************************/
@ -230,7 +240,7 @@
#endif
#ifndef SEXP_USE_GREEN_THREADS
#define SEXP_USE_GREEN_THREADS 1
#define SEXP_USE_GREEN_THREADS ! SEXP_USE_NO_FEATURES
#endif
#ifndef SEXP_USE_NATIVE_X86
@ -314,7 +324,7 @@
#endif
#ifndef SEXP_USE_EXTENDED_FCALL
#define SEXP_USE_EXTENDED_FCALL 1
#define SEXP_USE_EXTENDED_FCALL ! SEXP_USE_NO_FEATURES
#endif
#ifndef SEXP_USE_FLONUMS
@ -361,6 +371,21 @@
#define SEXP_USE_DEBUG_VM 0
#endif
#ifndef SEXP_USE_UTF8_STRINGS
#define SEXP_USE_UTF8_STRINGS ! SEXP_USE_NO_FEATURES
#endif
#ifndef SEXP_USE_MUTABLE_STRINGS
#define SEXP_USE_MUTABLE_STRINGS 1
#endif
#if (SEXP_USE_UTF8_STRINGS && SEXP_USE_MUTABLE_STRINGS)
#define SEXP_USE_PACKED_STRINGS 0
#endif
#ifndef SEXP_USE_PACKED_STRINGS
#define SEXP_USE_PACKED_STRINGS 1
#endif
#ifndef SEXP_USE_STRING_STREAMS
#define SEXP_USE_STRING_STREAMS ! SEXP_USE_NO_FEATURES
#endif

View file

@ -83,6 +83,7 @@ enum sexp_types {
SEXP_BOOLEAN,
SEXP_PAIR,
SEXP_SYMBOL,
SEXP_BYTES,
SEXP_STRING,
SEXP_VECTOR,
SEXP_FLONUM,
@ -210,6 +211,15 @@ struct sexp_struct {
struct {
sexp_uint_t length;
char data[];
} bytes;
struct {
#if SEXP_USE_PACKED_STRINGS
sexp_uint_t length;
char data[];
#else
sexp_uint_t offset, length;
sexp bytes;
#endif
} string;
struct {
sexp_uint_t length;
@ -578,12 +588,26 @@ SEXP_API sexp sexp_make_unsigned_integer(sexp ctx, sexp_luint_t x);
#define sexp_procedure_code(x) ((x)->value.procedure.bc)
#define sexp_procedure_vars(x) ((x)->value.procedure.vars)
#define sexp_bytes_length(x) ((x)->value.bytes.length)
#define sexp_bytes_data(x) ((x)->value.bytes.data)
#define sexp_string_length(x) ((x)->value.string.length)
#if SEXP_USE_PACKED_STRINGS
#define sexp_string_data(x) ((x)->value.string.data)
#else
#define sexp_string_bytes(x) ((x)->value.string.bytes)
#define sexp_string_offset(x) ((x)->value.string.offset)
#define sexp_string_data(x) (sexp_bytes_data(sexp_string_bytes(x))+sexp_string_offset(x))
#endif
#define sexp_bytes_ref(x, i) (sexp_make_fixnum((unsigned char)sexp_bytes_data(x)[sexp_unbox_fixnum(i)]))
#define sexp_bytes_set(x, i, v) (sexp_bytes_data(x)[sexp_unbox_fixnum(i)] = sexp_unbox_fixnum(v))
#define sexp_string_ref(x, i) (sexp_make_character((unsigned char)sexp_string_data(x)[sexp_unbox_fixnum(i)]))
#define sexp_string_set(x, i, v) (sexp_string_data(x)[sexp_unbox_fixnum(i)] = sexp_unbox_character(v))
#define sexp_symbol_data(x) ((x)->value.symbol.data)
#define sexp_symbol_length(x) ((x)->value.symbol.length)
#define sexp_symbol_string(x) (x)
#define sexp_port_stream(p) ((p)->value.port.stream)
@ -902,6 +926,7 @@ SEXP_API sexp sexp_memq_op(sexp ctx sexp_api_params(self, n), sexp x, sexp ls);
SEXP_API sexp sexp_assq_op(sexp ctx sexp_api_params(self, n), sexp x, sexp ls);
SEXP_API sexp sexp_length_op(sexp ctx sexp_api_params(self, n), sexp ls);
SEXP_API sexp sexp_c_string(sexp ctx, const char *str, sexp_sint_t slen);
SEXP_API sexp sexp_make_bytes_op(sexp ctx sexp_api_params(self, n), sexp len, sexp i);
SEXP_API sexp sexp_make_string_op(sexp ctx sexp_api_params(self, n), sexp len, sexp ch);
SEXP_API sexp sexp_substring_op (sexp ctx sexp_api_params(self, n), sexp str, sexp start, sexp end);
SEXP_API sexp sexp_string_concatenate_op (sexp ctx sexp_api_params(self, n), sexp str_ls, sexp sep);
@ -980,6 +1005,7 @@ SEXP_API sexp sexp_finalize_c_type (sexp ctx sexp_api_params(self, n), sexp obj)
#define sexp_list_to_vector(ctx, x) sexp_list_to_vector_op(ctx sexp_api_pass(NULL, 1), x)
#define sexp_exception_type(ctx, x) sexp_exception_type_op(ctx sexp_api_pass(NULL, 1), x)
#define sexp_string_to_number(ctx, s, b) sexp_make_string_op(ctx sexp_api_pass(NULL, 2), s, b)
#define sexp_make_bytes(ctx, l, i) sexp_make_bytes_op(ctx sexp_api_pass(NULL, 2), l, i)
#define sexp_make_string(ctx, l, c) sexp_make_string_op(ctx sexp_api_pass(NULL, 2), l, c)
#define sexp_string_cmp(ctx, a, b, c) sexp_string_cmp_op(ctx sexp_api_pass(NULL, 3), a, b, c)
#define sexp_substring(ctx, a, b, c) sexp_substring_op(ctx sexp_api_pass(NULL, 3), a, b, c)

View file

@ -24,8 +24,21 @@ _OP(SEXP_OPC_SETTER, SEXP_OP_SET_CDR, 2, 0, SEXP_PAIR, 0, 0, "set-cdr!", 0, NULL
_OP(SEXP_OPC_GETTER, SEXP_OP_VECTOR_REF,2,0, SEXP_VECTOR, SEXP_FIXNUM, 0,"vector-ref", 0, NULL),
_OP(SEXP_OPC_SETTER, SEXP_OP_VECTOR_SET,3,0, SEXP_VECTOR, SEXP_FIXNUM, 0,"vector-set!", 0, NULL),
_OP(SEXP_OPC_GETTER, SEXP_OP_VECTOR_LENGTH,1,0, SEXP_VECTOR, 0, 0,"vector-length", 0, NULL),
_OP(SEXP_OPC_GETTER, SEXP_OP_BYTES_REF,2,0, SEXP_BYTES, SEXP_FIXNUM, 0,"byte-vector-ref", 0, NULL),
_OP(SEXP_OPC_SETTER, SEXP_OP_BYTES_SET,3,0, SEXP_BYTES, SEXP_FIXNUM, 0,"byte-vector-set!", 0, NULL),
_OP(SEXP_OPC_GETTER, SEXP_OP_BYTES_LENGTH,1,0, SEXP_BYTES, 0, 0,"byte-vector-length", 0, NULL),
#if SEXP_USE_UTF8_STRINGS
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_REF,2,0, SEXP_STRING, SEXP_FIXNUM, 0,"string-cursor-ref", 0, NULL),
#else
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_REF,2,0, SEXP_STRING, SEXP_FIXNUM, 0,"string-ref", 0, NULL),
#endif
#if SEXP_USE_MUTABLE_STRINGS
#if SEXP_USE_UTF8_STRINGS
_OP(SEXP_OPC_SETTER, SEXP_OP_STRING_SET,3,0, SEXP_STRING, SEXP_FIXNUM, 0,"string-cursor-set!", 0, NULL),
#else
_OP(SEXP_OPC_SETTER, SEXP_OP_STRING_SET,3,0, SEXP_STRING, SEXP_FIXNUM, 0,"string-set!", 0, NULL),
#endif
#endif
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_LENGTH,1,0, SEXP_STRING, 0, 0,"string-length", 0, NULL),
_OP(SEXP_OPC_GENERIC, SEXP_OP_FIX2FLO, 1, 0, 0, 0, 0, "exact->inexact", 0, NULL),
_OP(SEXP_OPC_GENERIC, SEXP_OP_FLO2FIX, 1, 0, 0, 0, 0, "inexact->exact", 0, NULL),
@ -101,6 +114,7 @@ _FN4(SEXP_ENV, SEXP_ENV, "%env-copy!", 0, sexp_env_copy_op),
_FN2(SEXP_EXCEPTION, SEXP_OPORT, "print-exception", 0, sexp_print_exception_op),
_FN1(SEXP_EXCEPTION, "exception-type", 0, sexp_exception_type_op),
_FN2OPT(SEXP_FIXNUM, SEXP_CHAR, "make-string", sexp_make_character(' '), sexp_make_string_op),
_FN2OPT(SEXP_FIXNUM, SEXP_FIXNUM, "make-byte-vector", SEXP_ZERO, sexp_make_bytes_op),
_FN2OPT(SEXP_FIXNUM, SEXP_FIXNUM, "string->number", SEXP_TEN, sexp_string_to_number_op),
_FN3(SEXP_STRING, SEXP_STRING, "string-cmp", 0, sexp_string_cmp_op),
_FN3(SEXP_STRING, SEXP_FIXNUM, "substring", 0, sexp_substring_op),
@ -134,6 +148,11 @@ _FN1(0, "floor", 0, sexp_floor),
_FN1(0, "ceiling", 0, sexp_ceiling),
#endif
_FN2(0, 0, "expt", 0, sexp_expt_op),
#if SEXP_USE_UTF8_STRINGS
_FN2(SEXP_STRING, SEXP_FIXNUM, "string-index->offset", 0, sexp_string_index_to_offset),
_FN2(SEXP_STRING, SEXP_FIXNUM, "string-ref", 0, sexp_string_utf8_index_ref),
_FN3(SEXP_STRING, SEXP_FIXNUM, "string-set!", 0, sexp_string_utf8_index_set),
#endif
#if SEXP_USE_TYPE_DEFS
_FN2(SEXP_STRING, SEXP_FIXNUM, "register-simple-type", 0, sexp_register_simple_type_op),
_FN2(SEXP_STRING, SEXP_FIXNUM, "make-type-predicate", 0, sexp_make_type_predicate_op),

View file

@ -1,16 +1,21 @@
static const char* reverse_opcode_names[] =
{"NOOP", "RAISE", "RESUMECC", "CALLCC", "APPLY1", "TAIL-CALL", "CALL",
"FCALL0", "FCALL1", "FCALL2", "FCALL3", "FCALL4", "FCALL5", "FCALL6", "FCALLN",
"JUMP-UNLESS", "JUMP", "PUSH", "DROP", "GLOBAL-REF", "GLOBAL-KNOWN-REF",
"STACK-REF", "LOCAL-REF", "LOCAL-SET",
"CLOSURE-REF", "VECTOR-REF", "VECTOR-SET", "VECTOR-LENGTH", "STRING-REF",
"STRING-SET", "STRING-LENGTH", "MAKE-PROCEDURE", "MAKE-VECTOR",
{"NOOP", "RAISE", "RESUMECC", "CALLCC", "APPLY1",
"TAIL-CALL", "CALL", "FCALL0", "FCALL1", "FCALL2",
"FCALL3", "FCALL4", "FCALL5", "FCALL6", "FCALLN",
"JUMP-UNLESS", "JUMP", "PUSH", "DROP",
"GLOBAL-REF", "GLOBAL-KNOWN-REF", "STACK-REF",
"LOCAL-REF", "LOCAL-SET", "CLOSURE-REF",
"VECTOR-REF", "VECTOR-SET", "VECTOR-LENGTH",
"BYTES-REF", "BYTES-SET", "BYTES-LENGTH",
"STRING-REF", "STRING-SET", "STRING-LENGTH",
"MAKE-PROCEDURE", "MAKE-VECTOR",
"MAKE-EXCEPTION", "AND", "NULL?", "FIXNUM?", "SYMBOL?", "CHAR?",
"EOF?", "TYPEP", "MAKE", "SLOT-REF", "SLOT-SET", "CAR", "CDR",
"SET-CAR", "SET-CDR", "CONS", "ADD", "SUB",
"MUL", "DIV", "QUOTIENT", "REMAINDER",
"LT", "LE", "EQN", "EQ", "EXACT->INEXACT", "INEXACT->EXACT",
"CHAR->INTEGER", "INTEGER->CHAR", "CHAR-UPCASE", "CHAR-DOWNCASE",
"WRITE-CHAR", "NEWLINE", "READ-CHAR", "PEEK-CHAR", "YIELD", "RET", "DONE",
"WRITE-CHAR", "NEWLINE", "READ-CHAR", "PEEK-CHAR",
"YIELD", "RET", "DONE",
};

63
sexp.c
View file

@ -88,7 +88,12 @@ static struct sexp_struct _sexp_type_specs[] = {
_DEF_TYPE(SEXP_BOOLEAN, 0, 0, 0, 0, 0, 0, 0, 0, "boolean", NULL),
_DEF_TYPE(SEXP_PAIR, sexp_offsetof(pair, car), 2, 3, 0, 0, sexp_sizeof(pair), 0, 0, "pair", NULL),
_DEF_TYPE(SEXP_SYMBOL, 0, 0, 0, 0, 0, sexp_sizeof(symbol)+1, sexp_offsetof(symbol, length), 1, "symbol", NULL),
_DEF_TYPE(SEXP_BYTES, 0, 0, 0, 0, 0, sexp_sizeof(bytes)+1, sexp_offsetof(bytes, length), 1, "byte-vector", NULL),
#if SEXP_USE_PACKED_STRINGS
_DEF_TYPE(SEXP_STRING, 0, 0, 0, 0, 0, sexp_sizeof(string)+1, sexp_offsetof(string, length), 1, "string", NULL),
#else
_DEF_TYPE(SEXP_STRING, sexp_offsetof(string, bytes), 1, 1, 0, 0, sexp_sizeof(string), 0, 0, "string", NULL),
#endif
_DEF_TYPE(SEXP_VECTOR, sexp_offsetof(vector, data), 0, 0, sexp_offsetof(vector, length), 1, sexp_sizeof(vector), sexp_offsetof(vector, length), sizeof(sexp), "vector", NULL),
_DEF_TYPE(SEXP_FLONUM, 0, 0, 0, 0, 0, sexp_sizeof(flonum), 0, 0, "real", NULL),
_DEF_TYPE(SEXP_BIGNUM, 0, 0, 0, 0, 0, sexp_sizeof(bignum), sexp_offsetof(bignum, length), sizeof(sexp_uint_t), "bignum", NULL),
@ -666,24 +671,44 @@ sexp sexp_make_flonum (sexp ctx, float f) {
#endif
#endif
sexp sexp_make_string_op (sexp ctx sexp_api_params(self, n), sexp len, sexp ch) {
sexp sexp_make_bytes_op (sexp ctx sexp_api_params(self, n), sexp len, sexp i) {
sexp_sint_t clen = sexp_unbox_fixnum(len);
sexp s;
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, len);
if (clen < 0) return sexp_xtype_exception(ctx, self, "negative length", len);
s = sexp_alloc_atomic(ctx, sexp_sizeof(string)+clen+1);
s = sexp_alloc_atomic(ctx, sexp_sizeof(bytes)+clen+1);
if (sexp_exceptionp(s)) return s;
sexp_pointer_tag(s) = SEXP_STRING;
sexp_pointer_tag(s) = SEXP_BYTES;
#if SEXP_USE_HEADER_MAGIC
sexp_pointer_magic(s) = SEXP_POINTER_MAGIC;
#endif
sexp_string_length(s) = clen;
if (sexp_charp(ch))
memset(sexp_string_data(s), sexp_unbox_character(ch), clen);
sexp_string_data(s)[clen] = '\0';
sexp_bytes_length(s) = clen;
if (sexp_fixnump(i))
memset(sexp_bytes_data(s), sexp_unbox_fixnum(i), clen);
sexp_bytes_data(s)[clen] = '\0';
return s;
}
sexp sexp_make_string_op (sexp ctx sexp_api_params(self, n), sexp len, sexp ch)
{
sexp i = (sexp_charp(ch) ? sexp_make_fixnum(sexp_unbox_character(ch)) : ch);
sexp_gc_var2(b, s);
b = sexp_make_bytes_op(ctx sexp_api_pass(self, n), len, i);
if (sexp_exceptionp(b)) return b;
#if SEXP_USE_PACKED_STRINGS
sexp_pointer_tag(b) = SEXP_STRING;
return b;
#else
sexp_gc_preserve2(ctx, b, s);
s = sexp_alloc_type(ctx, string, SEXP_STRING);
sexp_string_bytes(s) = b;
sexp_string_offset(s) = 0;
sexp_string_length(s) = sexp_unbox_fixnum(len);
sexp_gc_release2(ctx);
return s;
#endif
}
sexp sexp_c_string (sexp ctx, const char *str, sexp_sint_t slen) {
sexp_sint_t len = ((slen >= 0) ? slen : strlen(str));
sexp s = sexp_make_string(ctx, sexp_make_fixnum(len), SEXP_VOID);
@ -790,14 +815,17 @@ sexp sexp_intern(sexp ctx, const char *str, sexp_sint_t len) {
bucket = 0;
#endif
for (ls=sexp_context_symbols(ctx)[bucket]; sexp_pairp(ls); ls=sexp_cdr(ls))
if ((sexp_string_length(tmp=sexp_symbol_string(sexp_car(ls))) == len)
&& ! strncmp(str, sexp_string_data(tmp), len))
if ((sexp_symbol_length(tmp=sexp_car(ls)) == len)
&& ! strncmp(str, sexp_symbol_data(tmp), len))
return sexp_car(ls);
/* not found, make a new symbol */
sexp_gc_preserve1(ctx, sym);
sym = sexp_c_string(ctx, str, len);
if (sexp_exceptionp(sym)) return sym;
#if ! SEXP_USE_PACKED_STRINGS
sym = sexp_string_bytes(sym);
#endif
sexp_pointer_tag(sym) = SEXP_SYMBOL;
sexp_push(ctx, sexp_context_symbols(ctx)[bucket], sym);
sexp_gc_release1(ctx);
@ -1190,8 +1218,8 @@ sexp sexp_write_one (sexp ctx, sexp obj, sexp out) {
sexp_write_char(ctx, '"', out);
break;
case SEXP_SYMBOL:
i = sexp_string_length(sexp_symbol_string(obj));
str = sexp_string_data(sexp_symbol_string(obj));
i = sexp_symbol_length(obj);
str = sexp_symbol_data(obj);
for ( ; i>0; str++, i--) {
if ((str[0] == '\\') || is_separator(str[0]))
sexp_write_char(ctx, '\\', out);
@ -1253,8 +1281,17 @@ sexp sexp_write_one (sexp ctx, sexp obj, sexp out) {
sexp_write_char(ctx, sexp_unbox_character(obj), out);
} else {
sexp_write_string(ctx, "#\\x", out);
sexp_write_char(ctx, hex_digit(sexp_unbox_character(obj)>>4), out);
sexp_write_char(ctx, hex_digit(sexp_unbox_character(obj)&0xF), out);
c = sexp_unbox_character(obj);
if (c >= 0x100) {
if (c >= 0x10000) {
sexp_write_char(ctx, hex_digit((c>>20)&0x0F), out);
sexp_write_char(ctx, hex_digit((c>>16)&0x0F), out);
}
sexp_write_char(ctx, hex_digit((c>>12)&0x0F), out);
sexp_write_char(ctx, hex_digit((c>>8)&0x0F), out);
}
sexp_write_char(ctx, hex_digit((c>>4)&0x0F), out);
sexp_write_char(ctx, hex_digit(c&0x0F), out);
}
} else if (sexp_symbolp(obj)) {

43
vm.c
View file

@ -2,8 +2,6 @@
/* Copyright (c) 2009-2010 Alex Shinn. All rights reserved. */
/* BSD-style license: http://synthcode.com/license.txt */
static sexp sexp_apply1 (sexp ctx, sexp f, sexp x);
#if SEXP_USE_DEBUG_VM > 1
static void sexp_print_stack (sexp ctx, sexp *stack, int top, int fp, sexp out) {
int i;
@ -834,6 +832,7 @@ sexp sexp_vm (sexp ctx, sexp proc) {
sexp_raise("vector-length: not a vector", sexp_list1(ctx, _ARG1));
_ARG1 = sexp_make_fixnum(sexp_vector_length(_ARG1));
break;
case SEXP_OP_BYTES_REF:
case SEXP_OP_STRING_REF:
if (! sexp_stringp(_ARG1))
sexp_raise("string-ref: not a string", sexp_list1(ctx, _ARG1));
@ -842,9 +841,17 @@ sexp sexp_vm (sexp ctx, sexp proc) {
i = sexp_unbox_fixnum(_ARG2);
if ((i < 0) || (i >= sexp_string_length(_ARG1)))
sexp_raise("string-ref: index out of range", sexp_list2(ctx, _ARG1, _ARG2));
_ARG2 = sexp_string_ref(_ARG1, _ARG2);
if (ip[-1] == SEXP_OP_BYTES_REF)
_ARG2 = sexp_bytes_ref(_ARG1, _ARG2);
else
#if SEXP_USE_UTF8_STRINGS
_ARG2 = sexp_string_utf8_ref(ctx, _ARG1, _ARG2);
#else
_ARG2 = sexp_string_ref(_ARG1, _ARG2);
#endif
top--;
break;
case SEXP_OP_BYTES_SET:
case SEXP_OP_STRING_SET:
if (! sexp_stringp(_ARG1))
sexp_raise("string-set!: not a string", sexp_list1(ctx, _ARG1));
@ -857,14 +864,30 @@ sexp sexp_vm (sexp ctx, sexp proc) {
i = sexp_unbox_fixnum(_ARG2);
if ((i < 0) || (i >= sexp_string_length(_ARG1)))
sexp_raise("string-set!: index out of range", sexp_list2(ctx, _ARG1, _ARG2));
sexp_string_set(_ARG1, _ARG2, _ARG3);
if (ip[-1] == SEXP_OP_BYTES_SET)
sexp_bytes_set(_ARG1, _ARG2, _ARG3);
else
#if SEXP_USE_UTF8_STRINGS
sexp_string_utf8_set(ctx, _ARG1, _ARG2, _ARG3);
#else
sexp_string_set(_ARG1, _ARG2, _ARG3);
#endif
_ARG3 = SEXP_VOID;
top-=2;
break;
case SEXP_OP_BYTES_LENGTH:
if (! sexp_stringp(_ARG1))
sexp_raise("bytes-length: not a byte-vector", sexp_list1(ctx, _ARG1));
_ARG1 = sexp_make_fixnum(sexp_bytes_length(_ARG1));
break;
case SEXP_OP_STRING_LENGTH:
if (! sexp_stringp(_ARG1))
sexp_raise("string-length: not a string", sexp_list1(ctx, _ARG1));
#if SEXP_USE_UTF8_STRINGS
_ARG1 = sexp_make_fixnum(sexp_string_utf8_length((unsigned char*)sexp_string_data(_ARG1), sexp_string_length(_ARG1)));
#else
_ARG1 = sexp_make_fixnum(sexp_string_length(_ARG1));
#endif
break;
case SEXP_OP_MAKE_PROCEDURE:
sexp_context_top(ctx) = top;
@ -1244,6 +1267,11 @@ sexp sexp_vm (sexp ctx, sexp proc) {
sexp_raise("write-char: not a character", sexp_list1(ctx, _ARG1));
if (! sexp_oportp(_ARG2))
sexp_raise("write-char: not an output-port", sexp_list1(ctx, _ARG2));
#if SEXP_USE_UTF8_STRINGS
if (sexp_unbox_character(_ARG1) >= 0x80)
sexp_write_utf8_char(ctx, sexp_unbox_character(_ARG1), _ARG2);
else
#endif
sexp_write_char(ctx, sexp_unbox_character(_ARG1), _ARG2);
_ARG2 = SEXP_VOID;
top--;
@ -1258,6 +1286,11 @@ sexp sexp_vm (sexp ctx, sexp proc) {
if (! sexp_iportp(_ARG1))
sexp_raise("read-char: not an input-port", sexp_list1(ctx, _ARG1));
i = sexp_read_char(ctx, _ARG1);
#if SEXP_USE_UTF8_STRINGS
if (i >= 0x80)
_ARG1 = sexp_read_utf8_char(ctx, _ARG1, i);
else
#endif
_ARG1 = (i == EOF) ? SEXP_EOF : sexp_make_character(i);
break;
case SEXP_OP_PEEK_CHAR:
@ -1302,7 +1335,7 @@ sexp sexp_vm (sexp ctx, sexp proc) {
/******************************* apply ********************************/
static sexp sexp_apply1 (sexp ctx, sexp f, sexp x) {
sexp sexp_apply1 (sexp ctx, sexp f, sexp x) {
sexp res;
sexp_gc_var1(args);
if (sexp_opcodep(f)) {