mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-20 22:29:16 +02:00
initial unicode support
This commit is contained in:
parent
1ecc2bb55c
commit
dbb4db1728
10 changed files with 349 additions and 41 deletions
7
AUTHORS
7
AUTHORS
|
@ -5,18 +5,23 @@ The `dynamic-wind' implementation is adapted from the implementation
|
|||
in the appendix to the Scheme48 reference manual, reportedly first
|
||||
written by Chris Hanson and John Lamping.
|
||||
|
||||
Thanks to the following people for patches:
|
||||
Thanks to the following people for patches and bug reports:
|
||||
|
||||
* Alexander Shendi
|
||||
* Andreas Rottman
|
||||
* Bruno Deferrari
|
||||
* Derrick Eddington
|
||||
* Eduardo Cavazos
|
||||
* Felix Winkelmann
|
||||
* Gregor Klinke
|
||||
* Jeremy Wolff
|
||||
* Jeronimo Pellegrini
|
||||
* John Cowan
|
||||
* John Samsa
|
||||
* Lars J Aas
|
||||
* Lorenzo Campedelli
|
||||
* Michal Kowalski (sladegen)
|
||||
* Taylor Venable
|
||||
|
||||
If you would prefer not to be listed, or are one of the users listed
|
||||
without a full name, please contact me. If you've made a contribution
|
||||
|
|
21
TODO
21
TODO
|
@ -8,7 +8,12 @@
|
|||
** DONE exceptions
|
||||
- State "DONE" [2009-04-09 Thu 14:45]
|
||||
** TODO native x86 backend
|
||||
API redesign in preparation complete, initial
|
||||
tests on native factorial and closures working.
|
||||
** TODO fasl/image files
|
||||
sexp_copy_context() can form the basis for images,
|
||||
FASL for arbitrary modules will need additional
|
||||
help with resolving external references.
|
||||
** DONE shared stack on EVAL
|
||||
- State "DONE" [2009-12-26 Sat 08:22]
|
||||
|
||||
|
@ -58,8 +63,17 @@
|
|||
* runtime
|
||||
** DONE bignums
|
||||
- State "DONE" [2009-07-07 Tue 14:42]
|
||||
** TODO unicode
|
||||
** TODO threads
|
||||
** DONE unicode
|
||||
- State "DONE" from "TODO" [2010-07-11 Sun 23:58]
|
||||
Supported with UTF-8 strings, string-ref is O(n) and
|
||||
string-set! may need to reallocate the whole string.
|
||||
string-cursor-ref can be used for O(1) string access.
|
||||
** DONE threads
|
||||
- State "DONE" from "TODO" [2010-07-11 Sun 15:31]
|
||||
VM now supports an optional hook for green threads,
|
||||
and a SRFI-18 interface is provided as a separate module.
|
||||
I/O operations will currently block all threads though,
|
||||
this needs to be addressed.
|
||||
** DONE virtual ports
|
||||
- State "DONE" [2010-01-02 Sat 20:12]
|
||||
** DONE dynamic-wind
|
||||
|
@ -107,7 +121,8 @@
|
|||
** DONE loop library
|
||||
- State "DONE" [2009-12-08 Tue 14:54]
|
||||
** TODO network interface
|
||||
** TODO posix interface
|
||||
** DONE posix interface
|
||||
- State "DONE" from "TODO" [2010-07-11 Sun 15:36]
|
||||
Splitting this into several parts.
|
||||
*** DONE filesystem interface
|
||||
- State "DONE" [2009-12-26 Sat 01:50]
|
||||
|
|
157
eval.c
157
eval.c
|
@ -43,7 +43,7 @@ void sexp_warn_undefs (sexp ctx, sexp from, sexp to) {
|
|||
sexp x;
|
||||
for (x=from; sexp_pairp(x) && x!=to; x=sexp_env_next_cell(x))
|
||||
if (sexp_cdr(x) == SEXP_UNDEF)
|
||||
sexp_warn(ctx, "reference to undefined variable", sexp_car(x));
|
||||
sexp_warn(ctx, "reference to undefined variable: ", sexp_car(x));
|
||||
}
|
||||
|
||||
|
||||
|
@ -299,14 +299,6 @@ static sexp sexp_make_lit (sexp ctx, sexp value) {
|
|||
return res;
|
||||
}
|
||||
|
||||
/************************* backend ***************************/
|
||||
|
||||
#if SEXP_USE_NATIVE_X86
|
||||
#include "opt/x86.c"
|
||||
#else
|
||||
#include "vm.c"
|
||||
#endif
|
||||
|
||||
/****************************** contexts ******************************/
|
||||
|
||||
#define SEXP_STACK_SIZE (sexp_sizeof(stack)+sizeof(sexp)*SEXP_INIT_STACK_SIZE)
|
||||
|
@ -1123,6 +1115,142 @@ static sexp sexp_string_cmp_op (sexp ctx sexp_api_params(self, n), sexp str1, se
|
|||
return sexp_make_fixnum(diff);
|
||||
}
|
||||
|
||||
#if SEXP_USE_UTF8_STRINGS
|
||||
|
||||
static int sexp_utf8_initial_byte_count(int c) {
|
||||
if (c < 0xC0) return 1;
|
||||
if (c < 0xE0) return 2;
|
||||
return ((c>>4)&1)+3;
|
||||
}
|
||||
|
||||
static int sexp_utf8_char_byte_count(int c) {
|
||||
if (c < 0x80) return 1;
|
||||
if (c < 0x800) return 2;
|
||||
if (c < 0x10000) return 3;
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int sexp_string_utf8_length (unsigned char *p, int len) {
|
||||
unsigned char *q = p+len;
|
||||
int i;
|
||||
for (i=0; p<q; i++)
|
||||
p += sexp_utf8_initial_byte_count(*p);
|
||||
return i;
|
||||
}
|
||||
|
||||
sexp sexp_string_index_to_offset (sexp ctx sexp_api_params(self, n), sexp str, sexp index) {
|
||||
sexp_sint_t i, j, limit;
|
||||
unsigned char *p;
|
||||
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
||||
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, index);
|
||||
p = (unsigned char*)sexp_string_data(str);
|
||||
limit = sexp_string_length(str);
|
||||
for (j=0, i=sexp_unbox_fixnum(index); i>0 && j<limit; i--)
|
||||
j += sexp_utf8_initial_byte_count(p[j]);
|
||||
if (i>0)
|
||||
return sexp_user_exception(ctx, self, "string-index->offset: index out of range", index);
|
||||
return sexp_make_fixnum(j);
|
||||
}
|
||||
|
||||
sexp sexp_string_utf8_ref (sexp ctx, sexp str, sexp i) {
|
||||
unsigned char *p=(unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(i);
|
||||
if (*p < 0x80)
|
||||
return sexp_make_character(*p);
|
||||
else if ((*p < 0xC0) || (*p > 0xF7))
|
||||
return sexp_user_exception(ctx, NULL, "string-ref: invalid utf8 byte", i);
|
||||
else if (*p < 0xE0)
|
||||
return sexp_make_character(((p[0]&0x3F)<<6) + (p[1]&0x3F));
|
||||
else if (*p < 0xF0)
|
||||
return sexp_make_character(((p[0]&0x1F)<<12) + ((p[1]&0x3F)<<6) + (p[2]&0x3F));
|
||||
else
|
||||
return sexp_make_character(((p[0]&0x0F)<<16) + ((p[1]&0x3F)<<6) + ((p[2]&0x3F)<<6) + (p[2]&0x3F));
|
||||
}
|
||||
|
||||
sexp sexp_string_utf8_index_ref (sexp ctx sexp_api_params(self, n), sexp str, sexp i) {
|
||||
sexp off;
|
||||
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
||||
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, i);
|
||||
off = sexp_string_index_to_offset(ctx sexp_api_pass(self, n), str, i);
|
||||
if (sexp_exceptionp(off)) return off;
|
||||
return sexp_string_utf8_ref(ctx, str, off);
|
||||
}
|
||||
|
||||
void sexp_utf8_encode_char (unsigned char* p, int len, int c) {
|
||||
switch (len) {
|
||||
case 4: *p++ = (0xF0 + ((c)>>18)); *p++ = (0x80 + ((c>>12)&0x3F));
|
||||
*p++ = (0x80 + ((c>>6)&0x3F)); *p = (0x80 + (c&0x3F)); break;
|
||||
case 3: *p++ = (0xE0 + ((c)>>12)); *p++ = (0x80 + ((c>>6)&0x3F));
|
||||
*p = (0x80 + (c&0x3F)); break;
|
||||
case 2: *p++ = (0xC0 + ((c)>>6)); *p = (0x80 + (c&0x3F)); break;
|
||||
default: *p = c; break;
|
||||
}
|
||||
}
|
||||
|
||||
void sexp_write_utf8_char (sexp ctx, int c, sexp out) {
|
||||
unsigned char buf[8];
|
||||
int len = sexp_utf8_char_byte_count(c);
|
||||
sexp_utf8_encode_char(buf, len, c);
|
||||
buf[len+1] = 0;
|
||||
sexp_write_string(ctx, (char*)buf, out);
|
||||
}
|
||||
|
||||
sexp sexp_read_utf8_char (sexp ctx, sexp port, int i) {
|
||||
if (i >= 0x80) {
|
||||
if ((i < 0xC0) || (i > 0xF7)) {
|
||||
return sexp_user_exception(ctx, NULL, "read-char: invalid utf8 byte", sexp_make_fixnum(i));
|
||||
} else if (i < 0xE0) {
|
||||
i = ((i&0x3F)<<6) + (sexp_read_char(ctx, port)&0x3F);
|
||||
} else if (i < 0xF0) {
|
||||
i = ((i&0x1F)<<12) + ((sexp_read_char(ctx, port)&0x3F)<<6);
|
||||
i += sexp_read_char(ctx, port)&0x3F;
|
||||
} else {
|
||||
i = ((i&0x0F)<<16) + ((sexp_read_char(ctx, port)&0x3F)<<6);
|
||||
i += (sexp_read_char(ctx, port)&0x3F)<<6;
|
||||
i += sexp_read_char(ctx, port)&0x3F;
|
||||
}
|
||||
}
|
||||
return sexp_make_character(i);
|
||||
}
|
||||
|
||||
#if SEXP_USE_MUTABLE_STRINGS
|
||||
|
||||
void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) {
|
||||
sexp b;
|
||||
unsigned char *p, *q;
|
||||
int i = sexp_unbox_fixnum(index), c = sexp_unbox_character(ch),
|
||||
old_len, new_len, len;
|
||||
p = (unsigned char*)sexp_string_data(str) + i;
|
||||
old_len = sexp_utf8_initial_byte_count(*p);
|
||||
new_len = sexp_utf8_char_byte_count(c);
|
||||
if (old_len != new_len) { /* resize bytes if needed */
|
||||
len = sexp_string_length(str)+(new_len-old_len);
|
||||
b = sexp_make_bytes(ctx, sexp_make_fixnum(len), SEXP_VOID);
|
||||
if (! sexp_exceptionp(b)) {
|
||||
q = (unsigned char*)sexp_bytes_data(b);
|
||||
memcpy(q, sexp_string_data(str), i);
|
||||
memcpy(q+i+new_len, p+old_len, len-i-new_len+1);
|
||||
sexp_string_bytes(str) = b;
|
||||
p = q + i;
|
||||
}
|
||||
sexp_string_length(str) += new_len - old_len;
|
||||
}
|
||||
sexp_utf8_encode_char(p, new_len, c);
|
||||
}
|
||||
|
||||
sexp sexp_string_utf8_index_set (sexp ctx sexp_api_params(self, n), sexp str, sexp i, sexp ch) {
|
||||
sexp off;
|
||||
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
|
||||
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, i);
|
||||
sexp_assert_type(ctx, sexp_charp, SEXP_CHAR, ch);
|
||||
off = sexp_string_index_to_offset(ctx sexp_api_pass(self, n), str, i);
|
||||
if (sexp_exceptionp(off)) return off;
|
||||
sexp_string_utf8_set(ctx, str, off, ch);
|
||||
return SEXP_VOID;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PLAN9
|
||||
#include "opt/plan9.c"
|
||||
#endif
|
||||
|
@ -1438,6 +1566,9 @@ sexp sexp_load_standard_env (sexp ctx, sexp e, sexp version) {
|
|||
#endif
|
||||
#if SEXP_USE_BOEHM
|
||||
sexp_push(ctx, tmp, sym=sexp_intern(ctx, "boehm-gc", -1));
|
||||
#endif
|
||||
#if SEXP_USE_UTF8_STRINGS
|
||||
sexp_push(ctx, tmp, sym=sexp_intern(ctx, "boehm-gc", -1));
|
||||
#endif
|
||||
sexp_push(ctx, tmp, sym=sexp_intern(ctx, "chibi", -1));
|
||||
sexp_env_define(ctx, e, sexp_intern(ctx, "*features*", -1), tmp);
|
||||
|
@ -1519,6 +1650,14 @@ sexp sexp_env_copy_op (sexp ctx sexp_api_params(self, n), sexp to, sexp from, se
|
|||
return SEXP_VOID;
|
||||
}
|
||||
|
||||
/************************* backend ***************************/
|
||||
|
||||
#if SEXP_USE_NATIVE_X86
|
||||
#include "opt/x86.c"
|
||||
#else
|
||||
#include "vm.c"
|
||||
#endif
|
||||
|
||||
/************************** eval interface ****************************/
|
||||
|
||||
sexp sexp_compile (sexp ctx, sexp x) {
|
||||
|
|
|
@ -76,6 +76,9 @@ enum sexp_opcode_names {
|
|||
SEXP_OP_VECTOR_REF,
|
||||
SEXP_OP_VECTOR_SET,
|
||||
SEXP_OP_VECTOR_LENGTH,
|
||||
SEXP_OP_BYTES_REF,
|
||||
SEXP_OP_BYTES_SET,
|
||||
SEXP_OP_BYTES_LENGTH,
|
||||
SEXP_OP_STRING_REF,
|
||||
SEXP_OP_STRING_SET,
|
||||
SEXP_OP_STRING_LENGTH,
|
||||
|
@ -132,6 +135,7 @@ SEXP_API sexp sexp_compile_error (sexp ctx, const char *message, sexp obj);
|
|||
SEXP_API sexp sexp_analyze (sexp context, sexp x);
|
||||
SEXP_API void sexp_stack_trace (sexp ctx, sexp out);
|
||||
SEXP_API sexp sexp_apply (sexp context, sexp proc, sexp args);
|
||||
SEXP_API sexp sexp_apply1 (sexp ctx, sexp f, sexp x);
|
||||
SEXP_API sexp sexp_free_vars (sexp context, sexp x, sexp fv);
|
||||
SEXP_API int sexp_param_index (sexp lambda, sexp name);
|
||||
SEXP_API sexp sexp_eval_op (sexp context sexp_api_params(self, n), sexp obj, sexp env);
|
||||
|
|
|
@ -145,6 +145,16 @@
|
|||
/* non-immediate symbols in a single list. */
|
||||
/* #define SEXP_USE_HASH_SYMS 0 */
|
||||
|
||||
/* uncomment this to disable UTF-8 string support */
|
||||
/* The default settings store strings in memory as UTF-8, */
|
||||
/* and assumes strings passed to/from the C FFI are UTF-8. */
|
||||
/* #define SEXP_USE_UTF8_STRINGS 0 */
|
||||
|
||||
/* uncomment this to disable the string-set! opcode */
|
||||
/* By default (non-literal) strings are mutable. */
|
||||
/* Making them immutable allows for packed UTF-8 strings. */
|
||||
/* #define SEXP_USE_MUTABLE_STRINGS 0 */
|
||||
|
||||
/* uncomment this to disable string ports */
|
||||
/* If disabled some basic functionality such as number->string */
|
||||
/* will not be available by default. */
|
||||
|
@ -201,7 +211,7 @@
|
|||
|
||||
/* the default number of opcodes to run each thread for */
|
||||
#ifndef SEXP_DEFAULT_QUANTUM
|
||||
#define SEXP_DEFAULT_QUANTUM 1000
|
||||
#define SEXP_DEFAULT_QUANTUM 500
|
||||
#endif
|
||||
|
||||
/************************************************************************/
|
||||
|
@ -230,7 +240,7 @@
|
|||
#endif
|
||||
|
||||
#ifndef SEXP_USE_GREEN_THREADS
|
||||
#define SEXP_USE_GREEN_THREADS 1
|
||||
#define SEXP_USE_GREEN_THREADS ! SEXP_USE_NO_FEATURES
|
||||
#endif
|
||||
|
||||
#ifndef SEXP_USE_NATIVE_X86
|
||||
|
@ -314,7 +324,7 @@
|
|||
#endif
|
||||
|
||||
#ifndef SEXP_USE_EXTENDED_FCALL
|
||||
#define SEXP_USE_EXTENDED_FCALL 1
|
||||
#define SEXP_USE_EXTENDED_FCALL ! SEXP_USE_NO_FEATURES
|
||||
#endif
|
||||
|
||||
#ifndef SEXP_USE_FLONUMS
|
||||
|
@ -361,6 +371,21 @@
|
|||
#define SEXP_USE_DEBUG_VM 0
|
||||
#endif
|
||||
|
||||
#ifndef SEXP_USE_UTF8_STRINGS
|
||||
#define SEXP_USE_UTF8_STRINGS ! SEXP_USE_NO_FEATURES
|
||||
#endif
|
||||
|
||||
#ifndef SEXP_USE_MUTABLE_STRINGS
|
||||
#define SEXP_USE_MUTABLE_STRINGS 1
|
||||
#endif
|
||||
|
||||
#if (SEXP_USE_UTF8_STRINGS && SEXP_USE_MUTABLE_STRINGS)
|
||||
#define SEXP_USE_PACKED_STRINGS 0
|
||||
#endif
|
||||
#ifndef SEXP_USE_PACKED_STRINGS
|
||||
#define SEXP_USE_PACKED_STRINGS 1
|
||||
#endif
|
||||
|
||||
#ifndef SEXP_USE_STRING_STREAMS
|
||||
#define SEXP_USE_STRING_STREAMS ! SEXP_USE_NO_FEATURES
|
||||
#endif
|
||||
|
|
|
@ -83,6 +83,7 @@ enum sexp_types {
|
|||
SEXP_BOOLEAN,
|
||||
SEXP_PAIR,
|
||||
SEXP_SYMBOL,
|
||||
SEXP_BYTES,
|
||||
SEXP_STRING,
|
||||
SEXP_VECTOR,
|
||||
SEXP_FLONUM,
|
||||
|
@ -210,6 +211,15 @@ struct sexp_struct {
|
|||
struct {
|
||||
sexp_uint_t length;
|
||||
char data[];
|
||||
} bytes;
|
||||
struct {
|
||||
#if SEXP_USE_PACKED_STRINGS
|
||||
sexp_uint_t length;
|
||||
char data[];
|
||||
#else
|
||||
sexp_uint_t offset, length;
|
||||
sexp bytes;
|
||||
#endif
|
||||
} string;
|
||||
struct {
|
||||
sexp_uint_t length;
|
||||
|
@ -578,12 +588,26 @@ SEXP_API sexp sexp_make_unsigned_integer(sexp ctx, sexp_luint_t x);
|
|||
#define sexp_procedure_code(x) ((x)->value.procedure.bc)
|
||||
#define sexp_procedure_vars(x) ((x)->value.procedure.vars)
|
||||
|
||||
#define sexp_bytes_length(x) ((x)->value.bytes.length)
|
||||
#define sexp_bytes_data(x) ((x)->value.bytes.data)
|
||||
|
||||
#define sexp_string_length(x) ((x)->value.string.length)
|
||||
#if SEXP_USE_PACKED_STRINGS
|
||||
#define sexp_string_data(x) ((x)->value.string.data)
|
||||
#else
|
||||
#define sexp_string_bytes(x) ((x)->value.string.bytes)
|
||||
#define sexp_string_offset(x) ((x)->value.string.offset)
|
||||
#define sexp_string_data(x) (sexp_bytes_data(sexp_string_bytes(x))+sexp_string_offset(x))
|
||||
#endif
|
||||
|
||||
#define sexp_bytes_ref(x, i) (sexp_make_fixnum((unsigned char)sexp_bytes_data(x)[sexp_unbox_fixnum(i)]))
|
||||
#define sexp_bytes_set(x, i, v) (sexp_bytes_data(x)[sexp_unbox_fixnum(i)] = sexp_unbox_fixnum(v))
|
||||
|
||||
#define sexp_string_ref(x, i) (sexp_make_character((unsigned char)sexp_string_data(x)[sexp_unbox_fixnum(i)]))
|
||||
#define sexp_string_set(x, i, v) (sexp_string_data(x)[sexp_unbox_fixnum(i)] = sexp_unbox_character(v))
|
||||
|
||||
#define sexp_symbol_data(x) ((x)->value.symbol.data)
|
||||
#define sexp_symbol_length(x) ((x)->value.symbol.length)
|
||||
#define sexp_symbol_string(x) (x)
|
||||
|
||||
#define sexp_port_stream(p) ((p)->value.port.stream)
|
||||
|
@ -902,6 +926,7 @@ SEXP_API sexp sexp_memq_op(sexp ctx sexp_api_params(self, n), sexp x, sexp ls);
|
|||
SEXP_API sexp sexp_assq_op(sexp ctx sexp_api_params(self, n), sexp x, sexp ls);
|
||||
SEXP_API sexp sexp_length_op(sexp ctx sexp_api_params(self, n), sexp ls);
|
||||
SEXP_API sexp sexp_c_string(sexp ctx, const char *str, sexp_sint_t slen);
|
||||
SEXP_API sexp sexp_make_bytes_op(sexp ctx sexp_api_params(self, n), sexp len, sexp i);
|
||||
SEXP_API sexp sexp_make_string_op(sexp ctx sexp_api_params(self, n), sexp len, sexp ch);
|
||||
SEXP_API sexp sexp_substring_op (sexp ctx sexp_api_params(self, n), sexp str, sexp start, sexp end);
|
||||
SEXP_API sexp sexp_string_concatenate_op (sexp ctx sexp_api_params(self, n), sexp str_ls, sexp sep);
|
||||
|
@ -980,6 +1005,7 @@ SEXP_API sexp sexp_finalize_c_type (sexp ctx sexp_api_params(self, n), sexp obj)
|
|||
#define sexp_list_to_vector(ctx, x) sexp_list_to_vector_op(ctx sexp_api_pass(NULL, 1), x)
|
||||
#define sexp_exception_type(ctx, x) sexp_exception_type_op(ctx sexp_api_pass(NULL, 1), x)
|
||||
#define sexp_string_to_number(ctx, s, b) sexp_make_string_op(ctx sexp_api_pass(NULL, 2), s, b)
|
||||
#define sexp_make_bytes(ctx, l, i) sexp_make_bytes_op(ctx sexp_api_pass(NULL, 2), l, i)
|
||||
#define sexp_make_string(ctx, l, c) sexp_make_string_op(ctx sexp_api_pass(NULL, 2), l, c)
|
||||
#define sexp_string_cmp(ctx, a, b, c) sexp_string_cmp_op(ctx sexp_api_pass(NULL, 3), a, b, c)
|
||||
#define sexp_substring(ctx, a, b, c) sexp_substring_op(ctx sexp_api_pass(NULL, 3), a, b, c)
|
||||
|
|
19
opcodes.c
19
opcodes.c
|
@ -24,8 +24,21 @@ _OP(SEXP_OPC_SETTER, SEXP_OP_SET_CDR, 2, 0, SEXP_PAIR, 0, 0, "set-cdr!", 0, NULL
|
|||
_OP(SEXP_OPC_GETTER, SEXP_OP_VECTOR_REF,2,0, SEXP_VECTOR, SEXP_FIXNUM, 0,"vector-ref", 0, NULL),
|
||||
_OP(SEXP_OPC_SETTER, SEXP_OP_VECTOR_SET,3,0, SEXP_VECTOR, SEXP_FIXNUM, 0,"vector-set!", 0, NULL),
|
||||
_OP(SEXP_OPC_GETTER, SEXP_OP_VECTOR_LENGTH,1,0, SEXP_VECTOR, 0, 0,"vector-length", 0, NULL),
|
||||
_OP(SEXP_OPC_GETTER, SEXP_OP_BYTES_REF,2,0, SEXP_BYTES, SEXP_FIXNUM, 0,"byte-vector-ref", 0, NULL),
|
||||
_OP(SEXP_OPC_SETTER, SEXP_OP_BYTES_SET,3,0, SEXP_BYTES, SEXP_FIXNUM, 0,"byte-vector-set!", 0, NULL),
|
||||
_OP(SEXP_OPC_GETTER, SEXP_OP_BYTES_LENGTH,1,0, SEXP_BYTES, 0, 0,"byte-vector-length", 0, NULL),
|
||||
#if SEXP_USE_UTF8_STRINGS
|
||||
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_REF,2,0, SEXP_STRING, SEXP_FIXNUM, 0,"string-cursor-ref", 0, NULL),
|
||||
#else
|
||||
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_REF,2,0, SEXP_STRING, SEXP_FIXNUM, 0,"string-ref", 0, NULL),
|
||||
#endif
|
||||
#if SEXP_USE_MUTABLE_STRINGS
|
||||
#if SEXP_USE_UTF8_STRINGS
|
||||
_OP(SEXP_OPC_SETTER, SEXP_OP_STRING_SET,3,0, SEXP_STRING, SEXP_FIXNUM, 0,"string-cursor-set!", 0, NULL),
|
||||
#else
|
||||
_OP(SEXP_OPC_SETTER, SEXP_OP_STRING_SET,3,0, SEXP_STRING, SEXP_FIXNUM, 0,"string-set!", 0, NULL),
|
||||
#endif
|
||||
#endif
|
||||
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_LENGTH,1,0, SEXP_STRING, 0, 0,"string-length", 0, NULL),
|
||||
_OP(SEXP_OPC_GENERIC, SEXP_OP_FIX2FLO, 1, 0, 0, 0, 0, "exact->inexact", 0, NULL),
|
||||
_OP(SEXP_OPC_GENERIC, SEXP_OP_FLO2FIX, 1, 0, 0, 0, 0, "inexact->exact", 0, NULL),
|
||||
|
@ -101,6 +114,7 @@ _FN4(SEXP_ENV, SEXP_ENV, "%env-copy!", 0, sexp_env_copy_op),
|
|||
_FN2(SEXP_EXCEPTION, SEXP_OPORT, "print-exception", 0, sexp_print_exception_op),
|
||||
_FN1(SEXP_EXCEPTION, "exception-type", 0, sexp_exception_type_op),
|
||||
_FN2OPT(SEXP_FIXNUM, SEXP_CHAR, "make-string", sexp_make_character(' '), sexp_make_string_op),
|
||||
_FN2OPT(SEXP_FIXNUM, SEXP_FIXNUM, "make-byte-vector", SEXP_ZERO, sexp_make_bytes_op),
|
||||
_FN2OPT(SEXP_FIXNUM, SEXP_FIXNUM, "string->number", SEXP_TEN, sexp_string_to_number_op),
|
||||
_FN3(SEXP_STRING, SEXP_STRING, "string-cmp", 0, sexp_string_cmp_op),
|
||||
_FN3(SEXP_STRING, SEXP_FIXNUM, "substring", 0, sexp_substring_op),
|
||||
|
@ -134,6 +148,11 @@ _FN1(0, "floor", 0, sexp_floor),
|
|||
_FN1(0, "ceiling", 0, sexp_ceiling),
|
||||
#endif
|
||||
_FN2(0, 0, "expt", 0, sexp_expt_op),
|
||||
#if SEXP_USE_UTF8_STRINGS
|
||||
_FN2(SEXP_STRING, SEXP_FIXNUM, "string-index->offset", 0, sexp_string_index_to_offset),
|
||||
_FN2(SEXP_STRING, SEXP_FIXNUM, "string-ref", 0, sexp_string_utf8_index_ref),
|
||||
_FN3(SEXP_STRING, SEXP_FIXNUM, "string-set!", 0, sexp_string_utf8_index_set),
|
||||
#endif
|
||||
#if SEXP_USE_TYPE_DEFS
|
||||
_FN2(SEXP_STRING, SEXP_FIXNUM, "register-simple-type", 0, sexp_register_simple_type_op),
|
||||
_FN2(SEXP_STRING, SEXP_FIXNUM, "make-type-predicate", 0, sexp_make_type_predicate_op),
|
||||
|
|
|
@ -1,16 +1,21 @@
|
|||
|
||||
static const char* reverse_opcode_names[] =
|
||||
{"NOOP", "RAISE", "RESUMECC", "CALLCC", "APPLY1", "TAIL-CALL", "CALL",
|
||||
"FCALL0", "FCALL1", "FCALL2", "FCALL3", "FCALL4", "FCALL5", "FCALL6", "FCALLN",
|
||||
"JUMP-UNLESS", "JUMP", "PUSH", "DROP", "GLOBAL-REF", "GLOBAL-KNOWN-REF",
|
||||
"STACK-REF", "LOCAL-REF", "LOCAL-SET",
|
||||
"CLOSURE-REF", "VECTOR-REF", "VECTOR-SET", "VECTOR-LENGTH", "STRING-REF",
|
||||
"STRING-SET", "STRING-LENGTH", "MAKE-PROCEDURE", "MAKE-VECTOR",
|
||||
{"NOOP", "RAISE", "RESUMECC", "CALLCC", "APPLY1",
|
||||
"TAIL-CALL", "CALL", "FCALL0", "FCALL1", "FCALL2",
|
||||
"FCALL3", "FCALL4", "FCALL5", "FCALL6", "FCALLN",
|
||||
"JUMP-UNLESS", "JUMP", "PUSH", "DROP",
|
||||
"GLOBAL-REF", "GLOBAL-KNOWN-REF", "STACK-REF",
|
||||
"LOCAL-REF", "LOCAL-SET", "CLOSURE-REF",
|
||||
"VECTOR-REF", "VECTOR-SET", "VECTOR-LENGTH",
|
||||
"BYTES-REF", "BYTES-SET", "BYTES-LENGTH",
|
||||
"STRING-REF", "STRING-SET", "STRING-LENGTH",
|
||||
"MAKE-PROCEDURE", "MAKE-VECTOR",
|
||||
"MAKE-EXCEPTION", "AND", "NULL?", "FIXNUM?", "SYMBOL?", "CHAR?",
|
||||
"EOF?", "TYPEP", "MAKE", "SLOT-REF", "SLOT-SET", "CAR", "CDR",
|
||||
"SET-CAR", "SET-CDR", "CONS", "ADD", "SUB",
|
||||
"MUL", "DIV", "QUOTIENT", "REMAINDER",
|
||||
"LT", "LE", "EQN", "EQ", "EXACT->INEXACT", "INEXACT->EXACT",
|
||||
"CHAR->INTEGER", "INTEGER->CHAR", "CHAR-UPCASE", "CHAR-DOWNCASE",
|
||||
"WRITE-CHAR", "NEWLINE", "READ-CHAR", "PEEK-CHAR", "YIELD", "RET", "DONE",
|
||||
"WRITE-CHAR", "NEWLINE", "READ-CHAR", "PEEK-CHAR",
|
||||
"YIELD", "RET", "DONE",
|
||||
};
|
||||
|
|
63
sexp.c
63
sexp.c
|
@ -88,7 +88,12 @@ static struct sexp_struct _sexp_type_specs[] = {
|
|||
_DEF_TYPE(SEXP_BOOLEAN, 0, 0, 0, 0, 0, 0, 0, 0, "boolean", NULL),
|
||||
_DEF_TYPE(SEXP_PAIR, sexp_offsetof(pair, car), 2, 3, 0, 0, sexp_sizeof(pair), 0, 0, "pair", NULL),
|
||||
_DEF_TYPE(SEXP_SYMBOL, 0, 0, 0, 0, 0, sexp_sizeof(symbol)+1, sexp_offsetof(symbol, length), 1, "symbol", NULL),
|
||||
_DEF_TYPE(SEXP_BYTES, 0, 0, 0, 0, 0, sexp_sizeof(bytes)+1, sexp_offsetof(bytes, length), 1, "byte-vector", NULL),
|
||||
#if SEXP_USE_PACKED_STRINGS
|
||||
_DEF_TYPE(SEXP_STRING, 0, 0, 0, 0, 0, sexp_sizeof(string)+1, sexp_offsetof(string, length), 1, "string", NULL),
|
||||
#else
|
||||
_DEF_TYPE(SEXP_STRING, sexp_offsetof(string, bytes), 1, 1, 0, 0, sexp_sizeof(string), 0, 0, "string", NULL),
|
||||
#endif
|
||||
_DEF_TYPE(SEXP_VECTOR, sexp_offsetof(vector, data), 0, 0, sexp_offsetof(vector, length), 1, sexp_sizeof(vector), sexp_offsetof(vector, length), sizeof(sexp), "vector", NULL),
|
||||
_DEF_TYPE(SEXP_FLONUM, 0, 0, 0, 0, 0, sexp_sizeof(flonum), 0, 0, "real", NULL),
|
||||
_DEF_TYPE(SEXP_BIGNUM, 0, 0, 0, 0, 0, sexp_sizeof(bignum), sexp_offsetof(bignum, length), sizeof(sexp_uint_t), "bignum", NULL),
|
||||
|
@ -666,24 +671,44 @@ sexp sexp_make_flonum (sexp ctx, float f) {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
sexp sexp_make_string_op (sexp ctx sexp_api_params(self, n), sexp len, sexp ch) {
|
||||
sexp sexp_make_bytes_op (sexp ctx sexp_api_params(self, n), sexp len, sexp i) {
|
||||
sexp_sint_t clen = sexp_unbox_fixnum(len);
|
||||
sexp s;
|
||||
sexp_assert_type(ctx, sexp_fixnump, SEXP_FIXNUM, len);
|
||||
if (clen < 0) return sexp_xtype_exception(ctx, self, "negative length", len);
|
||||
s = sexp_alloc_atomic(ctx, sexp_sizeof(string)+clen+1);
|
||||
s = sexp_alloc_atomic(ctx, sexp_sizeof(bytes)+clen+1);
|
||||
if (sexp_exceptionp(s)) return s;
|
||||
sexp_pointer_tag(s) = SEXP_STRING;
|
||||
sexp_pointer_tag(s) = SEXP_BYTES;
|
||||
#if SEXP_USE_HEADER_MAGIC
|
||||
sexp_pointer_magic(s) = SEXP_POINTER_MAGIC;
|
||||
#endif
|
||||
sexp_string_length(s) = clen;
|
||||
if (sexp_charp(ch))
|
||||
memset(sexp_string_data(s), sexp_unbox_character(ch), clen);
|
||||
sexp_string_data(s)[clen] = '\0';
|
||||
sexp_bytes_length(s) = clen;
|
||||
if (sexp_fixnump(i))
|
||||
memset(sexp_bytes_data(s), sexp_unbox_fixnum(i), clen);
|
||||
sexp_bytes_data(s)[clen] = '\0';
|
||||
return s;
|
||||
}
|
||||
|
||||
sexp sexp_make_string_op (sexp ctx sexp_api_params(self, n), sexp len, sexp ch)
|
||||
{
|
||||
sexp i = (sexp_charp(ch) ? sexp_make_fixnum(sexp_unbox_character(ch)) : ch);
|
||||
sexp_gc_var2(b, s);
|
||||
b = sexp_make_bytes_op(ctx sexp_api_pass(self, n), len, i);
|
||||
if (sexp_exceptionp(b)) return b;
|
||||
#if SEXP_USE_PACKED_STRINGS
|
||||
sexp_pointer_tag(b) = SEXP_STRING;
|
||||
return b;
|
||||
#else
|
||||
sexp_gc_preserve2(ctx, b, s);
|
||||
s = sexp_alloc_type(ctx, string, SEXP_STRING);
|
||||
sexp_string_bytes(s) = b;
|
||||
sexp_string_offset(s) = 0;
|
||||
sexp_string_length(s) = sexp_unbox_fixnum(len);
|
||||
sexp_gc_release2(ctx);
|
||||
return s;
|
||||
#endif
|
||||
}
|
||||
|
||||
sexp sexp_c_string (sexp ctx, const char *str, sexp_sint_t slen) {
|
||||
sexp_sint_t len = ((slen >= 0) ? slen : strlen(str));
|
||||
sexp s = sexp_make_string(ctx, sexp_make_fixnum(len), SEXP_VOID);
|
||||
|
@ -790,14 +815,17 @@ sexp sexp_intern(sexp ctx, const char *str, sexp_sint_t len) {
|
|||
bucket = 0;
|
||||
#endif
|
||||
for (ls=sexp_context_symbols(ctx)[bucket]; sexp_pairp(ls); ls=sexp_cdr(ls))
|
||||
if ((sexp_string_length(tmp=sexp_symbol_string(sexp_car(ls))) == len)
|
||||
&& ! strncmp(str, sexp_string_data(tmp), len))
|
||||
if ((sexp_symbol_length(tmp=sexp_car(ls)) == len)
|
||||
&& ! strncmp(str, sexp_symbol_data(tmp), len))
|
||||
return sexp_car(ls);
|
||||
|
||||
/* not found, make a new symbol */
|
||||
sexp_gc_preserve1(ctx, sym);
|
||||
sym = sexp_c_string(ctx, str, len);
|
||||
if (sexp_exceptionp(sym)) return sym;
|
||||
#if ! SEXP_USE_PACKED_STRINGS
|
||||
sym = sexp_string_bytes(sym);
|
||||
#endif
|
||||
sexp_pointer_tag(sym) = SEXP_SYMBOL;
|
||||
sexp_push(ctx, sexp_context_symbols(ctx)[bucket], sym);
|
||||
sexp_gc_release1(ctx);
|
||||
|
@ -1190,8 +1218,8 @@ sexp sexp_write_one (sexp ctx, sexp obj, sexp out) {
|
|||
sexp_write_char(ctx, '"', out);
|
||||
break;
|
||||
case SEXP_SYMBOL:
|
||||
i = sexp_string_length(sexp_symbol_string(obj));
|
||||
str = sexp_string_data(sexp_symbol_string(obj));
|
||||
i = sexp_symbol_length(obj);
|
||||
str = sexp_symbol_data(obj);
|
||||
for ( ; i>0; str++, i--) {
|
||||
if ((str[0] == '\\') || is_separator(str[0]))
|
||||
sexp_write_char(ctx, '\\', out);
|
||||
|
@ -1253,8 +1281,17 @@ sexp sexp_write_one (sexp ctx, sexp obj, sexp out) {
|
|||
sexp_write_char(ctx, sexp_unbox_character(obj), out);
|
||||
} else {
|
||||
sexp_write_string(ctx, "#\\x", out);
|
||||
sexp_write_char(ctx, hex_digit(sexp_unbox_character(obj)>>4), out);
|
||||
sexp_write_char(ctx, hex_digit(sexp_unbox_character(obj)&0xF), out);
|
||||
c = sexp_unbox_character(obj);
|
||||
if (c >= 0x100) {
|
||||
if (c >= 0x10000) {
|
||||
sexp_write_char(ctx, hex_digit((c>>20)&0x0F), out);
|
||||
sexp_write_char(ctx, hex_digit((c>>16)&0x0F), out);
|
||||
}
|
||||
sexp_write_char(ctx, hex_digit((c>>12)&0x0F), out);
|
||||
sexp_write_char(ctx, hex_digit((c>>8)&0x0F), out);
|
||||
}
|
||||
sexp_write_char(ctx, hex_digit((c>>4)&0x0F), out);
|
||||
sexp_write_char(ctx, hex_digit(c&0x0F), out);
|
||||
}
|
||||
} else if (sexp_symbolp(obj)) {
|
||||
|
||||
|
|
43
vm.c
43
vm.c
|
@ -2,8 +2,6 @@
|
|||
/* Copyright (c) 2009-2010 Alex Shinn. All rights reserved. */
|
||||
/* BSD-style license: http://synthcode.com/license.txt */
|
||||
|
||||
static sexp sexp_apply1 (sexp ctx, sexp f, sexp x);
|
||||
|
||||
#if SEXP_USE_DEBUG_VM > 1
|
||||
static void sexp_print_stack (sexp ctx, sexp *stack, int top, int fp, sexp out) {
|
||||
int i;
|
||||
|
@ -834,6 +832,7 @@ sexp sexp_vm (sexp ctx, sexp proc) {
|
|||
sexp_raise("vector-length: not a vector", sexp_list1(ctx, _ARG1));
|
||||
_ARG1 = sexp_make_fixnum(sexp_vector_length(_ARG1));
|
||||
break;
|
||||
case SEXP_OP_BYTES_REF:
|
||||
case SEXP_OP_STRING_REF:
|
||||
if (! sexp_stringp(_ARG1))
|
||||
sexp_raise("string-ref: not a string", sexp_list1(ctx, _ARG1));
|
||||
|
@ -842,9 +841,17 @@ sexp sexp_vm (sexp ctx, sexp proc) {
|
|||
i = sexp_unbox_fixnum(_ARG2);
|
||||
if ((i < 0) || (i >= sexp_string_length(_ARG1)))
|
||||
sexp_raise("string-ref: index out of range", sexp_list2(ctx, _ARG1, _ARG2));
|
||||
_ARG2 = sexp_string_ref(_ARG1, _ARG2);
|
||||
if (ip[-1] == SEXP_OP_BYTES_REF)
|
||||
_ARG2 = sexp_bytes_ref(_ARG1, _ARG2);
|
||||
else
|
||||
#if SEXP_USE_UTF8_STRINGS
|
||||
_ARG2 = sexp_string_utf8_ref(ctx, _ARG1, _ARG2);
|
||||
#else
|
||||
_ARG2 = sexp_string_ref(_ARG1, _ARG2);
|
||||
#endif
|
||||
top--;
|
||||
break;
|
||||
case SEXP_OP_BYTES_SET:
|
||||
case SEXP_OP_STRING_SET:
|
||||
if (! sexp_stringp(_ARG1))
|
||||
sexp_raise("string-set!: not a string", sexp_list1(ctx, _ARG1));
|
||||
|
@ -857,14 +864,30 @@ sexp sexp_vm (sexp ctx, sexp proc) {
|
|||
i = sexp_unbox_fixnum(_ARG2);
|
||||
if ((i < 0) || (i >= sexp_string_length(_ARG1)))
|
||||
sexp_raise("string-set!: index out of range", sexp_list2(ctx, _ARG1, _ARG2));
|
||||
sexp_string_set(_ARG1, _ARG2, _ARG3);
|
||||
if (ip[-1] == SEXP_OP_BYTES_SET)
|
||||
sexp_bytes_set(_ARG1, _ARG2, _ARG3);
|
||||
else
|
||||
#if SEXP_USE_UTF8_STRINGS
|
||||
sexp_string_utf8_set(ctx, _ARG1, _ARG2, _ARG3);
|
||||
#else
|
||||
sexp_string_set(_ARG1, _ARG2, _ARG3);
|
||||
#endif
|
||||
_ARG3 = SEXP_VOID;
|
||||
top-=2;
|
||||
break;
|
||||
case SEXP_OP_BYTES_LENGTH:
|
||||
if (! sexp_stringp(_ARG1))
|
||||
sexp_raise("bytes-length: not a byte-vector", sexp_list1(ctx, _ARG1));
|
||||
_ARG1 = sexp_make_fixnum(sexp_bytes_length(_ARG1));
|
||||
break;
|
||||
case SEXP_OP_STRING_LENGTH:
|
||||
if (! sexp_stringp(_ARG1))
|
||||
sexp_raise("string-length: not a string", sexp_list1(ctx, _ARG1));
|
||||
#if SEXP_USE_UTF8_STRINGS
|
||||
_ARG1 = sexp_make_fixnum(sexp_string_utf8_length((unsigned char*)sexp_string_data(_ARG1), sexp_string_length(_ARG1)));
|
||||
#else
|
||||
_ARG1 = sexp_make_fixnum(sexp_string_length(_ARG1));
|
||||
#endif
|
||||
break;
|
||||
case SEXP_OP_MAKE_PROCEDURE:
|
||||
sexp_context_top(ctx) = top;
|
||||
|
@ -1244,6 +1267,11 @@ sexp sexp_vm (sexp ctx, sexp proc) {
|
|||
sexp_raise("write-char: not a character", sexp_list1(ctx, _ARG1));
|
||||
if (! sexp_oportp(_ARG2))
|
||||
sexp_raise("write-char: not an output-port", sexp_list1(ctx, _ARG2));
|
||||
#if SEXP_USE_UTF8_STRINGS
|
||||
if (sexp_unbox_character(_ARG1) >= 0x80)
|
||||
sexp_write_utf8_char(ctx, sexp_unbox_character(_ARG1), _ARG2);
|
||||
else
|
||||
#endif
|
||||
sexp_write_char(ctx, sexp_unbox_character(_ARG1), _ARG2);
|
||||
_ARG2 = SEXP_VOID;
|
||||
top--;
|
||||
|
@ -1258,6 +1286,11 @@ sexp sexp_vm (sexp ctx, sexp proc) {
|
|||
if (! sexp_iportp(_ARG1))
|
||||
sexp_raise("read-char: not an input-port", sexp_list1(ctx, _ARG1));
|
||||
i = sexp_read_char(ctx, _ARG1);
|
||||
#if SEXP_USE_UTF8_STRINGS
|
||||
if (i >= 0x80)
|
||||
_ARG1 = sexp_read_utf8_char(ctx, _ARG1, i);
|
||||
else
|
||||
#endif
|
||||
_ARG1 = (i == EOF) ? SEXP_EOF : sexp_make_character(i);
|
||||
break;
|
||||
case SEXP_OP_PEEK_CHAR:
|
||||
|
@ -1302,7 +1335,7 @@ sexp sexp_vm (sexp ctx, sexp proc) {
|
|||
|
||||
/******************************* apply ********************************/
|
||||
|
||||
static sexp sexp_apply1 (sexp ctx, sexp f, sexp x) {
|
||||
sexp sexp_apply1 (sexp ctx, sexp f, sexp x) {
|
||||
sexp res;
|
||||
sexp_gc_var1(args);
|
||||
if (sexp_opcodep(f)) {
|
||||
|
|
Loading…
Add table
Reference in a new issue