From 2e55517108b7bb15aa0a0d9eda4d97d9151d5201 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Thu, 2 Apr 2009 18:35:08 +0900 Subject: [PATCH] string utilities --- debug.c | 3 +- eval.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++-------- eval.h | 8 ++-- init.scm | 40 +++++++++++++++--- opcodes.c | 13 ++++-- sexp.c | 29 ++++++++----- sexp.h | 8 ++-- 7 files changed, 175 insertions(+), 47 deletions(-) diff --git a/debug.c b/debug.c index 6bdc8b01..89923926 100644 --- a/debug.c +++ b/debug.c @@ -10,7 +10,8 @@ static const char* reverse_opcode_names[] = "STRING-SET", "STRING-LENGTH", "MAKE-PROCEDURE", "MAKE-VECTOR", "AND", "NULL?", "FIXNUM?", "SYMBOL?", "CHAR?", "EOF?", "TYPEP", "CAR", "CDR", "SET-CAR", "SET-CDR", "CONS", "ADD", "SUB", - "MUL", "DIV", "QUOT", "MOD", "NEG", "INV", "LT", "LE", "EQN", "EQ", + "MUL", "DIV", "QUOTIENT", "REMAINDER", "NEGATIVE", "INVERSE", + "LT", "LE", "EQN", "EQ", "EXACT->INEXACT", "INEXACT->EXACT", "CHAR->INTEGER", "INTEGER->CHAR", "CHAR-UPCASE", "CHAR-DOWNCASE", "DISPLAY", "WRITE", "WRITE-CHAR", diff --git a/eval.c b/eval.c index dd242cd6..b0fae803 100644 --- a/eval.c +++ b/eval.c @@ -285,7 +285,7 @@ static sexp sexp_identifier_eq (sexp e1, sexp id1, sexp e2, sexp id2) { static sexp sexp_compile_error(char *message, sexp irritants) { return sexp_make_exception(the_compile_error_symbol, - sexp_make_string(message), + sexp_c_string(message), irritants, SEXP_FALSE, SEXP_FALSE); } @@ -1280,21 +1280,26 @@ sexp vm(sexp bc, sexp cp, sexp context, sexp* stack, sexp_sint_t top) { else sexp_raise("/: not a number", sexp_list2(_ARG1, _ARG2)); top--; break; - case OP_QUOT: + case OP_QUOTIENT: if (sexp_integerp(_ARG1) && sexp_integerp(_ARG2)) { + if (_ARG1 == sexp_make_integer(0)) + sexp_raise("divide by zero", SEXP_NULL); _ARG2 = sexp_fx_div(_ARG1, _ARG2); top--; } - else sexp_raise("quotient: not a number", sexp_list2(_ARG1, _ARG2)); + else sexp_raise("quotient: not an integer", sexp_list2(_ARG1, _ARG2)); break; - case OP_MOD: + case OP_REMAINDER: if (sexp_integerp(_ARG1) && sexp_integerp(_ARG2)) { - _ARG2 = sexp_fx_mod(_ARG1, _ARG2); + if (_ARG1 == sexp_make_integer(0)) + sexp_raise("divide by zero", SEXP_NULL); + tmp1 = sexp_fx_rem(_ARG1, _ARG2); top--; + _ARG1 = tmp1; } - else sexp_raise("modulo: not a number", sexp_list2(_ARG1, _ARG2)); + else sexp_raise("remainder: not an integer", sexp_list2(_ARG1, _ARG2)); break; - case OP_NEG: + case OP_NEGATIVE: if (sexp_integerp(_ARG1)) _ARG1 = sexp_make_integer(-sexp_unbox_integer(_ARG1)); #if USE_FLONUMS @@ -1303,7 +1308,7 @@ sexp vm(sexp bc, sexp cp, sexp context, sexp* stack, sexp_sint_t top) { #endif else sexp_raise("-: not a number", sexp_list1(_ARG1)); break; - case OP_INV: + case OP_INVERSE: if (sexp_integerp(_ARG1)) _ARG1 = sexp_make_flonum(1/(double)sexp_unbox_integer(_ARG1)); #if USE_FLONUMS @@ -1483,14 +1488,21 @@ sexp sexp_load (sexp source, sexp env) { return res; } -#if USE_MATH - -static sexp sexp_math_exception (char *message, sexp obj) { +static sexp sexp_type_exception (char *message, sexp obj) { return sexp_make_exception(sexp_intern("type-error"), - sexp_make_string(message), + sexp_c_string(message), sexp_list1(obj), SEXP_FALSE, SEXP_FALSE); } +static sexp sexp_range_exception (sexp obj, sexp start, sexp end) { + return sexp_make_exception(sexp_intern("range-error"), + sexp_c_string("bad index range"), + sexp_list3(obj, start, end), + SEXP_FALSE, SEXP_FALSE); +} + +#if USE_MATH + #define define_math_op(name, cname) \ static sexp name (sexp z) { \ double d; \ @@ -1499,7 +1511,7 @@ static sexp sexp_math_exception (char *message, sexp obj) { else if (sexp_integerp(z)) \ d = (double)sexp_unbox_integer(z); \ else \ - return sexp_math_exception("not a number", z); \ + return sexp_type_exception("not a number", z); \ return sexp_make_flonum(cname(d)); \ } @@ -1528,7 +1540,7 @@ static sexp sexp_expt (sexp x, sexp e) { x1 = sexp_flonum_value(x); #endif else - return sexp_math_exception("not a number", x); + return sexp_type_exception("not a number", x); if (sexp_integerp(e)) e1 = (double)sexp_unbox_integer(e); #if USE_FLONUMS @@ -1536,7 +1548,7 @@ static sexp sexp_expt (sexp x, sexp e) { e1 = sexp_flonum_value(e); #endif else - return sexp_math_exception("not a number", e); + return sexp_type_exception("not a number", e); res = pow(x1, e1); #if USE_FLONUMS if ((res > SEXP_MAX_INT) || sexp_flonump(x) || sexp_flonump(e)) @@ -1545,6 +1557,79 @@ static sexp sexp_expt (sexp x, sexp e) { return sexp_make_integer((sexp_sint_t)round(res)); } +static sexp sexp_substring (sexp str, sexp start, sexp end) { + sexp res; + if (! sexp_stringp(str)) + return sexp_type_exception("not a string", str); + if (! sexp_integerp(start)) + return sexp_type_exception("not a number", start); + if (end == SEXP_FALSE) + end = sexp_make_integer(sexp_string_length(str)); + if (! sexp_integerp(end)) + return sexp_type_exception("not a number", end); + if ((sexp_unbox_integer(start) < 0) + || (sexp_unbox_integer(start) > sexp_string_length(str)) + || (sexp_unbox_integer(end) < 0) + || (sexp_unbox_integer(end) > sexp_string_length(str)) + || (end < start)) + return sexp_range_exception(str, start, end); + res = sexp_make_string(sexp_fx_sub(end, start), + SEXP_UNDEF); + memcpy(sexp_string_data(res), + sexp_string_data(str)+sexp_unbox_integer(start), + sexp_string_length(res)); + return res; +} + +static sexp sexp_string_concatenate (sexp str_ls) { + sexp res, ls; + sexp_uint_t len=0; + char *p; + for (ls=str_ls; sexp_pairp(ls); ls=sexp_cdr(ls)) + if (! sexp_stringp(sexp_car(ls))) + return sexp_type_exception("not a string", sexp_car(ls)); + else + len += sexp_string_length(sexp_car(ls)); + res = sexp_make_string(sexp_make_integer(len), SEXP_UNDEF); + p = sexp_string_data(res); + for (ls=str_ls; sexp_pairp(ls); ls=sexp_cdr(ls)) { + len = sexp_string_length(sexp_car(ls)); + memcpy(p, sexp_string_data(sexp_car(ls)), len); + p += len; + } + return res; +} + +static sexp sexp_string_cmp (sexp str1, sexp str2) { + sexp_sint_t len1, len2, len, diff; + if (! sexp_stringp(str1)) + return sexp_type_exception("not a string", str1); + if (! sexp_stringp(str2)) + return sexp_type_exception("not a string", str2); + len1 = sexp_string_length(str1); + len2 = sexp_string_length(str2); + len = ((len1string string->number ;; symbol->string string->symbol -;; make-string -;; string=? string-ci=? string? -;; string<=? string>=? string-ci? string-ci<=? string-ci>=? -;; substring string-append string-copy -;; values call-with-values dynamic-wind ;; call-with-input-file call-with-output-file ;; with-input-from-file with-output-to-file @@ -341,6 +335,20 @@ (if (>= i 0) (begin (string-set! str i ch) (lp (- i 1)))))) (define (string . args) (list->string args)) +(define (string-append . args) (string-concatenate args)) +(define (string-copy s) (substring s 0 (string-length s))) + +(define (string=? s1 s2) (eq? (string-cmp s1 s2) 0)) +(define (string? s1 s2) (> (string-cmp s1 s2) 0)) +(define (string>=? s1 s2) (>= (string-cmp s1 s2) 0)) + +(define (string-ci=? s1 s2) (eq? (string-cmp-ci s1 s2) 0)) +(define (string-ci? s1 s2) (> (string-cmp-ci s1 s2) 0)) +(define (string-ci>=? s1 s2) (>= (string-cmp-ci s1 s2) 0)) ;; math utils @@ -360,6 +368,12 @@ (define (abs x) (if (< x 0) (- x) x)) +(define (modulo a b) + (let ((res (remainder a b))) + (if (< b 0) + (if (< res 0) res (- res b)) + (if (> res 0) res (+ res b))))) + (define (gcd a b) (if (= b 0) a @@ -413,3 +427,17 @@ (define (load file) (%load file (interaction-environment))) +;; values + +(define *values-tag* (list 'values)) + +(define (values . ls) + (if (and (pair? ls) (null? (cdr ls))) + (car ls) + (cons *values-tag* ls))) + +(define (call-with-values producer consumer) + (let ((res (producer))) + (if (and (pair? res) (eq? *values-tag* (car res))) + (apply consumer (cdr res)) + (consumer res)))) diff --git a/opcodes.c b/opcodes.c index c8fb6d66..e9a3ca74 100644 --- a/opcodes.c +++ b/opcodes.c @@ -27,10 +27,10 @@ _OP(OPC_GENERIC, OP_CHAR_UPCASE, 1, 0, SEXP_CHAR, 0, 0, "char-upcase", NULL, NUL _OP(OPC_GENERIC, OP_CHAR_DOWNCASE, 1, 0, SEXP_CHAR, 0, 0, "char-downcase", NULL, NULL), _OP(OPC_ARITHMETIC, OP_ADD, 0, 1, SEXP_FIXNUM, 0, 0, "+", NULL, NULL), _OP(OPC_ARITHMETIC, OP_MUL, 0, 1, SEXP_FIXNUM, 0, 0, "*", NULL, NULL), -_OP(OPC_ARITHMETIC_INV, OP_SUB, 0, 1, SEXP_FIXNUM, 0, OP_NEG, "-", NULL, NULL), -_OP(OPC_ARITHMETIC_INV, OP_DIV, 0, 1, SEXP_FIXNUM, 0, OP_INV, "/", NULL, NULL), -_OP(OPC_ARITHMETIC, OP_QUOT, 2, 0, SEXP_FIXNUM, SEXP_FIXNUM, 0, "quotient", NULL, NULL), -_OP(OPC_ARITHMETIC, OP_MOD, 2, 0, SEXP_FIXNUM, SEXP_FIXNUM, 0, "modulo", NULL, NULL), +_OP(OPC_ARITHMETIC_INV, OP_SUB, 0, 1, SEXP_FIXNUM, 0, OP_NEGATIVE, "-", NULL, NULL), +_OP(OPC_ARITHMETIC_INV, OP_DIV, 0, 1, SEXP_FIXNUM, 0, OP_INVERSE, "/", NULL, NULL), +_OP(OPC_ARITHMETIC, OP_QUOTIENT, 2, 0, SEXP_FIXNUM, SEXP_FIXNUM, 0, "quotient", NULL, NULL), +_OP(OPC_ARITHMETIC, OP_REMAINDER, 2, 0, SEXP_FIXNUM, SEXP_FIXNUM, 0, "remainder", NULL, NULL), _OP(OPC_ARITHMETIC_CMP, OP_LT, 0, 1, SEXP_FIXNUM, 0, 0, "<", NULL, NULL), _OP(OPC_ARITHMETIC_CMP, OP_LE, 0, 1, SEXP_FIXNUM, 0, 0, "<=", NULL, NULL), _OP(OPC_ARITHMETIC_CMP, OP_LT, 0, 1, SEXP_FIXNUM, 0, 1, ">", NULL, NULL), @@ -78,6 +78,11 @@ _FN1(SEXP_OPORT, "close-output-port", sexp_close_port), _FN1(SEXP_FIXNUM, "null-environment", sexp_make_null_env), _FN1(SEXP_FIXNUM, "scheme-report-environment", sexp_make_standard_env), _FN2(0, SEXP_ENV, "%load", sexp_load), +_FN2(SEXP_FIXNUM, SEXP_CHAR, "make-string", sexp_make_string), +_FN2(SEXP_STRING, SEXP_STRING, "string-cmp", sexp_string_cmp), +_FN2(SEXP_STRING, SEXP_STRING, "string-cmp-ci", sexp_string_cmp_ci), +_FN3(SEXP_STRING, SEXP_FIXNUM, "substring", sexp_substring), +_FN1(SEXP_PAIR, "string-concatenate", sexp_string_concatenate), #if USE_MATH _FN1(0, "exp", sexp_exp), _FN1(0, "log", sexp_log), diff --git a/sexp.c b/sexp.c index 702afcc4..8cb49a33 100644 --- a/sexp.c +++ b/sexp.c @@ -139,9 +139,9 @@ sexp sexp_print_exception (sexp exn, sexp out) { static sexp sexp_read_error (char *message, sexp irritants, sexp port) { sexp name = (sexp_port_name(port) - ? sexp_make_string(sexp_port_name(port)) : SEXP_FALSE); + ? sexp_c_string(sexp_port_name(port)) : SEXP_FALSE); return sexp_make_exception(the_read_error_symbol, - sexp_make_string(message), + sexp_c_string(message), irritants, name, sexp_make_integer(sexp_port_line(port))); @@ -269,13 +269,22 @@ sexp sexp_make_flonum(double f) { return x; } -sexp sexp_make_string(char *str) { +sexp sexp_make_string(sexp len, sexp ch) { sexp s = sexp_alloc_type(string, SEXP_STRING); + sexp_uint_t clen = sexp_unbox_integer(len); + char *cstr = sexp_alloc(clen+1); + if (sexp_charp(ch)) + memset(cstr, sexp_unbox_character(ch), clen); + cstr[clen] = '\0'; + sexp_string_length(s) = clen; + sexp_string_data(s) = cstr; + return s; +} + +sexp sexp_c_string(char *str) { sexp_uint_t len = strlen(str); - char *mystr = sexp_alloc(len+1); - memcpy(mystr, str, len+1); - sexp_string_length(s) = len; - sexp_string_data(s) = mystr; + sexp s = sexp_make_string(sexp_make_integer(len), SEXP_UNDEF); + memcpy(sexp_string_data(s), str, len); return s; } @@ -754,7 +763,7 @@ sexp sexp_read_raw (sexp in) { break; case '"': str = sexp_read_string(in); - res = sexp_make_string(str); + res = sexp_c_string(str); sexp_free(str); break; case '(': @@ -847,7 +856,7 @@ sexp sexp_read_raw (sexp in) { res = sexp_make_character('\t'); else { res = sexp_read_error("unknown character name", - sexp_list1(sexp_make_string(str)), + sexp_list1(sexp_c_string(str)), in); } } @@ -928,7 +937,7 @@ sexp sexp_read (sexp in) { } sexp sexp_read_from_string(char *str) { - sexp s = sexp_make_string(str); + sexp s = sexp_c_string(str); sexp in = sexp_make_input_string_port(s); sexp res = sexp_read(in); sexp_deep_free(s); diff --git a/sexp.h b/sexp.h index 4d46b3c6..7db6b806 100644 --- a/sexp.h +++ b/sexp.h @@ -344,7 +344,8 @@ struct sexp_struct { #define sexp_fx_sub(a, b) ((sexp)(((sexp_sint_t)a)-((sexp_sint_t)b)+SEXP_FIXNUM_TAG)) #define sexp_fx_mul(a, b) ((sexp)((((((sexp_sint_t)a)-SEXP_FIXNUM_TAG)*(((sexp_sint_t)b)>>SEXP_FIXNUM_BITS))+SEXP_FIXNUM_TAG))) #define sexp_fx_div(a, b) (sexp_make_integer(sexp_unbox_integer(a) / sexp_unbox_integer(b))) -#define sexp_fx_mod(a, b) (sexp_make_integer(sexp_unbox_integer(a) % sexp_unbox_integer(b))) +#define sexp_fx_rem(a, b) (sexp_make_integer(sexp_unbox_integer(a) % sexp_unbox_integer(b))) +#define sexp_fx_sign(a) (-((sexp_sint_t)(a) < 0)) /* -1 or 0 */ #define sexp_fp_add(a, b) (sexp_make_flonum(sexp_flonum_value(a) + sexp_flonum_value(b))) #define sexp_fp_sub(a, b) (sexp_make_flonum(sexp_flonum_value(a) - sexp_flonum_value(b))) @@ -368,7 +369,6 @@ struct sexp_struct { #define sexp_cadr(x) (sexp_car(sexp_cdr(x))) #define sexp_cdar(x) (sexp_cdr(sexp_car(x))) #define sexp_cddr(x) (sexp_cdr(sexp_cdr(x))) - #define sexp_caaar(x) (sexp_car(sexp_caar(x))) #define sexp_caadr(x) (sexp_car(sexp_cadr(x))) #define sexp_cadar(x) (sexp_car(sexp_cdar(x))) @@ -377,7 +377,6 @@ struct sexp_struct { #define sexp_cdadr(x) (sexp_cdr(sexp_cadr(x))) #define sexp_cddar(x) (sexp_cdr(sexp_cdar(x))) #define sexp_cdddr(x) (sexp_cdr(sexp_cddr(x))) - #define sexp_cadddr(x) (sexp_cadr(sexp_cddr(x))) #define sexp_cddddr(x) (sexp_cddr(sexp_cddr(x))) @@ -417,7 +416,8 @@ sexp sexp_append(sexp a, sexp b); sexp sexp_memq(sexp x, sexp ls); sexp sexp_assq(sexp x, sexp ls); sexp sexp_length(sexp ls); -sexp sexp_make_string(char *str); +sexp sexp_c_string(char *str); +sexp sexp_make_string(sexp len, sexp ch); sexp sexp_make_flonum(double f); int sexp_string_hash(char *str, int acc); sexp sexp_intern(char *str);