From 017548cc468c8afc2ea274f74eb8b77b776e9645 Mon Sep 17 00:00:00 2001 From: Alex Shinn Date: Mon, 21 Mar 2011 14:57:52 +0900 Subject: [PATCH] adding string cursors, optimizing in-string(-reverse) loops --- eval.c | 6 ++++++ include/chibi/eval.h | 3 +++ lib/chibi/loop/loop.scm | 48 +++++++++++++++++++++++++---------------- lib/init.scm | 19 ++++++++++++++++ opcodes.c | 3 +++ tests/loop-tests.scm | 10 ++++----- tests/unicode-tests.scm | 10 +++++++++ vm.c | 30 ++++++++++++++++++++++++++ 8 files changed, 106 insertions(+), 23 deletions(-) diff --git a/eval.c b/eval.c index 16865faa..08a7e0a7 100644 --- a/eval.c +++ b/eval.c @@ -1192,6 +1192,12 @@ static int sexp_string_utf8_length (unsigned char *p, int len) { return i; } +static char* sexp_string_utf8_prev (unsigned char *p) { + while ((*--p)>>6 == 2) + ; + return (char*)p; +} + sexp sexp_string_utf8_ref (sexp ctx, sexp str, sexp i) { unsigned char *p=(unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(i); if (*p < 0x80) diff --git a/include/chibi/eval.h b/include/chibi/eval.h index b54df205..5c5f01d6 100644 --- a/include/chibi/eval.h +++ b/include/chibi/eval.h @@ -78,6 +78,9 @@ enum sexp_opcode_names { SEXP_OP_STRING_REF, SEXP_OP_STRING_SET, SEXP_OP_STRING_LENGTH, + SEXP_OP_STRING_CURSOR_NEXT, + SEXP_OP_STRING_CURSOR_PREV, + SEXP_OP_STRING_SIZE, SEXP_OP_MAKE_PROCEDURE, SEXP_OP_MAKE_VECTOR, SEXP_OP_MAKE_EXCEPTION, diff --git a/lib/chibi/loop/loop.scm b/lib/chibi/loop/loop.scm index 09e12856..8a15fe3b 100644 --- a/lib/chibi/loop/loop.scm +++ b/lib/chibi/loop/loop.scm @@ -1,6 +1,6 @@ ;;;; loop.scm - the chibi loop (aka foof-loop) ;; -;; Copyright (c) 2009 Alex Shinn. All rights reserved. +;; Copyright (c) 2009-2011 Alex Shinn. All rights reserved. ;; BSD-style license: http://synthcode.com/license.txt ;; The loop API is compatible with Taylor Campbell's foof-loop, but @@ -194,34 +194,46 @@ (begin (define-syntax in-type (syntax-rules () - ((in-type ls next . rest) - (%in-idx >= + 0 (length tmp) ref tmp ls next . rest)))) + ((in-type seq next . rest) + (%in-idx >= (lambda (x i) (+ i 1)) (lambda (x) 0) length ref tmp seq next . rest)))) (define-syntax in-type-reverse (syntax-rules () - ((in-type-reverse ls next . rest) - (%in-idx < - (- (length tmp) 1) 0 ref tmp ls next . rest)))) + ((in-type-reverse seq next . rest) + (%in-idx < (lambda (x i) (- i 1)) (lambda (x) (- (length x) 1)) (lambda (x) 0) ref tmp seq next . rest)))) )))) -(define-in-indexed in-string in-string-reverse string-length string-ref) (define-in-indexed in-vector in-vector-reverse vector-length vector-ref) +(define-syntax in-string + (syntax-rules () + ((in-string s next . rest) + (%in-idx string-cursor>=? string-cursor-next + string-cursor-start string-cursor-end string-cursor-ref + tmp s next . rest)))) + +(define-syntax in-string-reverse + (syntax-rules () + ((in-string-reverse s next . rest) + (%in-idx string-cursor? >) +(define string-cursor>=? >=) +(define string-cursor=? =) + +(define (string-cursor-start s) 0) + +(cond-expand + (utf-8 + (define string-cursor-end string-size)) + (else + (define string-cursor-end string-length) + (define (string-cursor-next s i) (+ i 1)) + (define (string-cursor-prev s i) (- i 1)))) diff --git a/opcodes.c b/opcodes.c index 755d3dba..d085fb01 100644 --- a/opcodes.c +++ b/opcodes.c @@ -32,6 +32,9 @@ _OP(SEXP_OPC_SETTER, SEXP_OP_BYTES_SET, 3, 0, SEXP_VOID, _I(SEXP_BYTES), _I(SEXP _OP(SEXP_OPC_GETTER, SEXP_OP_BYTES_LENGTH, 1, 0, _I(SEXP_FIXNUM), _I(SEXP_BYTES), SEXP_FALSE, SEXP_FALSE, 0,"byte-vector-length", 0, NULL), #if SEXP_USE_UTF8_STRINGS _OP(SEXP_OPC_GETTER, SEXP_OP_STRING_REF, 2, 0, _I(SEXP_CHAR), _I(SEXP_STRING), _I(SEXP_FIXNUM), SEXP_FALSE, 0,"string-cursor-ref", 0, NULL), +_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_CURSOR_NEXT, 2, 0, _I(SEXP_FIXNUM), _I(SEXP_STRING), _I(SEXP_FIXNUM), SEXP_FALSE, 0,"string-cursor-next", 0, NULL), +_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_CURSOR_PREV, 2, 0, _I(SEXP_FIXNUM), _I(SEXP_STRING), _I(SEXP_FIXNUM), SEXP_FALSE, 0,"string-cursor-prev", 0, NULL), +_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_SIZE, 1, 0, _I(SEXP_FIXNUM), _I(SEXP_STRING), SEXP_FALSE, SEXP_FALSE, 0,"string-size", 0, NULL), #else _OP(SEXP_OPC_GETTER, SEXP_OP_STRING_REF, 2, 0, _I(SEXP_CHAR), _I(SEXP_STRING), _I(SEXP_FIXNUM), SEXP_FALSE, 0,"string-ref", 0, NULL), #endif diff --git a/tests/loop-tests.scm b/tests/loop-tests.scm index f259245c..d1ff3143 100644 --- a/tests/loop-tests.scm +++ b/tests/loop-tests.scm @@ -80,11 +80,6 @@ '(#\h #\e #\l #\l) (loop ((for c (in-string "hello" 0 4)) (for res (listing c))) => res)) -(test - "in-string with start, end and step" - '(#\e #\l) - (loop ((for c (in-string "hello" 1 4 2)) (for res (listing c))) => res)) - (test "in-string-reverse" '(#\o #\l #\l #\e #\h) @@ -95,6 +90,11 @@ '(1 2 3) (loop ((for x (in-vector '#(1 2 3))) (for res (listing x))) => res)) +(test + "in-vector-reverse" + '(3 2 1) + (loop ((for x (in-vector-reverse '#(1 2 3))) (for res (listing x))) => res)) + (test "up-from" '(5 6 7) (loop ((for i (up-from 5 (to 8))) (for res (listing i))) diff --git a/tests/unicode-tests.scm b/tests/unicode-tests.scm index 95d81049..f3705094 100644 --- a/tests/unicode-tests.scm +++ b/tests/unicode-tests.scm @@ -40,4 +40,14 @@ (string-fill! s #\字) s)) +(import (chibi loop)) + +(test "in-string" + '(#\日 #\本 #\語) + (loop ((for c (in-string "日本語")) (for res (listing c))) => res)) + +(test "in-string-reverse" + '(#\語 #\本 #\日) + (loop ((for c (in-string-reverse "日本語")) (for res (listing c))) => res)) + (test-end) diff --git a/vm.c b/vm.c index 32388d9e..fbf3a521 100644 --- a/vm.c +++ b/vm.c @@ -964,6 +964,9 @@ sexp sexp_vm (sexp ctx, sexp proc) { _ARG2 = sexp_string_ref(_ARG1, _ARG2); #endif top--; +#if SEXP_USE_UTF8_STRINGS + sexp_check_exception(); +#endif break; case SEXP_OP_BYTES_SET: case SEXP_OP_STRING_SET: @@ -992,6 +995,33 @@ sexp sexp_vm (sexp ctx, sexp proc) { _ARG3 = SEXP_VOID; top-=2; break; +#if SEXP_USE_UTF8_STRINGS + case SEXP_OP_STRING_CURSOR_NEXT: + if (! sexp_stringp(_ARG1)) + sexp_raise("string-cursor-next: not a string", sexp_list1(ctx, _ARG1)); + else if (! sexp_fixnump(_ARG2)) + sexp_raise("string-cursor-next: not an integer", sexp_list1(ctx, _ARG2)); + i = sexp_unbox_fixnum(_ARG2); + _ARG2 = sexp_make_fixnum(i + sexp_utf8_initial_byte_count(((unsigned char*)sexp_string_data(_ARG1))[i])); + top--; + sexp_check_exception(); + break; + case SEXP_OP_STRING_CURSOR_PREV: + if (! sexp_stringp(_ARG1)) + sexp_raise("string-cursor-prev: not a string", sexp_list1(ctx, _ARG1)); + else if (! sexp_fixnump(_ARG2)) + sexp_raise("string-cursor-prev: not an integer", sexp_list1(ctx, _ARG2)); + i = sexp_unbox_fixnum(_ARG2); + _ARG2 = sexp_make_fixnum(sexp_string_utf8_prev((unsigned char*)sexp_string_data(_ARG1)+i) - sexp_string_data(_ARG1)); + top--; + sexp_check_exception(); + break; + case SEXP_OP_STRING_SIZE: + if (! sexp_stringp(_ARG1)) + sexp_raise("string-size: not a string", sexp_list1(ctx, _ARG1)); + _ARG1 = sexp_make_fixnum(sexp_string_length(_ARG1)); + break; +#endif case SEXP_OP_BYTES_LENGTH: if (! sexp_stringp(_ARG1)) sexp_raise("bytes-length: not a byte-vector", sexp_list1(ctx, _ARG1));