adding string cursors, optimizing in-string(-reverse) loops

This commit is contained in:
Alex Shinn 2011-03-21 14:57:52 +09:00
parent 89f5d9ea65
commit 017548cc46
8 changed files with 106 additions and 23 deletions

6
eval.c
View file

@ -1192,6 +1192,12 @@ static int sexp_string_utf8_length (unsigned char *p, int len) {
return i;
}
static char* sexp_string_utf8_prev (unsigned char *p) {
while ((*--p)>>6 == 2)
;
return (char*)p;
}
sexp sexp_string_utf8_ref (sexp ctx, sexp str, sexp i) {
unsigned char *p=(unsigned char*)sexp_string_data(str) + sexp_unbox_fixnum(i);
if (*p < 0x80)

View file

@ -78,6 +78,9 @@ enum sexp_opcode_names {
SEXP_OP_STRING_REF,
SEXP_OP_STRING_SET,
SEXP_OP_STRING_LENGTH,
SEXP_OP_STRING_CURSOR_NEXT,
SEXP_OP_STRING_CURSOR_PREV,
SEXP_OP_STRING_SIZE,
SEXP_OP_MAKE_PROCEDURE,
SEXP_OP_MAKE_VECTOR,
SEXP_OP_MAKE_EXCEPTION,

View file

@ -1,6 +1,6 @@
;;;; loop.scm - the chibi loop (aka foof-loop)
;;
;; Copyright (c) 2009 Alex Shinn. All rights reserved.
;; Copyright (c) 2009-2011 Alex Shinn. All rights reserved.
;; BSD-style license: http://synthcode.com/license.txt
;; The loop API is compatible with Taylor Campbell's foof-loop, but
@ -194,34 +194,46 @@
(begin
(define-syntax in-type
(syntax-rules ()
((in-type ls next . rest)
(%in-idx >= + 0 (length tmp) ref tmp ls next . rest))))
((in-type seq next . rest)
(%in-idx >= (lambda (x i) (+ i 1)) (lambda (x) 0) length ref tmp seq next . rest))))
(define-syntax in-type-reverse
(syntax-rules ()
((in-type-reverse ls next . rest)
(%in-idx < - (- (length tmp) 1) 0 ref tmp ls next . rest))))
((in-type-reverse seq next . rest)
(%in-idx < (lambda (x i) (- i 1)) (lambda (x) (- (length x) 1)) (lambda (x) 0) ref tmp seq next . rest))))
))))
(define-in-indexed in-string in-string-reverse string-length string-ref)
(define-in-indexed in-vector in-vector-reverse vector-length vector-ref)
(define-syntax in-string
(syntax-rules ()
((in-string s next . rest)
(%in-idx string-cursor>=? string-cursor-next
string-cursor-start string-cursor-end string-cursor-ref
tmp s next . rest))))
(define-syntax in-string-reverse
(syntax-rules ()
((in-string-reverse s next . rest)
(%in-idx string-cursor<? string-cursor-prev
(lambda (x) (string-cursor-prev x (string-cursor-end x)))
string-cursor-start string-cursor-ref
tmp s next . rest))))
;; helper for the above string and vector iterators
(define-syntax %in-idx
(syntax-rules ()
;; cmp inc start end ref
((%in-idx ge + s e r tmp-vec ((var) (vec ...)) next . rest)
(%in-idx ge + s e r tmp-vec ((var vec-index) (vec ...)) next . rest))
((%in-idx ge + s e r tmp-vec ((var index) (vec)) next . rest)
(%in-idx ge + s e r tmp-vec ((var index) (vec s e 1)) next . rest))
((%in-idx ge + s e r tmp-vec ((var index) (vec from)) next . rest)
(%in-idx ge + s e r tmp-vec ((var index) (vec from e 1)) next . rest))
((%in-idx ge + s e r tmp-vec ((var index) (vec from to)) next . rest)
(%in-idx ge + s e r tmp-vec ((var index) (vec from to 1)) next . rest))
((%in-idx ge + s e r tmp-vec ((var index) (vec from to step)) next . rest)
(next ((tmp-vec vec) (end to))
((index from (+ index step)))
((%in-idx ge + s e r tmp ((var) (seq ...)) next . rest)
(%in-idx ge + s e r tmp ((var seq-index) (seq ...)) next . rest))
((%in-idx ge + s e r tmp ((var index) (seq)) next . rest)
(%in-idx ge + s e r tmp ((var index) (seq (s tmp) (e tmp))) next . rest))
((%in-idx ge + s e r tmp ((var index) (seq from)) next . rest)
(%in-idx ge + s e r tmp ((var index) (seq from (e tmp))) next . rest))
((%in-idx ge + s e r tmp ((var index) (seq from to)) next . rest)
(next ((tmp seq) (end to))
((index from (+ tmp index)))
((ge index end))
((var (r tmp-vec index)))
((var (r tmp index)))
()
. rest))
))

View file

@ -879,3 +879,22 @@
(set! computed? #t)))
result)))
(define (force x) (if (procedure? x) (x) x))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; string cursors
(define string-cursor<? <)
(define string-cursor<=? <=)
(define string-cursor>? >)
(define string-cursor>=? >=)
(define string-cursor=? =)
(define (string-cursor-start s) 0)
(cond-expand
(utf-8
(define string-cursor-end string-size))
(else
(define string-cursor-end string-length)
(define (string-cursor-next s i) (+ i 1))
(define (string-cursor-prev s i) (- i 1))))

View file

@ -32,6 +32,9 @@ _OP(SEXP_OPC_SETTER, SEXP_OP_BYTES_SET, 3, 0, SEXP_VOID, _I(SEXP_BYTES), _I(SEXP
_OP(SEXP_OPC_GETTER, SEXP_OP_BYTES_LENGTH, 1, 0, _I(SEXP_FIXNUM), _I(SEXP_BYTES), SEXP_FALSE, SEXP_FALSE, 0,"byte-vector-length", 0, NULL),
#if SEXP_USE_UTF8_STRINGS
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_REF, 2, 0, _I(SEXP_CHAR), _I(SEXP_STRING), _I(SEXP_FIXNUM), SEXP_FALSE, 0,"string-cursor-ref", 0, NULL),
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_CURSOR_NEXT, 2, 0, _I(SEXP_FIXNUM), _I(SEXP_STRING), _I(SEXP_FIXNUM), SEXP_FALSE, 0,"string-cursor-next", 0, NULL),
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_CURSOR_PREV, 2, 0, _I(SEXP_FIXNUM), _I(SEXP_STRING), _I(SEXP_FIXNUM), SEXP_FALSE, 0,"string-cursor-prev", 0, NULL),
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_SIZE, 1, 0, _I(SEXP_FIXNUM), _I(SEXP_STRING), SEXP_FALSE, SEXP_FALSE, 0,"string-size", 0, NULL),
#else
_OP(SEXP_OPC_GETTER, SEXP_OP_STRING_REF, 2, 0, _I(SEXP_CHAR), _I(SEXP_STRING), _I(SEXP_FIXNUM), SEXP_FALSE, 0,"string-ref", 0, NULL),
#endif

View file

@ -80,11 +80,6 @@
'(#\h #\e #\l #\l)
(loop ((for c (in-string "hello" 0 4)) (for res (listing c))) => res))
(test
"in-string with start, end and step"
'(#\e #\l)
(loop ((for c (in-string "hello" 1 4 2)) (for res (listing c))) => res))
(test
"in-string-reverse"
'(#\o #\l #\l #\e #\h)
@ -95,6 +90,11 @@
'(1 2 3)
(loop ((for x (in-vector '#(1 2 3))) (for res (listing x))) => res))
(test
"in-vector-reverse"
'(3 2 1)
(loop ((for x (in-vector-reverse '#(1 2 3))) (for res (listing x))) => res))
(test "up-from" '(5 6 7)
(loop ((for i (up-from 5 (to 8)))
(for res (listing i)))

View file

@ -40,4 +40,14 @@
(string-fill! s #\字)
s))
(import (chibi loop))
(test "in-string"
'(#\日 #\本 #\語)
(loop ((for c (in-string "日本語")) (for res (listing c))) => res))
(test "in-string-reverse"
'(#\語 #\本 #\日)
(loop ((for c (in-string-reverse "日本語")) (for res (listing c))) => res))
(test-end)

30
vm.c
View file

@ -964,6 +964,9 @@ sexp sexp_vm (sexp ctx, sexp proc) {
_ARG2 = sexp_string_ref(_ARG1, _ARG2);
#endif
top--;
#if SEXP_USE_UTF8_STRINGS
sexp_check_exception();
#endif
break;
case SEXP_OP_BYTES_SET:
case SEXP_OP_STRING_SET:
@ -992,6 +995,33 @@ sexp sexp_vm (sexp ctx, sexp proc) {
_ARG3 = SEXP_VOID;
top-=2;
break;
#if SEXP_USE_UTF8_STRINGS
case SEXP_OP_STRING_CURSOR_NEXT:
if (! sexp_stringp(_ARG1))
sexp_raise("string-cursor-next: not a string", sexp_list1(ctx, _ARG1));
else if (! sexp_fixnump(_ARG2))
sexp_raise("string-cursor-next: not an integer", sexp_list1(ctx, _ARG2));
i = sexp_unbox_fixnum(_ARG2);
_ARG2 = sexp_make_fixnum(i + sexp_utf8_initial_byte_count(((unsigned char*)sexp_string_data(_ARG1))[i]));
top--;
sexp_check_exception();
break;
case SEXP_OP_STRING_CURSOR_PREV:
if (! sexp_stringp(_ARG1))
sexp_raise("string-cursor-prev: not a string", sexp_list1(ctx, _ARG1));
else if (! sexp_fixnump(_ARG2))
sexp_raise("string-cursor-prev: not an integer", sexp_list1(ctx, _ARG2));
i = sexp_unbox_fixnum(_ARG2);
_ARG2 = sexp_make_fixnum(sexp_string_utf8_prev((unsigned char*)sexp_string_data(_ARG1)+i) - sexp_string_data(_ARG1));
top--;
sexp_check_exception();
break;
case SEXP_OP_STRING_SIZE:
if (! sexp_stringp(_ARG1))
sexp_raise("string-size: not a string", sexp_list1(ctx, _ARG1));
_ARG1 = sexp_make_fixnum(sexp_string_length(_ARG1));
break;
#endif
case SEXP_OP_BYTES_LENGTH:
if (! sexp_stringp(_ARG1))
sexp_raise("bytes-length: not a byte-vector", sexp_list1(ctx, _ARG1));