diff --git a/eval.c b/eval.c index 4fd78faa..82360178 100644 --- a/eval.c +++ b/eval.c @@ -1891,6 +1891,19 @@ sexp sexp_read_utf8_char (sexp ctx, sexp port, int i) { return sexp_make_character(i); } +void sexp_push_utf8_char (sexp ctx, int i, sexp port) { + unsigned char ch[6]; + int len = sexp_utf8_char_byte_count(i); + sexp_utf8_encode_char(ch, len, i); + if (sexp_port_stream(port)) { + while (len>0) + ungetc(ch[--len], sexp_port_stream(port)); + } else { + while (len>0) + sexp_port_buf(port)[--sexp_port_offset(port)] = ch[--len]; + } +} + #if SEXP_USE_MUTABLE_STRINGS void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) { diff --git a/include/chibi/eval.h b/include/chibi/eval.h index 3f6ca257..99e673ae 100644 --- a/include/chibi/eval.h +++ b/include/chibi/eval.h @@ -137,6 +137,7 @@ SEXP_API sexp sexp_make_promise (sexp ctx, sexp self, sexp_sint_t n, sexp done, #endif #if SEXP_USE_UTF8_STRINGS SEXP_API sexp sexp_read_utf8_char (sexp ctx, sexp port, int i); +SEXP_API void sexp_push_utf8_char (sexp ctx, int i, sexp port); SEXP_API void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch); SEXP_API sexp sexp_string_utf8_index_ref (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i); SEXP_API sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch); diff --git a/lib/chibi/io-test.sld b/lib/chibi/io-test.sld index 642569f8..1374e651 100644 --- a/lib/chibi/io-test.sld +++ b/lib/chibi/io-test.sld @@ -131,7 +131,7 @@ (lambda (bv start end) (do ((j start (+ j 1))) ((= j end)) - (bytevector-u8-set! bv j (modulo (+ j i) 256))) + (bytevector-u8-set! bv j (modulo (- (+ j i) start) 256))) (if (> end 0) (set! i (bytevector-u8-ref bv (- end 1)))) (- end start)))))) diff --git a/lib/chibi/io/io.scm b/lib/chibi/io/io.scm index 6d9cb4ea..0f6025d4 100644 --- a/lib/chibi/io/io.scm +++ b/lib/chibi/io/io.scm @@ -378,7 +378,7 @@ (set! buf "") (set! len 0) (set! offset 0) - (+ i start)) + i) (else (set! len (string-size buf)) (cond diff --git a/sexp.c b/sexp.c index 82d10c5e..1c485fd9 100644 --- a/sexp.c +++ b/sexp.c @@ -1338,6 +1338,9 @@ sexp sexp_make_cpointer (sexp ctx, sexp_uint_t type_id, void *value, /************************ reading and writing *************************/ +/* start 4 bytes in so we can always unread a utf8 char in peek-char */ +#define BUF_START 4 + int sexp_buffered_read_char (sexp ctx, sexp p) { sexp_gc_var2(tmp, origbytes); int res = 0; @@ -1346,29 +1349,29 @@ int sexp_buffered_read_char (sexp ctx, sexp p) { } else if (!sexp_port_openp(p)) { return EOF; } else if (sexp_port_stream(p)) { - res = fread(sexp_port_buf(p), 1, SEXP_PORT_BUFFER_SIZE, sexp_port_stream(p)); + res = fread(sexp_port_buf(p) + BUF_START, 1, SEXP_PORT_BUFFER_SIZE - BUF_START, sexp_port_stream(p)); if (res >= 0) { - sexp_port_offset(p) = 0; + sexp_port_offset(p) = BUF_START; sexp_port_size(p) = res; res = ((sexp_port_offset(p) < sexp_port_size(p)) ? ((unsigned char*)sexp_port_buf(p))[sexp_port_offset(p)++] : EOF); } } else if (sexp_filenop(sexp_port_fd(p))) { - res = read(sexp_port_fileno(p), sexp_port_buf(p), SEXP_PORT_BUFFER_SIZE); + res = read(sexp_port_fileno(p), sexp_port_buf(p) + BUF_START, SEXP_PORT_BUFFER_SIZE - BUF_START); if (res >= 0) { - sexp_port_offset(p) = 0; + sexp_port_offset(p) = BUF_START; sexp_port_size(p) = res; res = ((sexp_port_offset(p) < sexp_port_size(p)) ? ((unsigned char*)sexp_port_buf(p))[sexp_port_offset(p)++] : EOF); } } else if (sexp_port_customp(p)) { sexp_gc_preserve2(ctx, tmp, origbytes); - tmp = sexp_list2(ctx, SEXP_ZERO, sexp_make_fixnum(SEXP_PORT_BUFFER_SIZE)); + tmp = sexp_list2(ctx, sexp_make_fixnum(BUF_START), sexp_make_fixnum(SEXP_PORT_BUFFER_SIZE)); origbytes = sexp_port_binaryp(p) && !SEXP_USE_PACKED_STRINGS ? sexp_string_bytes(sexp_port_buffer(p)) : sexp_port_buffer(p); tmp = sexp_cons(ctx, origbytes, tmp); tmp = sexp_apply(ctx, sexp_port_reader(p), tmp); - if (sexp_fixnump(tmp) && sexp_unbox_fixnum(tmp) > 0) { - sexp_port_offset(p) = 0; + if (sexp_fixnump(tmp) && sexp_unbox_fixnum(tmp) > BUF_START) { + sexp_port_offset(p) = BUF_START; sexp_port_size(p) = sexp_unbox_fixnum(tmp); if (!sexp_port_binaryp(p) && !SEXP_USE_PACKED_STRINGS && origbytes != sexp_string_bytes(sexp_port_buffer(p))) { diff --git a/tests/r7rs-tests.scm b/tests/r7rs-tests.scm index e08412ff..20f8a512 100644 --- a/tests/r7rs-tests.scm +++ b/tests/r7rs-tests.scm @@ -1905,9 +1905,11 @@ (test "abc" (read-string 3 (open-input-string "abc\ndef\n"))) (let ((in (open-input-string (string #\x10F700 #\x10F701 #\x10F702)))) - (let* ((c1 (read-char in)) + (let* ((c0 (peek-char in)) + (c1 (read-char in)) (c2 (read-char in)) (c3 (read-char in))) + (test #\x10F700 c0) (test #\x10F700 c1) (test #\x10F701 c2) (test #\x10F702 c3))) diff --git a/vm.c b/vm.c index 960d7809..132f2236 100644 --- a/vm.c +++ b/vm.c @@ -2081,7 +2081,7 @@ sexp sexp_apply (sexp ctx, sexp proc, sexp args) { #endif if (i == EOF) { if (!sexp_port_openp(_ARG1)) - sexp_raise("peek-char: port is closed", _ARG1); + sexp_raise("read-char: port is closed", _ARG1); else #if SEXP_USE_GREEN_THREADS if ((sexp_port_stream(_ARG1) ? ferror(sexp_port_stream(_ARG1)) : 1) @@ -2113,7 +2113,7 @@ sexp sexp_apply (sexp ctx, sexp proc, sexp args) { i = sexp_read_char(ctx, _ARG1); if (i == EOF) { if (!sexp_port_openp(_ARG1)) - sexp_raise("read-char: port is closed", _ARG1); + sexp_raise("peek-char: port is closed", _ARG1); else #if SEXP_USE_GREEN_THREADS if ((sexp_port_stream(_ARG1) ? ferror(sexp_port_stream(_ARG1)) : 1) @@ -2128,6 +2128,12 @@ sexp sexp_apply (sexp ctx, sexp proc, sexp args) { } else #endif _ARG1 = SEXP_EOF; +#if SEXP_USE_UTF8_STRINGS + } else if (i >= 0x80) { + tmp1 = sexp_read_utf8_char(ctx, _ARG1, i); + sexp_push_utf8_char(ctx, sexp_unbox_character(tmp1), _ARG1); + _ARG1 = tmp1; +#endif } else { sexp_push_char(ctx, i, _ARG1); _ARG1 = sexp_make_character(i);