mirror of
https://github.com/ashinn/chibi-scheme.git
synced 2025-05-18 21:29:19 +02:00
fixing peek-char on non-ascii chars
This commit is contained in:
parent
3197969d3e
commit
da410523b0
7 changed files with 37 additions and 12 deletions
13
eval.c
13
eval.c
|
@ -1891,6 +1891,19 @@ sexp sexp_read_utf8_char (sexp ctx, sexp port, int i) {
|
||||||
return sexp_make_character(i);
|
return sexp_make_character(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sexp_push_utf8_char (sexp ctx, int i, sexp port) {
|
||||||
|
unsigned char ch[6];
|
||||||
|
int len = sexp_utf8_char_byte_count(i);
|
||||||
|
sexp_utf8_encode_char(ch, len, i);
|
||||||
|
if (sexp_port_stream(port)) {
|
||||||
|
while (len>0)
|
||||||
|
ungetc(ch[--len], sexp_port_stream(port));
|
||||||
|
} else {
|
||||||
|
while (len>0)
|
||||||
|
sexp_port_buf(port)[--sexp_port_offset(port)] = ch[--len];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#if SEXP_USE_MUTABLE_STRINGS
|
#if SEXP_USE_MUTABLE_STRINGS
|
||||||
|
|
||||||
void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) {
|
void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch) {
|
||||||
|
|
|
@ -137,6 +137,7 @@ SEXP_API sexp sexp_make_promise (sexp ctx, sexp self, sexp_sint_t n, sexp done,
|
||||||
#endif
|
#endif
|
||||||
#if SEXP_USE_UTF8_STRINGS
|
#if SEXP_USE_UTF8_STRINGS
|
||||||
SEXP_API sexp sexp_read_utf8_char (sexp ctx, sexp port, int i);
|
SEXP_API sexp sexp_read_utf8_char (sexp ctx, sexp port, int i);
|
||||||
|
SEXP_API void sexp_push_utf8_char (sexp ctx, int i, sexp port);
|
||||||
SEXP_API void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch);
|
SEXP_API void sexp_string_utf8_set (sexp ctx, sexp str, sexp index, sexp ch);
|
||||||
SEXP_API sexp sexp_string_utf8_index_ref (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i);
|
SEXP_API sexp sexp_string_utf8_index_ref (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i);
|
||||||
SEXP_API sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch);
|
SEXP_API sexp sexp_string_utf8_index_set (sexp ctx, sexp self, sexp_sint_t n, sexp str, sexp i, sexp ch);
|
||||||
|
|
|
@ -131,7 +131,7 @@
|
||||||
(lambda (bv start end)
|
(lambda (bv start end)
|
||||||
(do ((j start (+ j 1)))
|
(do ((j start (+ j 1)))
|
||||||
((= j end))
|
((= j end))
|
||||||
(bytevector-u8-set! bv j (modulo (+ j i) 256)))
|
(bytevector-u8-set! bv j (modulo (- (+ j i) start) 256)))
|
||||||
(if (> end 0)
|
(if (> end 0)
|
||||||
(set! i (bytevector-u8-ref bv (- end 1))))
|
(set! i (bytevector-u8-ref bv (- end 1))))
|
||||||
(- end start))))))
|
(- end start))))))
|
||||||
|
|
|
@ -378,7 +378,7 @@
|
||||||
(set! buf "")
|
(set! buf "")
|
||||||
(set! len 0)
|
(set! len 0)
|
||||||
(set! offset 0)
|
(set! offset 0)
|
||||||
(+ i start))
|
i)
|
||||||
(else
|
(else
|
||||||
(set! len (string-size buf))
|
(set! len (string-size buf))
|
||||||
(cond
|
(cond
|
||||||
|
|
17
sexp.c
17
sexp.c
|
@ -1338,6 +1338,9 @@ sexp sexp_make_cpointer (sexp ctx, sexp_uint_t type_id, void *value,
|
||||||
|
|
||||||
/************************ reading and writing *************************/
|
/************************ reading and writing *************************/
|
||||||
|
|
||||||
|
/* start 4 bytes in so we can always unread a utf8 char in peek-char */
|
||||||
|
#define BUF_START 4
|
||||||
|
|
||||||
int sexp_buffered_read_char (sexp ctx, sexp p) {
|
int sexp_buffered_read_char (sexp ctx, sexp p) {
|
||||||
sexp_gc_var2(tmp, origbytes);
|
sexp_gc_var2(tmp, origbytes);
|
||||||
int res = 0;
|
int res = 0;
|
||||||
|
@ -1346,29 +1349,29 @@ int sexp_buffered_read_char (sexp ctx, sexp p) {
|
||||||
} else if (!sexp_port_openp(p)) {
|
} else if (!sexp_port_openp(p)) {
|
||||||
return EOF;
|
return EOF;
|
||||||
} else if (sexp_port_stream(p)) {
|
} else if (sexp_port_stream(p)) {
|
||||||
res = fread(sexp_port_buf(p), 1, SEXP_PORT_BUFFER_SIZE, sexp_port_stream(p));
|
res = fread(sexp_port_buf(p) + BUF_START, 1, SEXP_PORT_BUFFER_SIZE - BUF_START, sexp_port_stream(p));
|
||||||
if (res >= 0) {
|
if (res >= 0) {
|
||||||
sexp_port_offset(p) = 0;
|
sexp_port_offset(p) = BUF_START;
|
||||||
sexp_port_size(p) = res;
|
sexp_port_size(p) = res;
|
||||||
res = ((sexp_port_offset(p) < sexp_port_size(p))
|
res = ((sexp_port_offset(p) < sexp_port_size(p))
|
||||||
? ((unsigned char*)sexp_port_buf(p))[sexp_port_offset(p)++] : EOF);
|
? ((unsigned char*)sexp_port_buf(p))[sexp_port_offset(p)++] : EOF);
|
||||||
}
|
}
|
||||||
} else if (sexp_filenop(sexp_port_fd(p))) {
|
} else if (sexp_filenop(sexp_port_fd(p))) {
|
||||||
res = read(sexp_port_fileno(p), sexp_port_buf(p), SEXP_PORT_BUFFER_SIZE);
|
res = read(sexp_port_fileno(p), sexp_port_buf(p) + BUF_START, SEXP_PORT_BUFFER_SIZE - BUF_START);
|
||||||
if (res >= 0) {
|
if (res >= 0) {
|
||||||
sexp_port_offset(p) = 0;
|
sexp_port_offset(p) = BUF_START;
|
||||||
sexp_port_size(p) = res;
|
sexp_port_size(p) = res;
|
||||||
res = ((sexp_port_offset(p) < sexp_port_size(p))
|
res = ((sexp_port_offset(p) < sexp_port_size(p))
|
||||||
? ((unsigned char*)sexp_port_buf(p))[sexp_port_offset(p)++] : EOF);
|
? ((unsigned char*)sexp_port_buf(p))[sexp_port_offset(p)++] : EOF);
|
||||||
}
|
}
|
||||||
} else if (sexp_port_customp(p)) {
|
} else if (sexp_port_customp(p)) {
|
||||||
sexp_gc_preserve2(ctx, tmp, origbytes);
|
sexp_gc_preserve2(ctx, tmp, origbytes);
|
||||||
tmp = sexp_list2(ctx, SEXP_ZERO, sexp_make_fixnum(SEXP_PORT_BUFFER_SIZE));
|
tmp = sexp_list2(ctx, sexp_make_fixnum(BUF_START), sexp_make_fixnum(SEXP_PORT_BUFFER_SIZE));
|
||||||
origbytes = sexp_port_binaryp(p) && !SEXP_USE_PACKED_STRINGS ? sexp_string_bytes(sexp_port_buffer(p)) : sexp_port_buffer(p);
|
origbytes = sexp_port_binaryp(p) && !SEXP_USE_PACKED_STRINGS ? sexp_string_bytes(sexp_port_buffer(p)) : sexp_port_buffer(p);
|
||||||
tmp = sexp_cons(ctx, origbytes, tmp);
|
tmp = sexp_cons(ctx, origbytes, tmp);
|
||||||
tmp = sexp_apply(ctx, sexp_port_reader(p), tmp);
|
tmp = sexp_apply(ctx, sexp_port_reader(p), tmp);
|
||||||
if (sexp_fixnump(tmp) && sexp_unbox_fixnum(tmp) > 0) {
|
if (sexp_fixnump(tmp) && sexp_unbox_fixnum(tmp) > BUF_START) {
|
||||||
sexp_port_offset(p) = 0;
|
sexp_port_offset(p) = BUF_START;
|
||||||
sexp_port_size(p) = sexp_unbox_fixnum(tmp);
|
sexp_port_size(p) = sexp_unbox_fixnum(tmp);
|
||||||
if (!sexp_port_binaryp(p) && !SEXP_USE_PACKED_STRINGS
|
if (!sexp_port_binaryp(p) && !SEXP_USE_PACKED_STRINGS
|
||||||
&& origbytes != sexp_string_bytes(sexp_port_buffer(p))) {
|
&& origbytes != sexp_string_bytes(sexp_port_buffer(p))) {
|
||||||
|
|
|
@ -1905,9 +1905,11 @@
|
||||||
(test "abc" (read-string 3 (open-input-string "abc\ndef\n")))
|
(test "abc" (read-string 3 (open-input-string "abc\ndef\n")))
|
||||||
|
|
||||||
(let ((in (open-input-string (string #\x10F700 #\x10F701 #\x10F702))))
|
(let ((in (open-input-string (string #\x10F700 #\x10F701 #\x10F702))))
|
||||||
(let* ((c1 (read-char in))
|
(let* ((c0 (peek-char in))
|
||||||
|
(c1 (read-char in))
|
||||||
(c2 (read-char in))
|
(c2 (read-char in))
|
||||||
(c3 (read-char in)))
|
(c3 (read-char in)))
|
||||||
|
(test #\x10F700 c0)
|
||||||
(test #\x10F700 c1)
|
(test #\x10F700 c1)
|
||||||
(test #\x10F701 c2)
|
(test #\x10F701 c2)
|
||||||
(test #\x10F702 c3)))
|
(test #\x10F702 c3)))
|
||||||
|
|
10
vm.c
10
vm.c
|
@ -2081,7 +2081,7 @@ sexp sexp_apply (sexp ctx, sexp proc, sexp args) {
|
||||||
#endif
|
#endif
|
||||||
if (i == EOF) {
|
if (i == EOF) {
|
||||||
if (!sexp_port_openp(_ARG1))
|
if (!sexp_port_openp(_ARG1))
|
||||||
sexp_raise("peek-char: port is closed", _ARG1);
|
sexp_raise("read-char: port is closed", _ARG1);
|
||||||
else
|
else
|
||||||
#if SEXP_USE_GREEN_THREADS
|
#if SEXP_USE_GREEN_THREADS
|
||||||
if ((sexp_port_stream(_ARG1) ? ferror(sexp_port_stream(_ARG1)) : 1)
|
if ((sexp_port_stream(_ARG1) ? ferror(sexp_port_stream(_ARG1)) : 1)
|
||||||
|
@ -2113,7 +2113,7 @@ sexp sexp_apply (sexp ctx, sexp proc, sexp args) {
|
||||||
i = sexp_read_char(ctx, _ARG1);
|
i = sexp_read_char(ctx, _ARG1);
|
||||||
if (i == EOF) {
|
if (i == EOF) {
|
||||||
if (!sexp_port_openp(_ARG1))
|
if (!sexp_port_openp(_ARG1))
|
||||||
sexp_raise("read-char: port is closed", _ARG1);
|
sexp_raise("peek-char: port is closed", _ARG1);
|
||||||
else
|
else
|
||||||
#if SEXP_USE_GREEN_THREADS
|
#if SEXP_USE_GREEN_THREADS
|
||||||
if ((sexp_port_stream(_ARG1) ? ferror(sexp_port_stream(_ARG1)) : 1)
|
if ((sexp_port_stream(_ARG1) ? ferror(sexp_port_stream(_ARG1)) : 1)
|
||||||
|
@ -2128,6 +2128,12 @@ sexp sexp_apply (sexp ctx, sexp proc, sexp args) {
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
_ARG1 = SEXP_EOF;
|
_ARG1 = SEXP_EOF;
|
||||||
|
#if SEXP_USE_UTF8_STRINGS
|
||||||
|
} else if (i >= 0x80) {
|
||||||
|
tmp1 = sexp_read_utf8_char(ctx, _ARG1, i);
|
||||||
|
sexp_push_utf8_char(ctx, sexp_unbox_character(tmp1), _ARG1);
|
||||||
|
_ARG1 = tmp1;
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
sexp_push_char(ctx, i, _ARG1);
|
sexp_push_char(ctx, i, _ARG1);
|
||||||
_ARG1 = sexp_make_character(i);
|
_ARG1 = sexp_make_character(i);
|
||||||
|
|
Loading…
Add table
Reference in a new issue