cleanup json unicode escape code

This commit is contained in:
Alex Shinn 2020-05-22 11:32:19 +09:00
parent 278657eea4
commit bda192f071

View file

@ -4,6 +4,10 @@
#include <chibi/eval.h> #include <chibi/eval.h>
static int digit_value (int c) {
return (((c)<='9') ? ((c) - '0') : ((sexp_tolower(c) - 'a') + 10));
}
sexp parse_json (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len); sexp parse_json (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len);
sexp sexp_json_exception (sexp ctx, sexp self, const char* msg, sexp str, const int pos) { sexp sexp_json_exception (sexp ctx, sexp self, const char* msg, sexp str, const int pos) {
@ -57,14 +61,13 @@ sexp parse_json_literal (sexp ctx, sexp self, sexp str, const char* s, int* i, c
#define USEQ_LEN 4 #define USEQ_LEN 4
long decode_useq(const char* s) { long decode_useq(const char* s) {
char utf_tmp[USEQ_LEN+1]; long result = 0, i;
for (int iter=0; iter!=USEQ_LEN; iter++) { for (i=0; i < USEQ_LEN; i++) {
if (!isxdigit(s[iter])) { if (!isxdigit(s[i]))
return -1; return -1;
} result = (result << 4) + digit_value(s[i]);
} }
strncpy(utf_tmp, s, USEQ_LEN); return result;
return strtol(utf_tmp, NULL, 16);
} }
sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len) { sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len) {
@ -73,7 +76,7 @@ sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, co
int from = *i, to = *i; int from = *i, to = *i;
long utfchar, utfchar2; long utfchar, utfchar2;
res = SEXP_NULL; res = SEXP_NULL;
for ( ; s[to] != '"'; ++to) { for ( ; s[to] != '"' && !sexp_exceptionp(res); ++to) {
if (to+1 >= len) { if (to+1 >= len) {
res = sexp_json_exception(ctx, self, "unterminated string in json started at", str, *i); res = sexp_json_exception(ctx, self, "unterminated string in json started at", str, *i);
break; break;
@ -94,28 +97,23 @@ sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, co
break; break;
case 'u': case 'u':
utfchar = decode_useq(s+to+1); utfchar = decode_useq(s+to+1);
if (utfchar == -1) { to += USEQ_LEN;
res = sexp_json_exception(ctx, self, "invalid \\u sequence at", str, *i); if (0xd800 <= utfchar && utfchar <= 0xdbff && s[to+1] == '\\' && s[to+2] == 'u') {
goto except; /* high surrogate followed by another unicode escape */
}
to = to+USEQ_LEN;
if ( 0xd800 <= utfchar && utfchar <= 0xdbff && s[to+2] == 'u') {
utfchar2 = decode_useq(s+to+3); utfchar2 = decode_useq(s+to+3);
if (0xdc00 <= utfchar2 && utfchar2 <= 0xdfff) {
if (utfchar2 == -1) { /* merge low surrogate (otherwise high is left unpaired) */
res = sexp_json_exception(ctx, self, "invalid \\u sequence at", str, *i);
goto except;
}
if ( 0xdc00 <= utfchar2 && utfchar <=0xdfff ) {
utfchar = 0x10000 + (((utfchar - 0xd800) << 10) | (utfchar2 - 0xdc00)); utfchar = 0x10000 + (((utfchar - 0xd800) << 10) | (utfchar2 - 0xdc00));
to = to + USEQ_LEN +2; to += USEQ_LEN + 2;
} }
} }
if (utfchar < 0) {
tmp = sexp_make_string(ctx, sexp_make_fixnum(1), sexp_make_character(utfchar)); res = sexp_json_exception(ctx, self, "invalid \\u sequence at", str, to - USEQ_LEN);
res = sexp_cons(ctx, tmp, res); } else {
from = to + 1; tmp = sexp_make_string(ctx, sexp_make_fixnum(1), sexp_make_character(utfchar));
res = sexp_cons(ctx, tmp, res);
from = to + 1;
}
break; break;
default: default:
from = to; from = to;
@ -123,7 +121,6 @@ sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, co
} }
} }
} }
except:
if (!sexp_exceptionp(res)) { if (!sexp_exceptionp(res)) {
tmp = sexp_c_string(ctx, s+from, to-from); tmp = sexp_c_string(ctx, s+from, to-from);
if (res == SEXP_NULL) { if (res == SEXP_NULL) {