diff --git a/lib/chibi/json-test.sld b/lib/chibi/json-test.sld index 0d6e0b27..c085fff9 100644 --- a/lib/chibi/json-test.sld +++ b/lib/chibi/json-test.sld @@ -8,6 +8,9 @@ (test 1 (parse-json "1")) (test 1.5 (parse-json "1.5")) (test 1000.0 (parse-json "1e3")) + (test "รก" (parse-json "\"\\u00e1\"")) + (test "๐ท" (parse-json "\"\\uD801\\uDC37\"")) + (test "๐Ÿ˜" (parse-json "\"\\uD83D\\uDE10\"")) (test '((glossary (title . "example glossary") (GlossDiv diff --git a/lib/chibi/json.c b/lib/chibi/json.c index 30834c24..516ad951 100644 --- a/lib/chibi/json.c +++ b/lib/chibi/json.c @@ -54,11 +54,24 @@ sexp parse_json_literal (sexp ctx, sexp self, sexp str, const char* s, int* i, c return res; } +#define USEQ_LEN 4 + +long decode_useq(const char* s) { + char utf_tmp[USEQ_LEN+1]; + for (int iter=0; iter!=USEQ_LEN; iter++) { + if (!isxdigit(s[iter])) { + return -1; + } + } + strncpy(utf_tmp, s, USEQ_LEN); + return strtol(utf_tmp, NULL, 16); +} + sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len) { sexp_gc_var2(res, tmp); sexp_gc_preserve2(ctx, res, tmp); - char utf_tmp[5]; int from = *i, to = *i; + long utfchar, utfchar2; res = SEXP_NULL; for ( ; s[to] != '"'; ++to) { if (to+1 >= len) { @@ -80,10 +93,29 @@ sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, co from = to+1; break; case 'u': - strncpy(utf_tmp, s+to+1, 5); - tmp = sexp_make_string(ctx, sexp_make_fixnum(1), sexp_make_character(strtoll(utf_tmp, NULL, 16))); + utfchar = decode_useq(s+to+1); + if (utfchar == -1) { + res = sexp_json_exception(ctx, self, "invalid \\u sequence at", str, *i); + goto except; + } + to = to+USEQ_LEN; + + if ( 0xd800 <= utfchar && utfchar <= 0xdbff && s[to+2] == 'u') { + utfchar2 = decode_useq(s+to+3); + + if (utfchar2 == -1) { + res = sexp_json_exception(ctx, self, "invalid \\u sequence at", str, *i); + goto except; + } + if ( 0xdc00 <= utfchar2 && utfchar <=0xdfff ) { + utfchar = 0x10000 + (((utfchar - 0xd800) << 10) | (utfchar2 - 0xdc00)); + to = to + USEQ_LEN +2; + } + } + + tmp = sexp_make_string(ctx, sexp_make_fixnum(1), sexp_make_character(utfchar)); res = sexp_cons(ctx, tmp, res); - from = to+5; + from = to + 1; break; default: from = to; @@ -91,6 +123,7 @@ sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, co } } } +except: if (!sexp_exceptionp(res)) { tmp = sexp_c_string(ctx, s+from, to-from); if (res == SEXP_NULL) { diff --git a/tests/lib-tests.scm b/tests/lib-tests.scm index 0b8edd91..0aa3bdcc 100644 --- a/tests/lib-tests.scm +++ b/tests/lib-tests.scm @@ -40,6 +40,7 @@ (rename (chibi generic-test) (run-tests run-generic-tests)) (rename (chibi io-test) (run-tests run-io-tests)) (rename (chibi iset-test) (run-tests run-iset-tests)) + (rename (chibi json-test) (run-tests run-json-tests)) (rename (chibi log-test) (run-tests run-log-tests)) (rename (chibi loop-test) (run-tests run-loop-tests)) (rename (chibi match-test) (run-tests run-match-tests)) @@ -100,6 +101,7 @@ (run-generic-tests) (run-io-tests) (run-iset-tests) +(run-json-tests) (run-log-tests) (run-loop-tests) (run-match-tests)