From e79d2aefeabf0ce5f466de4d5f4a1b7529eec9a4 Mon Sep 17 00:00:00 2001 From: Ekaitz Zarraga Date: Tue, 19 May 2020 05:41:10 +0200 Subject: [PATCH 1/3] Support multiple \u sequences and check errors --- lib/chibi/json.c | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/lib/chibi/json.c b/lib/chibi/json.c index 30834c24..88242a76 100644 --- a/lib/chibi/json.c +++ b/lib/chibi/json.c @@ -54,11 +54,24 @@ sexp parse_json_literal (sexp ctx, sexp self, sexp str, const char* s, int* i, c return res; } +#define USEQ_LEN 4 + +long decode_useq(const char* s){ + char utf_tmp[USEQ_LEN+1]; + for (int iter=0; iter!=USEQ_LEN; iter++){ + if (!isxdigit(s[iter])){ + return -1; + } + } + strncpy(utf_tmp, s, USEQ_LEN); + return strtol(utf_tmp, NULL, 16); +} + sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len) { sexp_gc_var2(res, tmp); sexp_gc_preserve2(ctx, res, tmp); - char utf_tmp[5]; int from = *i, to = *i; + long utfchar, utfchar2; res = SEXP_NULL; for ( ; s[to] != '"'; ++to) { if (to+1 >= len) { @@ -80,10 +93,29 @@ sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, co from = to+1; break; case 'u': - strncpy(utf_tmp, s+to+1, 5); - tmp = sexp_make_string(ctx, sexp_make_fixnum(1), sexp_make_character(strtoll(utf_tmp, NULL, 16))); + utfchar = decode_useq(s+to+1); + if (utfchar == -1){ + res = sexp_json_exception(ctx, self, "invalid \\u sequence at", str, *i); + goto except; + } + to = to+USEQ_LEN; + + if ( 0xd800 <= utfchar && utfchar <= 0xdbff && s[to+2] == 'u'){ + utfchar2 = decode_useq(s+to+3); + + if (utfchar2 == -1){ + res = sexp_json_exception(ctx, self, "invalid \\u sequence at", str, *i); + goto except; + } + if ( 0xdc00 <= utfchar2 && utfchar <=0xdfff ){ + utfchar = 0x10000 + (((utfchar - 0xd800) << 10) | (utfchar2 - 0xdc00)); + to = to + USEQ_LEN +2; + } + } + + tmp = sexp_make_string(ctx, sexp_make_fixnum(1), sexp_make_character(utfchar)); res = sexp_cons(ctx, tmp, res); - from = to+5; + from = to + 1; break; default: from = to; @@ -91,6 +123,7 @@ sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, co } } } +except: if (!sexp_exceptionp(res)) { tmp = sexp_c_string(ctx, s+from, to-from); if (res == SEXP_NULL) { From 64d04f063800c563c79ab3709994495363256438 Mon Sep 17 00:00:00 2001 From: Ekaitz Zarraga Date: Wed, 20 May 2020 13:11:12 +0200 Subject: [PATCH 2/3] Add JSON tests --- lib/chibi/json-test.sld | 3 +++ tests/lib-tests.scm | 2 ++ 2 files changed, 5 insertions(+) diff --git a/lib/chibi/json-test.sld b/lib/chibi/json-test.sld index 0d6e0b27..c085fff9 100644 --- a/lib/chibi/json-test.sld +++ b/lib/chibi/json-test.sld @@ -8,6 +8,9 @@ (test 1 (parse-json "1")) (test 1.5 (parse-json "1.5")) (test 1000.0 (parse-json "1e3")) + (test "รก" (parse-json "\"\\u00e1\"")) + (test "๐ท" (parse-json "\"\\uD801\\uDC37\"")) + (test "๐Ÿ˜" (parse-json "\"\\uD83D\\uDE10\"")) (test '((glossary (title . "example glossary") (GlossDiv diff --git a/tests/lib-tests.scm b/tests/lib-tests.scm index 0b8edd91..0aa3bdcc 100644 --- a/tests/lib-tests.scm +++ b/tests/lib-tests.scm @@ -40,6 +40,7 @@ (rename (chibi generic-test) (run-tests run-generic-tests)) (rename (chibi io-test) (run-tests run-io-tests)) (rename (chibi iset-test) (run-tests run-iset-tests)) + (rename (chibi json-test) (run-tests run-json-tests)) (rename (chibi log-test) (run-tests run-log-tests)) (rename (chibi loop-test) (run-tests run-loop-tests)) (rename (chibi match-test) (run-tests run-match-tests)) @@ -100,6 +101,7 @@ (run-generic-tests) (run-io-tests) (run-iset-tests) +(run-json-tests) (run-log-tests) (run-loop-tests) (run-match-tests) From 1cc24e37d9bd00b710f1c446621c60237e15d9e1 Mon Sep 17 00:00:00 2001 From: Ekaitz Zarraga Date: Fri, 22 May 2020 00:30:24 +0200 Subject: [PATCH 3/3] Fix formatting --- lib/chibi/json.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/chibi/json.c b/lib/chibi/json.c index 88242a76..516ad951 100644 --- a/lib/chibi/json.c +++ b/lib/chibi/json.c @@ -56,10 +56,10 @@ sexp parse_json_literal (sexp ctx, sexp self, sexp str, const char* s, int* i, c #define USEQ_LEN 4 -long decode_useq(const char* s){ +long decode_useq(const char* s) { char utf_tmp[USEQ_LEN+1]; - for (int iter=0; iter!=USEQ_LEN; iter++){ - if (!isxdigit(s[iter])){ + for (int iter=0; iter!=USEQ_LEN; iter++) { + if (!isxdigit(s[iter])) { return -1; } } @@ -94,20 +94,20 @@ sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, co break; case 'u': utfchar = decode_useq(s+to+1); - if (utfchar == -1){ + if (utfchar == -1) { res = sexp_json_exception(ctx, self, "invalid \\u sequence at", str, *i); goto except; } to = to+USEQ_LEN; - if ( 0xd800 <= utfchar && utfchar <= 0xdbff && s[to+2] == 'u'){ + if ( 0xd800 <= utfchar && utfchar <= 0xdbff && s[to+2] == 'u') { utfchar2 = decode_useq(s+to+3); - if (utfchar2 == -1){ + if (utfchar2 == -1) { res = sexp_json_exception(ctx, self, "invalid \\u sequence at", str, *i); goto except; } - if ( 0xdc00 <= utfchar2 && utfchar <=0xdfff ){ + if ( 0xdc00 <= utfchar2 && utfchar <=0xdfff ) { utfchar = 0x10000 + (((utfchar - 0xd800) << 10) | (utfchar2 - 0xdc00)); to = to + USEQ_LEN +2; }