updating (chibi json) to work on ports

This commit is contained in:
Alex Shinn 2020-07-08 17:27:05 +09:00
parent 2bdaebe8c7
commit e31e5ffbf3
3 changed files with 277 additions and 363 deletions

View file

@ -4,13 +4,14 @@
(export run-tests) (export run-tests)
(begin (begin
(define (run-tests) (define (run-tests)
(test-begin "json-parse") (test-begin "json")
(test 1 (parse-json "1")) (test-begin "string->json")
(test 1.5 (parse-json "1.5")) (test 1 (string->json "1"))
(test 1000.0 (parse-json "1e3")) (test 1.5 (string->json "1.5"))
(test "á" (parse-json "\"\\u00e1\"")) (test 1000.0 (string->json "1e3"))
(test "𐐷" (parse-json "\"\\uD801\\uDC37\"")) (test "á" (string->json "\"\\u00e1\""))
(test "😐" (parse-json "\"\\uD83D\\uDE10\"")) (test "𐐷" (string->json "\"\\uD801\\uDC37\""))
(test "😐" (string->json "\"\\uD83D\\uDE10\""))
(test '((glossary (test '((glossary
(title . "example glossary") (title . "example glossary")
(GlossDiv (GlossDiv
@ -26,7 +27,7 @@
(para . "A meta-markup language, used to create markup languages such as DocBook.") (para . "A meta-markup language, used to create markup languages such as DocBook.")
(GlossSeeAlso . #("GML" "XML"))) (GlossSeeAlso . #("GML" "XML")))
(GlossSee . "markup")))))) (GlossSee . "markup"))))))
(parse-json "{ (string->json "{
\"glossary\": { \"glossary\": {
\"title\": \"example glossary\", \"title\": \"example glossary\",
\"GlossDiv\": { \"GlossDiv\": {
@ -56,7 +57,7 @@
. #(((value . "New") (onclick . "CreateNewDoc()")) . #(((value . "New") (onclick . "CreateNewDoc()"))
((value . "Open") (onclick . "OpenDoc()")) ((value . "Open") (onclick . "OpenDoc()"))
((value . "Close") (onclick . "CloseDoc()"))))))) ((value . "Close") (onclick . "CloseDoc()")))))))
(parse-json "{\"menu\": { (string->json "{\"menu\": {
\"id\": \"file\", \"id\": \"file\",
\"value\": \"File\", \"value\": \"File\",
\"popup\": { \"popup\": {
@ -68,15 +69,15 @@
} }
}}")) }}"))
(test-end) (test-end)
(test-begin "json-unparse") (test-begin "json->string")
(test "1" (unparse-json 1)) (test "1" (json->string 1))
(test "1.5" (unparse-json 1.5)) (test "1.5" (json->string 1.5))
(test "1000" (unparse-json 1E3)) (test "1000" (json->string 1E3))
(test "\"\\u00E1\"" (unparse-json "á")) (test "\"\\u00E1\"" (json->string "á"))
(test "\"\\uD801\\uDC37\"" (unparse-json "𐐷")) (test "\"\\uD801\\uDC37\"" (json->string "𐐷"))
(test "\"\\uD83D\\uDE10\"" (unparse-json "😐")) (test "\"\\uD83D\\uDE10\"" (json->string "😐"))
(test "{\"menu\":{\"id\":\"file\",\"value\":\"File\",\"popup\":{\"menuitem\":[{\"value\":\"New\",\"onclick\":\"CreateNewDoc()\"},{\"value\":\"Open\",\"onclick\":\"OpenDoc()\"},{\"value\":\"Close\",\"onclick\":\"CloseDoc()\"}]}}}" (test "{\"menu\":{\"id\":\"file\",\"value\":\"File\",\"popup\":{\"menuitem\":[{\"value\":\"New\",\"onclick\":\"CreateNewDoc()\"},{\"value\":\"Open\",\"onclick\":\"OpenDoc()\"},{\"value\":\"Close\",\"onclick\":\"CloseDoc()\"}]}}}"
(unparse-json '((menu (json->string '((menu
(id . "file") (id . "file")
(value . "File") (value . "File")
(popup (popup
@ -85,7 +86,7 @@
((value . "Open") (onclick . "OpenDoc()")) ((value . "Open") (onclick . "OpenDoc()"))
((value . "Close") (onclick . "CloseDoc()"))))))))) ((value . "Close") (onclick . "CloseDoc()")))))))))
(test "{\"glossary\":{\"title\":\"example glossary\",\"GlossDiv\":{\"title\":\"S\",\"GlossList\":{\"GlossEntry\":{\"ID\":\"SGML\",\"SortAs\":\"SGML\",\"GlossTerm\":\"Standard Generalized Markup Language\",\"Acronym\":\"SGML\",\"Abbrev\":\"ISO 8879:1986\",\"GlossDef\":{\"para\":\"A meta-markup language, used to create markup languages such as DocBook.\",\"GlossSeeAlso\":[\"GML\",\"XML\"]},\"GlossSee\":\"markup\"}}}}}" (test "{\"glossary\":{\"title\":\"example glossary\",\"GlossDiv\":{\"title\":\"S\",\"GlossList\":{\"GlossEntry\":{\"ID\":\"SGML\",\"SortAs\":\"SGML\",\"GlossTerm\":\"Standard Generalized Markup Language\",\"Acronym\":\"SGML\",\"Abbrev\":\"ISO 8879:1986\",\"GlossDef\":{\"para\":\"A meta-markup language, used to create markup languages such as DocBook.\",\"GlossSeeAlso\":[\"GML\",\"XML\"]},\"GlossSee\":\"markup\"}}}}}"
(unparse-json '((glossary (json->string '((glossary
(title . "example glossary") (title . "example glossary")
(GlossDiv (GlossDiv
(title . "S") (title . "S")
@ -100,4 +101,6 @@
(para . "A meta-markup language, used to create markup languages such as DocBook.") (para . "A meta-markup language, used to create markup languages such as DocBook.")
(GlossSeeAlso . #("GML" "XML"))) (GlossSeeAlso . #("GML" "XML")))
(GlossSee . "markup")))))))) (GlossSee . "markup"))))))))
(test-end)))) (test-end)
(test-end)
)))

View file

@ -1,5 +1,5 @@
/* json.c -- fast json parser and unparser */ /* json.c -- fast json I/O */
/* Copyright (c) 2019 Alex Shinn. All rights reserved. */ /* Copyright (c) 2020 Alex Shinn. All rights reserved. */
/* Copyright (c) 2020 Ekaitz Zarraga. All rights reserved. */ /* Copyright (c) 2020 Ekaitz Zarraga. All rights reserved. */
/* BSD-style license: http://synthcode.com/license.txt */ /* BSD-style license: http://synthcode.com/license.txt */
@ -9,18 +9,23 @@ static int digit_value (int c) {
return (((c)<='9') ? ((c) - '0') : ((sexp_tolower(c) - 'a') + 10)); return (((c)<='9') ? ((c) - '0') : ((sexp_tolower(c) - 'a') + 10));
} }
sexp parse_json (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len); sexp json_read (sexp ctx, sexp self, sexp in);
sexp sexp_json_parse_exception (sexp ctx, sexp self, const char* msg, sexp str, const int pos) { sexp sexp_json_read_exception (sexp ctx, sexp self, const char* msg, sexp in, sexp ir) {
sexp_gc_var2(res, tmp); sexp res;
sexp_gc_preserve2(ctx, res, tmp); sexp_gc_var4(sym, name, str, irr);
tmp = sexp_list2(ctx, str, sexp_make_fixnum(pos)); sexp_gc_preserve4(ctx, sym, name, str, irr);
res = sexp_user_exception(ctx, self, msg, tmp); name = (sexp_port_name(in) ? sexp_port_name(in) : SEXP_FALSE);
sexp_gc_release2(ctx); name = sexp_cons(ctx, name, sexp_make_fixnum(sexp_port_line(in)));
str = sexp_c_string(ctx, msg, -1);
irr = ((sexp_pairp(ir) || sexp_nullp(ir)) ? ir : sexp_list1(ctx, ir));
res = sexp_make_exception(ctx, sym = sexp_intern(ctx, "json-read", -1),
str, irr, SEXP_FALSE, name);
sexp_gc_release4(ctx);
return res; return res;
} }
sexp sexp_json_unparse_exception (sexp ctx, sexp self, const char* msg, sexp obj) { sexp sexp_json_write_exception (sexp ctx, sexp self, const char* msg, sexp obj) {
sexp_gc_var2(res, tmp); sexp_gc_var2(res, tmp);
sexp_gc_preserve2(ctx, res, tmp); sexp_gc_preserve2(ctx, res, tmp);
tmp = sexp_list1(ctx, obj); tmp = sexp_list1(ctx, obj);
@ -29,161 +34,171 @@ sexp sexp_json_unparse_exception (sexp ctx, sexp self, const char* msg, sexp obj
return res; return res;
} }
sexp parse_json_number (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len) { sexp json_read_number (sexp ctx, sexp self, sexp in) {
double res = 0, scale = 1; double res = 0, scale = 1;
int j = *i, sign = 1, inexactp = 0, scale_sign = 1; int sign = 1, inexactp = 0, scale_sign = 1, ch;
if (s[j] == '+') { ch = sexp_read_char(ctx, in);
++j; if (ch == '+') {
} else if (s[j] == '-') { ch = sexp_read_char(ctx, in);
++j; } else if (ch == '-') {
ch = sexp_read_char(ctx, in);
sign = -1; sign = -1;
} }
while (j < len && isdigit(s[j])) for ( ; ch != EOF && isdigit(ch); ch = sexp_read_char(ctx, in))
res = res * 10 + s[j++] - '0'; res = res * 10 + ch - '0';
if (j < len && s[j] == '.') { if (ch == '.') {
inexactp = 1; inexactp = 1;
for (++j; j < len && isdigit(s[j]); scale *= 10) for (ch = sexp_read_char(ctx, in); isdigit(ch); scale *= 10, ch = sexp_read_char(ctx, in))
res = res * 10 + s[j++] - '0'; res = res * 10 + ch - '0';
res /= scale; res /= scale;
} else if (j < len && sexp_tolower(s[j]) == 'e') { } else if (ch == 'e') {
inexactp = 1; inexactp = 1;
if (j+1 < len) { ch = sexp_read_char(ctx, in);
if (s[j+1] == '+') { if (ch == '+') {
++j; ch = sexp_read_char(ctx, in);
} else if (s[j+1] == '-') { } else if (ch == '-') {
++j; ch = sexp_read_char(ctx, in);
scale_sign = -1; scale_sign = -1;
} }
} for (scale=0; isdigit(ch); ch = sexp_read_char(ctx, in))
for (++j, scale=0; j < len && isdigit(s[j]); ) scale = scale * 10 + ch - '0';
scale = scale * 10 + s[j++] - '0';
res *= pow(10.0, scale_sign * scale); res *= pow(10.0, scale_sign * scale);
} }
*i = j; if (ch != EOF) sexp_push_char(ctx, ch, in);
return (inexactp || fabs(res) > SEXP_MAX_FIXNUM) ? return (inexactp || fabs(res) > SEXP_MAX_FIXNUM) ?
sexp_make_flonum(ctx, sign * res) : sexp_make_flonum(ctx, sign * res) :
sexp_make_fixnum(sign * res); /* always return inexact? */ sexp_make_fixnum(sign * res); /* always return inexact? */
} }
sexp parse_json_literal (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len, const char* name, int namelen, sexp value) { sexp json_read_literal (sexp ctx, sexp self, sexp in, char* name, sexp value) {
sexp res; int ch;
if (strncasecmp(s+*i, name, namelen) == 0 && (*i+namelen >= len || !isalnum(s[*i+namelen]))) { for (++name; *name; )
res = value; if (*(name++) != (ch = sexp_read_char(ctx, in)))
*i += namelen; sexp_json_read_exception(ctx, self, "unexpected character in json", in, sexp_make_character(ch));
} else { return value;
res = sexp_json_parse_exception(ctx, self, "unexpected character in json at", str, *i);
}
return res;
} }
#define USEQ_LEN 4 #define USEQ_LEN 4
long decode_useq(const char* s) { long decode_useq(sexp ctx, sexp in) {
long result = 0, i; long result = 0, i, ch;
for (i=0; i < USEQ_LEN; i++) { for (i=0; i < USEQ_LEN; i++) {
if (!isxdigit(s[i])) ch = sexp_read_char(ctx, in);
if (!isxdigit(ch)) {
sexp_push_char(ctx, ch, in);
return -1; return -1;
result = (result << 4) + digit_value(s[i]); }
result = (result << 4) + digit_value(ch);
} }
return result; return result;
} }
sexp parse_json_string (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len) { #define INIT_STRING_BUFFER_SIZE 128
sexp_gc_var2(res, tmp);
sexp_gc_preserve2(ctx, res, tmp); sexp json_read_string (sexp ctx, sexp self, sexp in) {
int from = *i, to = *i; sexp_sint_t size=INIT_STRING_BUFFER_SIZE;
char initbuf[INIT_STRING_BUFFER_SIZE];
char *buf=initbuf, *tmp;
int i=0, ch, len;
long utfchar, utfchar2; long utfchar, utfchar2;
res = SEXP_NULL; sexp res = SEXP_VOID;
for ( ; s[to] != '"' && !sexp_exceptionp(res); ++to) { for (ch = sexp_read_char(ctx, in); ch != '"'; ch = sexp_read_char(ctx, in)) {
if (to+1 >= len) { if (ch == EOF) {
res = sexp_json_parse_exception(ctx, self, "unterminated string in json started at", str, *i); res = sexp_json_read_exception(ctx, self, "unterminated string in json", in, SEXP_NULL);
break; break;
} }
if (s[to] == '\\') { if (i+4 >= size) { /* expand buffer w/ malloc(), later free() it */
tmp = sexp_c_string(ctx, s+from, to-from); tmp = (char*) sexp_malloc(size*2);
res = sexp_stringp(res) ? sexp_list2(ctx, tmp, res) : sexp_cons(ctx, tmp, res); if (!tmp) {res = sexp_global(ctx, SEXP_G_OOM_ERROR); break;}
switch (s[++to]) { memcpy(tmp, buf, i);
if (size != INIT_STRING_BUFFER_SIZE) free(buf);
buf = tmp;
size *= 2;
}
if (ch == '\\') {
ch = sexp_read_char(ctx, in);
switch (ch) {
case 'n': case 'n':
tmp = sexp_c_string(ctx, "\n", -1); buf[i++] = '\n';
res = sexp_cons(ctx, tmp, res);
from = to+1;
break; break;
case 't': case 't':
tmp = sexp_c_string(ctx, "\t", -1); buf[i++] = '\t';
res = sexp_cons(ctx, tmp, res);
from = to+1;
break; break;
case 'u': case 'u':
utfchar = decode_useq(s+to+1); utfchar = decode_useq(ctx, in);
to += USEQ_LEN; if (0xd800 <= utfchar && utfchar <= 0xdbff) {
if (0xd800 <= utfchar && utfchar <= 0xdbff && s[to+1] == '\\' && s[to+2] == 'u') { ch = sexp_read_char(ctx, in);
if (ch == '\\') {
ch = sexp_read_char(ctx, in);
if (ch == 'u') {
/* high surrogate followed by another unicode escape */ /* high surrogate followed by another unicode escape */
utfchar2 = decode_useq(s+to+3); utfchar2 = decode_useq(ctx, in);
if (0xdc00 <= utfchar2 && utfchar2 <= 0xdfff) { if (0xdc00 <= utfchar2 && utfchar2 <= 0xdfff) {
/* merge low surrogate (otherwise high is left unpaired) */ /* merge low surrogate (otherwise high is left unpaired) */
utfchar = 0x10000 + (((utfchar - 0xd800) << 10) | (utfchar2 - 0xdc00)); utfchar = 0x10000 + (((utfchar - 0xd800) << 10) | (utfchar2 - 0xdc00));
to += USEQ_LEN + 2; } else {
return sexp_json_read_exception(ctx, self, "invalid \\u sequence", in, SEXP_NULL);
}
} else {
sexp_push_char(ctx, ch, in);
sexp_push_char(ctx, '\\', in);
}
} else {
sexp_push_char(ctx, ch, in);
} }
} }
if (utfchar < 0) { if (utfchar < 0) {
res = sexp_json_parse_exception(ctx, self, "invalid \\u sequence at", str, to - USEQ_LEN); return sexp_json_read_exception(ctx, self, "invalid \\u sequence", in, SEXP_NULL);
} else { } else {
tmp = sexp_make_string(ctx, sexp_make_fixnum(1), sexp_make_character(utfchar)); len = sexp_utf8_char_byte_count(utfchar);
res = sexp_cons(ctx, tmp, res); sexp_utf8_encode_char((unsigned char*)buf + i, len, utfchar);
from = to + 1; i += len;
} }
break; break;
default: default:
from = to; buf[i++] = ch;
break; break;
} }
} else {
buf[i++] = ch;
} }
} }
if (!sexp_exceptionp(res)) { if (!sexp_exceptionp(res)) {
tmp = sexp_c_string(ctx, s+from, to-from); buf[i] = '\0';
if (res == SEXP_NULL) { res = sexp_c_string(ctx, buf, i);
res = tmp; if (sexp_stringp(res)) sexp_immutablep(res) = 1;
} else {
res = sexp_stringp(res) ? sexp_list2(ctx, tmp, res) : sexp_cons(ctx, tmp, res);
res = sexp_nreverse(ctx, res);
res = sexp_string_concatenate(ctx, res, SEXP_FALSE);
} }
} if (size != INIT_STRING_BUFFER_SIZE) free(buf);
*i = to+1;
sexp_gc_release2(ctx);
return res; return res;
} }
sexp parse_json_array (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len) { sexp json_read_array (sexp ctx, sexp self, sexp in) {
sexp_gc_var2(res, tmp); sexp_gc_var2(res, tmp);
sexp_gc_preserve2(ctx, res, tmp); sexp_gc_preserve2(ctx, res, tmp);
int j = *i; int comma = 1, ch;
int comma = 1;
res = SEXP_NULL; res = SEXP_NULL;
while (1) { while (1) {
if (j >= len) { ch = sexp_read_char(ctx, in);
res = sexp_json_parse_exception(ctx, self, "unterminated array in json started at", str, *i); if (ch == EOF) {
res = sexp_json_read_exception(ctx, self, "unterminated array in json", in, SEXP_NULL);
break; break;
} else if (s[j] == ']') { } else if (ch == ']') {
if (comma && res != SEXP_NULL) { if (comma && res != SEXP_NULL) {
res = sexp_json_parse_exception(ctx, self, "missing value after comma in json array at", str, j); res = sexp_json_read_exception(ctx, self, "missing value after comma in json", in, SEXP_NULL);
} else { } else {
res = sexp_nreverse(ctx, res); res = sexp_nreverse(ctx, res);
res = sexp_list_to_vector(ctx, res); res = sexp_list_to_vector(ctx, res);
} }
++j;
break; break;
} else if (s[j] == ',' && comma) { } else if (ch == ',' && comma) {
res = sexp_json_parse_exception(ctx, self, "unexpected comma in json array at", str, j); res = sexp_json_read_exception(ctx, self, "unexpected comma in json array", in, SEXP_NULL);
break; break;
} else if (s[j] == ',') { } else if (ch == ',') {
comma = 1; comma = 1;
++j; } else if (!isspace(ch)) {
} else if (isspace(s[j])) {
++j;
} else {
if (comma) { if (comma) {
tmp = parse_json(ctx, self, str, s, &j, len); sexp_push_char(ctx, ch, in);
tmp = json_read(ctx, self, in);
if (sexp_exceptionp(tmp)) { if (sexp_exceptionp(tmp)) {
res = tmp; res = tmp;
break; break;
@ -191,45 +206,41 @@ sexp parse_json_array (sexp ctx, sexp self, sexp str, const char* s, int* i, con
res = sexp_cons(ctx, tmp, res); res = sexp_cons(ctx, tmp, res);
comma = 0; comma = 0;
} else { } else {
res = sexp_json_parse_exception(ctx, self, "unexpected value in json array at", str, j); res = sexp_json_read_exception(ctx, self, "unexpected value in json array", in, SEXP_NULL);
break; break;
} }
} }
} }
*i = j;
sexp_gc_release2(ctx); sexp_gc_release2(ctx);
return res; return res;
} }
sexp parse_json_object (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len) { sexp json_read_object (sexp ctx, sexp self, sexp in) {
sexp_gc_var2(res, tmp); sexp_gc_var2(res, tmp);
sexp_gc_preserve2(ctx, res, tmp); sexp_gc_preserve2(ctx, res, tmp);
int j = *i; int comma = 1, ch;
int comma = 1;
res = SEXP_NULL; res = SEXP_NULL;
while (1) { while (1) {
if (j >= len) { ch = sexp_read_char(ctx, in);
res = sexp_json_parse_exception(ctx, self, "unterminated object in json started at", str, *i); if (ch == EOF) {
res = sexp_json_read_exception(ctx, self, "unterminated object in json", in, SEXP_NULL);
break; break;
} else if (s[j] == '}') { } else if (ch == '}') {
if (comma && res != SEXP_NULL) { if (comma && res != SEXP_NULL) {
res = sexp_json_parse_exception(ctx, self, "missing value after comma in json object at", str, j); res = sexp_json_read_exception(ctx, self, "missing value after comma in json object", in, SEXP_NULL);
} else { } else {
res = sexp_nreverse(ctx, res); res = sexp_nreverse(ctx, res);
} }
++j;
break; break;
} else if (s[j] == ',' && comma) { } else if (ch == ',' && comma) {
res = sexp_json_parse_exception(ctx, self, "unexpected comma in json object at", str, j); res = sexp_json_read_exception(ctx, self, "unexpected comma in json object", in, SEXP_NULL);
break; break;
} else if (s[j] == ',') { } else if (ch == ',') {
comma = 1; comma = 1;
++j; } else if (!isspace(ch)) {
} else if (isspace(s[j])) {
++j;
} else {
if (comma) { if (comma) {
tmp = parse_json(ctx, self, str, s, &j, len); sexp_push_char(ctx, ch, in);
tmp = json_read(ctx, self, in);
if (sexp_exceptionp(tmp)) { if (sexp_exceptionp(tmp)) {
res = tmp; res = tmp;
break; break;
@ -237,14 +248,13 @@ sexp parse_json_object (sexp ctx, sexp self, sexp str, const char* s, int* i, co
tmp = sexp_string_to_symbol(ctx, tmp); tmp = sexp_string_to_symbol(ctx, tmp);
} }
tmp = sexp_cons(ctx, tmp, SEXP_VOID); tmp = sexp_cons(ctx, tmp, SEXP_VOID);
while (j < len && isspace(s[j])) for (ch = sexp_read_char(ctx, in); isspace(ch); ch = sexp_read_char(ctx, in))
++j; ;
if (s[j] != ':') { if (ch != ':') {
res = sexp_json_parse_exception(ctx, self, "missing colon in json object at", str, j); res = sexp_json_read_exception(ctx, self, "missing colon in json object", in, sexp_make_character(ch));
break; break;
} }
++j; sexp_cdr(tmp) = json_read(ctx, self, in);
sexp_cdr(tmp) = parse_json(ctx, self, str, s, &j, len);
if (sexp_exceptionp(sexp_cdr(tmp))) { if (sexp_exceptionp(sexp_cdr(tmp))) {
res = sexp_cdr(tmp); res = sexp_cdr(tmp);
break; break;
@ -252,324 +262,216 @@ sexp parse_json_object (sexp ctx, sexp self, sexp str, const char* s, int* i, co
res = sexp_cons(ctx, tmp, res); res = sexp_cons(ctx, tmp, res);
comma = 0; comma = 0;
} else { } else {
res = sexp_json_parse_exception(ctx, self, "unexpected value in json object at", str, j); res = sexp_json_read_exception(ctx, self, "unexpected value in json object", in, SEXP_NULL);
break; break;
} }
} }
} }
*i = j;
sexp_gc_release2(ctx); sexp_gc_release2(ctx);
return res; return res;
} }
sexp parse_json (sexp ctx, sexp self, sexp str, const char* s, int* i, const int len) { sexp json_read (sexp ctx, sexp self, sexp in) {
sexp res; sexp res;
int j = *i; int ch = ' ';
while (j < len && isspace(s[j])) while (isspace(ch))
++j; ch = sexp_read_char(ctx, in);
switch (s[j]) { switch (ch) {
case '{': case '{':
++j; res = json_read_object(ctx, self, in);
res = parse_json_object(ctx, self, str, s, &j, len);
break; break;
case '[': case '[':
++j; res = json_read_array(ctx, self, in);
res = parse_json_array(ctx, self, str, s, &j, len);
break; break;
case '"': case '"':
++j; res = json_read_string(ctx, self, in);
res = parse_json_string(ctx, self, str, s, &j, len);
break; break;
case '-': case '+': case '-': case '+':
case '0': case '1': case '2': case '3': case '4': case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '5': case '6': case '7': case '8': case '9':
res = parse_json_number(ctx, self, str, s, &j, len); sexp_push_char(ctx, ch, in);
res = json_read_number(ctx, self, in);
break; break;
case 'n': case 'N': case 'n': case 'N':
res = parse_json_literal(ctx, self, str, s, &j, len, "null", 4, SEXP_VOID); res = json_read_literal(ctx, self, in, "null", SEXP_VOID);
break; break;
case 't': case 'T': case 't': case 'T':
res = parse_json_literal(ctx, self, str, s, &j, len, "true", 4, SEXP_TRUE); res = json_read_literal(ctx, self, in, "true", SEXP_TRUE);
break; break;
case 'f': case 'F': case 'f': case 'F':
res = parse_json_literal(ctx, self, str, s, &j, len, "false", 5, SEXP_FALSE); res = json_read_literal(ctx, self, in, "false", SEXP_FALSE);
break; break;
case '}': case '}':
res = sexp_json_parse_exception(ctx, self, "unexpected closing brace in json at", str, j); res = sexp_json_read_exception(ctx, self, "unexpected closing brace in json", in, SEXP_NULL);
break; break;
case ']': case ']':
res = sexp_json_parse_exception(ctx, self, "unexpected closing bracket in json at", str, j); res = sexp_json_read_exception(ctx, self, "unexpected closing bracket in json", in, SEXP_NULL);
break; break;
default: default:
res = sexp_json_parse_exception(ctx, self, "unexpected character in json at", str, j); res = sexp_json_read_exception(ctx, self, "unexpected character in json", in, sexp_make_character(ch));
break; break;
} }
*i = j;
return res; return res;
} }
sexp sexp_parse_json (sexp ctx, sexp self, sexp_sint_t n, sexp str) { sexp sexp_json_read (sexp ctx, sexp self, sexp_sint_t n, sexp in) {
const char *s; sexp_assert_type(ctx, sexp_iportp, SEXP_IPORT, in);
int i=0, len; return json_read(ctx, self, in);
sexp_assert_type(ctx, sexp_stringp, SEXP_STRING, str);
s = sexp_string_data(str);
len = sexp_string_size(str);
return parse_json(ctx, self, str, s, &i, len);
} }
sexp json_write (sexp ctx, sexp self, sexp obj, sexp out);
sexp unparse_json (sexp ctx, sexp self, sexp obj);
sexp unparse_json_fixnum(sexp ctx, sexp self, const sexp obj) {
sexp_gc_var2(res, tmp);
sexp_gc_preserve2(ctx, res, tmp);
res = SEXP_VOID;
int sign = 1;
long num = sexp_unbox_fixnum(obj);
char digit;
if (num == 0) {
res = sexp_c_string(ctx, "0", -1);
} else {
if (num < 0) {
sign = -1;
num = labs(num);
}
while (num > 0) {
digit = '0' + num%10;
num /= 10;
tmp = sexp_c_string(ctx, &digit, 1);
res = sexp_cons(ctx, tmp, res);
}
if (sign==-1) {
tmp = sexp_c_string(ctx, "-", -1);
res = sexp_cons(ctx, tmp, res);
}
res = sexp_string_concatenate(ctx, res, SEXP_FALSE);
}
sexp_gc_release2(ctx);
return res;
}
#define FLONUM_SIGNIFICANT_DIGITS 10 #define FLONUM_SIGNIFICANT_DIGITS 10
#define FLONUM_EXP_MAX_DIGITS 3 #define FLONUM_EXP_MAX_DIGITS 3
sexp unparse_json_flonum(sexp ctx, sexp self, const sexp obj) { sexp json_write_flonum(sexp ctx, sexp self, const sexp obj, sexp out) {
sexp_gc_var2(res, tmp);
sexp_gc_preserve2(ctx, res, tmp);
res = SEXP_VOID;
char cout[FLONUM_SIGNIFICANT_DIGITS + FLONUM_EXP_MAX_DIGITS + 5];
// Extra space for signs (x2), dot, E and \0
if (sexp_infp(obj) || sexp_nanp(obj)) { if (sexp_infp(obj) || sexp_nanp(obj)) {
res = sexp_json_unparse_exception(ctx, self, "unable to encode number", obj); return sexp_json_write_exception(ctx, self, "unable to encode number", obj);
sexp_gc_release2(ctx);
return res;
} }
/* Extra space for signs (x2), dot, E and \0 */
char cout[FLONUM_SIGNIFICANT_DIGITS + FLONUM_EXP_MAX_DIGITS + 5];
snprintf(cout, sizeof(cout), "%.*G", FLONUM_SIGNIFICANT_DIGITS, sexp_flonum_value(obj)); snprintf(cout, sizeof(cout), "%.*G", FLONUM_SIGNIFICANT_DIGITS, sexp_flonum_value(obj));
res = sexp_c_string(ctx, cout, -1); sexp_write_string(ctx, cout, out);
sexp_gc_release2(ctx); return SEXP_VOID;
return res;
} }
sexp unparse_json_string(sexp ctx, sexp self, const sexp obj) { sexp json_write_string(sexp ctx, sexp self, const sexp obj, sexp out) {
sexp_gc_var2(res, tmp);
sexp_gc_preserve2(ctx, res, tmp);
res = SEXP_VOID;
tmp = sexp_c_string(ctx, "\"", -1);
res = sexp_cons(ctx, tmp, res);
char cout[32]; /* oversized to avoid snprintf warnings */ char cout[32]; /* oversized to avoid snprintf warnings */
unsigned long ch, chh, chl; sexp_sint_t ch, chh, chl;
sexp i, end = sexp_make_string_cursor(sexp_string_size(obj));
sexp_uint_t len = sexp_string_length(obj); sexp_write_char(ctx, '"', out);
for(sexp_uint_t i=0; i!= len; i++) { for (i = sexp_make_string_cursor(0); i < end;
ch = sexp_unbox_character(sexp_string_ref(ctx, obj, sexp_make_fixnum(i))); i = sexp_string_cursor_next(obj, i)) {
ch = sexp_unbox_character(sexp_string_cursor_ref(ctx, obj, i));
if (ch < 0x7F) { if (ch < 0x7F) {
switch (ch) { switch (ch) {
case '\\': case '\\':
snprintf(cout, sizeof(cout), "\\\\"); sexp_write_string(ctx, "\\\\", out);
break;
case '/':
snprintf(cout, sizeof(cout), "\\/");
break; break;
case '\b': case '\b':
snprintf(cout, sizeof(cout), "\\b"); sexp_write_string(ctx, "\\b", out);
break; break;
case '\f': case '\f':
snprintf(cout, sizeof(cout), "\\f"); sexp_write_string(ctx, "\\f", out);
break; break;
case '\n': case '\n':
snprintf(cout, sizeof(cout), "\\n"); sexp_write_string(ctx, "\\n", out);
break; break;
case '\r': case '\r':
snprintf(cout, sizeof(cout), "\\r"); sexp_write_string(ctx, "\\r", out);
break; break;
case '\t': case '\t':
snprintf(cout, sizeof(cout), "\\t"); sexp_write_string(ctx, "\\t", out);
break; break;
default: default:
snprintf(cout, sizeof(cout), "%c", (int)ch); sexp_write_char(ctx, ch, out);
break; break;
} }
} else if (ch <= 0xFFFF) { } else if (ch <= 0xFFFF) {
snprintf(cout, sizeof(cout), "\\u%04lX", ch); snprintf(cout, sizeof(cout), "\\u%04lX", ch);
sexp_write_string(ctx, cout, out);
} else { } else {
// Surrogate pair // Surrogate pair
chh = (0xD800 - (0x10000 >> 10) + ((ch) >> 10)); chh = (0xD800 - (0x10000 >> 10) + ((ch) >> 10));
chl = (0xDC00 + ((ch) & 0x3FF)); chl = (0xDC00 + ((ch) & 0x3FF));
if (chh > 0xFFFF || chl > 0xFFFF) { if (chh > 0xFFFF || chl > 0xFFFF) {
res = sexp_json_unparse_exception(ctx, self, "unable to encode string", obj); return sexp_json_write_exception(ctx, self, "unable to encode string", obj);
sexp_gc_release2(ctx);
return res;
} }
snprintf(cout, sizeof(cout), "\\u%04lX\\u%04lX", chh, chl); snprintf(cout, sizeof(cout), "\\u%04lX\\u%04lX", chh, chl);
sexp_write_string(ctx, cout, out);
} }
tmp = sexp_c_string(ctx, cout, -1); }
res = sexp_cons(ctx, tmp, res); sexp_write_char(ctx, '"', out);
return SEXP_VOID;
} }
tmp = sexp_c_string(ctx, "\"", -1); sexp json_write_array(sexp ctx, sexp self, const sexp obj, sexp out) {
res = sexp_cons(ctx, tmp, res); sexp tmp;
int len = sexp_vector_length(obj), i;
res = sexp_nreverse(ctx, res); sexp_write_string(ctx, "[", out);
res = sexp_string_concatenate(ctx, res, SEXP_FALSE); for (i = 0; i < len; ++i) {
tmp = json_write(ctx, self, sexp_vector_ref(obj, sexp_make_fixnum(i)), out);
sexp_gc_release2(ctx);
return res;
}
sexp unparse_json_array(sexp ctx, sexp self, const sexp obj) {
sexp_gc_var2(res, tmp);
sexp_gc_preserve2(ctx, res, tmp);
res = SEXP_VOID;
tmp = sexp_c_string(ctx, "[", -1);
res = sexp_cons(ctx, tmp, res);
int len = sexp_vector_length(obj);
for (int i=0; i!=len; i++) {
tmp = unparse_json(ctx, self, sexp_vector_ref(obj, sexp_make_fixnum(i)));
if (sexp_exceptionp(tmp)) { if (sexp_exceptionp(tmp)) {
sexp_gc_release2(ctx);
return tmp; return tmp;
} }
res = sexp_cons(ctx, tmp, res); if (i < len - 1) {
sexp_write_char(ctx, ',', out);
if (i != len-1) {
tmp = sexp_c_string(ctx, ",", -1);
res = sexp_cons(ctx, tmp, res);
} }
} }
sexp_write_string(ctx, "]", out);
tmp = sexp_c_string(ctx, "]", -1); return SEXP_VOID;
res = sexp_cons(ctx, tmp, res);
res = sexp_nreverse(ctx, res);
res = sexp_string_concatenate(ctx, res, SEXP_FALSE);
sexp_gc_release2(ctx);
return res;
} }
sexp json_write_object(sexp ctx, sexp self, const sexp obj, sexp out) {
sexp unparse_json_object(sexp ctx, sexp self, const sexp obj) { sexp ls, cur, key, val, tmp;
sexp_gc_var6(res, tmp, it, cur, key, val); if (sexp_length(ctx, obj) == SEXP_FALSE)
sexp_gc_preserve6(ctx, res, tmp, it, cur, key, val); return sexp_json_write_exception(ctx, self, "unable to encode circular list", obj);
res = SEXP_VOID; sexp_write_char(ctx, '{', out);
for (ls = obj; sexp_pairp(ls); ls = sexp_cdr(ls)) {
tmp = sexp_c_string(ctx, "{", -1); if (ls != obj)
res = sexp_cons(ctx, tmp, res); sexp_write_char(ctx, ',', out);
cur = sexp_car(ls);
int len = sexp_unbox_fixnum(sexp_length(ctx, obj)); if (!sexp_pairp(cur))
it = obj; return sexp_json_write_exception(ctx, self, "unable to encode key-value pair: not a pair", obj);
for (int i=0; i!=len; i++) {
cur = sexp_car(it);
if (!sexp_pairp(cur)) {
res = sexp_json_unparse_exception(ctx, self, "unable to encode key-value pair: not a pair", obj);
goto except;
}
// Key
key = sexp_car(cur); key = sexp_car(cur);
if (!(sexp_symbolp(key) /*|| sexp_stringp(key)*/)) { if (!sexp_symbolp(key))
res = sexp_json_unparse_exception(ctx, self, "unable to encode key: not a symbol", key); return sexp_json_write_exception(ctx, self, "unable to encode key: not a symbol", key);
goto except; tmp = json_write(ctx, self, key, out);
} if (sexp_exceptionp(tmp))
tmp = unparse_json(ctx, self, key); return tmp;
if (sexp_exceptionp(tmp)) { sexp_write_char(ctx, ':', out);
res = tmp;
goto except;
}
res = sexp_cons(ctx, tmp, res);
// Separator
tmp = sexp_c_string(ctx, ":", -1);
res = sexp_cons(ctx, tmp, res);
// Value
val = sexp_cdr(cur); val = sexp_cdr(cur);
tmp = unparse_json(ctx, self, val); tmp = json_write(ctx, self, val, out);
res = sexp_cons(ctx, tmp, res); if (sexp_exceptionp(tmp))
return tmp;
if (i != len-1) {
tmp = sexp_c_string(ctx, ",", -1);
res = sexp_cons(ctx, tmp, res);
} }
it = sexp_cdr(it); sexp_write_char(ctx, '}', out);
return SEXP_VOID;
} }
tmp = sexp_c_string(ctx, "}", -1); sexp json_write (sexp ctx, sexp self, const sexp obj, sexp out) {
res = sexp_cons(ctx, tmp, res);
res = sexp_nreverse(ctx, res);
res = sexp_string_concatenate(ctx, res, SEXP_FALSE);
except:
sexp_gc_release6(ctx);
return res;
}
sexp unparse_json (sexp ctx, sexp self, sexp obj) {
sexp_gc_var1(res); sexp_gc_var1(res);
sexp_gc_preserve1(ctx, res); sexp_gc_preserve1(ctx, res);
res = SEXP_VOID; res = SEXP_VOID;
if (sexp_symbolp(obj)) { if (sexp_symbolp(obj)) {
obj = sexp_symbol_to_string(ctx, obj); res = sexp_symbol_to_string(ctx, obj);
res = unparse_json_string(ctx, self, obj); res = json_write_string(ctx, self, res, out);
} else if (sexp_stringp(obj)) { } else if (sexp_stringp(obj)) {
res = unparse_json_string(ctx, self, obj); res = json_write_string(ctx, self, obj, out);
} else if (sexp_listp(ctx, obj) == SEXP_TRUE) { } else if (sexp_listp(ctx, obj) == SEXP_TRUE) {
res = unparse_json_object(ctx, self, obj); res = json_write_object(ctx, self, obj, out);
} else if (sexp_vectorp(obj)) { } else if (sexp_vectorp(obj)) {
res = unparse_json_array(ctx, self, obj); res = json_write_array(ctx, self, obj, out);
} else if (sexp_fixnump(obj)) { } else if (sexp_fixnump(obj)) {
res = unparse_json_fixnum(ctx, self, obj); res = sexp_write(ctx, obj, out);
} else if (sexp_flonump(obj)) { } else if (sexp_flonump(obj)) {
res = unparse_json_flonum(ctx, self, obj); // OTHER TYPES? bignum etc? res = json_write_flonum(ctx, self, obj, out);
#if SEXP_USE_BIGNUMS
} else if (sexp_bignump(obj)) {
res = sexp_make_flonum(ctx, sexp_bignum_to_double(obj));
res = json_write_flonum(ctx, self, res, out);
#endif
} else if (obj == SEXP_FALSE) { } else if (obj == SEXP_FALSE) {
res = sexp_c_string(ctx, "false", -1); sexp_write_string(ctx, "false", out);
} else if (obj == SEXP_TRUE) { } else if (obj == SEXP_TRUE) {
res = sexp_c_string(ctx, "true", -1); sexp_write_string(ctx, "true", out);
} else if (obj == SEXP_NULL) { } else if (obj == SEXP_NULL) {
res = sexp_c_string(ctx, "null", -1); sexp_write_string(ctx, "null", out);
} else if (sexp_pairp(obj)) { } else if (sexp_pairp(obj)) {
res = sexp_json_unparse_exception(ctx, self, "unable to encode elemente: key-value pair out of object", obj); res = sexp_json_write_exception(ctx, self, "unable to encode elemente: key-value pair out of object", obj);
} else { } else {
res = sexp_json_unparse_exception(ctx, self, "unable to encode element", obj); res = sexp_json_write_exception(ctx, self, "unable to encode element", obj);
} }
sexp_gc_release1(ctx); sexp_gc_release1(ctx);
return res; return res;
} }
sexp sexp_unparse_json (sexp ctx, sexp self, sexp_sint_t n, sexp obj) { sexp sexp_json_write (sexp ctx, sexp self, sexp_sint_t n, sexp obj, sexp out) {
return unparse_json(ctx, self, obj); sexp_assert_type(ctx, sexp_oportp, SEXP_OPORT, out);
return json_write(ctx, self, obj, out);
} }
@ -577,7 +479,7 @@ sexp sexp_init_library (sexp ctx, sexp self, sexp_sint_t n, sexp env, const char
if (!(sexp_version_compatible(ctx, version, sexp_version) if (!(sexp_version_compatible(ctx, version, sexp_version)
&& sexp_abi_compatible(ctx, abi, SEXP_ABI_IDENTIFIER))) && sexp_abi_compatible(ctx, abi, SEXP_ABI_IDENTIFIER)))
return SEXP_ABI_ERROR; return SEXP_ABI_ERROR;
sexp_define_foreign(ctx, env, "parse-json", 1, sexp_parse_json); sexp_define_foreign(ctx, env, "json-read", 1, sexp_json_read);
sexp_define_foreign(ctx, env, "unparse-json", 1, sexp_unparse_json); sexp_define_foreign(ctx, env, "json-write", 2, sexp_json_write);
return SEXP_VOID; return SEXP_VOID;
} }

View file

@ -1,6 +1,15 @@
(define-library (chibi json) (define-library (chibi json)
(import (scheme base)) (import (scheme base))
(export parse-json) (export string->json json->string json-read json-write)
(export unparse-json) (include-shared "json")
(include-shared "json")) (begin
(define (string->json str)
(let* ((in (open-input-string str))
(res (json-read in)))
(close-input-port in)
res))
(define (json->string json)
(let ((out (open-output-string)))
(json-write json out)
(get-output-string out)))))