json-parser.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. /*
  2. * JSON Parser
  3. *
  4. * Copyright IBM, Corp. 2009
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
  10. * See the COPYING.LIB file in the top-level directory.
  11. *
  12. */
  13. #include "qemu/osdep.h"
  14. #include "qemu/cutils.h"
  15. #include "qemu/unicode.h"
  16. #include "qapi/error.h"
  17. #include "qemu-common.h"
  18. #include "qapi/qmp/qbool.h"
  19. #include "qapi/qmp/qdict.h"
  20. #include "qapi/qmp/qlist.h"
  21. #include "qapi/qmp/qnull.h"
  22. #include "qapi/qmp/qnum.h"
  23. #include "qapi/qmp/qstring.h"
  24. #include "qapi/qmp/json-parser.h"
  25. #include "qapi/qmp/json-lexer.h"
  26. #include "qapi/qmp/json-streamer.h"
  27. typedef struct JSONParserContext
  28. {
  29. Error *err;
  30. JSONToken *current;
  31. GQueue *buf;
  32. } JSONParserContext;
  33. #define BUG_ON(cond) assert(!(cond))
  34. /**
  35. * TODO
  36. *
  37. * 0) make errors meaningful again
  38. * 1) add geometry information to tokens
  39. * 3) should we return a parsed size?
  40. * 4) deal with premature EOI
  41. */
  42. static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
  43. /**
  44. * Error handler
  45. */
  46. static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
  47. JSONToken *token, const char *msg, ...)
  48. {
  49. va_list ap;
  50. char message[1024];
  51. if (ctxt->err) {
  52. return;
  53. }
  54. va_start(ap, msg);
  55. vsnprintf(message, sizeof(message), msg, ap);
  56. va_end(ap);
  57. error_setg(&ctxt->err, "JSON parse error, %s", message);
  58. }
  59. static int cvt4hex(const char *s)
  60. {
  61. int cp, i;
  62. cp = 0;
  63. for (i = 0; i < 4; i++) {
  64. if (!qemu_isxdigit(s[i])) {
  65. return -1;
  66. }
  67. cp <<= 4;
  68. if (s[i] >= '0' && s[i] <= '9') {
  69. cp |= s[i] - '0';
  70. } else if (s[i] >= 'a' && s[i] <= 'f') {
  71. cp |= 10 + s[i] - 'a';
  72. } else if (s[i] >= 'A' && s[i] <= 'F') {
  73. cp |= 10 + s[i] - 'A';
  74. } else {
  75. return -1;
  76. }
  77. }
  78. return cp;
  79. }
  80. /**
  81. * parse_string(): Parse a JSON string
  82. *
  83. * From RFC 8259 "The JavaScript Object Notation (JSON) Data
  84. * Interchange Format":
  85. *
  86. * char = unescaped /
  87. * escape (
  88. * %x22 / ; " quotation mark U+0022
  89. * %x5C / ; \ reverse solidus U+005C
  90. * %x2F / ; / solidus U+002F
  91. * %x62 / ; b backspace U+0008
  92. * %x66 / ; f form feed U+000C
  93. * %x6E / ; n line feed U+000A
  94. * %x72 / ; r carriage return U+000D
  95. * %x74 / ; t tab U+0009
  96. * %x75 4HEXDIG ) ; uXXXX U+XXXX
  97. * escape = %x5C ; \
  98. * quotation-mark = %x22 ; "
  99. * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
  100. *
  101. * Extensions over RFC 8259:
  102. * - Extra escape sequence in strings:
  103. * 0x27 (apostrophe) is recognized after escape, too
  104. * - Single-quoted strings:
  105. * Like double-quoted strings, except they're delimited by %x27
  106. * (apostrophe) instead of %x22 (quotation mark), and can't contain
  107. * unescaped apostrophe, but can contain unescaped quotation mark.
  108. *
  109. * Note:
  110. * - Encoding is modified UTF-8.
  111. * - Invalid Unicode characters are rejected.
  112. * - Control characters \x00..\x1F are rejected by the lexer.
  113. */
  114. static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
  115. {
  116. const char *ptr = token->str;
  117. QString *str;
  118. char quote;
  119. const char *beg;
  120. int cp, trailing;
  121. char *end;
  122. ssize_t len;
  123. char utf8_buf[5];
  124. assert(*ptr == '"' || *ptr == '\'');
  125. quote = *ptr++;
  126. str = qstring_new();
  127. while (*ptr != quote) {
  128. assert(*ptr);
  129. if (*ptr == '\\') {
  130. beg = ptr++;
  131. switch (*ptr++) {
  132. case '"':
  133. qstring_append_chr(str, '"');
  134. break;
  135. case '\'':
  136. qstring_append_chr(str, '\'');
  137. break;
  138. case '\\':
  139. qstring_append_chr(str, '\\');
  140. break;
  141. case '/':
  142. qstring_append_chr(str, '/');
  143. break;
  144. case 'b':
  145. qstring_append_chr(str, '\b');
  146. break;
  147. case 'f':
  148. qstring_append_chr(str, '\f');
  149. break;
  150. case 'n':
  151. qstring_append_chr(str, '\n');
  152. break;
  153. case 'r':
  154. qstring_append_chr(str, '\r');
  155. break;
  156. case 't':
  157. qstring_append_chr(str, '\t');
  158. break;
  159. case 'u':
  160. cp = cvt4hex(ptr);
  161. ptr += 4;
  162. /* handle surrogate pairs */
  163. if (cp >= 0xD800 && cp <= 0xDBFF
  164. && ptr[0] == '\\' && ptr[1] == 'u') {
  165. /* leading surrogate followed by \u */
  166. cp = 0x10000 + ((cp & 0x3FF) << 10);
  167. trailing = cvt4hex(ptr + 2);
  168. if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
  169. /* followed by trailing surrogate */
  170. cp |= trailing & 0x3FF;
  171. ptr += 6;
  172. } else {
  173. cp = -1; /* invalid */
  174. }
  175. }
  176. if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
  177. parse_error(ctxt, token,
  178. "%.*s is not a valid Unicode character",
  179. (int)(ptr - beg), beg);
  180. goto out;
  181. }
  182. qstring_append(str, utf8_buf);
  183. break;
  184. default:
  185. parse_error(ctxt, token, "invalid escape sequence in string");
  186. goto out;
  187. }
  188. } else {
  189. cp = mod_utf8_codepoint(ptr, 6, &end);
  190. if (cp < 0) {
  191. parse_error(ctxt, token, "invalid UTF-8 sequence in string");
  192. goto out;
  193. }
  194. ptr = end;
  195. len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
  196. assert(len >= 0);
  197. qstring_append(str, utf8_buf);
  198. }
  199. }
  200. return str;
  201. out:
  202. qobject_unref(str);
  203. return NULL;
  204. }
  205. /* Note: the token object returned by parser_context_peek_token or
  206. * parser_context_pop_token is deleted as soon as parser_context_pop_token
  207. * is called again.
  208. */
  209. static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
  210. {
  211. g_free(ctxt->current);
  212. assert(!g_queue_is_empty(ctxt->buf));
  213. ctxt->current = g_queue_pop_head(ctxt->buf);
  214. return ctxt->current;
  215. }
  216. static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
  217. {
  218. assert(!g_queue_is_empty(ctxt->buf));
  219. return g_queue_peek_head(ctxt->buf);
  220. }
  221. /**
  222. * Parsing rules
  223. */
  224. static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
  225. {
  226. QObject *value;
  227. QString *key = NULL;
  228. JSONToken *peek, *token;
  229. peek = parser_context_peek_token(ctxt);
  230. if (peek == NULL) {
  231. parse_error(ctxt, NULL, "premature EOI");
  232. goto out;
  233. }
  234. key = qobject_to(QString, parse_value(ctxt, ap));
  235. if (!key) {
  236. parse_error(ctxt, peek, "key is not a string in object");
  237. goto out;
  238. }
  239. token = parser_context_pop_token(ctxt);
  240. if (token == NULL) {
  241. parse_error(ctxt, NULL, "premature EOI");
  242. goto out;
  243. }
  244. if (token->type != JSON_COLON) {
  245. parse_error(ctxt, token, "missing : in object pair");
  246. goto out;
  247. }
  248. value = parse_value(ctxt, ap);
  249. if (value == NULL) {
  250. parse_error(ctxt, token, "Missing value in dict");
  251. goto out;
  252. }
  253. qdict_put_obj(dict, qstring_get_str(key), value);
  254. qobject_unref(key);
  255. return 0;
  256. out:
  257. qobject_unref(key);
  258. return -1;
  259. }
  260. static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
  261. {
  262. QDict *dict = NULL;
  263. JSONToken *token, *peek;
  264. token = parser_context_pop_token(ctxt);
  265. assert(token && token->type == JSON_LCURLY);
  266. dict = qdict_new();
  267. peek = parser_context_peek_token(ctxt);
  268. if (peek == NULL) {
  269. parse_error(ctxt, NULL, "premature EOI");
  270. goto out;
  271. }
  272. if (peek->type != JSON_RCURLY) {
  273. if (parse_pair(ctxt, dict, ap) == -1) {
  274. goto out;
  275. }
  276. token = parser_context_pop_token(ctxt);
  277. if (token == NULL) {
  278. parse_error(ctxt, NULL, "premature EOI");
  279. goto out;
  280. }
  281. while (token->type != JSON_RCURLY) {
  282. if (token->type != JSON_COMMA) {
  283. parse_error(ctxt, token, "expected separator in dict");
  284. goto out;
  285. }
  286. if (parse_pair(ctxt, dict, ap) == -1) {
  287. goto out;
  288. }
  289. token = parser_context_pop_token(ctxt);
  290. if (token == NULL) {
  291. parse_error(ctxt, NULL, "premature EOI");
  292. goto out;
  293. }
  294. }
  295. } else {
  296. (void)parser_context_pop_token(ctxt);
  297. }
  298. return QOBJECT(dict);
  299. out:
  300. qobject_unref(dict);
  301. return NULL;
  302. }
  303. static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
  304. {
  305. QList *list = NULL;
  306. JSONToken *token, *peek;
  307. token = parser_context_pop_token(ctxt);
  308. assert(token && token->type == JSON_LSQUARE);
  309. list = qlist_new();
  310. peek = parser_context_peek_token(ctxt);
  311. if (peek == NULL) {
  312. parse_error(ctxt, NULL, "premature EOI");
  313. goto out;
  314. }
  315. if (peek->type != JSON_RSQUARE) {
  316. QObject *obj;
  317. obj = parse_value(ctxt, ap);
  318. if (obj == NULL) {
  319. parse_error(ctxt, token, "expecting value");
  320. goto out;
  321. }
  322. qlist_append_obj(list, obj);
  323. token = parser_context_pop_token(ctxt);
  324. if (token == NULL) {
  325. parse_error(ctxt, NULL, "premature EOI");
  326. goto out;
  327. }
  328. while (token->type != JSON_RSQUARE) {
  329. if (token->type != JSON_COMMA) {
  330. parse_error(ctxt, token, "expected separator in list");
  331. goto out;
  332. }
  333. obj = parse_value(ctxt, ap);
  334. if (obj == NULL) {
  335. parse_error(ctxt, token, "expecting value");
  336. goto out;
  337. }
  338. qlist_append_obj(list, obj);
  339. token = parser_context_pop_token(ctxt);
  340. if (token == NULL) {
  341. parse_error(ctxt, NULL, "premature EOI");
  342. goto out;
  343. }
  344. }
  345. } else {
  346. (void)parser_context_pop_token(ctxt);
  347. }
  348. return QOBJECT(list);
  349. out:
  350. qobject_unref(list);
  351. return NULL;
  352. }
  353. static QObject *parse_keyword(JSONParserContext *ctxt)
  354. {
  355. JSONToken *token;
  356. token = parser_context_pop_token(ctxt);
  357. assert(token && token->type == JSON_KEYWORD);
  358. if (!strcmp(token->str, "true")) {
  359. return QOBJECT(qbool_from_bool(true));
  360. } else if (!strcmp(token->str, "false")) {
  361. return QOBJECT(qbool_from_bool(false));
  362. } else if (!strcmp(token->str, "null")) {
  363. return QOBJECT(qnull());
  364. }
  365. parse_error(ctxt, token, "invalid keyword '%s'", token->str);
  366. return NULL;
  367. }
  368. static QObject *parse_interpolation(JSONParserContext *ctxt, va_list *ap)
  369. {
  370. JSONToken *token;
  371. token = parser_context_pop_token(ctxt);
  372. assert(token && token->type == JSON_INTERP);
  373. if (!strcmp(token->str, "%p")) {
  374. return va_arg(*ap, QObject *);
  375. } else if (!strcmp(token->str, "%i")) {
  376. return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
  377. } else if (!strcmp(token->str, "%d")) {
  378. return QOBJECT(qnum_from_int(va_arg(*ap, int)));
  379. } else if (!strcmp(token->str, "%ld")) {
  380. return QOBJECT(qnum_from_int(va_arg(*ap, long)));
  381. } else if (!strcmp(token->str, "%lld") ||
  382. !strcmp(token->str, "%I64d")) {
  383. return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
  384. } else if (!strcmp(token->str, "%u")) {
  385. return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int)));
  386. } else if (!strcmp(token->str, "%lu")) {
  387. return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long)));
  388. } else if (!strcmp(token->str, "%llu") ||
  389. !strcmp(token->str, "%I64u")) {
  390. return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long)));
  391. } else if (!strcmp(token->str, "%s")) {
  392. return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
  393. } else if (!strcmp(token->str, "%f")) {
  394. return QOBJECT(qnum_from_double(va_arg(*ap, double)));
  395. }
  396. parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
  397. return NULL;
  398. }
  399. static QObject *parse_literal(JSONParserContext *ctxt)
  400. {
  401. JSONToken *token;
  402. token = parser_context_pop_token(ctxt);
  403. assert(token);
  404. switch (token->type) {
  405. case JSON_STRING:
  406. return QOBJECT(parse_string(ctxt, token));
  407. case JSON_INTEGER: {
  408. /*
  409. * Represent JSON_INTEGER as QNUM_I64 if possible, else as
  410. * QNUM_U64, else as QNUM_DOUBLE. Note that qemu_strtoi64()
  411. * and qemu_strtou64() fail with ERANGE when it's not
  412. * possible.
  413. *
  414. * qnum_get_int() will then work for any signed 64-bit
  415. * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
  416. * integer, and qnum_get_double() both for any JSON_INTEGER
  417. * and any JSON_FLOAT (with precision loss for integers beyond
  418. * 53 bits)
  419. */
  420. int ret;
  421. int64_t value;
  422. uint64_t uvalue;
  423. ret = qemu_strtoi64(token->str, NULL, 10, &value);
  424. if (!ret) {
  425. return QOBJECT(qnum_from_int(value));
  426. }
  427. assert(ret == -ERANGE);
  428. if (token->str[0] != '-') {
  429. ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
  430. if (!ret) {
  431. return QOBJECT(qnum_from_uint(uvalue));
  432. }
  433. assert(ret == -ERANGE);
  434. }
  435. /* fall through to JSON_FLOAT */
  436. }
  437. case JSON_FLOAT:
  438. /* FIXME dependent on locale; a pervasive issue in QEMU */
  439. /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
  440. * but those might be useful extensions beyond JSON */
  441. return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
  442. default:
  443. abort();
  444. }
  445. }
  446. static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
  447. {
  448. JSONToken *token;
  449. token = parser_context_peek_token(ctxt);
  450. if (token == NULL) {
  451. parse_error(ctxt, NULL, "premature EOI");
  452. return NULL;
  453. }
  454. switch (token->type) {
  455. case JSON_LCURLY:
  456. return parse_object(ctxt, ap);
  457. case JSON_LSQUARE:
  458. return parse_array(ctxt, ap);
  459. case JSON_INTERP:
  460. return parse_interpolation(ctxt, ap);
  461. case JSON_INTEGER:
  462. case JSON_FLOAT:
  463. case JSON_STRING:
  464. return parse_literal(ctxt);
  465. case JSON_KEYWORD:
  466. return parse_keyword(ctxt);
  467. default:
  468. parse_error(ctxt, token, "expecting value");
  469. return NULL;
  470. }
  471. }
  472. QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
  473. {
  474. JSONParserContext ctxt = { .buf = tokens };
  475. QObject *result;
  476. result = parse_value(&ctxt, ap);
  477. error_propagate(errp, ctxt.err);
  478. while (!g_queue_is_empty(ctxt.buf)) {
  479. parser_context_pop_token(&ctxt);
  480. }
  481. g_free(ctxt.current);
  482. g_queue_free(ctxt.buf);
  483. return result;
  484. }