json-parser.c 16 KB


  1. /*
  2. * JSON Parser
  3. *
  4. * Copyright IBM, Corp. 2009
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
  10. * See the COPYING.LIB file in the top-level directory.
  11. *
  12. */
  13. #include "qemu/osdep.h"
  14. #include "qemu/ctype.h"
  15. #include "qemu/cutils.h"
  16. #include "qemu/unicode.h"
  17. #include "qapi/error.h"
  18. #include "qapi/qmp/qbool.h"
  19. #include "qapi/qmp/qdict.h"
  20. #include "qapi/qmp/qlist.h"
  21. #include "qapi/qmp/qnull.h"
  22. #include "qapi/qmp/qnum.h"
  23. #include "qapi/qmp/qstring.h"
  24. #include "json-parser-int.h"
  25. struct JSONToken {
  26. JSONTokenType type;
  27. int x;
  28. int y;
  29. char str[];
  30. };
  31. typedef struct JSONParserContext {
  32. Error *err;
  33. JSONToken *current;
  34. GQueue *buf;
  35. va_list *ap;
  36. } JSONParserContext;
  37. #define BUG_ON(cond) assert(!(cond))
  38. /**
  39. * TODO
  40. *
  41. * 0) make errors meaningful again
  42. * 1) add geometry information to tokens
  43. * 3) should we return a parsed size?
  44. * 4) deal with premature EOI
  45. */
  46. static QObject *parse_value(JSONParserContext *ctxt);
  47. /**
  48. * Error handler
  49. */
  50. static void G_GNUC_PRINTF(3, 4) parse_error(JSONParserContext *ctxt,
  51. JSONToken *token, const char *msg, ...)
  52. {
  53. va_list ap;
  54. char message[1024];
  55. if (ctxt->err) {
  56. return;
  57. }
  58. va_start(ap, msg);
  59. vsnprintf(message, sizeof(message), msg, ap);
  60. va_end(ap);
  61. error_setg(&ctxt->err, "JSON parse error, %s", message);
  62. }
  63. static int cvt4hex(const char *s)
  64. {
  65. int cp, i;
  66. cp = 0;
  67. for (i = 0; i < 4; i++) {
  68. if (!qemu_isxdigit(s[i])) {
  69. return -1;
  70. }
  71. cp <<= 4;
  72. if (s[i] >= '0' && s[i] <= '9') {
  73. cp |= s[i] - '0';
  74. } else if (s[i] >= 'a' && s[i] <= 'f') {
  75. cp |= 10 + s[i] - 'a';
  76. } else if (s[i] >= 'A' && s[i] <= 'F') {
  77. cp |= 10 + s[i] - 'A';
  78. } else {
  79. return -1;
  80. }
  81. }
  82. return cp;
  83. }
  84. /**
  85. * parse_string(): Parse a JSON string
  86. *
  87. * From RFC 8259 "The JavaScript Object Notation (JSON) Data
  88. * Interchange Format":
  89. *
  90. * char = unescaped /
  91. * escape (
  92. * %x22 / ; " quotation mark U+0022
  93. * %x5C / ; \ reverse solidus U+005C
  94. * %x2F / ; / solidus U+002F
  95. * %x62 / ; b backspace U+0008
  96. * %x66 / ; f form feed U+000C
  97. * %x6E / ; n line feed U+000A
  98. * %x72 / ; r carriage return U+000D
  99. * %x74 / ; t tab U+0009
  100. * %x75 4HEXDIG ) ; uXXXX U+XXXX
  101. * escape = %x5C ; \
  102. * quotation-mark = %x22 ; "
  103. * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
  104. *
  105. * Extensions over RFC 8259:
  106. * - Extra escape sequence in strings:
  107. * 0x27 (apostrophe) is recognized after escape, too
  108. * - Single-quoted strings:
  109. * Like double-quoted strings, except they're delimited by %x27
  110. * (apostrophe) instead of %x22 (quotation mark), and can't contain
  111. * unescaped apostrophe, but can contain unescaped quotation mark.
  112. *
  113. * Note:
  114. * - Encoding is modified UTF-8.
  115. * - Invalid Unicode characters are rejected.
  116. * - Control characters \x00..\x1F are rejected by the lexer.
  117. */
  118. static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
  119. {
  120. const char *ptr = token->str;
  121. GString *str;
  122. char quote;
  123. const char *beg;
  124. int cp, trailing;
  125. char *end;
  126. ssize_t len;
  127. char utf8_buf[5];
  128. assert(*ptr == '"' || *ptr == '\'');
  129. quote = *ptr++;
  130. str = g_string_new(NULL);
  131. while (*ptr != quote) {
  132. assert(*ptr);
  133. switch (*ptr) {
  134. case '\\':
  135. beg = ptr++;
  136. switch (*ptr++) {
  137. case '"':
  138. g_string_append_c(str, '"');
  139. break;
  140. case '\'':
  141. g_string_append_c(str, '\'');
  142. break;
  143. case '\\':
  144. g_string_append_c(str, '\\');
  145. break;
  146. case '/':
  147. g_string_append_c(str, '/');
  148. break;
  149. case 'b':
  150. g_string_append_c(str, '\b');
  151. break;
  152. case 'f':
  153. g_string_append_c(str, '\f');
  154. break;
  155. case 'n':
  156. g_string_append_c(str, '\n');
  157. break;
  158. case 'r':
  159. g_string_append_c(str, '\r');
  160. break;
  161. case 't':
  162. g_string_append_c(str, '\t');
  163. break;
  164. case 'u':
  165. cp = cvt4hex(ptr);
  166. ptr += 4;
  167. /* handle surrogate pairs */
  168. if (cp >= 0xD800 && cp <= 0xDBFF
  169. && ptr[0] == '\\' && ptr[1] == 'u') {
  170. /* leading surrogate followed by \u */
  171. cp = 0x10000 + ((cp & 0x3FF) << 10);
  172. trailing = cvt4hex(ptr + 2);
  173. if (trailing >= 0xDC00 && trailing <= 0xDFFF) {
  174. /* followed by trailing surrogate */
  175. cp |= trailing & 0x3FF;
  176. ptr += 6;
  177. } else {
  178. cp = -1; /* invalid */
  179. }
  180. }
  181. if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
  182. parse_error(ctxt, token,
  183. "%.*s is not a valid Unicode character",
  184. (int)(ptr - beg), beg);
  185. goto out;
  186. }
  187. g_string_append(str, utf8_buf);
  188. break;
  189. default:
  190. parse_error(ctxt, token, "invalid escape sequence in string");
  191. goto out;
  192. }
  193. break;
  194. case '%':
  195. if (ctxt->ap) {
  196. if (ptr[1] != '%') {
  197. parse_error(ctxt, token, "can't interpolate into string");
  198. goto out;
  199. }
  200. ptr++;
  201. }
  202. /* fall through */
  203. default:
  204. cp = mod_utf8_codepoint(ptr, 6, &end);
  205. if (cp < 0) {
  206. parse_error(ctxt, token, "invalid UTF-8 sequence in string");
  207. goto out;
  208. }
  209. ptr = end;
  210. len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
  211. assert(len >= 0);
  212. g_string_append(str, utf8_buf);
  213. }
  214. }
  215. return qstring_from_gstring(str);
  216. out:
  217. g_string_free(str, true);
  218. return NULL;
  219. }
  220. /* Note: the token object returned by parser_context_peek_token or
  221. * parser_context_pop_token is deleted as soon as parser_context_pop_token
  222. * is called again.
  223. */
  224. static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
  225. {
  226. g_free(ctxt->current);
  227. ctxt->current = g_queue_pop_head(ctxt->buf);
  228. return ctxt->current;
  229. }
  230. static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
  231. {
  232. return g_queue_peek_head(ctxt->buf);
  233. }
  234. /**
  235. * Parsing rules
  236. */
  237. static int parse_pair(JSONParserContext *ctxt, QDict *dict)
  238. {
  239. QObject *key_obj = NULL;
  240. QString *key;
  241. QObject *value;
  242. JSONToken *peek, *token;
  243. peek = parser_context_peek_token(ctxt);
  244. if (peek == NULL) {
  245. parse_error(ctxt, NULL, "premature EOI");
  246. goto out;
  247. }
  248. key_obj = parse_value(ctxt);
  249. key = qobject_to(QString, key_obj);
  250. if (!key) {
  251. parse_error(ctxt, peek, "key is not a string in object");
  252. goto out;
  253. }
  254. token = parser_context_pop_token(ctxt);
  255. if (token == NULL) {
  256. parse_error(ctxt, NULL, "premature EOI");
  257. goto out;
  258. }
  259. if (token->type != JSON_COLON) {
  260. parse_error(ctxt, token, "missing : in object pair");
  261. goto out;
  262. }
  263. value = parse_value(ctxt);
  264. if (value == NULL) {
  265. parse_error(ctxt, token, "Missing value in dict");
  266. goto out;
  267. }
  268. if (qdict_haskey(dict, qstring_get_str(key))) {
  269. parse_error(ctxt, token, "duplicate key");
  270. goto out;
  271. }
  272. qdict_put_obj(dict, qstring_get_str(key), value);
  273. qobject_unref(key_obj);
  274. return 0;
  275. out:
  276. qobject_unref(key_obj);
  277. return -1;
  278. }
  279. static QObject *parse_object(JSONParserContext *ctxt)
  280. {
  281. QDict *dict = NULL;
  282. JSONToken *token, *peek;
  283. token = parser_context_pop_token(ctxt);
  284. assert(token && token->type == JSON_LCURLY);
  285. dict = qdict_new();
  286. peek = parser_context_peek_token(ctxt);
  287. if (peek == NULL) {
  288. parse_error(ctxt, NULL, "premature EOI");
  289. goto out;
  290. }
  291. if (peek->type != JSON_RCURLY) {
  292. if (parse_pair(ctxt, dict) == -1) {
  293. goto out;
  294. }
  295. token = parser_context_pop_token(ctxt);
  296. if (token == NULL) {
  297. parse_error(ctxt, NULL, "premature EOI");
  298. goto out;
  299. }
  300. while (token->type != JSON_RCURLY) {
  301. if (token->type != JSON_COMMA) {
  302. parse_error(ctxt, token, "expected separator in dict");
  303. goto out;
  304. }
  305. if (parse_pair(ctxt, dict) == -1) {
  306. goto out;
  307. }
  308. token = parser_context_pop_token(ctxt);
  309. if (token == NULL) {
  310. parse_error(ctxt, NULL, "premature EOI");
  311. goto out;
  312. }
  313. }
  314. } else {
  315. (void)parser_context_pop_token(ctxt);
  316. }
  317. return QOBJECT(dict);
  318. out:
  319. qobject_unref(dict);
  320. return NULL;
  321. }
  322. static QObject *parse_array(JSONParserContext *ctxt)
  323. {
  324. QList *list = NULL;
  325. JSONToken *token, *peek;
  326. token = parser_context_pop_token(ctxt);
  327. assert(token && token->type == JSON_LSQUARE);
  328. list = qlist_new();
  329. peek = parser_context_peek_token(ctxt);
  330. if (peek == NULL) {
  331. parse_error(ctxt, NULL, "premature EOI");
  332. goto out;
  333. }
  334. if (peek->type != JSON_RSQUARE) {
  335. QObject *obj;
  336. obj = parse_value(ctxt);
  337. if (obj == NULL) {
  338. parse_error(ctxt, token, "expecting value");
  339. goto out;
  340. }
  341. qlist_append_obj(list, obj);
  342. token = parser_context_pop_token(ctxt);
  343. if (token == NULL) {
  344. parse_error(ctxt, NULL, "premature EOI");
  345. goto out;
  346. }
  347. while (token->type != JSON_RSQUARE) {
  348. if (token->type != JSON_COMMA) {
  349. parse_error(ctxt, token, "expected separator in list");
  350. goto out;
  351. }
  352. obj = parse_value(ctxt);
  353. if (obj == NULL) {
  354. parse_error(ctxt, token, "expecting value");
  355. goto out;
  356. }
  357. qlist_append_obj(list, obj);
  358. token = parser_context_pop_token(ctxt);
  359. if (token == NULL) {
  360. parse_error(ctxt, NULL, "premature EOI");
  361. goto out;
  362. }
  363. }
  364. } else {
  365. (void)parser_context_pop_token(ctxt);
  366. }
  367. return QOBJECT(list);
  368. out:
  369. qobject_unref(list);
  370. return NULL;
  371. }
  372. static QObject *parse_keyword(JSONParserContext *ctxt)
  373. {
  374. JSONToken *token;
  375. token = parser_context_pop_token(ctxt);
  376. assert(token && token->type == JSON_KEYWORD);
  377. if (!strcmp(token->str, "true")) {
  378. return QOBJECT(qbool_from_bool(true));
  379. } else if (!strcmp(token->str, "false")) {
  380. return QOBJECT(qbool_from_bool(false));
  381. } else if (!strcmp(token->str, "null")) {
  382. return QOBJECT(qnull());
  383. }
  384. parse_error(ctxt, token, "invalid keyword '%s'", token->str);
  385. return NULL;
  386. }
  387. static QObject *parse_interpolation(JSONParserContext *ctxt)
  388. {
  389. JSONToken *token;
  390. token = parser_context_pop_token(ctxt);
  391. assert(token && token->type == JSON_INTERP);
  392. if (!strcmp(token->str, "%p")) {
  393. return va_arg(*ctxt->ap, QObject *);
  394. } else if (!strcmp(token->str, "%i")) {
  395. return QOBJECT(qbool_from_bool(va_arg(*ctxt->ap, int)));
  396. } else if (!strcmp(token->str, "%d")) {
  397. return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int)));
  398. } else if (!strcmp(token->str, "%ld")) {
  399. return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long)));
  400. } else if (!strcmp(token->str, "%lld")) {
  401. return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, long long)));
  402. } else if (!strcmp(token->str, "%" PRId64)) {
  403. return QOBJECT(qnum_from_int(va_arg(*ctxt->ap, int64_t)));
  404. } else if (!strcmp(token->str, "%u")) {
  405. return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned int)));
  406. } else if (!strcmp(token->str, "%lu")) {
  407. return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long)));
  408. } else if (!strcmp(token->str, "%llu")) {
  409. return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, unsigned long long)));
  410. } else if (!strcmp(token->str, "%" PRIu64)) {
  411. return QOBJECT(qnum_from_uint(va_arg(*ctxt->ap, uint64_t)));
  412. } else if (!strcmp(token->str, "%s")) {
  413. return QOBJECT(qstring_from_str(va_arg(*ctxt->ap, const char *)));
  414. } else if (!strcmp(token->str, "%f")) {
  415. return QOBJECT(qnum_from_double(va_arg(*ctxt->ap, double)));
  416. }
  417. parse_error(ctxt, token, "invalid interpolation '%s'", token->str);
  418. return NULL;
  419. }
  420. static QObject *parse_literal(JSONParserContext *ctxt)
  421. {
  422. JSONToken *token;
  423. token = parser_context_pop_token(ctxt);
  424. assert(token);
  425. switch (token->type) {
  426. case JSON_STRING:
  427. return QOBJECT(parse_string(ctxt, token));
  428. case JSON_INTEGER: {
  429. /*
  430. * Represent JSON_INTEGER as QNUM_I64 if possible, else as
  431. * QNUM_U64, else as QNUM_DOUBLE. Note that qemu_strtoi64()
  432. * and qemu_strtou64() fail with ERANGE when it's not
  433. * possible.
  434. *
  435. * qnum_get_int() will then work for any signed 64-bit
  436. * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
  437. * integer, and qnum_get_double() both for any JSON_INTEGER
  438. * and any JSON_FLOAT (with precision loss for integers beyond
  439. * 53 bits)
  440. */
  441. int ret;
  442. int64_t value;
  443. uint64_t uvalue;
  444. ret = qemu_strtoi64(token->str, NULL, 10, &value);
  445. if (!ret) {
  446. return QOBJECT(qnum_from_int(value));
  447. }
  448. assert(ret == -ERANGE);
  449. if (token->str[0] != '-') {
  450. ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
  451. if (!ret) {
  452. return QOBJECT(qnum_from_uint(uvalue));
  453. }
  454. assert(ret == -ERANGE);
  455. }
  456. }
  457. /* fall through to JSON_FLOAT */
  458. case JSON_FLOAT:
  459. /* FIXME dependent on locale; a pervasive issue in QEMU */
  460. /* FIXME our lexer matches RFC 8259 in forbidding Inf or NaN,
  461. * but those might be useful extensions beyond JSON */
  462. return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
  463. default:
  464. abort();
  465. }
  466. }
  467. static QObject *parse_value(JSONParserContext *ctxt)
  468. {
  469. JSONToken *token;
  470. token = parser_context_peek_token(ctxt);
  471. if (token == NULL) {
  472. parse_error(ctxt, NULL, "premature EOI");
  473. return NULL;
  474. }
  475. switch (token->type) {
  476. case JSON_LCURLY:
  477. return parse_object(ctxt);
  478. case JSON_LSQUARE:
  479. return parse_array(ctxt);
  480. case JSON_INTERP:
  481. return parse_interpolation(ctxt);
  482. case JSON_INTEGER:
  483. case JSON_FLOAT:
  484. case JSON_STRING:
  485. return parse_literal(ctxt);
  486. case JSON_KEYWORD:
  487. return parse_keyword(ctxt);
  488. default:
  489. parse_error(ctxt, token, "expecting value");
  490. return NULL;
  491. }
  492. }
  493. JSONToken *json_token(JSONTokenType type, int x, int y, GString *tokstr)
  494. {
  495. JSONToken *token = g_malloc(sizeof(JSONToken) + tokstr->len + 1);
  496. token->type = type;
  497. memcpy(token->str, tokstr->str, tokstr->len);
  498. token->str[tokstr->len] = 0;
  499. token->x = x;
  500. token->y = y;
  501. return token;
  502. }
  503. QObject *json_parser_parse(GQueue *tokens, va_list *ap, Error **errp)
  504. {
  505. JSONParserContext ctxt = { .buf = tokens, .ap = ap };
  506. QObject *result;
  507. result = parse_value(&ctxt);
  508. assert(ctxt.err || g_queue_is_empty(ctxt.buf));
  509. error_propagate(errp, ctxt.err);
  510. while (!g_queue_is_empty(ctxt.buf)) {
  511. parser_context_pop_token(&ctxt);
  512. }
  513. g_free(ctxt.current);
  514. return result;
  515. }