json-parser.c 13 KB


  1. /*
  2. * JSON Parser
  3. *
  4. * Copyright IBM, Corp. 2009
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
  10. * See the COPYING.LIB file in the top-level directory.
  11. *
  12. */
  13. #include <stdarg.h>
  14. #include "qemu-common.h"
  15. #include "qstring.h"
  16. #include "qint.h"
  17. #include "qdict.h"
  18. #include "qlist.h"
  19. #include "qfloat.h"
  20. #include "qbool.h"
  21. #include "json-parser.h"
  22. #include "json-lexer.h"
  23. typedef struct JSONParserContext
  24. {
  25. } JSONParserContext;
  26. #define BUG_ON(cond) assert(!(cond))
  27. /**
  28. * TODO
  29. *
  30. * 0) make errors meaningful again
  31. * 1) add geometry information to tokens
  32. * 3) should we return a parsed size?
  33. * 4) deal with premature EOI
  34. */
  35. static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
  36. /**
  37. * Token manipulators
  38. *
  39. * tokens are dictionaries that contain a type, a string value, and geometry information
  40. * about a token identified by the lexer. These are routines that make working with
  41. * these objects a bit easier.
  42. */
  43. static const char *token_get_value(QObject *obj)
  44. {
  45. return qdict_get_str(qobject_to_qdict(obj), "token");
  46. }
  47. static JSONTokenType token_get_type(QObject *obj)
  48. {
  49. return qdict_get_int(qobject_to_qdict(obj), "type");
  50. }
  51. static int token_is_operator(QObject *obj, char op)
  52. {
  53. const char *val;
  54. if (token_get_type(obj) != JSON_OPERATOR) {
  55. return 0;
  56. }
  57. val = token_get_value(obj);
  58. return (val[0] == op) && (val[1] == 0);
  59. }
  60. static int token_is_keyword(QObject *obj, const char *value)
  61. {
  62. if (token_get_type(obj) != JSON_KEYWORD) {
  63. return 0;
  64. }
  65. return strcmp(token_get_value(obj), value) == 0;
  66. }
  67. static int token_is_escape(QObject *obj, const char *value)
  68. {
  69. if (token_get_type(obj) != JSON_ESCAPE) {
  70. return 0;
  71. }
  72. return (strcmp(token_get_value(obj), value) == 0);
  73. }
  74. /**
  75. * Error handler
  76. */
  77. static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
  78. QObject *token, const char *msg, ...)
  79. {
  80. va_list ap;
  81. va_start(ap, msg);
  82. fprintf(stderr, "parse error: ");
  83. vfprintf(stderr, msg, ap);
  84. fprintf(stderr, "\n");
  85. va_end(ap);
  86. }
  87. /**
  88. * String helpers
  89. *
  90. * These helpers are used to unescape strings.
  91. */
  92. static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
  93. {
  94. if (wchar <= 0x007F) {
  95. BUG_ON(buffer_length < 2);
  96. buffer[0] = wchar & 0x7F;
  97. buffer[1] = 0;
  98. } else if (wchar <= 0x07FF) {
  99. BUG_ON(buffer_length < 3);
  100. buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
  101. buffer[1] = 0x80 | (wchar & 0x3F);
  102. buffer[2] = 0;
  103. } else {
  104. BUG_ON(buffer_length < 4);
  105. buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
  106. buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
  107. buffer[2] = 0x80 | (wchar & 0x3F);
  108. buffer[3] = 0;
  109. }
  110. }
  111. static int hex2decimal(char ch)
  112. {
  113. if (ch >= '0' && ch <= '9') {
  114. return (ch - '0');
  115. } else if (ch >= 'a' && ch <= 'f') {
  116. return 10 + (ch - 'a');
  117. } else if (ch >= 'A' && ch <= 'F') {
  118. return 10 + (ch - 'A');
  119. }
  120. return -1;
  121. }
  122. /**
  123. * parse_string(): Parse a json string and return a QObject
  124. *
  125. * string
  126. * ""
  127. * " chars "
  128. * chars
  129. * char
  130. * char chars
  131. * char
  132. * any-Unicode-character-
  133. * except-"-or-\-or-
  134. * control-character
  135. * \"
  136. * \\
  137. * \/
  138. * \b
  139. * \f
  140. * \n
  141. * \r
  142. * \t
  143. * \u four-hex-digits
  144. */
  145. static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
  146. {
  147. const char *ptr = token_get_value(token);
  148. QString *str;
  149. int double_quote = 1;
  150. if (*ptr == '"') {
  151. double_quote = 1;
  152. } else {
  153. double_quote = 0;
  154. }
  155. ptr++;
  156. str = qstring_new();
  157. while (*ptr &&
  158. ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
  159. if (*ptr == '\\') {
  160. ptr++;
  161. switch (*ptr) {
  162. case '"':
  163. qstring_append(str, "\"");
  164. ptr++;
  165. break;
  166. case '\'':
  167. qstring_append(str, "'");
  168. ptr++;
  169. break;
  170. case '\\':
  171. qstring_append(str, "\\");
  172. ptr++;
  173. break;
  174. case '/':
  175. qstring_append(str, "/");
  176. ptr++;
  177. break;
  178. case 'b':
  179. qstring_append(str, "\b");
  180. ptr++;
  181. break;
  182. case 'f':
  183. qstring_append(str, "\f");
  184. ptr++;
  185. break;
  186. case 'n':
  187. qstring_append(str, "\n");
  188. ptr++;
  189. break;
  190. case 'r':
  191. qstring_append(str, "\r");
  192. ptr++;
  193. break;
  194. case 't':
  195. qstring_append(str, "\t");
  196. ptr++;
  197. break;
  198. case 'u': {
  199. uint16_t unicode_char = 0;
  200. char utf8_char[4];
  201. int i = 0;
  202. ptr++;
  203. for (i = 0; i < 4; i++) {
  204. if (qemu_isxdigit(*ptr)) {
  205. unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
  206. } else {
  207. parse_error(ctxt, token,
  208. "invalid hex escape sequence in string");
  209. goto out;
  210. }
  211. ptr++;
  212. }
  213. wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
  214. qstring_append(str, utf8_char);
  215. } break;
  216. default:
  217. parse_error(ctxt, token, "invalid escape sequence in string");
  218. goto out;
  219. }
  220. } else {
  221. char dummy[2];
  222. dummy[0] = *ptr++;
  223. dummy[1] = 0;
  224. qstring_append(str, dummy);
  225. }
  226. }
  227. return str;
  228. out:
  229. QDECREF(str);
  230. return NULL;
  231. }
  232. /**
  233. * Parsing rules
  234. */
  235. static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
  236. {
  237. QObject *key, *token = NULL, *value, *peek;
  238. QList *working = qlist_copy(*tokens);
  239. peek = qlist_peek(working);
  240. key = parse_value(ctxt, &working, ap);
  241. if (!key || qobject_type(key) != QTYPE_QSTRING) {
  242. parse_error(ctxt, peek, "key is not a string in object");
  243. goto out;
  244. }
  245. token = qlist_pop(working);
  246. if (!token_is_operator(token, ':')) {
  247. parse_error(ctxt, token, "missing : in object pair");
  248. goto out;
  249. }
  250. value = parse_value(ctxt, &working, ap);
  251. if (value == NULL) {
  252. parse_error(ctxt, token, "Missing value in dict");
  253. goto out;
  254. }
  255. qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
  256. qobject_decref(token);
  257. qobject_decref(key);
  258. QDECREF(*tokens);
  259. *tokens = working;
  260. return 0;
  261. out:
  262. qobject_decref(token);
  263. qobject_decref(key);
  264. QDECREF(working);
  265. return -1;
  266. }
  267. static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  268. {
  269. QDict *dict = NULL;
  270. QObject *token, *peek;
  271. QList *working = qlist_copy(*tokens);
  272. token = qlist_pop(working);
  273. if (!token_is_operator(token, '{')) {
  274. goto out;
  275. }
  276. qobject_decref(token);
  277. token = NULL;
  278. dict = qdict_new();
  279. peek = qlist_peek(working);
  280. if (!token_is_operator(peek, '}')) {
  281. if (parse_pair(ctxt, dict, &working, ap) == -1) {
  282. goto out;
  283. }
  284. token = qlist_pop(working);
  285. while (!token_is_operator(token, '}')) {
  286. if (!token_is_operator(token, ',')) {
  287. parse_error(ctxt, token, "expected separator in dict");
  288. goto out;
  289. }
  290. qobject_decref(token);
  291. token = NULL;
  292. if (parse_pair(ctxt, dict, &working, ap) == -1) {
  293. goto out;
  294. }
  295. token = qlist_pop(working);
  296. }
  297. qobject_decref(token);
  298. token = NULL;
  299. } else {
  300. token = qlist_pop(working);
  301. qobject_decref(token);
  302. token = NULL;
  303. }
  304. QDECREF(*tokens);
  305. *tokens = working;
  306. return QOBJECT(dict);
  307. out:
  308. qobject_decref(token);
  309. QDECREF(working);
  310. QDECREF(dict);
  311. return NULL;
  312. }
  313. static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  314. {
  315. QList *list = NULL;
  316. QObject *token, *peek;
  317. QList *working = qlist_copy(*tokens);
  318. token = qlist_pop(working);
  319. if (!token_is_operator(token, '[')) {
  320. goto out;
  321. }
  322. qobject_decref(token);
  323. token = NULL;
  324. list = qlist_new();
  325. peek = qlist_peek(working);
  326. if (!token_is_operator(peek, ']')) {
  327. QObject *obj;
  328. obj = parse_value(ctxt, &working, ap);
  329. if (obj == NULL) {
  330. parse_error(ctxt, token, "expecting value");
  331. goto out;
  332. }
  333. qlist_append_obj(list, obj);
  334. token = qlist_pop(working);
  335. while (!token_is_operator(token, ']')) {
  336. if (!token_is_operator(token, ',')) {
  337. parse_error(ctxt, token, "expected separator in list");
  338. goto out;
  339. }
  340. qobject_decref(token);
  341. token = NULL;
  342. obj = parse_value(ctxt, &working, ap);
  343. if (obj == NULL) {
  344. parse_error(ctxt, token, "expecting value");
  345. goto out;
  346. }
  347. qlist_append_obj(list, obj);
  348. token = qlist_pop(working);
  349. }
  350. qobject_decref(token);
  351. token = NULL;
  352. } else {
  353. token = qlist_pop(working);
  354. qobject_decref(token);
  355. token = NULL;
  356. }
  357. QDECREF(*tokens);
  358. *tokens = working;
  359. return QOBJECT(list);
  360. out:
  361. qobject_decref(token);
  362. QDECREF(working);
  363. QDECREF(list);
  364. return NULL;
  365. }
  366. static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
  367. {
  368. QObject *token, *ret;
  369. QList *working = qlist_copy(*tokens);
  370. token = qlist_pop(working);
  371. if (token_get_type(token) != JSON_KEYWORD) {
  372. goto out;
  373. }
  374. if (token_is_keyword(token, "true")) {
  375. ret = QOBJECT(qbool_from_int(true));
  376. } else if (token_is_keyword(token, "false")) {
  377. ret = QOBJECT(qbool_from_int(false));
  378. } else {
  379. parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
  380. goto out;
  381. }
  382. qobject_decref(token);
  383. QDECREF(*tokens);
  384. *tokens = working;
  385. return ret;
  386. out:
  387. qobject_decref(token);
  388. QDECREF(working);
  389. return NULL;
  390. }
  391. static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  392. {
  393. QObject *token = NULL, *obj;
  394. QList *working = qlist_copy(*tokens);
  395. if (ap == NULL) {
  396. goto out;
  397. }
  398. token = qlist_pop(working);
  399. if (token_is_escape(token, "%p")) {
  400. obj = va_arg(*ap, QObject *);
  401. } else if (token_is_escape(token, "%i")) {
  402. obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
  403. } else if (token_is_escape(token, "%d")) {
  404. obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
  405. } else if (token_is_escape(token, "%ld")) {
  406. obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
  407. } else if (token_is_escape(token, "%lld") ||
  408. token_is_escape(token, "%I64d")) {
  409. obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
  410. } else if (token_is_escape(token, "%s")) {
  411. obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
  412. } else if (token_is_escape(token, "%f")) {
  413. obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
  414. } else {
  415. goto out;
  416. }
  417. qobject_decref(token);
  418. QDECREF(*tokens);
  419. *tokens = working;
  420. return obj;
  421. out:
  422. qobject_decref(token);
  423. QDECREF(working);
  424. return NULL;
  425. }
  426. static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
  427. {
  428. QObject *token, *obj;
  429. QList *working = qlist_copy(*tokens);
  430. token = qlist_pop(working);
  431. switch (token_get_type(token)) {
  432. case JSON_STRING:
  433. obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
  434. break;
  435. case JSON_INTEGER:
  436. obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
  437. break;
  438. case JSON_FLOAT:
  439. /* FIXME dependent on locale */
  440. obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
  441. break;
  442. default:
  443. goto out;
  444. }
  445. qobject_decref(token);
  446. QDECREF(*tokens);
  447. *tokens = working;
  448. return obj;
  449. out:
  450. qobject_decref(token);
  451. QDECREF(working);
  452. return NULL;
  453. }
  454. static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  455. {
  456. QObject *obj;
  457. obj = parse_object(ctxt, tokens, ap);
  458. if (obj == NULL) {
  459. obj = parse_array(ctxt, tokens, ap);
  460. }
  461. if (obj == NULL) {
  462. obj = parse_escape(ctxt, tokens, ap);
  463. }
  464. if (obj == NULL) {
  465. obj = parse_keyword(ctxt, tokens);
  466. }
  467. if (obj == NULL) {
  468. obj = parse_literal(ctxt, tokens);
  469. }
  470. return obj;
  471. }
  472. QObject *json_parser_parse(QList *tokens, va_list *ap)
  473. {
  474. JSONParserContext ctxt = {};
  475. QList *working = qlist_copy(tokens);
  476. QObject *result;
  477. result = parse_value(&ctxt, &working, ap);
  478. QDECREF(working);
  479. return result;
  480. }