json-parser.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. /*
  2. * JSON Parser
  3. *
  4. * Copyright IBM, Corp. 2009
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
  10. * See the COPYING.LIB file in the top-level directory.
  11. *
  12. */
  13. #include <stdbool.h>
  14. #include "qemu-common.h"
  15. #include "qstring.h"
  16. #include "qint.h"
  17. #include "qdict.h"
  18. #include "qlist.h"
  19. #include "qfloat.h"
  20. #include "qbool.h"
  21. #include "json-parser.h"
  22. #include "json-lexer.h"
  23. typedef struct JSONParserContext
  24. {
  25. } JSONParserContext;
  26. #define BUG_ON(cond) assert(!(cond))
  27. /**
  28. * TODO
  29. *
  30. * 0) make errors meaningful again
  31. * 1) add geometry information to tokens
  32. * 3) should we return a parsed size?
  33. * 4) deal with premature EOI
  34. */
  35. static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
  36. /**
  37. * Token manipulators
  38. *
  39. * tokens are dictionaries that contain a type, a string value, and geometry information
  40. * about a token identified by the lexer. These are routines that make working with
  41. * these objects a bit easier.
  42. */
  43. static const char *token_get_value(QObject *obj)
  44. {
  45. return qdict_get_str(qobject_to_qdict(obj), "token");
  46. }
  47. static JSONTokenType token_get_type(QObject *obj)
  48. {
  49. return qdict_get_int(qobject_to_qdict(obj), "type");
  50. }
  51. static int token_is_operator(QObject *obj, char op)
  52. {
  53. const char *val;
  54. if (token_get_type(obj) != JSON_OPERATOR) {
  55. return 0;
  56. }
  57. val = token_get_value(obj);
  58. return (val[0] == op) && (val[1] == 0);
  59. }
  60. static int token_is_keyword(QObject *obj, const char *value)
  61. {
  62. if (token_get_type(obj) != JSON_KEYWORD) {
  63. return 0;
  64. }
  65. return strcmp(token_get_value(obj), value) == 0;
  66. }
  67. static int token_is_escape(QObject *obj, const char *value)
  68. {
  69. if (token_get_type(obj) != JSON_ESCAPE) {
  70. return 0;
  71. }
  72. return (strcmp(token_get_value(obj), value) == 0);
  73. }
  74. /**
  75. * Error handler
  76. */
  77. static void parse_error(JSONParserContext *ctxt, QObject *token, const char *msg, ...)
  78. {
  79. fprintf(stderr, "parse error: %s\n", msg);
  80. }
  81. /**
  82. * String helpers
  83. *
  84. * These helpers are used to unescape strings.
  85. */
  86. static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
  87. {
  88. if (wchar <= 0x007F) {
  89. BUG_ON(buffer_length < 2);
  90. buffer[0] = wchar & 0x7F;
  91. buffer[1] = 0;
  92. } else if (wchar <= 0x07FF) {
  93. BUG_ON(buffer_length < 3);
  94. buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
  95. buffer[1] = 0x80 | (wchar & 0x3F);
  96. buffer[2] = 0;
  97. } else {
  98. BUG_ON(buffer_length < 4);
  99. buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
  100. buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
  101. buffer[2] = 0x80 | (wchar & 0x3F);
  102. buffer[3] = 0;
  103. }
  104. }
  105. static int hex2decimal(char ch)
  106. {
  107. if (ch >= '0' && ch <= '9') {
  108. return (ch - '0');
  109. } else if (ch >= 'a' && ch <= 'f') {
  110. return 10 + (ch - 'a');
  111. } else if (ch >= 'A' && ch <= 'F') {
  112. return 10 + (ch - 'A');
  113. }
  114. return -1;
  115. }
  116. /**
  117. * parse_string(): Parse a json string and return a QObject
  118. *
  119. * string
  120. * ""
  121. * " chars "
  122. * chars
  123. * char
  124. * char chars
  125. * char
  126. * any-Unicode-character-
  127. * except-"-or-\-or-
  128. * control-character
  129. * \"
  130. * \\
  131. * \/
  132. * \b
  133. * \f
  134. * \n
  135. * \r
  136. * \t
  137. * \u four-hex-digits
  138. */
  139. static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
  140. {
  141. const char *ptr = token_get_value(token);
  142. QString *str;
  143. int double_quote = 1;
  144. if (*ptr == '"') {
  145. double_quote = 1;
  146. } else {
  147. double_quote = 0;
  148. }
  149. ptr++;
  150. str = qstring_new();
  151. while (*ptr &&
  152. ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
  153. if (*ptr == '\\') {
  154. ptr++;
  155. switch (*ptr) {
  156. case '"':
  157. qstring_append(str, "\"");
  158. ptr++;
  159. break;
  160. case '\'':
  161. qstring_append(str, "'");
  162. ptr++;
  163. break;
  164. case '\\':
  165. qstring_append(str, "\\");
  166. ptr++;
  167. break;
  168. case '/':
  169. qstring_append(str, "/");
  170. ptr++;
  171. break;
  172. case 'b':
  173. qstring_append(str, "\b");
  174. ptr++;
  175. break;
  176. case 'n':
  177. qstring_append(str, "\n");
  178. ptr++;
  179. break;
  180. case 'r':
  181. qstring_append(str, "\r");
  182. ptr++;
  183. break;
  184. case 't':
  185. qstring_append(str, "\t");
  186. ptr++;
  187. break;
  188. case 'u': {
  189. uint16_t unicode_char = 0;
  190. char utf8_char[4];
  191. int i = 0;
  192. ptr++;
  193. for (i = 0; i < 4; i++) {
  194. if (qemu_isxdigit(*ptr)) {
  195. unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
  196. } else {
  197. parse_error(ctxt, token,
  198. "invalid hex escape sequence in string");
  199. goto out;
  200. }
  201. ptr++;
  202. }
  203. wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
  204. qstring_append(str, utf8_char);
  205. } break;
  206. default:
  207. parse_error(ctxt, token, "invalid escape sequence in string");
  208. goto out;
  209. }
  210. } else {
  211. char dummy[2];
  212. dummy[0] = *ptr++;
  213. dummy[1] = 0;
  214. qstring_append(str, dummy);
  215. }
  216. }
  217. return str;
  218. out:
  219. QDECREF(str);
  220. return NULL;
  221. }
  222. /**
  223. * Parsing rules
  224. */
  225. static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
  226. {
  227. QObject *key, *token = NULL, *value, *peek;
  228. QList *working = qlist_copy(*tokens);
  229. peek = qlist_peek(working);
  230. key = parse_value(ctxt, &working, ap);
  231. if (!key || qobject_type(key) != QTYPE_QSTRING) {
  232. parse_error(ctxt, peek, "key is not a string in object");
  233. goto out;
  234. }
  235. token = qlist_pop(working);
  236. if (!token_is_operator(token, ':')) {
  237. parse_error(ctxt, token, "missing : in object pair");
  238. goto out;
  239. }
  240. value = parse_value(ctxt, &working, ap);
  241. if (value == NULL) {
  242. parse_error(ctxt, token, "Missing value in dict");
  243. goto out;
  244. }
  245. qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
  246. qobject_decref(token);
  247. qobject_decref(key);
  248. QDECREF(*tokens);
  249. *tokens = working;
  250. return 0;
  251. out:
  252. qobject_decref(token);
  253. qobject_decref(key);
  254. QDECREF(working);
  255. return -1;
  256. }
  257. static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  258. {
  259. QDict *dict = NULL;
  260. QObject *token, *peek;
  261. QList *working = qlist_copy(*tokens);
  262. token = qlist_pop(working);
  263. if (!token_is_operator(token, '{')) {
  264. goto out;
  265. }
  266. qobject_decref(token);
  267. token = NULL;
  268. dict = qdict_new();
  269. peek = qlist_peek(working);
  270. if (!token_is_operator(peek, '}')) {
  271. if (parse_pair(ctxt, dict, &working, ap) == -1) {
  272. goto out;
  273. }
  274. token = qlist_pop(working);
  275. while (!token_is_operator(token, '}')) {
  276. if (!token_is_operator(token, ',')) {
  277. parse_error(ctxt, token, "expected separator in dict");
  278. goto out;
  279. }
  280. qobject_decref(token);
  281. token = NULL;
  282. if (parse_pair(ctxt, dict, &working, ap) == -1) {
  283. goto out;
  284. }
  285. token = qlist_pop(working);
  286. }
  287. qobject_decref(token);
  288. token = NULL;
  289. } else {
  290. token = qlist_pop(working);
  291. qobject_decref(token);
  292. token = NULL;
  293. }
  294. QDECREF(*tokens);
  295. *tokens = working;
  296. return QOBJECT(dict);
  297. out:
  298. qobject_decref(token);
  299. QDECREF(working);
  300. QDECREF(dict);
  301. return NULL;
  302. }
  303. static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  304. {
  305. QList *list = NULL;
  306. QObject *token, *peek;
  307. QList *working = qlist_copy(*tokens);
  308. token = qlist_pop(working);
  309. if (!token_is_operator(token, '[')) {
  310. goto out;
  311. }
  312. qobject_decref(token);
  313. token = NULL;
  314. list = qlist_new();
  315. peek = qlist_peek(working);
  316. if (!token_is_operator(peek, ']')) {
  317. QObject *obj;
  318. obj = parse_value(ctxt, &working, ap);
  319. if (obj == NULL) {
  320. parse_error(ctxt, token, "expecting value");
  321. goto out;
  322. }
  323. qlist_append_obj(list, obj);
  324. token = qlist_pop(working);
  325. while (!token_is_operator(token, ']')) {
  326. if (!token_is_operator(token, ',')) {
  327. parse_error(ctxt, token, "expected separator in list");
  328. goto out;
  329. }
  330. qobject_decref(token);
  331. token = NULL;
  332. obj = parse_value(ctxt, &working, ap);
  333. if (obj == NULL) {
  334. parse_error(ctxt, token, "expecting value");
  335. goto out;
  336. }
  337. qlist_append_obj(list, obj);
  338. token = qlist_pop(working);
  339. }
  340. qobject_decref(token);
  341. token = NULL;
  342. } else {
  343. token = qlist_pop(working);
  344. qobject_decref(token);
  345. token = NULL;
  346. }
  347. QDECREF(*tokens);
  348. *tokens = working;
  349. return QOBJECT(list);
  350. out:
  351. qobject_decref(token);
  352. QDECREF(working);
  353. QDECREF(list);
  354. return NULL;
  355. }
  356. static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
  357. {
  358. QObject *token, *ret;
  359. QList *working = qlist_copy(*tokens);
  360. token = qlist_pop(working);
  361. if (token_get_type(token) != JSON_KEYWORD) {
  362. goto out;
  363. }
  364. if (token_is_keyword(token, "true")) {
  365. ret = QOBJECT(qbool_from_int(true));
  366. } else if (token_is_keyword(token, "false")) {
  367. ret = QOBJECT(qbool_from_int(false));
  368. } else {
  369. parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
  370. goto out;
  371. }
  372. qobject_decref(token);
  373. QDECREF(*tokens);
  374. *tokens = working;
  375. return ret;
  376. out:
  377. qobject_decref(token);
  378. QDECREF(working);
  379. return NULL;
  380. }
  381. static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  382. {
  383. QObject *token = NULL, *obj;
  384. QList *working = qlist_copy(*tokens);
  385. if (ap == NULL) {
  386. goto out;
  387. }
  388. token = qlist_pop(working);
  389. if (token_is_escape(token, "%p")) {
  390. obj = va_arg(*ap, QObject *);
  391. } else if (token_is_escape(token, "%i")) {
  392. obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
  393. } else if (token_is_escape(token, "%d")) {
  394. obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
  395. } else if (token_is_escape(token, "%ld")) {
  396. obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
  397. } else if (token_is_escape(token, "%lld") ||
  398. token_is_escape(token, "%I64d")) {
  399. obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
  400. } else if (token_is_escape(token, "%s")) {
  401. obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
  402. } else if (token_is_escape(token, "%f")) {
  403. obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
  404. } else {
  405. goto out;
  406. }
  407. qobject_decref(token);
  408. QDECREF(*tokens);
  409. *tokens = working;
  410. return obj;
  411. out:
  412. qobject_decref(token);
  413. QDECREF(working);
  414. return NULL;
  415. }
  416. static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
  417. {
  418. QObject *token, *obj;
  419. QList *working = qlist_copy(*tokens);
  420. token = qlist_pop(working);
  421. switch (token_get_type(token)) {
  422. case JSON_STRING:
  423. obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
  424. break;
  425. case JSON_INTEGER:
  426. obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
  427. break;
  428. case JSON_FLOAT:
  429. /* FIXME dependent on locale */
  430. obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
  431. break;
  432. default:
  433. goto out;
  434. }
  435. qobject_decref(token);
  436. QDECREF(*tokens);
  437. *tokens = working;
  438. return obj;
  439. out:
  440. qobject_decref(token);
  441. QDECREF(working);
  442. return NULL;
  443. }
  444. static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
  445. {
  446. QObject *obj;
  447. obj = parse_object(ctxt, tokens, ap);
  448. if (obj == NULL) {
  449. obj = parse_array(ctxt, tokens, ap);
  450. }
  451. if (obj == NULL) {
  452. obj = parse_escape(ctxt, tokens, ap);
  453. }
  454. if (obj == NULL) {
  455. obj = parse_keyword(ctxt, tokens);
  456. }
  457. if (obj == NULL) {
  458. obj = parse_literal(ctxt, tokens);
  459. }
  460. return obj;
  461. }
  462. QObject *json_parser_parse(QList *tokens, va_list *ap)
  463. {
  464. JSONParserContext ctxt = {};
  465. QList *working = qlist_copy(tokens);
  466. QObject *result;
  467. result = parse_value(&ctxt, &working, ap);
  468. QDECREF(working);
  469. return result;
  470. }