преди 7 години · b2da4a4d75
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -80,6 +80,8 @@
 
															  *    escape = %x5C              ; \
														
 
															  *    quotation-mark = %x22      ; "
														
 
															  *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
														
 
															+ *    [This lexer accepts any non-control character after escape, and
														
 
															+ *    leaves rejecting invalid ones to the parser.]
														
 
															  *
														
 
															  *
														
 
															  * Extensions over RFC 8259:
														
@@ -99,16 +101,8 @@
 
															 enum json_lexer_state {
														
 
															     IN_ERROR = 0,               /* must really be 0, see json_lexer[] */
														
 
															-    IN_DQ_UCODE3,
														
 
															-    IN_DQ_UCODE2,
														
 
															-    IN_DQ_UCODE1,
														
 
															-    IN_DQ_UCODE0,
														
 
															     IN_DQ_STRING_ESCAPE,
														
 
															     IN_DQ_STRING,
														
 
															-    IN_SQ_UCODE3,
														
 
															-    IN_SQ_UCODE2,
														
 
															-    IN_SQ_UCODE1,
														
 
															-    IN_SQ_UCODE0,
														
 
															     IN_SQ_STRING_ESCAPE,
														
 
															     IN_SQ_STRING,
														
 
															     IN_ZERO,
														
@@ -144,37 +138,8 @@ static const uint8_t json_lexer[][256] =  {
 
															     /* Relies on default initialization to IN_ERROR! */
														
 
															     /* double quote string */
														
 
															-    [IN_DQ_UCODE3] = {
														
 
															-        ['0' ... '9'] = IN_DQ_STRING,
														
 
															-        ['a' ... 'f'] = IN_DQ_STRING,
														
 
															-        ['A' ... 'F'] = IN_DQ_STRING,
														
 
															-    },
														
 
															-    [IN_DQ_UCODE2] = {
														
 
															-        ['0' ... '9'] = IN_DQ_UCODE3,
														
 
															-        ['a' ... 'f'] = IN_DQ_UCODE3,
														
 
															-        ['A' ... 'F'] = IN_DQ_UCODE3,
														
 
															-    },
														
 
															-    [IN_DQ_UCODE1] = {
														
 
															-        ['0' ... '9'] = IN_DQ_UCODE2,
														
 
															-        ['a' ... 'f'] = IN_DQ_UCODE2,
														
 
															-        ['A' ... 'F'] = IN_DQ_UCODE2,
														
 
															-    },
														
 
															-    [IN_DQ_UCODE0] = {
														
 
															-        ['0' ... '9'] = IN_DQ_UCODE1,
														
 
															-        ['a' ... 'f'] = IN_DQ_UCODE1,
														
 
															-        ['A' ... 'F'] = IN_DQ_UCODE1,
														
 
															-    },
														
 
															     [IN_DQ_STRING_ESCAPE] = {
														
 
															-        ['b'] = IN_DQ_STRING,
														
 
															-        ['f'] =  IN_DQ_STRING,
														
 
															-        ['n'] =  IN_DQ_STRING,
														
 
															-        ['r'] =  IN_DQ_STRING,
														
 
															-        ['t'] =  IN_DQ_STRING,
														
 
															-        ['/'] = IN_DQ_STRING,
														
 
															-        ['\\'] = IN_DQ_STRING,
														
 
															-        ['\''] = IN_DQ_STRING,
														
 
															-        ['\"'] = IN_DQ_STRING,
														
 
															-        ['u'] = IN_DQ_UCODE0,
														
 
															+        [0x20 ... 0xFD] = IN_DQ_STRING,
														
 
															     },
														
 
															     [IN_DQ_STRING] = {
														
 
															         [0x20 ... 0xFD] = IN_DQ_STRING,
														
@@ -183,37 +148,8 @@ static const uint8_t json_lexer[][256] =  {
 
															     },
														
 
															     /* single quote string */
														
 
															-    [IN_SQ_UCODE3] = {
														
 
															-        ['0' ... '9'] = IN_SQ_STRING,
														
 
															-        ['a' ... 'f'] = IN_SQ_STRING,
														
 
															-        ['A' ... 'F'] = IN_SQ_STRING,
														
 
															-    },
														
 
															-    [IN_SQ_UCODE2] = {
														
 
															-        ['0' ... '9'] = IN_SQ_UCODE3,
														
 
															-        ['a' ... 'f'] = IN_SQ_UCODE3,
														
 
															-        ['A' ... 'F'] = IN_SQ_UCODE3,
														
 
															-    },
														
 
															-    [IN_SQ_UCODE1] = {
														
 
															-        ['0' ... '9'] = IN_SQ_UCODE2,
														
 
															-        ['a' ... 'f'] = IN_SQ_UCODE2,
														
 
															-        ['A' ... 'F'] = IN_SQ_UCODE2,
														
 
															-    },
														
 
															-    [IN_SQ_UCODE0] = {
														
 
															-        ['0' ... '9'] = IN_SQ_UCODE1,
														
 
															-        ['a' ... 'f'] = IN_SQ_UCODE1,
														
 
															-        ['A' ... 'F'] = IN_SQ_UCODE1,
														
 
															-    },
														
 
															     [IN_SQ_STRING_ESCAPE] = {
														
 
															-        ['b'] = IN_SQ_STRING,
														
 
															-        ['f'] =  IN_SQ_STRING,
														
 
															-        ['n'] =  IN_SQ_STRING,
														
 
															-        ['r'] =  IN_SQ_STRING,
														
 
															-        ['t'] =  IN_SQ_STRING,
														
 
															-        ['/'] = IN_SQ_STRING,
														
 
															-        ['\\'] = IN_SQ_STRING,
														
 
															-        ['\''] = IN_SQ_STRING,
														
 
															-        ['\"'] = IN_SQ_STRING,
														
 
															-        ['u'] = IN_SQ_UCODE0,
														
 
															+        [0x20 ... 0xFD] = IN_SQ_STRING,
														
 
															     },
														
 
															     [IN_SQ_STRING] = {
														
 
															         [0x20 ... 0xFD] = IN_SQ_STRING,
														
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -106,30 +106,40 @@ static int hex2decimal(char ch)
 
															 }
														
 
															 /**
														
 
															- * parse_string(): Parse a json string and return a QObject
														
 
															+ * parse_string(): Parse a JSON string
														
 
															  *
														
 
															- *  string
														
 
															- *      ""
														
 
															- *      " chars "
														
 
															- *  chars
														
 
															- *      char
														
 
															- *      char chars
														
 
															- *  char
														
 
															- *      any-Unicode-character-
														
 
															- *          except-"-or-\-or-
														
 
															- *          control-character
														
 
															- *      \"
														
 
															- *      \\
														
 
															- *      \/
														
 
															- *      \b
														
 
															- *      \f
														
 
															- *      \n
														
 
															- *      \r
														
 
															- *      \t
														
 
															- *      \u four-hex-digits 
														
 
															+ * From RFC 8259 "The JavaScript Object Notation (JSON) Data
														
 
															+ * Interchange Format":
														
 
															+ *
														
 
															+ *    char = unescaped /
														
 
															+ *        escape (
														
 
															+ *            %x22 /          ; "    quotation mark  U+0022
														
 
															+ *            %x5C /          ; \    reverse solidus U+005C
														
 
															+ *            %x2F /          ; /    solidus         U+002F
														
 
															+ *            %x62 /          ; b    backspace       U+0008
														
 
															+ *            %x66 /          ; f    form feed       U+000C
														
 
															+ *            %x6E /          ; n    line feed       U+000A
														
 
															+ *            %x72 /          ; r    carriage return U+000D
														
 
															+ *            %x74 /          ; t    tab             U+0009
														
 
															+ *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
														
 
															+ *    escape = %x5C              ; \
														
 
															+ *    quotation-mark = %x22      ; "
														
 
															+ *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
														
 
															+ *
														
 
															+ * Extensions over RFC 8259:
														
 
															+ * - Extra escape sequence in strings:
														
 
															+ *   0x27 (apostrophe) is recognized after escape, too
														
 
															+ * - Single-quoted strings:
														
 
															+ *   Like double-quoted strings, except they're delimited by %x27
														
 
															+ *   (apostrophe) instead of %x22 (quotation mark), and can't contain
														
 
															+ *   unescaped apostrophe, but can contain unescaped quotation mark.
														
 
															+ *
														
 
															+ * Note:
														
 
															+ * - Encoding is modified UTF-8.
														
 
															+ * - Invalid Unicode characters are rejected.
														
 
															+ * - Control characters \x00..\x1F are rejected by the lexer.
														
 
															  */
														
 
															-static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
														
 
															-                                         JSONToken *token)
														
 
															+static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
														
 
															 {
														
 
															     const char *ptr = token->str;
														
 
															     QString *str;
														
@@ -495,7 +505,7 @@ static QObject *parse_literal(JSONParserContext *ctxt)
 
															     switch (token->type) {
														
 
															     case JSON_STRING:
														
 
															-        return QOBJECT(qstring_from_escaped_str(ctxt, token));
														
 
															+        return QOBJECT(parse_string(ctxt, token));
														
 
															     case JSON_INTEGER: {
														
 
															         /*
														
 
															          * Represent JSON_INTEGER as QNUM_I64 if possible, else as