xref: /illumos-kvm-cmd/json-parser.c (revision 68396ea9)
1 /*
2  * JSON Parser
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13 
14 #include <stdarg.h>
15 
16 #include "qemu-common.h"
17 #include "qstring.h"
18 #include "qint.h"
19 #include "qdict.h"
20 #include "qlist.h"
21 #include "qfloat.h"
22 #include "qbool.h"
23 #include "json-parser.h"
24 #include "json-lexer.h"
25 
26 typedef struct JSONParserContext
27 {
28 } JSONParserContext;
29 
30 #define BUG_ON(cond) assert(!(cond))
31 
32 /**
33  * TODO
34  *
35  * 0) make errors meaningful again
36  * 1) add geometry information to tokens
37  * 3) should we return a parsed size?
38  * 4) deal with premature EOI
39  */
40 
41 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap);
42 
43 /**
44  * Token manipulators
45  *
46  * tokens are dictionaries that contain a type, a string value, and geometry information
47  * about a token identified by the lexer.  These are routines that make working with
48  * these objects a bit easier.
49  */
token_get_value(QObject * obj)50 static const char *token_get_value(QObject *obj)
51 {
52     return qdict_get_str(qobject_to_qdict(obj), "token");
53 }
54 
token_get_type(QObject * obj)55 static JSONTokenType token_get_type(QObject *obj)
56 {
57     return qdict_get_int(qobject_to_qdict(obj), "type");
58 }
59 
token_is_operator(QObject * obj,char op)60 static int token_is_operator(QObject *obj, char op)
61 {
62     const char *val;
63 
64     if (token_get_type(obj) != JSON_OPERATOR) {
65         return 0;
66     }
67 
68     val = token_get_value(obj);
69 
70     return (val[0] == op) && (val[1] == 0);
71 }
72 
token_is_keyword(QObject * obj,const char * value)73 static int token_is_keyword(QObject *obj, const char *value)
74 {
75     if (token_get_type(obj) != JSON_KEYWORD) {
76         return 0;
77     }
78 
79     return strcmp(token_get_value(obj), value) == 0;
80 }
81 
token_is_escape(QObject * obj,const char * value)82 static int token_is_escape(QObject *obj, const char *value)
83 {
84     if (token_get_type(obj) != JSON_ESCAPE) {
85         return 0;
86     }
87 
88     return (strcmp(token_get_value(obj), value) == 0);
89 }
90 
91 /**
92  * Error handler
93  */
parse_error(JSONParserContext * ctxt,QObject * token,const char * msg,...)94 static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
95                                            QObject *token, const char *msg, ...)
96 {
97     va_list ap;
98     va_start(ap, msg);
99     fprintf(stderr, "parse error: ");
100     vfprintf(stderr, msg, ap);
101     fprintf(stderr, "\n");
102     va_end(ap);
103 }
104 
105 /**
106  * String helpers
107  *
108  * These helpers are used to unescape strings.
109  */
wchar_to_utf8(uint16_t wchar,char * buffer,size_t buffer_length)110 static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
111 {
112     if (wchar <= 0x007F) {
113         BUG_ON(buffer_length < 2);
114 
115         buffer[0] = wchar & 0x7F;
116         buffer[1] = 0;
117     } else if (wchar <= 0x07FF) {
118         BUG_ON(buffer_length < 3);
119 
120         buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
121         buffer[1] = 0x80 | (wchar & 0x3F);
122         buffer[2] = 0;
123     } else {
124         BUG_ON(buffer_length < 4);
125 
126         buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
127         buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
128         buffer[2] = 0x80 | (wchar & 0x3F);
129         buffer[3] = 0;
130     }
131 }
132 
hex2decimal(char ch)133 static int hex2decimal(char ch)
134 {
135     if (ch >= '0' && ch <= '9') {
136         return (ch - '0');
137     } else if (ch >= 'a' && ch <= 'f') {
138         return 10 + (ch - 'a');
139     } else if (ch >= 'A' && ch <= 'F') {
140         return 10 + (ch - 'A');
141     }
142 
143     return -1;
144 }
145 
146 /**
147  * parse_string(): Parse a json string and return a QObject
148  *
149  *  string
150  *      ""
151  *      " chars "
152  *  chars
153  *      char
154  *      char chars
155  *  char
156  *      any-Unicode-character-
157  *          except-"-or-\-or-
158  *          control-character
159  *      \"
160  *      \\
161  *      \/
162  *      \b
163  *      \f
164  *      \n
165  *      \r
166  *      \t
167  *      \u four-hex-digits
168  */
qstring_from_escaped_str(JSONParserContext * ctxt,QObject * token)169 static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
170 {
171     const char *ptr = token_get_value(token);
172     QString *str;
173     int double_quote = 1;
174 
175     if (*ptr == '"') {
176         double_quote = 1;
177     } else {
178         double_quote = 0;
179     }
180     ptr++;
181 
182     str = qstring_new();
183     while (*ptr &&
184            ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
185         if (*ptr == '\\') {
186             ptr++;
187 
188             switch (*ptr) {
189             case '"':
190                 qstring_append(str, "\"");
191                 ptr++;
192                 break;
193             case '\'':
194                 qstring_append(str, "'");
195                 ptr++;
196                 break;
197             case '\\':
198                 qstring_append(str, "\\");
199                 ptr++;
200                 break;
201             case '/':
202                 qstring_append(str, "/");
203                 ptr++;
204                 break;
205             case 'b':
206                 qstring_append(str, "\b");
207                 ptr++;
208                 break;
209             case 'f':
210                 qstring_append(str, "\f");
211                 ptr++;
212                 break;
213             case 'n':
214                 qstring_append(str, "\n");
215                 ptr++;
216                 break;
217             case 'r':
218                 qstring_append(str, "\r");
219                 ptr++;
220                 break;
221             case 't':
222                 qstring_append(str, "\t");
223                 ptr++;
224                 break;
225             case 'u': {
226                 uint16_t unicode_char = 0;
227                 char utf8_char[4];
228                 int i = 0;
229 
230                 ptr++;
231 
232                 for (i = 0; i < 4; i++) {
233                     if (qemu_isxdigit(*ptr)) {
234                         unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
235                     } else {
236                         parse_error(ctxt, token,
237                                     "invalid hex escape sequence in string");
238                         goto out;
239                     }
240                     ptr++;
241                 }
242 
243                 wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
244                 qstring_append(str, utf8_char);
245             }   break;
246             default:
247                 parse_error(ctxt, token, "invalid escape sequence in string");
248                 goto out;
249             }
250         } else {
251             char dummy[2];
252 
253             dummy[0] = *ptr++;
254             dummy[1] = 0;
255 
256             qstring_append(str, dummy);
257         }
258     }
259 
260     return str;
261 
262 out:
263     QDECREF(str);
264     return NULL;
265 }
266 
267 /**
268  * Parsing rules
269  */
parse_pair(JSONParserContext * ctxt,QDict * dict,QList ** tokens,va_list * ap)270 static int parse_pair(JSONParserContext *ctxt, QDict *dict, QList **tokens, va_list *ap)
271 {
272     QObject *key, *token = NULL, *value, *peek;
273     QList *working = qlist_copy(*tokens);
274 
275     peek = qlist_peek(working);
276     key = parse_value(ctxt, &working, ap);
277     if (!key || qobject_type(key) != QTYPE_QSTRING) {
278         parse_error(ctxt, peek, "key is not a string in object");
279         goto out;
280     }
281 
282     token = qlist_pop(working);
283     if (!token_is_operator(token, ':')) {
284         parse_error(ctxt, token, "missing : in object pair");
285         goto out;
286     }
287 
288     value = parse_value(ctxt, &working, ap);
289     if (value == NULL) {
290         parse_error(ctxt, token, "Missing value in dict");
291         goto out;
292     }
293 
294     qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
295 
296     qobject_decref(token);
297     qobject_decref(key);
298     QDECREF(*tokens);
299     *tokens = working;
300 
301     return 0;
302 
303 out:
304     qobject_decref(token);
305     qobject_decref(key);
306     QDECREF(working);
307 
308     return -1;
309 }
310 
parse_object(JSONParserContext * ctxt,QList ** tokens,va_list * ap)311 static QObject *parse_object(JSONParserContext *ctxt, QList **tokens, va_list *ap)
312 {
313     QDict *dict = NULL;
314     QObject *token, *peek;
315     QList *working = qlist_copy(*tokens);
316 
317     token = qlist_pop(working);
318     if (!token_is_operator(token, '{')) {
319         goto out;
320     }
321     qobject_decref(token);
322     token = NULL;
323 
324     dict = qdict_new();
325 
326     peek = qlist_peek(working);
327     if (!token_is_operator(peek, '}')) {
328         if (parse_pair(ctxt, dict, &working, ap) == -1) {
329             goto out;
330         }
331 
332         token = qlist_pop(working);
333         while (!token_is_operator(token, '}')) {
334             if (!token_is_operator(token, ',')) {
335                 parse_error(ctxt, token, "expected separator in dict");
336                 goto out;
337             }
338             qobject_decref(token);
339             token = NULL;
340 
341             if (parse_pair(ctxt, dict, &working, ap) == -1) {
342                 goto out;
343             }
344 
345             token = qlist_pop(working);
346         }
347         qobject_decref(token);
348         token = NULL;
349     } else {
350         token = qlist_pop(working);
351         qobject_decref(token);
352         token = NULL;
353     }
354 
355     QDECREF(*tokens);
356     *tokens = working;
357 
358     return QOBJECT(dict);
359 
360 out:
361     qobject_decref(token);
362     QDECREF(working);
363     QDECREF(dict);
364     return NULL;
365 }
366 
parse_array(JSONParserContext * ctxt,QList ** tokens,va_list * ap)367 static QObject *parse_array(JSONParserContext *ctxt, QList **tokens, va_list *ap)
368 {
369     QList *list = NULL;
370     QObject *token, *peek;
371     QList *working = qlist_copy(*tokens);
372 
373     token = qlist_pop(working);
374     if (!token_is_operator(token, '[')) {
375         goto out;
376     }
377     qobject_decref(token);
378     token = NULL;
379 
380     list = qlist_new();
381 
382     peek = qlist_peek(working);
383     if (!token_is_operator(peek, ']')) {
384         QObject *obj;
385 
386         obj = parse_value(ctxt, &working, ap);
387         if (obj == NULL) {
388             parse_error(ctxt, token, "expecting value");
389             goto out;
390         }
391 
392         qlist_append_obj(list, obj);
393 
394         token = qlist_pop(working);
395         while (!token_is_operator(token, ']')) {
396             if (!token_is_operator(token, ',')) {
397                 parse_error(ctxt, token, "expected separator in list");
398                 goto out;
399             }
400 
401             qobject_decref(token);
402             token = NULL;
403 
404             obj = parse_value(ctxt, &working, ap);
405             if (obj == NULL) {
406                 parse_error(ctxt, token, "expecting value");
407                 goto out;
408             }
409 
410             qlist_append_obj(list, obj);
411 
412             token = qlist_pop(working);
413         }
414 
415         qobject_decref(token);
416         token = NULL;
417     } else {
418         token = qlist_pop(working);
419         qobject_decref(token);
420         token = NULL;
421     }
422 
423     QDECREF(*tokens);
424     *tokens = working;
425 
426     return QOBJECT(list);
427 
428 out:
429     qobject_decref(token);
430     QDECREF(working);
431     QDECREF(list);
432     return NULL;
433 }
434 
parse_keyword(JSONParserContext * ctxt,QList ** tokens)435 static QObject *parse_keyword(JSONParserContext *ctxt, QList **tokens)
436 {
437     QObject *token, *ret;
438     QList *working = qlist_copy(*tokens);
439 
440     token = qlist_pop(working);
441 
442     if (token_get_type(token) != JSON_KEYWORD) {
443         goto out;
444     }
445 
446     if (token_is_keyword(token, "true")) {
447         ret = QOBJECT(qbool_from_int(true));
448     } else if (token_is_keyword(token, "false")) {
449         ret = QOBJECT(qbool_from_int(false));
450     } else {
451         parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
452         goto out;
453     }
454 
455     qobject_decref(token);
456     QDECREF(*tokens);
457     *tokens = working;
458 
459     return ret;
460 
461 out:
462     qobject_decref(token);
463     QDECREF(working);
464 
465     return NULL;
466 }
467 
parse_escape(JSONParserContext * ctxt,QList ** tokens,va_list * ap)468 static QObject *parse_escape(JSONParserContext *ctxt, QList **tokens, va_list *ap)
469 {
470     QObject *token = NULL, *obj;
471     QList *working = qlist_copy(*tokens);
472 
473     if (ap == NULL) {
474         goto out;
475     }
476 
477     token = qlist_pop(working);
478 
479     if (token_is_escape(token, "%p")) {
480         obj = va_arg(*ap, QObject *);
481     } else if (token_is_escape(token, "%i")) {
482         obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
483     } else if (token_is_escape(token, "%d")) {
484         obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
485     } else if (token_is_escape(token, "%ld")) {
486         obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
487     } else if (token_is_escape(token, "%lld") ||
488                token_is_escape(token, "%I64d")) {
489         obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
490     } else if (token_is_escape(token, "%s")) {
491         obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
492     } else if (token_is_escape(token, "%f")) {
493         obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
494     } else {
495         goto out;
496     }
497 
498     qobject_decref(token);
499     QDECREF(*tokens);
500     *tokens = working;
501 
502     return obj;
503 
504 out:
505     qobject_decref(token);
506     QDECREF(working);
507 
508     return NULL;
509 }
510 
parse_literal(JSONParserContext * ctxt,QList ** tokens)511 static QObject *parse_literal(JSONParserContext *ctxt, QList **tokens)
512 {
513     QObject *token, *obj;
514     QList *working = qlist_copy(*tokens);
515 
516     token = qlist_pop(working);
517     switch (token_get_type(token)) {
518     case JSON_STRING:
519         obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
520         break;
521     case JSON_INTEGER:
522         obj = QOBJECT(qint_from_int(strtoll(token_get_value(token), NULL, 10)));
523         break;
524     case JSON_FLOAT:
525         /* FIXME dependent on locale */
526         obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
527         break;
528     default:
529         goto out;
530     }
531 
532     qobject_decref(token);
533     QDECREF(*tokens);
534     *tokens = working;
535 
536     return obj;
537 
538 out:
539     qobject_decref(token);
540     QDECREF(working);
541 
542     return NULL;
543 }
544 
parse_value(JSONParserContext * ctxt,QList ** tokens,va_list * ap)545 static QObject *parse_value(JSONParserContext *ctxt, QList **tokens, va_list *ap)
546 {
547     QObject *obj;
548 
549     obj = parse_object(ctxt, tokens, ap);
550     if (obj == NULL) {
551         obj = parse_array(ctxt, tokens, ap);
552     }
553     if (obj == NULL) {
554         obj = parse_escape(ctxt, tokens, ap);
555     }
556     if (obj == NULL) {
557         obj = parse_keyword(ctxt, tokens);
558     }
559     if (obj == NULL) {
560         obj = parse_literal(ctxt, tokens);
561     }
562 
563     return obj;
564 }
565 
json_parser_parse(QList * tokens,va_list * ap)566 QObject *json_parser_parse(QList *tokens, va_list *ap)
567 {
568     JSONParserContext ctxt = {};
569     QList *working = qlist_copy(tokens);
570     QObject *result;
571 
572     result = parse_value(&ctxt, &working, ap);
573 
574     QDECREF(working);
575 
576     return result;
577 }
578