/* $NetBSD: is_json.c,v 1.3 2019/05/22 17:26:05 christos Exp $ */
/*-
* Copyright (c) 2018 Christos Zoulas
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Parse JSON object serialization format (RFC-7159)
*/
#ifndef TEST
#include "file.h"
#ifndef lint
#if 0
FILE_RCSID("@(#)$File: is_json.c,v 1.13 2019/03/02 01:08:10 christos Exp $")
#else
__RCSID("$NetBSD: is_json.c,v 1.3 2019/05/22 17:26:05 christos Exp $");
#endif
#endif
#include <string.h>
#include "magic.h"
#endif
#ifdef DEBUG
#include <stdio.h>
#define DPRINTF(a, b, c) \
printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
#else
#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
#endif
#define JSON_ARRAY 0
#define JSON_CONSTANT 1
#define JSON_NUMBER 2
#define JSON_OBJECT 3
#define JSON_STRING 4
#define JSON_ARRAYN 5
#define JSON_MAX 6
/*
* if JSON_COUNT != 0:
* count all the objects, require that we have the whole data file
* otherwise:
* stop if we find an object or an array
*/
#ifndef JSON_COUNT
#define JSON_COUNT 0
#endif
static int json_parse(const unsigned char **, const unsigned char *, size_t *,
size_t);
static int
json_isspace(const unsigned char uc)
{
switch (uc) {
case ' ':
case '\n':
case '\r':
case '\t':
return 1;
default:
return 0;
}
}
static int
json_isdigit(unsigned char uc)
{
switch (uc) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return 1;
default:
return 0;
}
}
static int
json_isxdigit(unsigned char uc)
{
if (json_isdigit(uc))
return 1;
switch (uc) {
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
return 1;
default:
return 0;
}
}
static const unsigned char *
json_skip_space(const unsigned char *uc, const unsigned char *ue)
{
while (uc < ue && json_isspace(*uc))
uc++;
return uc;
}
static int
json_parse_string(const unsigned char **ucp, const unsigned char *ue)
{
const unsigned char *uc = *ucp;
size_t i;
DPRINTF("Parse string: ", uc, *ucp);
while (uc < ue) {
switch (*uc++) {
case '\0':
goto out;
case '\\':
if (uc == ue)
goto out;
switch (*uc++) {
case '\0':
goto out;
case '"':
case '\\':
case '/':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
continue;
case 'u':
if (ue - uc < 4) {
uc = ue;
goto out;
}
for (i = 0; i < 4; i++)
if (!json_isxdigit(*uc++))
goto out;
continue;
default:
goto out;
}
case '"':
*ucp = uc;
return 1;
default:
continue;
}
}
out:
DPRINTF("Bad string: ", uc, *ucp);
*ucp = uc;
return 0;
}
static int
json_parse_array(const unsigned char **ucp, const unsigned char *ue,
size_t *st, size_t lvl)
{
const unsigned char *uc = *ucp;
int more = 0; /* Array has more than 1 element */
DPRINTF("Parse array: ", uc, *ucp);
while (uc < ue) {
if (!json_parse(&uc, ue, st, lvl + 1))
goto out;
if (uc == ue)
goto out;
switch (*uc) {
case ',':
more++;
uc++;
continue;
case ']':
if (more)
st[JSON_ARRAYN]++;
*ucp = uc + 1;
return 1;
default:
goto out;
}
}
out:
DPRINTF("Bad array: ", uc, *ucp);
*ucp = uc;
return 0;
}
static int
json_parse_object(const unsigned char **ucp, const unsigned char *ue,
size_t *st, size_t lvl)
{
const unsigned char *uc = *ucp;
DPRINTF("Parse object: ", uc, *ucp);
while (uc < ue) {
uc = json_skip_space(uc, ue);
if (uc == ue)
goto out;
if (*uc++ != '"') {
DPRINTF("not string", uc, *ucp);
goto out;
}
DPRINTF("next field", uc, *ucp);
if (!json_parse_string(&uc, ue)) {
DPRINTF("not string", uc, *ucp);
goto out;
}
uc = json_skip_space(uc, ue);
if (uc == ue)
goto out;
if (*uc++ != ':') {
DPRINTF("not colon", uc, *ucp);
goto out;
}
if (!json_parse(&uc, ue, st, lvl + 1)) {
DPRINTF("not json", uc, *ucp);
goto out;
}
if (uc == ue)
goto out;
switch (*uc++) {
case ',':
continue;
case '}': /* { */
*ucp = uc;
DPRINTF("Good object: ", uc, *ucp);
return 1;
default:
*ucp = uc - 1;
DPRINTF("not more", uc, *ucp);
goto out;
}
}
out:
DPRINTF("Bad object: ", uc, *ucp);
*ucp = uc;
return 0;
}
static int
json_parse_number(const unsigned char **ucp, const unsigned char *ue)
{
const unsigned char *uc = *ucp;
int got = 0;
DPRINTF("Parse number: ", uc, *ucp);
if (uc == ue)
return 0;
if (*uc == '-')
uc++;
for (; uc < ue; uc++) {
if (!json_isdigit(*uc))
break;
got = 1;
}
if (uc == ue)
goto out;
if (*uc == '.')
uc++;
for (; uc < ue; uc++) {
if (!json_isdigit(*uc))
break;
got = 1;
}
if (uc == ue)
goto out;
if (got && (*uc == 'e' || *uc == 'E')) {
uc++;
got = 0;
if (uc == ue)
goto out;
if (*uc == '+' || *uc == '-')
uc++;
for (; uc < ue; uc++) {
if (!json_isdigit(*uc))
break;
got = 1;
}
}
out:
if (!got)
DPRINTF("Bad number: ", uc, *ucp);
else
DPRINTF("Good number: ", uc, *ucp);
*ucp = uc;
return got;
}
static int
json_parse_const(const unsigned char **ucp, const unsigned char *ue,
const char *str, size_t len)
{
const unsigned char *uc = *ucp;
DPRINTF("Parse const: ", uc, *ucp);
for (len--; uc < ue && --len;) {
if (*uc++ == *++str)
continue;
}
if (len)
DPRINTF("Bad const: ", uc, *ucp);
*ucp = uc;
return len == 0;
}
static int
json_parse(const unsigned char **ucp, const unsigned char *ue,
size_t *st, size_t lvl)
{
const unsigned char *uc;
int rv = 0;
int t;
uc = json_skip_space(*ucp, ue);
if (uc == ue)
goto out;
// Avoid recursion
if (lvl > 20)
return 0;
#if JSON_COUNT
/* bail quickly if not counting */
if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
return 1;
#endif
DPRINTF("Parse general: ", uc, *ucp);
switch (*uc++) {
case '"':
rv = json_parse_string(&uc, ue);
t = JSON_STRING;
break;
case '[':
rv = json_parse_array(&uc, ue, st, lvl + 1);
t = JSON_ARRAY;
break;
case '{': /* '}' */
rv = json_parse_object(&uc, ue, st, lvl + 1);
t = JSON_OBJECT;
break;
case 't':
rv = json_parse_const(&uc, ue, "true", sizeof("true"));
t = JSON_CONSTANT;
break;
case 'f':
rv = json_parse_const(&uc, ue, "false", sizeof("false"));
t = JSON_CONSTANT;
break;
case 'n':
rv = json_parse_const(&uc, ue, "null", sizeof("null"));
t = JSON_CONSTANT;
break;
default:
--uc;
rv = json_parse_number(&uc, ue);
t = JSON_NUMBER;
break;
}
if (rv)
st[t]++;
uc = json_skip_space(uc, ue);
out:
*ucp = uc;
DPRINTF("End general: ", uc, *ucp);
if (lvl == 0)
return rv && (st[JSON_ARRAYN] || st[JSON_OBJECT]);
return rv;
}
#ifndef TEST
int
file_is_json(struct magic_set *ms, const struct buffer *b)
{
const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
const unsigned char *ue = uc + b->flen;
size_t st[JSON_MAX];
int mime = ms->flags & MAGIC_MIME;
if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
return 0;
memset(st, 0, sizeof(st));
if (!json_parse(&uc, ue, st, 0))
return 0;
if (mime == MAGIC_MIME_ENCODING)
return 1;
if (mime) {
if (file_printf(ms, "application/json") == -1)
return -1;
return 1;
}
if (file_printf(ms, "JSON data") == -1)
return -1;
#if JSON_COUNT
#define P(n) st[n], st[n] > 1 ? "s" : ""
if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
"u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
"u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
"u >1array%s)",
P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
P(JSON_NUMBER), P(JSON_ARRAYN))
== -1)
return -1;
#endif
return 1;
}
#else
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <err.h>
int
main(int argc, char *argv[])
{
int fd, rv;
struct stat st;
unsigned char *p;
size_t stats[JSON_MAX];
if ((fd = open(argv[1], O_RDONLY)) == -1)
err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
if (fstat(fd, &st) == -1)
err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
if ((p = malloc(st.st_size)) == NULL)
err(EXIT_FAILURE, "Can't allocate %jd bytes",
(intmax_t)st.st_size);
if (read(fd, p, st.st_size) != st.st_size)
err(EXIT_FAILURE, "Can't read %jd bytes",
(intmax_t)st.st_size);
memset(stats, 0, sizeof(stats));
printf("is json %d\n", json_parse((const unsigned char **)&p,
p + st.st_size, stats, 0));
return 0;
}
#endif