Skip to content

Commit e072bc1

Browse files
CBOR-to-JSON: do properly escape JSON strings
We hadn't bothered, as this was just example-like code to show how one could convert from CBOR to JSON. But as it was added to the library (no extra dependency), we should Do The Right Thing (DTRT) and escape. This patch could have used cbor_value_to_pretty() to print the string, which has better support for UTF-8 escaping and thus checks for UTF-8 correctness, but that would make map_to_json()'s metadata functionality much more complex, especially since we cannot rely on open_memstream() always being available. Therefore, we are partially duplicating cborpretty.c's utf8EscapedDump(). Signed-off-by: Thiago Macieira <[email protected]>
1 parent e692445 commit e072bc1

File tree

4 files changed

+145
-12
lines changed

4 files changed

+145
-12
lines changed

src/cbortojson.c

Lines changed: 110 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -170,9 +170,16 @@ typedef struct ConversionStatus {
170170
static CborError value_to_json(FILE *out, CborValue *it, int flags, CborType type,
171171
int nestingLevel, ConversionStatus *status);
172172

173-
static CborError dump_bytestring_base16(char **result, CborValue *it)
173+
static void append_hex(void *buffer, uint8_t byte)
174174
{
175175
static const char characters[] = "0123456789abcdef";
176+
char *str = buffer;
177+
str[0] = characters[byte >> 4];
178+
str[1] = characters[byte & 0xf];
179+
}
180+
181+
static CborError dump_bytestring_base16(char **result, CborValue *it)
182+
{
176183
size_t i;
177184
size_t n = 0;
178185
uint8_t *buffer;
@@ -195,8 +202,7 @@ static CborError dump_bytestring_base16(char **result, CborValue *it)
195202

196203
for (i = 0; i < n; ++i) {
197204
uint8_t byte = buffer[n + i];
198-
buffer[2*i] = characters[byte >> 4];
199-
buffer[2*i + 1] = characters[byte & 0xf];
205+
append_hex(buffer + 2 * i, byte);
200206
}
201207
return CborNoError;
202208
}
@@ -293,6 +299,96 @@ static CborError dump_bytestring_base64url(char **result, CborValue *it)
293299
return generic_dump_base64(result, it, alphabet);
294300
}
295301

302+
static CborError escape_text_string(char **str, size_t *alloc, size_t *offsetp, const char *input, size_t len)
303+
{
304+
/* JSON requires escaping some characters in strings, so we iterate and
305+
* escape as necessary
306+
* https://www.rfc-editor.org/rfc/rfc8259#section-7:
307+
* All Unicode characters may be placed within the
308+
* quotation marks, except for the characters that MUST be escaped:
309+
* quotation mark, reverse solidus, and the control characters (U+0000
310+
* through U+001F).
311+
* We additionally choose to escape BS, HT, CR, LF and FF.
312+
*/
313+
char *buf = *str;
314+
315+
/* Ensure we have enough space for this chunk. In the worst case, we
316+
* have 6 escaped characters per input character.
317+
*
318+
* The overflow checking here is only practically useful for 32-bit
319+
* machines, as SIZE_MAX/6 for a 64-bit machine is 2.6667 exabytes.
320+
* That is much more than any current architecture can even address and
321+
* cbor_value_get_text_string_chunk() only works for data already
322+
* loaded into memory.
323+
*/
324+
size_t needed;
325+
size_t offset = offsetp ? *offsetp : 0;
326+
if (mul_check_overflow(len, 6, &needed) || add_check_overflow(needed, offset, &needed)
327+
|| add_check_overflow(needed, 1, &needed)) {
328+
return CborErrorDataTooLarge;
329+
}
330+
if (!alloc || needed > *alloc) {
331+
buf = cbor_realloc(buf, needed);
332+
if (!buf)
333+
return CborErrorOutOfMemory;
334+
if (alloc)
335+
*alloc = needed;
336+
}
337+
338+
for (size_t i = 0; i < len; ++i) {
339+
static const char escapeChars[] = "\b\t\n\r\f\"\\";
340+
static const char escapedChars[] = "btnrf\"\\";
341+
unsigned char c = input[i];
342+
343+
char *esc = c > 0 ? strchr(escapeChars, c) : NULL;
344+
if (esc) {
345+
buf[offset++] = '\\';
346+
buf[offset++] = escapedChars[esc - escapeChars];
347+
} else if (c <= 0x1F) {
348+
buf[offset++] = '\\';
349+
buf[offset++] = 'u';
350+
buf[offset++] = '0';
351+
buf[offset++] = '0';
352+
append_hex(buf + offset, c);
353+
offset += 2;
354+
} else {
355+
buf[offset++] = c;
356+
}
357+
}
358+
buf[offset] = '\0';
359+
*str = buf;
360+
if (offsetp)
361+
*offsetp = offset;
362+
return CborNoError;
363+
}
364+
365+
static CborError text_string_to_escaped(char **str, CborValue *it)
366+
{
367+
size_t alloc = 0, offset = 0;
368+
CborError err;
369+
370+
*str = NULL;
371+
err = cbor_value_begin_string_iteration(it);
372+
while (err == CborNoError) {
373+
const char *chunk;
374+
size_t len;
375+
err = cbor_value_get_text_string_chunk(it, &chunk, &len, it);
376+
if (err == CborNoError)
377+
err = escape_text_string(str, &alloc, &offset, chunk, len);
378+
}
379+
380+
if (likely(err == CborErrorNoMoreStringChunks)) {
381+
/* success */
382+
if (!*str)
383+
*str = strdup(""); // wasteful, but very atypical
384+
return cbor_value_finish_string_iteration(it);
385+
}
386+
387+
cbor_free(*str);
388+
*str = NULL;
389+
return err;
390+
}
391+
296392
static CborError add_value_metadata(FILE *out, CborType type, const ConversionStatus *status)
297393
{
298394
int flags = status->flags;
@@ -420,14 +516,20 @@ static CborError stringify_map_key(char **key, CborValue *it, int flags, CborTyp
420516
return CborErrorJsonNotImplemented;
421517
#else
422518
size_t size;
519+
char *stringified;
423520

424-
FILE *memstream = open_memstream(key, &size);
521+
FILE *memstream = open_memstream(&stringified, &size);
425522
if (memstream == NULL)
426523
return CborErrorOutOfMemory; /* could also be EMFILE, but it's unlikely */
427524
CborError err = cbor_value_to_pretty_advance(memstream, it);
428525

429-
if (unlikely(fclose(memstream) < 0 || *key == NULL))
526+
if (unlikely(fclose(memstream) < 0 || stringified == NULL))
430527
return CborErrorInternalError;
528+
if (err == CborNoError) {
529+
/* escape the stringified CBOR stream */
530+
err = escape_text_string(key, NULL, NULL, stringified, size);
531+
}
532+
cbor_free(stringified);
431533
return err;
432534
#endif
433535
}
@@ -452,15 +554,14 @@ static CborError map_to_json(FILE *out, CborValue *it, int flags, int nestingLev
452554
const char *comma = "";
453555
CborError err;
454556
while (!cbor_value_at_end(it)) {
455-
char *key;
557+
char *key = NULL;
456558
if (fprintf(out, "%s", comma) < 0)
457559
return CborErrorIO;
458560
comma = ",";
459561

460562
CborType keyType = cbor_value_get_type(it);
461563
if (likely(keyType == CborTextStringType)) {
462-
size_t n = 0;
463-
err = cbor_value_dup_text_string(it, &key, &n, it);
564+
err = text_string_to_escaped(&key, it);
464565
} else if (flags & CborConvertStringifyMapKeys) {
465566
err = stringify_map_key(&key, it, flags, keyType);
466567
} else {
@@ -570,8 +671,7 @@ static CborError value_to_json(FILE *out, CborValue *it, int flags, CborType typ
570671
err = dump_bytestring_base64url(&str, it);
571672
status->flags = TypeWasNotNative;
572673
} else {
573-
size_t n = 0;
574-
err = cbor_value_dup_text_string(it, &str, &n, it);
674+
err = text_string_to_escaped(&str, it);
575675
}
576676
if (err)
577677
return err;

src/compilersupport_p.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,4 +234,15 @@ static inline bool add_check_overflow(size_t v1, size_t v2, size_t *r)
234234
#endif
235235
}
236236

237+
static inline bool mul_check_overflow(size_t v1, size_t v2, size_t *r)
238+
{
239+
#if ((defined(__GNUC__) && (__GNUC__ >= 5)) && !defined(__INTEL_COMPILER)) || __has_builtin(__builtin_add_overflow)
240+
return __builtin_mul_overflow(v1, v2, r);
241+
#else
242+
/* unsigned multiplications are well-defined */
243+
*r = v1 * v2;
244+
return *r > v1 && *r > v2;
245+
#endif
246+
}
247+
237248
#endif /* COMPILERSUPPORT_H */

src/memory.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# include CBOR_CUSTOM_ALLOC_INCLUDE
2727
#else
2828
# include <stdlib.h>
29-
# define cbor_malloc malloc
30-
# define cbor_free free
29+
# define cbor_malloc malloc
30+
# define cbor_realloc realloc
31+
# define cbor_free free
3132
#endif

tests/tojson/tst_tojson.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,18 @@ void addTextStringsData()
160160
QTest::newRow("_textstring5*2") << raw("\x7f\x63Hel\x62lo\xff") << "\"Hello\"";
161161
QTest::newRow("_textstring5*5") << raw("\x7f\x61H\x61""e\x61l\x61l\x61o\xff") << "\"Hello\"";
162162
QTest::newRow("_textstring5*6") << raw("\x7f\x61H\x61""e\x61l\x60\x61l\x61o\xff") << "\"Hello\"";
163+
164+
// strings containing characters that are escaped in JSON
165+
QTest::newRow("null") << raw("\x61\0") << R"("\u0000")";
166+
QTest::newRow("bell") << raw("\x61\7") << R"("\u0007")"; // not \\a
167+
QTest::newRow("backspace") << raw("\x61\b") << R"("\b")";
168+
QTest::newRow("tab") << raw("\x61\t") << R"("\t")";
169+
QTest::newRow("carriage-return") << raw("\x61\r") << R"("\r")";
170+
QTest::newRow("line-feed") << raw("\x61\n") << R"("\n")";
171+
QTest::newRow("form-feed") << raw("\x61\f") << R"("\f")";
172+
QTest::newRow("esc") << raw("\x61\x1f") << R"("\u001f")";
173+
QTest::newRow("quote") << raw("\x61\"") << R"("\"")";
174+
QTest::newRow("backslash") << raw("\x61\\") << R"("\\")";
163175
}
164176

165177
void addNonJsonData()
@@ -412,6 +424,15 @@ void tst_ToJson::nonStringKeyMaps_data()
412424
QTest::newRow("map-24-0") << raw("\xa1\x18\x18\0") << "{24: 0}";
413425
QTest::newRow("_map-0-24") << raw("\xbf\0\x18\x18\xff") << "{_ 0: 24}";
414426
QTest::newRow("_map-24-0") << raw("\xbf\x18\x18\0\xff") << "{_ 24: 0}";
427+
428+
// nested strings ought to be escaped
429+
QTest::newRow("array-emptystring") << raw("\x81\x60") << R"([\"\"])";
430+
QTest::newRow("array-string1") << raw("\x81\x61 ") << R"([\" \"])";
431+
432+
// and escaped chracters in strings end up doubly escaped
433+
QTest::newRow("array-string-null") << raw("\x81\x61\0") << R"([\"\\u0000\"])";
434+
QTest::newRow("array-string-quote") << raw("\x81\x61\"") << R"([\"\\\"\"])";
435+
QTest::newRow("array-string-backslash") << raw("\x81\x61\\") << R"([\"\\\\\"])";
415436
}
416437

417438
void tst_ToJson::nonStringKeyMaps()

0 commit comments

Comments
 (0)