Skip to content

Commit e5b9cdf

Browse files
authored
CDRIVER-5713 fix bson_utf8_escape_for_json crash (#1725)
* add tests * add fuzz test * apply fix * fix docs * correctly handle alternative NULL terminator * addressing minor comments * adding proper boundary tests * refactor tests
1 parent 0a8e8c6 commit e5b9cdf

File tree

6 files changed

+96
-13
lines changed

6 files changed

+96
-13
lines changed

src/libbson/doc/bson_utf8_escape_for_json.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,4 @@ byte is found before ``utf8_len`` bytes, it is converted to
3131
Returns
3232
-------
3333

34-
A newly allocated string that should be freed with :symbol:`bson_free()`.
34+
A newly allocated string that should be freed with :symbol:`bson_free()` when ``utf8`` is a valid UTF-8 string, or ``NULL`` if the (possibly invalid UTF-8) string could not be escaped.

src/libbson/doc/bson_utf8_get_char.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Synopsis
1414
Parameters
1515
----------
1616

17-
* ``utf8``: A UTF-8 encoded string.
17+
* ``utf8``: A validated UTF-8 encoded string.
1818

1919
Description
2020
-----------

src/libbson/fuzz/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@ target_link_libraries(fuzz_test_init_from_json PRIVATE fuzz_test_properties)
77

88
add_executable(fuzz_test_validate EXCLUDE_FROM_ALL fuzz_test_validate.c)
99
target_link_libraries(fuzz_test_validate PRIVATE fuzz_test_properties)
10+
11+
add_executable(fuzz_test_bson_utf8_escape_for_json EXCLUDE_FROM_ALL fuzz_test_bson_utf8_escape_for_json.c)
12+
target_link_libraries(fuzz_test_bson_utf8_escape_for_json PRIVATE fuzz_test_properties)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#include <bson/bson.h>
2+
3+
int
4+
LLVMFuzzerTestOneInput (const uint8_t *data, size_t size)
5+
{
6+
// Reject inputs with lengths greater than ssize_t
7+
if (bson_cmp_greater_us (size, SSIZE_MAX)) {
8+
return -1;
9+
}
10+
11+
// Check if input crashes program (ok if return NULL)
12+
char *got = bson_utf8_escape_for_json ((const char *) data, (ssize_t) size);
13+
14+
// If UTF-8 is valid, we should get some non-null response
15+
if (bson_utf8_validate ((const char *) data, size, true)) {
16+
BSON_ASSERT (got);
17+
}
18+
19+
bson_free (got);
20+
return 0;
21+
}

src/libbson/src/bson/bson-utf8.c

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,15 @@ bson_utf8_escape_for_json (const char *utf8, /* IN */
278278
end = utf8 + utf8_len;
279279

280280
while (utf8 < end) {
281+
uint8_t mask;
282+
uint8_t length_of_char;
283+
284+
// Check if expected char length goes past end
285+
_bson_utf8_get_sequence (utf8, &length_of_char, &mask);
286+
if (utf8 > end - length_of_char) {
287+
goto invalid_utf8;
288+
}
289+
281290
c = bson_utf8_get_char (utf8);
282291

283292
switch (c) {
@@ -313,18 +322,25 @@ bson_utf8_escape_for_json (const char *utf8, /* IN */
313322
if (c) {
314323
utf8 = bson_utf8_next_char (utf8);
315324
} else {
316-
if (length_provided && !*utf8) {
317-
/* we escaped nil as '\u0000', now advance past it */
318-
utf8++;
325+
if (length_provided) {
326+
// Check if current character is null character,
327+
// if so skip past it as we've already added '\\u0000' above
328+
if (*utf8 == '\0' || (*utf8 == '\xc0' && *(utf8 + 1) == '\x80')) {
329+
utf8 += (*utf8 == '\0') ? 1 : 2;
330+
} else {
331+
goto invalid_utf8;
332+
}
319333
} else {
320-
/* invalid UTF-8 */
321-
bson_string_free (str, true);
322-
return NULL;
334+
goto invalid_utf8;
323335
}
324336
}
325337
}
326338

327339
return bson_string_free (str, false);
340+
341+
invalid_utf8:
342+
bson_string_free (str, true);
343+
return NULL;
328344
}
329345

330346

src/libbson/tests/test-utf8.c

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,24 +63,67 @@ test_bson_utf8_escape_for_json (void)
6363
char *unescaped = "\x0e";
6464

6565
str = bson_utf8_escape_for_json ("my\0key", 6);
66-
BSON_ASSERT (0 == memcmp (str, "my\\u0000key", 7));
66+
ASSERT_CMPSTR (str, "my\\u0000key");
67+
bson_free (str);
68+
69+
str = bson_utf8_escape_for_json ("my\xc0\x80key", 7);
70+
ASSERT_CMPSTR (str, "my\\u0000key");
6771
bson_free (str);
6872

6973
str = bson_utf8_escape_for_json ("my\"key", 6);
70-
BSON_ASSERT (0 == memcmp (str, "my\\\"key", 8));
74+
ASSERT_CMPSTR (str, "my\\\"key");
7175
bson_free (str);
7276

7377
str = bson_utf8_escape_for_json ("my\\key", 6);
74-
BSON_ASSERT (0 == memcmp (str, "my\\\\key", 8));
78+
ASSERT_CMPSTR (str, "my\\\\key");
7579
bson_free (str);
7680

7781
str = bson_utf8_escape_for_json ("\\\"\\\"", -1);
78-
BSON_ASSERT (0 == memcmp (str, "\\\\\\\"\\\\\\\"", 9));
82+
ASSERT_CMPSTR (str, "\\\\\\\"\\\\\\\"");
7983
bson_free (str);
8084

8185
str = bson_utf8_escape_for_json (unescaped, -1);
82-
BSON_ASSERT (0 == memcmp (str, "\\u000e", 7));
86+
ASSERT_CMPSTR (str, "\\u000e");
8387
bson_free (str);
88+
89+
// 2 bytes expected
90+
{
91+
str = bson_utf8_escape_for_json ("\xc2", -1);
92+
ASSERT (!str);
93+
94+
str = bson_utf8_escape_for_json ("\xc3\xa9", -1);
95+
ASSERT_CMPSTR (str, "é");
96+
bson_free (str);
97+
}
98+
99+
// 3 bytes expected
100+
{
101+
str = bson_utf8_escape_for_json ("\xed", -1);
102+
ASSERT (!str);
103+
104+
str = bson_utf8_escape_for_json ("\xed\x90", -1);
105+
ASSERT (!str);
106+
107+
str = bson_utf8_escape_for_json ("\xe4\xb8\xad", -1);
108+
ASSERT_CMPSTR (str, "中");
109+
bson_free (str);
110+
}
111+
112+
// 4 bytes expected
113+
{
114+
str = bson_utf8_escape_for_json ("\xf0", -1);
115+
ASSERT (!str);
116+
117+
str = bson_utf8_escape_for_json ("\xf0\x9f", -1);
118+
ASSERT (!str);
119+
120+
str = bson_utf8_escape_for_json ("\xf0\x9f\x9f", -1);
121+
ASSERT (!str);
122+
123+
str = bson_utf8_escape_for_json ("\xf0\xa0\x9c\x8f", -1);
124+
ASSERT_CMPSTR (str, "𠜏");
125+
bson_free (str);
126+
}
84127
}
85128

86129

0 commit comments

Comments
 (0)