Skip to content

Commit fef05f0

Browse files
committed
1 parent df701e8 commit fef05f0

File tree

7 files changed

+153
-71
lines changed

7 files changed

+153
-71
lines changed

src/libbson/src/bson/bson-json.c

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,34 @@ _bson_json_parse_binary_elem (bson_json_reader_t *reader,
970970
}
971971
}
972972

973+
static bool
974+
_bson_json_allow_embedded_nulls (bson_json_reader_t const *reader)
975+
{
976+
const bson_json_read_state_t read_state = reader->bson.read_state;
977+
const bson_json_read_bson_state_t bson_state = reader->bson.bson_state;
978+
979+
if (read_state == BSON_JSON_IN_BSON_TYPE_REGEX_VALUES) {
980+
if (bson_state == BSON_JSON_LF_REGULAR_EXPRESSION_PATTERN ||
981+
bson_state == BSON_JSON_LF_REGULAR_EXPRESSION_OPTIONS) {
982+
/* Prohibit embedded NULL bytes for canonical extended regex:
983+
* { $regularExpression: { pattern: "pattern", options: "options" } }
984+
*/
985+
return false;
986+
}
987+
}
988+
989+
if (read_state == BSON_JSON_IN_BSON_TYPE) {
990+
if (bson_state == BSON_JSON_LF_REGEX ||
991+
bson_state == BSON_JSON_LF_OPTIONS) {
992+
/* Prohibit embedded NULL bytes for legacy regex:
993+
* { $regex: "pattern", $options: "options" } */
994+
return false;
995+
}
996+
}
997+
998+
/* Embedded nulls are okay in any other context */
999+
return true;
1000+
}
9731001

9741002
static void
9751003
_bson_json_read_string (bson_json_reader_t *reader, /* IN */
@@ -978,13 +1006,14 @@ _bson_json_read_string (bson_json_reader_t *reader, /* IN */
9781006
{
9791007
bson_json_read_state_t rs;
9801008
bson_json_read_bson_state_t bs;
1009+
const bool allow_null = _bson_json_allow_embedded_nulls (reader);
9811010

9821011
BASIC_CB_PREAMBLE;
9831012

9841013
rs = bson->read_state;
9851014
bs = bson->bson_state;
9861015

987-
if (!bson_utf8_validate ((const char *) val, vlen, true /*allow null*/)) {
1016+
if (!bson_utf8_validate ((const char *) val, vlen, allow_null)) {
9881017
_bson_json_read_corrupt (reader, "invalid bytes in UTF8 string");
9891018
return;
9901019
}
@@ -1269,7 +1298,7 @@ _bson_json_read_map_key (bson_json_reader_t *reader, /* IN */
12691298
{
12701299
bson_json_reader_bson_t *bson = &reader->bson;
12711300

1272-
if (!bson_utf8_validate ((const char *) val, len, true /* allow null */)) {
1301+
if (!bson_utf8_validate ((const char *) val, len, false /* allow null */)) {
12731302
_bson_json_read_corrupt (reader, "invalid bytes in UTF8 string");
12741303
return;
12751304
}

src/libbson/src/bson/bson.c

Lines changed: 49 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,28 @@ _bson_append (bson_t *bson, /* IN */
415415
return ok;
416416
}
417417

418+
static BSON_INLINE bool
419+
_string_contains_null (const char *str, size_t len)
420+
{
421+
for (; len; ++str, --len) {
422+
if (*str == 0) {
423+
return true;
424+
}
425+
}
426+
return false;
427+
}
428+
429+
#define HANDLE_KEY_LENGTH(key, key_length) \
430+
do { \
431+
if (key_length < 0) { \
432+
key_length = (int) strlen (key); \
433+
} else { \
434+
/* Necessary to validate embedded NULL is not present in key. */ \
435+
if (_string_contains_null (key, key_length)) { \
436+
return false; \
437+
} \
438+
} \
439+
} while (0)
418440

419441
/*
420442
*--------------------------------------------------------------------------
@@ -457,9 +479,7 @@ _bson_append_bson_begin (bson_t *bson, /* IN */
457479
(child_type == BSON_TYPE_ARRAY));
458480
BSON_ASSERT (child);
459481

460-
if (key_length < 0) {
461-
key_length = (int) strlen (key);
462-
}
482+
HANDLE_KEY_LENGTH (key, key_length);
463483

464484
/*
465485
* If the parent is an inline bson_t, then we need to convert
@@ -743,9 +763,7 @@ bson_append_array (bson_t *bson, /* IN */
743763
BSON_ASSERT (key);
744764
BSON_ASSERT (array);
745765

746-
if (key_length < 0) {
747-
key_length = (int) strlen (key);
748-
}
766+
HANDLE_KEY_LENGTH (key, key_length);
749767

750768
/*
751769
* Let's be a bit pedantic and ensure the array has properly formatted key
@@ -818,9 +836,7 @@ bson_append_binary (bson_t *bson, /* IN */
818836
BSON_ASSERT (bson);
819837
BSON_ASSERT (key);
820838

821-
if (key_length < 0) {
822-
key_length = (int) strlen (key);
823-
}
839+
HANDLE_KEY_LENGTH (key, key_length);
824840

825841
subtype8 = subtype;
826842

@@ -896,9 +912,7 @@ bson_append_bool (bson_t *bson, /* IN */
896912
BSON_ASSERT (bson);
897913
BSON_ASSERT (key);
898914

899-
if (key_length < 0) {
900-
key_length = (int) strlen (key);
901-
}
915+
HANDLE_KEY_LENGTH (key, key_length);
902916

903917
return _bson_append (bson,
904918
4,
@@ -950,9 +964,7 @@ bson_append_code (bson_t *bson, /* IN */
950964
BSON_ASSERT (key);
951965
BSON_ASSERT (javascript);
952966

953-
if (key_length < 0) {
954-
key_length = (int) strlen (key);
955-
}
967+
HANDLE_KEY_LENGTH (key, key_length);
956968

957969
length = (int) strlen (javascript) + 1;
958970
length_le = BSON_UINT32_TO_LE (length);
@@ -1011,9 +1023,7 @@ bson_append_code_with_scope (bson_t *bson, /* IN */
10111023
return bson_append_code (bson, key, key_length, javascript);
10121024
}
10131025

1014-
if (key_length < 0) {
1015-
key_length = (int) strlen (key);
1016-
}
1026+
HANDLE_KEY_LENGTH (key, key_length);
10171027

10181028
js_length = (int) strlen (javascript) + 1;
10191029
js_length_le = BSON_UINT32_TO_LE (js_length);
@@ -1075,9 +1085,7 @@ bson_append_dbpointer (bson_t *bson, /* IN */
10751085
BSON_ASSERT (collection);
10761086
BSON_ASSERT (oid);
10771087

1078-
if (key_length < 0) {
1079-
key_length = (int) strlen (key);
1080-
}
1088+
HANDLE_KEY_LENGTH (key, key_length);
10811089

10821090
length = (int) strlen (collection) + 1;
10831091
length_le = BSON_UINT32_TO_LE (length);
@@ -1134,9 +1142,7 @@ bson_append_document (bson_t *bson, /* IN */
11341142
BSON_ASSERT (key);
11351143
BSON_ASSERT (value);
11361144

1137-
if (key_length < 0) {
1138-
key_length = (int) strlen (key);
1139-
}
1145+
HANDLE_KEY_LENGTH (key, key_length);
11401146

11411147
return _bson_append (bson,
11421148
4,
@@ -1160,9 +1166,7 @@ bson_append_double (bson_t *bson, const char *key, int key_length, double value)
11601166
BSON_ASSERT (bson);
11611167
BSON_ASSERT (key);
11621168

1163-
if (key_length < 0) {
1164-
key_length = (int) strlen (key);
1165-
}
1169+
HANDLE_KEY_LENGTH (key, key_length);
11661170

11671171
#if BSON_BYTE_ORDER == BSON_BIG_ENDIAN
11681172
value = BSON_DOUBLE_TO_LE (value);
@@ -1191,9 +1195,7 @@ bson_append_int32 (bson_t *bson, const char *key, int key_length, int32_t value)
11911195
BSON_ASSERT (bson);
11921196
BSON_ASSERT (key);
11931197

1194-
if (key_length < 0) {
1195-
key_length = (int) strlen (key);
1196-
}
1198+
HANDLE_KEY_LENGTH (key, key_length);
11971199

11981200
value_le = BSON_UINT32_TO_LE (value);
11991201

@@ -1220,9 +1222,7 @@ bson_append_int64 (bson_t *bson, const char *key, int key_length, int64_t value)
12201222
BSON_ASSERT (bson);
12211223
BSON_ASSERT (key);
12221224

1223-
if (key_length < 0) {
1224-
key_length = (int) strlen (key);
1225-
}
1225+
HANDLE_KEY_LENGTH (key, key_length);
12261226

12271227
value_le = BSON_UINT64_TO_LE (value);
12281228

@@ -1253,9 +1253,7 @@ bson_append_decimal128 (bson_t *bson,
12531253
BSON_ASSERT (key);
12541254
BSON_ASSERT (value);
12551255

1256-
if (key_length < 0) {
1257-
key_length = (int) strlen (key);
1258-
}
1256+
HANDLE_KEY_LENGTH (key, key_length);
12591257

12601258
value_le[0] = BSON_UINT64_TO_LE (value->low);
12611259
value_le[1] = BSON_UINT64_TO_LE (value->high);
@@ -1439,9 +1437,7 @@ bson_append_maxkey (bson_t *bson, const char *key, int key_length)
14391437
BSON_ASSERT (bson);
14401438
BSON_ASSERT (key);
14411439

1442-
if (key_length < 0) {
1443-
key_length = (int) strlen (key);
1444-
}
1440+
HANDLE_KEY_LENGTH (key, key_length);
14451441

14461442
return _bson_append (
14471443
bson, 3, (1 + key_length + 1), 1, &type, key_length, key, 1, &gZero);
@@ -1456,9 +1452,7 @@ bson_append_minkey (bson_t *bson, const char *key, int key_length)
14561452
BSON_ASSERT (bson);
14571453
BSON_ASSERT (key);
14581454

1459-
if (key_length < 0) {
1460-
key_length = (int) strlen (key);
1461-
}
1455+
HANDLE_KEY_LENGTH (key, key_length);
14621456

14631457
return _bson_append (
14641458
bson, 3, (1 + key_length + 1), 1, &type, key_length, key, 1, &gZero);
@@ -1473,9 +1467,7 @@ bson_append_null (bson_t *bson, const char *key, int key_length)
14731467
BSON_ASSERT (bson);
14741468
BSON_ASSERT (key);
14751469

1476-
if (key_length < 0) {
1477-
key_length = (int) strlen (key);
1478-
}
1470+
HANDLE_KEY_LENGTH (key, key_length);
14791471

14801472
return _bson_append (
14811473
bson, 3, (1 + key_length + 1), 1, &type, key_length, key, 1, &gZero);
@@ -1494,9 +1486,7 @@ bson_append_oid (bson_t *bson,
14941486
BSON_ASSERT (key);
14951487
BSON_ASSERT (value);
14961488

1497-
if (key_length < 0) {
1498-
key_length = (int) strlen (key);
1499-
}
1489+
HANDLE_KEY_LENGTH (key, key_length);
15001490

15011491
return _bson_append (bson,
15021492
4,
@@ -1573,12 +1563,15 @@ bson_append_regex_w_len (bson_t *bson,
15731563
BSON_ASSERT (bson);
15741564
BSON_ASSERT (key);
15751565

1576-
if (key_length < 0) {
1577-
key_length = (int) strlen (key);
1578-
}
1566+
HANDLE_KEY_LENGTH (key, key_length);
15791567

15801568
if (regex_length < 0) {
15811569
regex_length = (int) strlen (regex);
1570+
} else {
1571+
/* Necessary to validate embedded NULL is not present in key. */
1572+
if (_string_contains_null (regex, regex_length)) {
1573+
return false;
1574+
}
15821575
}
15831576

15841577
if (!regex) {
@@ -1630,9 +1623,7 @@ bson_append_utf8 (
16301623
return bson_append_null (bson, key, key_length);
16311624
}
16321625

1633-
if (BSON_UNLIKELY (key_length < 0)) {
1634-
key_length = (int) strlen (key);
1635-
}
1626+
HANDLE_KEY_LENGTH (key, key_length);
16361627

16371628
if (BSON_UNLIKELY (length < 0)) {
16381629
length = (int) strlen (value);
@@ -1672,9 +1663,7 @@ bson_append_symbol (
16721663
return bson_append_null (bson, key, key_length);
16731664
}
16741665

1675-
if (key_length < 0) {
1676-
key_length = (int) strlen (key);
1677-
}
1666+
HANDLE_KEY_LENGTH (key, key_length);
16781667

16791668
if (length < 0) {
16801669
length = (int) strlen (value);
@@ -1729,9 +1718,7 @@ bson_append_timestamp (bson_t *bson,
17291718
BSON_ASSERT (bson);
17301719
BSON_ASSERT (key);
17311720

1732-
if (key_length < 0) {
1733-
key_length = (int) strlen (key);
1734-
}
1721+
HANDLE_KEY_LENGTH (key, key_length);
17351722

17361723
value = ((((uint64_t) timestamp) << 32) | ((uint64_t) increment));
17371724
value = BSON_UINT64_TO_LE (value);
@@ -1773,9 +1760,7 @@ bson_append_date_time (bson_t *bson,
17731760
BSON_ASSERT (bson);
17741761
BSON_ASSERT (key);
17751762

1776-
if (key_length < 0) {
1777-
key_length = (int) strlen (key);
1778-
}
1763+
HANDLE_KEY_LENGTH (key, key_length);
17791764

17801765
value_le = BSON_UINT64_TO_LE (value);
17811766

@@ -1819,9 +1804,7 @@ bson_append_undefined (bson_t *bson, const char *key, int key_length)
18191804
BSON_ASSERT (bson);
18201805
BSON_ASSERT (key);
18211806

1822-
if (key_length < 0) {
1823-
key_length = (int) strlen (key);
1824-
}
1807+
HANDLE_KEY_LENGTH (key, key_length);
18251808

18261809
return _bson_append (
18271810
bson, 3, (1 + key_length + 1), 1, &type, key_length, key, 1, &gZero);

src/libbson/tests/json/bson_corpus/document.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@
5151
{
5252
"description": "Invalid subdocument: bad string length in field",
5353
"bson": "1C00000003666F6F001200000002626172000500000062617A000000"
54+
},
55+
{
56+
"description": "Null byte in sub-document key",
57+
"bson": "150000000378000D00000010610000010000000000"
5458
}
5559
]
5660
}

src/libbson/tests/json/bson_corpus/regex.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,11 @@
5454
],
5555
"decodeErrors": [
5656
{
57-
"description": "embedded null in pattern",
57+
"description": "Null byte in pattern string",
5858
"bson": "0F0000000B610061006300696D0000"
5959
},
6060
{
61-
"description": "embedded null in flags",
61+
"description": "Null byte in flags string",
6262
"bson": "100000000B61006162630069006D0000"
6363
}
6464
]
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"description": "Top-level document validity - libmongoc-specific tests. TODO (CDRIVER-4016) move this file.",
3+
"bson_type": "0x00",
4+
"valid": [],
5+
"decodeErrors": [],
6+
"parseErrors": [
7+
{
8+
"description": "Null byte in legacy $regex string",
9+
"string": "{\"a\" : {\"$regex\" : \"b\\u0000\", \"$options\" : \"i\"}}"
10+
},
11+
{
12+
"description": "Null byte in legacy $options string",
13+
"string": "{\"a\" : {\"$regex\" : \"b\", \"$options\" : \"i\\u0000\"}}"
14+
}
15+
]
16+
}

0 commit comments

Comments
 (0)