Skip to content

Commit da0e8a7

Browse files
authored
CDRIVER-3792 Support parsing $uuid as extended JSON (#759)
* Sync bson-corpus spec tests for binary data * Test parse errors for binary types * Support parsing $uuid into a binary UUID * Fix wrong handling of length * Fix wrong return value when failing to parse UUID * Make sscanf invocation compatible with more operating systems * Reformat SSCANF invocation * Change fall through comment for consistency
1 parent b4f84b8 commit da0e8a7

File tree

3 files changed

+117
-4
lines changed

3 files changed

+117
-4
lines changed

src/libbson/src/bson/bson-json.c

Lines changed: 88 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ static const char *read_state_names[] = {FOREACH_READ_STATE (GENERATE_STRING)};
103103
BS (DECIMAL128) \
104104
BS (DBPOINTER) \
105105
BS (SYMBOL) \
106-
BS (DBREF)
106+
BS (DBREF) \
107+
BS (UUID)
107108

108109
typedef enum {
109110
FOREACH_BSON_STATE (BSON_STATE_ENUM)
@@ -706,6 +707,7 @@ _bson_json_read_integer (bson_json_reader_t *reader, uint64_t val, int64_t sign)
706707
case BSON_JSON_LF_OID:
707708
case BSON_JSON_LF_BINARY:
708709
case BSON_JSON_LF_TYPE:
710+
case BSON_JSON_LF_UUID:
709711
case BSON_JSON_LF_UNDEFINED:
710712
case BSON_JSON_LF_DOUBLE:
711713
case BSON_JSON_LF_DECIMAL128:
@@ -833,8 +835,29 @@ _bson_json_read_int64_or_set_error (bson_json_reader_t *reader, /* IN */
833835
return true;
834836
}
835837

838+
static bool
839+
_unhexlify_uuid (const char *uuid, uint8_t *out, size_t max)
840+
{
841+
unsigned int byte;
842+
int x = 0;
843+
int i = 0;
844+
845+
BSON_ASSERT (strlen (uuid) == 32);
846+
847+
while (SSCANF (&uuid[i], "%2x", &byte) == 1) {
848+
if (x >= max) {
849+
return false;
850+
}
851+
852+
out[x++] = (uint8_t) byte;
853+
i += 2;
854+
}
836855

837-
/* parse a value for "base64", "subType" or legacy "$binary" or "$type" */
856+
return i == 32;
857+
}
858+
859+
/* parse a value for "base64", "subType", legacy "$binary" or "$type", or
860+
* "$uuid" */
838861
static void
839862
_bson_json_parse_binary_elem (bson_json_reader_t *reader,
840863
const char *val_w_null,
@@ -894,6 +917,64 @@ _bson_json_parse_binary_elem (bson_json_reader_t *reader,
894917
(int) vlen);
895918
}
896919
}
920+
} else if (bs == BSON_JSON_LF_UUID) {
921+
int nread = 0;
922+
char uuid[33];
923+
924+
data->binary.has_binary = true;
925+
data->binary.has_subtype = true;
926+
data->binary.type = BSON_SUBTYPE_UUID;
927+
928+
/* Validate the UUID and extract relevant portions */
929+
/* We can't use %x here as it allows +, -, and 0x prefixes */
930+
#ifdef _MSC_VER
931+
SSCANF (val_w_null,
932+
"%8c-%4c-%4c-%4c-%12c%n",
933+
&uuid[0],
934+
8,
935+
&uuid[8],
936+
4,
937+
&uuid[12],
938+
4,
939+
&uuid[16],
940+
4,
941+
&uuid[20],
942+
12,
943+
&nread);
944+
#else
945+
SSCANF (val_w_null,
946+
"%8c-%4c-%4c-%4c-%12c%n",
947+
&uuid[0],
948+
&uuid[8],
949+
&uuid[12],
950+
&uuid[16],
951+
&uuid[20],
952+
&nread);
953+
#endif
954+
955+
uuid[32] = '\0';
956+
957+
if (nread != 36 || val_w_null[nread] != '\0') {
958+
_bson_json_read_set_error (reader,
959+
"Invalid input string \"%s\", looking for "
960+
"a dash-separated UUID string",
961+
val_w_null);
962+
963+
return;
964+
}
965+
966+
binary_len = 16;
967+
_bson_json_buf_ensure (&bson->bson_type_buf[0], (size_t) binary_len + 1);
968+
969+
if (!_unhexlify_uuid (
970+
&uuid[0], bson->bson_type_buf[0].buf, (size_t) binary_len)) {
971+
_bson_json_read_set_error (reader,
972+
"Invalid input string \"%s\", looking for "
973+
"a dash-separated UUID string",
974+
val_w_null);
975+
}
976+
977+
bson->bson_type_buf[0].len = (size_t) binary_len;
897978
}
898979
}
899980

@@ -967,6 +1048,8 @@ _bson_json_read_string (bson_json_reader_t *reader, /* IN */
9671048
case BSON_JSON_LF_BINARY:
9681049
case BSON_JSON_LF_TYPE:
9691050
bson->bson_type_data.binary.is_legacy = true;
1051+
/* FALL THROUGH */
1052+
case BSON_JSON_LF_UUID:
9701053
_bson_json_parse_binary_elem (reader, val_w_null, vlen);
9711054
break;
9721055
case BSON_JSON_LF_INT32: {
@@ -1123,7 +1206,7 @@ _is_known_key (const char *key, size_t len)
11231206
IS_KEY ("$numberDouble") || IS_KEY ("$numberDecimal") ||
11241207
IS_KEY ("$numberInt") || IS_KEY ("$numberLong") ||
11251208
IS_KEY ("$numberDouble") || IS_KEY ("$numberDecimal") ||
1126-
IS_KEY ("$dbPointer") || IS_KEY ("$symbol"));
1209+
IS_KEY ("$dbPointer") || IS_KEY ("$symbol") || IS_KEY ("$uuid"));
11271210

11281211
#undef IS_KEY
11291212

@@ -1241,6 +1324,8 @@ _bson_json_read_map_key (bson_json_reader_t *reader, /* IN */
12411324
HANDLE_OPTION ("$binary", BSON_TYPE_BINARY, BSON_JSON_LF_BINARY)
12421325
else if
12431326
HANDLE_OPTION ("$type", BSON_TYPE_BINARY, BSON_JSON_LF_TYPE)
1327+
else if
1328+
HANDLE_OPTION ("$uuid", BSON_TYPE_BINARY, BSON_JSON_LF_UUID)
12441329
else if
12451330
HANDLE_OPTION ("$date", BSON_TYPE_DATE_TIME, BSON_JSON_LF_DATE)
12461331
else if

src/libbson/tests/json/bson_corpus/binary.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@
3939
"canonical_bson": "1D000000057800100000000473FFD26444B34C6990E8E7D1DFC035D400",
4040
"canonical_extjson": "{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"04\"}}}"
4141
},
42+
{
43+
"description": "subtype 0x04 UUID",
44+
"canonical_bson": "1D000000057800100000000473FFD26444B34C6990E8E7D1DFC035D400",
45+
"canonical_extjson": "{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"04\"}}}",
46+
"degenerate_extjson": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4\"}}"
47+
},
4248
{
4349
"description": "subtype 0x05",
4450
"canonical_bson": "1D000000057800100000000573FFD26444B34C6990E8E7D1DFC035D400",
@@ -81,5 +87,27 @@
8187
"description": "subtype 0x02 length negative one",
8288
"bson": "130000000578000600000002FFFFFFFFFFFF00"
8389
}
90+
],
91+
"parseErrors": [
92+
{
93+
"description": "$uuid wrong type",
94+
"string": "{\"x\" : { \"$uuid\" : { \"data\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4\"}}}"
95+
},
96+
{
97+
"description": "$uuid invalid value--too short",
98+
"string": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-90e8-e7d1dfc035d4\"}}"
99+
},
100+
{
101+
"description": "$uuid invalid value--too long",
102+
"string": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4-789e4\"}}"
103+
},
104+
{
105+
"description": "$uuid invalid value--misplaced hyphens",
106+
"string": "{\"x\" : { \"$uuid\" : \"73ff-d26444b-34c6-990e8e-7d1dfc035d4\"}}"
107+
},
108+
{
109+
"description": "$uuid invalid value--too many hyphens",
110+
"string": "{\"x\" : { \"$uuid\" : \"----d264-44b3-4--9-90e8-e7d1dfc0----\"}}"
111+
}
84112
]
85113
}

src/libbson/tests/test-bson-corpus.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ test_bson_corpus_parse_error (test_bson_parse_error_type_t *test)
239239

240240
switch (test->bson_type) {
241241
case BSON_TYPE_EOD: /* top-level document to be parsed as JSON */
242+
case BSON_TYPE_BINARY:
242243
ASSERT (!bson_new_from_json ((uint8_t *) test->str, test->str_len, NULL));
243244
break;
244245
case BSON_TYPE_DECIMAL128: {
@@ -251,7 +252,6 @@ test_bson_corpus_parse_error (test_bson_parse_error_type_t *test)
251252
case BSON_TYPE_UTF8:
252253
case BSON_TYPE_DOCUMENT:
253254
case BSON_TYPE_ARRAY:
254-
case BSON_TYPE_BINARY:
255255
case BSON_TYPE_UNDEFINED:
256256
case BSON_TYPE_OID:
257257
case BSON_TYPE_BOOL:

0 commit comments

Comments
 (0)