Skip to content

Commit de35f93

Browse files
benjirewisBenjamin Rewis
authored andcommitted
Support parsing $uuid as extended JSON representation for subtype 4 binary (#560)
GODRIVER-1746 GODRIVER-1840 GODRIVER-1850
1 parent 6e83de7 commit de35f93

35 files changed

+158
-5568
lines changed

bson/bson_corpus_spec_test.go

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ import (
2323
"github.com/stretchr/testify/require"
2424
"github.com/tidwall/pretty"
2525
"go.mongodb.org/mongo-driver/bson/bsoncodec"
26-
"go.mongodb.org/mongo-driver/bson/bsonrw"
2726
"go.mongodb.org/mongo-driver/bson/primitive"
27+
"go.mongodb.org/mongo-driver/internal/testutil/assert"
2828
)
2929

3030
type testCase struct {
@@ -59,7 +59,7 @@ type parseErrorTestCase struct {
5959
String string `json:"string"`
6060
}
6161

62-
const dataDir = "../data"
62+
const dataDir = "../data/bson-corpus/"
6363

6464
var dvd bsoncodec.DefaultValueDecoders
6565
var dve bsoncodec.DefaultValueEncoders
@@ -354,19 +354,14 @@ func runTest(t *testing.T, file string) {
354354

355355
s := unescapeUnicode(p.String, test.BsonType)
356356
if test.BsonType == "0x13" {
357-
s = fmt.Sprintf(`{"$numberDecimal": "%s"}`, s)
357+
s = fmt.Sprintf(`{"decimal128": {"$numberDecimal": "%s"}}`, s)
358358
}
359359

360360
switch test.BsonType {
361-
case "0x00":
361+
case "0x00", "0x05", "0x13":
362362
var doc D
363363
err := UnmarshalExtJSON([]byte(s), true, &doc)
364364
expectError(t, err, fmt.Sprintf("%s: expected parse error", p.Description))
365-
case "0x13":
366-
ejvr, err := bsonrw.NewExtJSONValueReader(strings.NewReader(s), true)
367-
expectNoError(t, err, fmt.Sprintf("error creating value reader: %s", err))
368-
_, err = ejvr.ReadDecimal128()
369-
expectError(t, err, fmt.Sprintf("%s: expected parse error", p.Description))
370365
default:
371366
t.Errorf("Update test to check for parse errors for type %s", test.BsonType)
372367
t.Fail()
@@ -398,3 +393,61 @@ func expectError(t *testing.T, err error, desc string) {
398393
t.FailNow()
399394
}
400395
}
396+
397+
func TestRelaxedUUIDValidation(t *testing.T) {
398+
testCases := []struct {
399+
description string
400+
canonicalExtJSON string
401+
degenerateExtJSON string
402+
expectedErr string
403+
}{
404+
{
405+
"valid uuid",
406+
"{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"04\"}}}",
407+
"{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4\"}}",
408+
"",
409+
},
410+
{
411+
"invalid uuid--no hyphens",
412+
"",
413+
"{\"x\" : { \"$uuid\" : \"73ffd26444b34c6990e8e7d1dfc035d4\"}}",
414+
"$uuid value does not follow RFC 4122 format regarding length and hyphens",
415+
},
416+
{
417+
"invalid uuid--trailing hyphens",
418+
"",
419+
"{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035--\"}}",
420+
"$uuid value does not follow RFC 4122 format regarding length and hyphens",
421+
},
422+
{
423+
"invalid uuid--malformed hex",
424+
"",
425+
"{\"x\" : { \"$uuid\" : \"q3@fd26l-44b3-4c69-90e8-e7d1dfc035d4\"}}",
426+
"$uuid value does not follow RFC 4122 format regarding hex bytes: encoding/hex: invalid byte: U+0071 'q'",
427+
},
428+
}
429+
430+
for _, tc := range testCases {
431+
t.Run(tc.description, func(t *testing.T) {
432+
// get canonical extended JSON
433+
cEJ := unescapeUnicode(string(pretty.Ugly([]byte(tc.canonicalExtJSON))), "0x05")
434+
435+
// get degenerate extended JSON
436+
dEJ := unescapeUnicode(string(pretty.Ugly([]byte(tc.degenerateExtJSON))), "0x05")
437+
438+
// convert dEJ to native doc
439+
var doc D
440+
err := UnmarshalExtJSON([]byte(dEJ), true, &doc)
441+
442+
if tc.expectedErr != "" {
443+
assert.Equal(t, tc.expectedErr, err.Error(), "expected error %v, got %v", tc.expectedErr, err)
444+
} else {
445+
assert.Nil(t, err, "expected no error, got error: %v", err)
446+
447+
// Marshal doc into extended JSON and compare with cEJ
448+
nativeToJSON(t, cEJ, doc, tc.description, "degenerate canonical", "cEJ", "json_to_native(dEJ)")
449+
}
450+
})
451+
}
452+
453+
}

bson/bsonrw/extjson_parser.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@
77
package bsonrw
88

99
import (
10+
"encoding/base64"
11+
"encoding/hex"
1012
"errors"
1113
"fmt"
1214
"io"
15+
"strings"
1316

1417
"go.mongodb.org/mongo-driver/bson/bsontype"
1518
)
@@ -66,6 +69,7 @@ type extJSONParser struct {
6669
maxDepth int
6770

6871
emptyObject bool
72+
relaxedUUID bool
6973
}
7074

7175
// newExtJSONParser returns a new extended JSON parser, ready to to begin
@@ -119,6 +123,12 @@ func (ejp *extJSONParser) peekType() (bsontype.Type, error) {
119123
}
120124
t = wrapperKeyBSONType(ejp.k)
121125

126+
// if $uuid is encountered, parse as binary subtype 4
127+
if ejp.k == "$uuid" {
128+
ejp.relaxedUUID = true
129+
t = bsontype.Binary
130+
}
131+
122132
switch t {
123133
case bsontype.JavaScript:
124134
// just saw $code, need to check for $scope at same level
@@ -273,6 +283,64 @@ func (ejp *extJSONParser) readValue(t bsontype.Type) (*extJSONValue, error) {
273283

274284
ejp.advanceState()
275285
if t == bsontype.Binary && ejp.s == jpsSawValue {
286+
// convert relaxed $uuid format
287+
if ejp.relaxedUUID {
288+
defer func() { ejp.relaxedUUID = false }()
289+
uuid, err := ejp.v.parseSymbol()
290+
if err != nil {
291+
return nil, err
292+
}
293+
294+
// RFC 4122 defines the length of a UUID as 36 and the hyphens in a UUID as appearing
295+
// in the 8th, 13th, 18th, and 23rd characters.
296+
//
297+
// See https://tools.ietf.org/html/rfc4122#section-3
298+
valid := len(uuid) == 36 &&
299+
string(uuid[8]) == "-" &&
300+
string(uuid[13]) == "-" &&
301+
string(uuid[18]) == "-" &&
302+
string(uuid[23]) == "-"
303+
if !valid {
304+
return nil, fmt.Errorf("$uuid value does not follow RFC 4122 format regarding length and hyphens")
305+
}
306+
307+
// remove hyphens
308+
uuidNoHyphens := strings.Replace(uuid, "-", "", -1)
309+
if len(uuidNoHyphens) != 32 {
310+
return nil, fmt.Errorf("$uuid value does not follow RFC 4122 format regarding length and hyphens")
311+
}
312+
313+
// convert hex to bytes
314+
bytes, err := hex.DecodeString(uuidNoHyphens)
315+
if err != nil {
316+
return nil, fmt.Errorf("$uuid value does not follow RFC 4122 format regarding hex bytes: %v", err)
317+
}
318+
319+
ejp.advanceState()
320+
if ejp.s != jpsSawEndObject {
321+
return nil, invalidJSONErrorForType("$uuid and value and then }", bsontype.Binary)
322+
}
323+
324+
base64 := &extJSONValue{
325+
t: bsontype.String,
326+
v: base64.StdEncoding.EncodeToString(bytes),
327+
}
328+
subType := &extJSONValue{
329+
t: bsontype.String,
330+
v: "04",
331+
}
332+
333+
v = &extJSONValue{
334+
t: bsontype.EmbeddedDocument,
335+
v: &extJSONObject{
336+
keys: []string{"base64", "subType"},
337+
values: []*extJSONValue{base64, subType},
338+
},
339+
}
340+
341+
break
342+
}
343+
276344
// convert legacy $binary format
277345
base64 := ejp.v
278346

data/array.json

Lines changed: 0 additions & 43 deletions
This file was deleted.

data/binary.json

Lines changed: 0 additions & 85 deletions
This file was deleted.

data/boolean.json

Lines changed: 0 additions & 27 deletions
This file was deleted.

0 commit comments

Comments
 (0)