Skip to content

Commit 429d612

Browse files
Implement new extended JSON parser
GODRIVER-514 Change-Id: I59e18824178c36ab37af5c3cc97f0575c0c2f709
1 parent 4cb519e commit 429d612

31 files changed

+4487
-1680
lines changed

benchmark/bson.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package benchmark
22

33
import (
44
"errors"
5+
"github.com/mongodb/mongo-go-driver/bson/bsoncodec"
56
"io/ioutil"
67
"path/filepath"
78

@@ -23,7 +24,8 @@ func loadSourceDocument(pathParts ...string) (*bson.Document, error) {
2324
if err != nil {
2425
return nil, err
2526
}
26-
doc, err := bson.ParseExtJSONObject(string(data))
27+
doc := bson.NewDocument()
28+
err = bsoncodec.UnmarshalExtJSON(data, true, &doc)
2729
if err != nil {
2830
return nil, err
2931
}
Lines changed: 336 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,336 @@
1+
// Copyright (C) MongoDB, Inc. 2017-present.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"); you may
4+
// not use this file except in compliance with the License. You may obtain
5+
// a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
6+
7+
package bsoncodec
8+
9+
import (
10+
"bytes"
11+
"encoding/hex"
12+
"encoding/json"
13+
"fmt"
14+
"io/ioutil"
15+
"path"
16+
"strconv"
17+
"strings"
18+
"testing"
19+
"unicode"
20+
"unicode/utf8"
21+
22+
"github.com/google/go-cmp/cmp"
23+
"github.com/mongodb/mongo-go-driver/bson"
24+
"github.com/stretchr/testify/require"
25+
"github.com/tidwall/pretty"
26+
)
27+
28+
type testCase struct {
29+
Description string `json:"description"`
30+
BsonType string `json:"bson_type"`
31+
TestKey *string `json:"test_key"`
32+
Valid []validityTestCase `json:"valid"`
33+
DecodeErrors []decodeErrorTestCase `json:"decodeErrors"`
34+
ParseErrors []parseErrorTestCase `json:"parseErrors"`
35+
Deprecated *bool `json:"deprecated"`
36+
}
37+
38+
type validityTestCase struct {
39+
Description string `json:"description"`
40+
CanonicalBson string `json:"canonical_bson"`
41+
CanonicalExtJSON string `json:"canonical_extjson"`
42+
RelaxedExtJSON *string `json:"relaxed_extjson"`
43+
DegenerateBSON *string `json:"degenerate_bson"`
44+
DegenerateExtJSON *string `json:"degenerate_extjson"`
45+
ConvertedBSON *string `json:"converted_bson"`
46+
ConvertedExtJSON *string `json:"converted_extjson"`
47+
Lossy *bool `json:"lossy"`
48+
}
49+
50+
type decodeErrorTestCase struct {
51+
Description string `json:"description"`
52+
Bson string `json:"bson"`
53+
}
54+
55+
type parseErrorTestCase struct {
56+
Description string `json:"description"`
57+
String string `json:"string"`
58+
}
59+
60+
const dataDir = "../../data"
61+
62+
var dvd DefaultValueDecoders
63+
var dve DefaultValueEncoders
64+
65+
var dc = DecodeContext{Registry: NewRegistryBuilder().Build()}
66+
var ec = EncodeContext{Registry: NewRegistryBuilder().Build()}
67+
68+
func findJSONFilesInDir(t *testing.T, dir string) []string {
69+
files := make([]string, 0)
70+
71+
entries, err := ioutil.ReadDir(dir)
72+
require.NoError(t, err)
73+
74+
for _, entry := range entries {
75+
if entry.IsDir() || path.Ext(entry.Name()) != ".json" {
76+
continue
77+
}
78+
79+
files = append(files, entry.Name())
80+
}
81+
82+
return files
83+
}
84+
85+
func needsEscapedUnicode(bsonType string) bool {
86+
return bsonType == "0x02" || bsonType == "0x0D" || bsonType == "0x0E" || bsonType == "0x0F"
87+
}
88+
89+
func unescapeUnicode(s, bsonType string) string {
90+
if !needsEscapedUnicode(bsonType) {
91+
return s
92+
}
93+
94+
newS := ""
95+
96+
for i := 0; i < len(s); i++ {
97+
c := s[i]
98+
switch c {
99+
case '\\':
100+
switch s[i+1] {
101+
case 'u':
102+
us := s[i : i+6]
103+
u, err := strconv.Unquote(strings.Replace(strconv.Quote(us), `\\u`, `\u`, 1))
104+
if err != nil {
105+
return ""
106+
}
107+
for _, r := range u {
108+
if r < ' ' {
109+
newS += fmt.Sprintf(`\u%04x`, r)
110+
} else {
111+
newS += string(r)
112+
}
113+
}
114+
i += 5
115+
default:
116+
newS += string(c)
117+
}
118+
default:
119+
if c > unicode.MaxASCII {
120+
r, size := utf8.DecodeRune([]byte(s[i:]))
121+
newS += string(r)
122+
i += size - 1
123+
} else {
124+
newS += string(c)
125+
}
126+
}
127+
}
128+
129+
return newS
130+
}
131+
132+
func normalizeCanonicalDouble(t *testing.T, key string, cEJ string) string {
133+
// Unmarshal string into map
134+
cEJMap := make(map[string]map[string]string)
135+
err := json.Unmarshal([]byte(cEJ), &cEJMap)
136+
require.NoError(t, err)
137+
138+
// Parse the float contained by the map.
139+
expectedString := cEJMap[key]["$numberDouble"]
140+
expectedFloat, err := strconv.ParseFloat(expectedString, 64)
141+
142+
// Normalize the string
143+
return fmt.Sprintf(`{"%s":{"$numberDouble":"%s"}}`, key, formatDouble(expectedFloat))
144+
}
145+
146+
func normalizeRelaxedDouble(t *testing.T, key string, rEJ string) string {
147+
// Unmarshal string into map
148+
rEJMap := make(map[string]float64)
149+
err := json.Unmarshal([]byte(rEJ), &rEJMap)
150+
if err != nil {
151+
return normalizeCanonicalDouble(t, key, rEJ)
152+
}
153+
154+
// Parse the float contained by the map.
155+
expectedFloat := rEJMap[key]
156+
157+
// Normalize the string
158+
return fmt.Sprintf(`{"%s":%s}`, key, formatDouble(expectedFloat))
159+
}
160+
161+
// bsonToNative decodes the BSON bytes (b) into a native bson.Document
162+
func bsonToNative(t *testing.T, b []byte, bType, testDesc string) *bson.Document {
163+
doc := bson.NewDocument()
164+
err := dvd.DocumentDecodeValue(dc, NewValueReader(b), &doc)
165+
expectNoError(t, err, fmt.Sprintf("%s: decoding %s BSON", testDesc, bType))
166+
return doc
167+
}
168+
169+
// nativeToBSON encodes the native bson.Document (doc) into canonical BSON and compares it to the expected
170+
// canonical BSON (cB)
171+
func nativeToBSON(t *testing.T, cB []byte, doc *bson.Document, testDesc, bType, docSrcDesc string) {
172+
actualB := new(bytes.Buffer)
173+
vw, err := NewBSONValueWriter(actualB)
174+
expectNoError(t, err, fmt.Sprintf("%s: creating ValueWriter", testDesc))
175+
err = dve.DocumentEncodeValue(ec, vw, doc)
176+
expectNoError(t, err, fmt.Sprintf("%s: encoding %s BSON", testDesc, bType))
177+
178+
if diff := cmp.Diff(cB, actualB.Bytes()); diff != "" {
179+
t.Errorf("%s: 'native_to_bson(%s) = cB' failed (-want, +got):\n-%v\n+%v\n",
180+
testDesc, docSrcDesc, cB, actualB.Bytes())
181+
t.FailNow()
182+
}
183+
}
184+
185+
// jsonToNative decodes the extended JSON string (ej) into a native bson.Document
186+
func jsonToNative(t *testing.T, ej, ejType, testDesc string) *bson.Document {
187+
doc := bson.NewDocument()
188+
err := UnmarshalExtJSON([]byte(ej), ejType != "relaxed", &doc)
189+
expectNoError(t, err, fmt.Sprintf("%s: decoding %s extended JSON", testDesc, ejType))
190+
return doc
191+
}
192+
193+
// nativeToJSON encodes the native bson.Document (doc) into an extended JSON string
194+
func nativeToJSON(t *testing.T, ej string, doc *bson.Document, testDesc, ejType, ejShortName, docSrcDesc string) {
195+
actualEJ, err := MarshalExtJSON(doc, ejType != "relaxed", true)
196+
expectNoError(t, err, fmt.Sprintf("%s: encoding %s extended JSON", testDesc, ejType))
197+
198+
if diff := cmp.Diff(ej, string(actualEJ)); diff != "" {
199+
t.Errorf("%s: 'native_to_%s_extended_json(%s) = %s' failed (-want, +got):\n%s\n",
200+
testDesc, ejType, docSrcDesc, ejShortName, diff)
201+
t.FailNow()
202+
}
203+
}
204+
205+
func runTest(t *testing.T, file string) {
206+
filepath := path.Join(dataDir, file)
207+
content, err := ioutil.ReadFile(filepath)
208+
require.NoError(t, err)
209+
210+
// Remove ".json" from filename.
211+
file = file[:len(file)-5]
212+
testName := "bson_corpus--" + file
213+
214+
t.Run(testName, func(t *testing.T) {
215+
var test testCase
216+
require.NoError(t, json.Unmarshal(content, &test))
217+
218+
for _, v := range test.Valid {
219+
// get canonical BSON
220+
cB, err := hex.DecodeString(v.CanonicalBson)
221+
expectNoError(t, err, fmt.Sprintf("%s: reading canonical BSON", v.Description))
222+
223+
// get canonical extended JSON
224+
cEJ := unescapeUnicode(string(pretty.Ugly([]byte(v.CanonicalExtJSON))), test.BsonType)
225+
if test.BsonType == "0x01" {
226+
cEJ = normalizeCanonicalDouble(t, *test.TestKey, cEJ)
227+
}
228+
229+
/*** canonical BSON round-trip tests ***/
230+
doc := bsonToNative(t, cB, "canonical", v.Description)
231+
232+
// native_to_bson(bson_to_native(cB)) = cB
233+
nativeToBSON(t, cB, doc, v.Description, "canonical", "bson_to_native(cB)")
234+
235+
// native_to_canonical_extended_json(bson_to_native(cB)) = cEJ
236+
nativeToJSON(t, cEJ, doc, v.Description, "canonical", "cEJ", "bson_to_native(cB)")
237+
238+
// native_to_relaxed_extended_json(bson_to_native(cB)) = rEJ (if rEJ exists)
239+
if v.RelaxedExtJSON != nil {
240+
rEJ := unescapeUnicode(string(pretty.Ugly([]byte(*v.RelaxedExtJSON))), test.BsonType)
241+
if test.BsonType == "0x01" {
242+
rEJ = normalizeRelaxedDouble(t, *test.TestKey, rEJ)
243+
}
244+
245+
nativeToJSON(t, rEJ, doc, v.Description, "relaxed", "rEJ", "bson_to_native(cB)")
246+
247+
/*** relaxed extended JSON round-trip tests (if exists) ***/
248+
doc = jsonToNative(t, rEJ, "relaxed", v.Description)
249+
250+
// native_to_relaxed_extended_json(json_to_native(rEJ)) = rEJ
251+
nativeToJSON(t, rEJ, doc, v.Description, "relaxed", "eJR", "json_to_native(rEJ)")
252+
}
253+
254+
/*** canonical extended JSON round-trip tests ***/
255+
doc = jsonToNative(t, cEJ, "canonical", v.Description)
256+
257+
// native_to_canonical_extended_json(json_to_native(cEJ)) = cEJ
258+
nativeToJSON(t, cEJ, doc, v.Description, "canonical", "cEJ", "json_to_native(cEJ)")
259+
260+
// native_to_bson(json_to_native(cEJ)) = cb (unless lossy)
261+
if v.Lossy == nil || !*v.Lossy {
262+
nativeToBSON(t, cB, doc, v.Description, "canonical", "json_to_native(cEJ)")
263+
}
264+
265+
/*** degenerate BSON round-trip tests (if exists) ***/
266+
if v.DegenerateBSON != nil {
267+
dB, err := hex.DecodeString(*v.DegenerateBSON)
268+
expectNoError(t, err, fmt.Sprintf("%s: reading degenerate BSON", v.Description))
269+
270+
doc = bsonToNative(t, dB, "degenerate", v.Description)
271+
272+
// native_to_bson(bson_to_native(dB)) = cB
273+
nativeToBSON(t, cB, doc, v.Description, "degenerate", "bson_to_native(dB)")
274+
}
275+
276+
/*** degenerate JSON round-trip tests (if exists) ***/
277+
if v.DegenerateExtJSON != nil {
278+
dEJ := unescapeUnicode(string(pretty.Ugly([]byte(*v.DegenerateExtJSON))), test.BsonType)
279+
if test.BsonType == "0x01" {
280+
dEJ = normalizeCanonicalDouble(t, *test.TestKey, dEJ)
281+
}
282+
283+
doc = jsonToNative(t, dEJ, "degenerate canonical", v.Description)
284+
285+
// native_to_canonical_extended_json(json_to_native(dEJ)) = cEJ
286+
nativeToJSON(t, cEJ, doc, v.Description, "degenerate canonical", "cEJ", "json_to_native(dEJ)")
287+
288+
// native_to_bson(json_to_native(dEJ)) = cB (unless lossy)
289+
if v.Lossy == nil || !*v.Lossy {
290+
nativeToBSON(t, cB, doc, v.Description, "canonical", "json_to_native(dEJ)")
291+
}
292+
}
293+
}
294+
295+
for _, d := range test.DecodeErrors {
296+
b, err := hex.DecodeString(d.Bson)
297+
expectNoError(t, err, d.Description)
298+
299+
doc := bson.NewDocument()
300+
err = dvd.DocumentDecodeValue(dc, NewValueReader(b), &doc)
301+
expectError(t, err, fmt.Sprintf("%s: expected decode error", d.Description))
302+
}
303+
304+
for _, p := range test.ParseErrors {
305+
// skip DBRef tests
306+
if strings.Contains(p.Description, "Bad DBRef") {
307+
continue
308+
}
309+
310+
s := unescapeUnicode(p.String, test.BsonType)
311+
if test.BsonType == "0x13" {
312+
s = fmt.Sprintf(`{"$numberDecimal": "%s"}`, s)
313+
}
314+
315+
switch test.BsonType {
316+
case "0x00":
317+
doc := bson.NewDocument()
318+
err := UnmarshalExtJSON([]byte(s), true, &doc)
319+
expectError(t, err, fmt.Sprintf("%s: expected parse error", p.Description))
320+
case "0x13":
321+
ejvr := newExtJSONValueReader(strings.NewReader(s), true)
322+
_, err := ejvr.ReadDecimal128()
323+
expectError(t, err, fmt.Sprintf("%s: expected parse error", p.Description))
324+
default:
325+
t.Errorf("Update test to check for parse errors for type %s", test.BsonType)
326+
t.Fail()
327+
}
328+
}
329+
})
330+
}
331+
332+
func Test_BsonCorpus(t *testing.T) {
333+
for _, file := range findJSONFilesInDir(t, dataDir) {
334+
runTest(t, file)
335+
}
336+
}

bson/bsoncodec/decoder.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import (
88
)
99

1010
// This pool is used to keep the allocations of Decoders down. This is only used for the Marshal*
11-
// methods and is not consumable from outside of this package. The Encoders retrieved from this pool
11+
// methods and is not consumable from outside of this package. The Decoders retrieved from this pool
1212
// must have both Reset and SetRegistry called on them.
1313
var decPool = sync.Pool{
1414
New: func() interface{} {
@@ -43,7 +43,8 @@ func NewDecoder(r *Registry, vr ValueReader) (*Decoder, error) {
4343
// The documentation for Unmarshal contains details about of BSON into a Go
4444
// value.
4545
func (d *Decoder) Decode(val interface{}) error {
46-
if unmarshaler, ok := val.(Unmarshaler); ok {
46+
_, vrOK := d.vr.(*extJSONValueReader)
47+
if unmarshaler, ok := val.(Unmarshaler); ok && !vrOK {
4748
// TODO(skriptble): Reuse a []byte here and use the AppendDocumentBytes method.
4849
buf, err := Copier{r: d.r}.CopyDocumentToBytes(d.vr)
4950
if err != nil {

bson/bsoncodec/encoder.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ func NewEncoder(r *Registry, vw ValueWriter) (*Encoder, error) {
4141
// The documentation for Marshal contains details about the conversion of Go
4242
// values to BSON.
4343
func (e *Encoder) Encode(val interface{}) error {
44-
if marshaler, ok := val.(Marshaler); ok {
44+
_, vwOK := e.vw.(*extJSONValueWriter)
45+
if marshaler, ok := val.(Marshaler); ok && !vwOK {
4546
// TODO(skriptble): Should we have a MarshalAppender interface so that we can have []byte reuse?
4647
buf, err := marshaler.MarshalBSON()
4748
if err != nil {

0 commit comments

Comments
 (0)