Skip to content

Commit 43e7c40

Browse files
committed
added support for reading multiple top-level documents.
Change-Id: I36ebc5b18c1ed83cfb02716a50bf81318263cd32
1 parent 6c8f481 commit 43e7c40

File tree

3 files changed

+134
-9
lines changed

3 files changed

+134
-9
lines changed

bson/bsonrw/extjson_parser.go

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,12 @@ func (ejp *extJSONParser) readKey() (string, bsontype.Type, error) {
163163
case jpsSawValue, jpsSawEndObject, jpsSawEndArray:
164164
ejp.advanceState()
165165
switch ejp.s {
166-
case jpsSawComma:
166+
case jpsSawBeginObject, jpsSawComma:
167167
ejp.advanceState()
168-
case jpsSawEndObject, jpsDoneState:
168+
case jpsSawEndObject:
169169
return "", 0, ErrEOD
170+
case jpsDoneState:
171+
return "", 0, io.EOF
170172
case jpsInvalidState:
171173
return "", 0, ejp.err
172174
default:
@@ -552,6 +554,12 @@ var jpsValidTransitionTokens = map[jsonParseState]map[jsonTokenType]bool{
552554

553555
func (ejp *extJSONParser) validateToken(jtt jsonTokenType) bool {
554556
switch ejp.s {
557+
case jpsSawEndObject:
558+
// if we are at depth zero and the next token is a '{',
559+
// we can consider it valid only if we are not in array mode.
560+
if jtt == jttBeginObject && ejp.depth == 0 {
561+
return ejp.peekMode() != jpmArrayMode
562+
}
555563
case jpsSawComma:
556564
switch ejp.peekMode() {
557565
// the only valid next token after a comma inside a document is a string (a key)
@@ -560,13 +568,10 @@ func (ejp *extJSONParser) validateToken(jtt jsonTokenType) bool {
560568
case jpmInvalidMode:
561569
return false
562570
}
563-
564-
// fallthrough for commas in arrays
565-
fallthrough
566-
default:
567-
_, ok := jpsValidTransitionTokens[ejp.s][jtt]
568-
return ok
569571
}
572+
573+
_, ok := jpsValidTransitionTokens[ejp.s][jtt]
574+
return ok
570575
}
571576

572577
// ensureExtValueType returns true if the current value has the expected

bson/bsonrw/extjson_parser_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
package bsonrw
88

99
import (
10+
"io"
1011
"strings"
1112
"testing"
1213

@@ -19,6 +20,7 @@ var (
1920
typDiff = specificDiff("type")
2021
valDiff = specificDiff("value")
2122

23+
expectErrEOF = expectSpecificError(io.EOF)
2224
expectErrEOD = expectSpecificError(ErrEOD)
2325
expectErrEOA = expectSpecificError(ErrEOA)
2426
)
@@ -694,7 +696,7 @@ func TestExtJSONParserAllTypes(t *testing.T) {
694696

695697
// expect end of whole document: read EOF
696698
k, typ, err = ejp.readKey()
697-
readKeyDiff(t, "", k, bsontype.Type(0), typ, err, expectErrEOD, "")
699+
readKeyDiff(t, "", k, bsontype.Type(0), typ, err, expectErrEOF, "")
698700
if diff := cmp.Diff(jpsDoneState, ejp.s); diff != "" {
699701
t.Errorf("expected parser to be in done state but instead is in %v\n", ejp.s)
700702
t.FailNow()

bson/bsonrw/extjson_reader_test.go

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
package bsonrw
88

99
import (
10+
"fmt"
11+
"io"
12+
"strings"
1013
"testing"
1114

1215
"github.com/google/go-cmp/cmp"
@@ -45,3 +48,118 @@ func TestExtJSONReader(t *testing.T) {
4548
})
4649
})
4750
}
51+
52+
func TestReadMultipleTopLevelDocuments(t *testing.T) {
53+
testCases := []struct {
54+
name string
55+
input string
56+
expected [][]byte
57+
}{
58+
{
59+
"single top-level document",
60+
"{\"foo\":1}",
61+
[][]byte{
62+
{0x0E, 0x00, 0x00, 0x00, 0x10, 'f', 'o', 'o', 0x00, 0x01, 0x00, 0x00, 0x00, 0x00},
63+
},
64+
},
65+
{
66+
"single top-level document with leading and trailing whitespace",
67+
"\n\n {\"foo\":1} \n",
68+
[][]byte{
69+
{0x0E, 0x00, 0x00, 0x00, 0x10, 'f', 'o', 'o', 0x00, 0x01, 0x00, 0x00, 0x00, 0x00},
70+
},
71+
},
72+
{
73+
"two top-level documents",
74+
"{\"foo\":1}{\"foo\":2}",
75+
[][]byte{
76+
{0x0E, 0x00, 0x00, 0x00, 0x10, 'f', 'o', 'o', 0x00, 0x01, 0x00, 0x00, 0x00, 0x00},
77+
{0x0E, 0x00, 0x00, 0x00, 0x10, 'f', 'o', 'o', 0x00, 0x02, 0x00, 0x00, 0x00, 0x00},
78+
},
79+
},
80+
{
81+
"two top-level documents with leading and trailing whitespace and whitespace separation ",
82+
"\n\n {\"foo\":1}\n{\"foo\":2}\n ",
83+
[][]byte{
84+
{0x0E, 0x00, 0x00, 0x00, 0x10, 'f', 'o', 'o', 0x00, 0x01, 0x00, 0x00, 0x00, 0x00},
85+
{0x0E, 0x00, 0x00, 0x00, 0x10, 'f', 'o', 'o', 0x00, 0x02, 0x00, 0x00, 0x00, 0x00},
86+
},
87+
},
88+
{
89+
"top-level array with single document",
90+
"[{\"foo\":1}]",
91+
[][]byte{
92+
{0x0E, 0x00, 0x00, 0x00, 0x10, 'f', 'o', 'o', 0x00, 0x01, 0x00, 0x00, 0x00, 0x00},
93+
},
94+
},
95+
{
96+
"top-level array with 2 documents",
97+
"[{\"foo\":1},{\"foo\":2}]",
98+
[][]byte{
99+
{0x0E, 0x00, 0x00, 0x00, 0x10, 'f', 'o', 'o', 0x00, 0x01, 0x00, 0x00, 0x00, 0x00},
100+
{0x0E, 0x00, 0x00, 0x00, 0x10, 'f', 'o', 'o', 0x00, 0x02, 0x00, 0x00, 0x00, 0x00},
101+
},
102+
},
103+
}
104+
105+
for _, tc := range testCases {
106+
t.Run(tc.name, func(t *testing.T) {
107+
r := strings.NewReader(tc.input)
108+
vr := NewExtJSONValueReader(r, false)
109+
110+
actual, err := readAllDocuments(vr)
111+
if err != nil {
112+
t.Fatalf("expected no error, but got %v", err)
113+
}
114+
115+
if diff := cmp.Diff(tc.expected, actual); diff != "" {
116+
t.Fatalf("expected does not match actual: %v", diff)
117+
}
118+
})
119+
}
120+
}
121+
122+
func readAllDocuments(vr ValueReader) ([][]byte, error) {
123+
c := NewCopier()
124+
var actual [][]byte
125+
126+
switch vr.Type() {
127+
case bsontype.EmbeddedDocument:
128+
for {
129+
result, err := c.CopyDocumentToBytes(vr)
130+
if err != nil {
131+
if err == io.EOF {
132+
break
133+
}
134+
return nil, err
135+
}
136+
137+
actual = append(actual, result)
138+
}
139+
case bsontype.Array:
140+
ar, err := vr.ReadArray()
141+
if err != nil {
142+
return nil, err
143+
}
144+
for {
145+
evr, err := ar.ReadValue()
146+
if err != nil {
147+
if err == ErrEOA {
148+
break
149+
}
150+
return nil, err
151+
}
152+
153+
result, err := c.CopyDocumentToBytes(evr)
154+
if err != nil {
155+
return nil, err
156+
}
157+
158+
actual = append(actual, result)
159+
}
160+
default:
161+
return nil, fmt.Errorf("expected an array or a document, but got %s", vr.Type())
162+
}
163+
164+
return actual, nil
165+
}

0 commit comments

Comments
 (0)