Skip to content

Commit b241b58

Browse files
authored
Merge pull request #8534 from dolthub/nicktobey/json_fix
Fix issue where JSON diff would fail under specific circumstances.
2 parents 7d9a638 + ad631c6 commit b241b58

File tree

3 files changed

+81
-45
lines changed

3 files changed

+81
-45
lines changed

go/store/prolly/tree/indexed_json_diff.go

Lines changed: 59 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -44,61 +44,74 @@ func NewIndexedJsonDiffer(ctx context.Context, from, to IndexedJsonDocument) (*I
4444
differ.fromStop = differ.fromStop.parent
4545
differ.toStop = differ.toStop.parent
4646

47-
if differ.from == nil || differ.to == nil {
48-
// This can happen when either document fits in a single chunk.
49-
// We don't use the chunk differ in this case, and instead we create the cursors without it.
47+
var currentFromCursor, currentToCursor *JsonCursor
48+
if differ.from == nil {
49+
// The "from" document fits inside a single chunk.
50+
// We can't create the "from" cursor from the differ, so we create it here instead.
5051
diffKey := []byte{byte(startOfValue)}
51-
currentFromCursor, err := newJsonCursorAtStartOfChunk(ctx, from.m.NodeStore, from.m.Root, diffKey)
52+
currentFromCursor, err = newJsonCursorAtStartOfChunk(ctx, from.m.NodeStore, from.m.Root, diffKey)
5253
if err != nil {
5354
return nil, err
5455
}
55-
currentToCursor, err := newJsonCursorAtStartOfChunk(ctx, to.m.NodeStore, to.m.Root, diffKey)
56+
// Advance the cursor past the "beginning of document" location, so that it aligns with the "to" cursor no matter what.
57+
err = advanceCursor(ctx, &currentFromCursor)
58+
if err != nil {
59+
return nil, err
60+
}
61+
}
62+
63+
if differ.to == nil {
64+
// The "to" document fits inside a single chunk.
65+
// We can't create the "from" cursor from the differ, so we create it here instead.
66+
diffKey := []byte{byte(startOfValue)}
67+
currentToCursor, err = newJsonCursorAtStartOfChunk(ctx, to.m.NodeStore, to.m.Root, diffKey)
68+
if err != nil {
69+
return nil, err
70+
}
71+
// Advance the cursor past the "beginning of document" location, so that it aligns with the "from" cursor no matter what.
72+
err = advanceCursor(ctx, &currentToCursor)
5673
if err != nil {
5774
return nil, err
5875
}
59-
return &IndexedJsonDiffer{
60-
differ: differ,
61-
from: from,
62-
to: to,
63-
currentFromCursor: currentFromCursor,
64-
currentToCursor: currentToCursor,
65-
}, nil
6676
}
6777

6878
return &IndexedJsonDiffer{
69-
differ: differ,
70-
from: from,
71-
to: to,
79+
differ: differ,
80+
from: from,
81+
to: to,
82+
currentFromCursor: currentFromCursor,
83+
currentToCursor: currentToCursor,
7284
}, nil
7385
}
7486

87+
func advanceCursor(ctx context.Context, jCur **JsonCursor) (err error) {
88+
if (*jCur).jsonScanner.atEndOfChunk() {
89+
err = (*jCur).cur.advance(ctx)
90+
if err != nil {
91+
return err
92+
}
93+
*jCur = nil
94+
} else {
95+
err = (*jCur).jsonScanner.AdvanceToNextLocation()
96+
if err != nil {
97+
return err
98+
}
99+
}
100+
return nil
101+
}
102+
75103
// Next computes the next diff between the two JSON documents.
76104
// To accomplish this, it uses the underlying Differ to find chunks that have changed, and uses a pair of JsonCursors
77105
// to walk corresponding chunks.
78106
func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error) {
79107
// helper function to advance a JsonCursor and set it to nil if it reaches the end of a chunk
80-
advanceCursor := func(jCur **JsonCursor) (err error) {
81-
if (*jCur).jsonScanner.atEndOfChunk() {
82-
err = (*jCur).cur.advance(ctx)
83-
if err != nil {
84-
return err
85-
}
86-
*jCur = nil
87-
} else {
88-
err = (*jCur).jsonScanner.AdvanceToNextLocation()
89-
if err != nil {
90-
return err
91-
}
92-
}
93-
return nil
94-
}
95108

96109
newAddedDiff := func(key []byte) (JsonDiff, error) {
97110
addedValue, err := jd.currentToCursor.NextValue(ctx)
98111
if err != nil {
99112
return JsonDiff{}, err
100113
}
101-
err = advanceCursor(&jd.currentToCursor)
114+
err = advanceCursor(ctx, &jd.currentToCursor)
102115
if err != nil {
103116
return JsonDiff{}, err
104117
}
@@ -114,7 +127,7 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
114127
if err != nil {
115128
return JsonDiff{}, err
116129
}
117-
err = advanceCursor(&jd.currentFromCursor)
130+
err = advanceCursor(ctx, &jd.currentFromCursor)
118131
if err != nil {
119132
return JsonDiff{}, err
120133
}
@@ -150,29 +163,35 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
150163
return JsonDiff{}, err
151164
}
152165
} else if jd.currentFromCursor == nil {
153-
// We exhausted the current `from` chunk but not the `to` chunk. Since the chunk boundaries don't align on
166+
// We exhausted the current `from` chunk but not the current `to` chunk. Since the chunk boundaries don't align on
154167
// the same key, we need to continue into the next chunk.
155168

169+
// Alternatively, the "to" cursor was created during construction because the "to" document fit in a single chunk,
170+
// and the "from" cursor hasn't been created yet.
171+
156172
jd.currentFromCursor, err = newJsonCursorFromCursor(ctx, jd.differ.from)
157173
if err != nil {
158174
return JsonDiff{}, err
159175
}
160176

161-
err = advanceCursor(&jd.currentFromCursor)
177+
err = advanceCursor(ctx, &jd.currentFromCursor)
162178
if err != nil {
163179
return JsonDiff{}, err
164180
}
165181
continue
166182
} else if jd.currentToCursor == nil {
167-
// We exhausted the current `to` chunk but not the `from` chunk. Since the chunk boundaries don't align on
183+
// We exhausted the current `to` chunk but not the current `from` chunk. Since the chunk boundaries don't align on
168184
// the same key, we need to continue into the next chunk.
169185

186+
// Alternatively, the "from" cursor was created during construction because the "from" document fit in a single chunk,
187+
// and the "to" cursor hasn't been created yet.
188+
170189
jd.currentToCursor, err = newJsonCursorFromCursor(ctx, jd.differ.to)
171190
if err != nil {
172191
return JsonDiff{}, err
173192
}
174193

175-
err = advanceCursor(&jd.currentToCursor)
194+
err = advanceCursor(ctx, &jd.currentToCursor)
176195
if err != nil {
177196
return JsonDiff{}, err
178197
}
@@ -209,11 +228,11 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
209228
if compareJsonLocations(fromCurrentLocation, toCurrentLocation) != 0 {
210229
return JsonDiff{}, jsonParseError
211230
}
212-
err = advanceCursor(&jd.currentFromCursor)
231+
err = advanceCursor(ctx, &jd.currentFromCursor)
213232
if err != nil {
214233
return JsonDiff{}, err
215234
}
216-
err = advanceCursor(&jd.currentToCursor)
235+
err = advanceCursor(ctx, &jd.currentToCursor)
217236
if err != nil {
218237
return JsonDiff{}, err
219238
}
@@ -230,11 +249,11 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
230249
// Otherwise, compare them and possibly return a modification.
231250
if (fromScanner.current() == '{' && toScanner.current() == '{') ||
232251
(fromScanner.current() == '[' && toScanner.current() == '[') {
233-
err = advanceCursor(&jd.currentFromCursor)
252+
err = advanceCursor(ctx, &jd.currentFromCursor)
234253
if err != nil {
235254
return JsonDiff{}, err
236255
}
237-
err = advanceCursor(&jd.currentToCursor)
256+
err = advanceCursor(ctx, &jd.currentToCursor)
238257
if err != nil {
239258
return JsonDiff{}, err
240259
}

go/store/prolly/tree/json_diff_test.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -287,10 +287,13 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest {
287287
ctx := sql.NewEmptyContext()
288288
ns := NewTestNodeStore()
289289

290+
emptyDocument := types.JSONDocument{Val: types.JsonObject{}}
291+
290292
insert := func(document types.MutableJSON, path string, val interface{}) types.MutableJSON {
291293
jsonVal, inRange, err := types.JSON.Convert(val)
292294
require.NoError(t, err)
293295
require.True(t, (bool)(inRange))
296+
document = document.Clone(ctx).(types.MutableJSON)
294297
newDoc, changed, err := document.Insert(ctx, path, jsonVal.(sql.JSONWrapper))
295298
require.NoError(t, err)
296299
require.True(t, changed)
@@ -400,6 +403,18 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest {
400403
},
401404
},
402405
},
406+
{
407+
// This is a regression test.
408+
// If:
409+
// - One document fits in a single chunk and the other doesn't
410+
// - The location of the chunk boundary in the larger document is also present in the smaller document
411+
// - The chunk boundary doesn't fall at the beginning of value.
412+
// Then the differ would fail to advance the prolly tree cursor and would incorrectly see the larger document as corrupt.
413+
// The values in this test case are specifically chosen to meet these conditions.
414+
name: "no error when diffing large doc with small doc",
415+
from: largeObject,
416+
to: insert(emptyDocument, "$.level6", insert(emptyDocument, "$.level4", lookup(largeObject, "$.level6.level4"))),
417+
},
403418
}
404419
}
405420

@@ -473,9 +488,11 @@ func runTest(t *testing.T, test jsonDiffTest) {
473488

474489
return cmp == 0
475490
}
476-
require.Equal(t, len(test.expectedDiffs), len(actualDiffs))
477-
for i, expected := range test.expectedDiffs {
478-
actual := actualDiffs[i]
479-
require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual))
491+
if test.expectedDiffs != nil {
492+
require.Equal(t, len(test.expectedDiffs), len(actualDiffs))
493+
for i, expected := range test.expectedDiffs {
494+
actual := actualDiffs[i]
495+
require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual))
496+
}
480497
}
481498
}

go/store/prolly/tree/json_scanner.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ type JsonScanner struct {
3838
valueOffset int
3939
}
4040

41-
var jsonParseError = fmt.Errorf("an error occurred while reading or writing JSON to/from the database. This is most likely a bug, but could indicate database corruption")
41+
var jsonParseError = fmt.Errorf("encountered invalid JSON while reading JSON from the database, or while preparing to write JSON to the database. This is most likely a bug in JSON diffing")
4242

4343
func (j JsonScanner) Clone() JsonScanner {
4444
return JsonScanner{

0 commit comments

Comments
 (0)