Skip to content

Commit 1785ef9

Browse files
authored
Merge pull request #8538 from dolthub/nicktobey/json2
Fix JSON merge issue that would report imprecise diffs in some situations
2 parents b3853f6 + c9b6269 commit 1785ef9

File tree

3 files changed

+72
-12
lines changed

3 files changed

+72
-12
lines changed

go/store/prolly/tree/indexed_json_diff.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,10 +245,24 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
245245
case 0:
246246
key := fromCurrentLocation.Clone().key
247247

248+
fromNextCharacter, err := jd.currentFromCursor.nextCharacter(ctx)
249+
if err == io.EOF {
250+
return JsonDiff{}, jsonParseError
251+
}
252+
if err != nil {
253+
return JsonDiff{}, err
254+
}
255+
toNextCharacter, err := jd.currentToCursor.nextCharacter(ctx)
256+
if err == io.EOF {
257+
return JsonDiff{}, jsonParseError
258+
}
259+
if err != nil {
260+
return JsonDiff{}, err
261+
}
248262
// Both sides have the same key. If they're both an object or both an array, continue.
249263
// Otherwise, compare them and possibly return a modification.
250-
if (fromScanner.current() == '{' && toScanner.current() == '{') ||
251-
(fromScanner.current() == '[' && toScanner.current() == '[') {
264+
if (fromNextCharacter == '{' && toNextCharacter == '{') ||
265+
(fromNextCharacter == '[' && toNextCharacter == '[') {
252266
err = advanceCursor(ctx, &jd.currentFromCursor)
253267
if err != nil {
254268
return JsonDiff{}, err

go/store/prolly/tree/json_cursor.go

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -195,20 +195,28 @@ func (j *JsonCursor) AdvanceToLocation(ctx context.Context, path jsonLocation, f
195195
return true, nil
196196
}
197197

198+
func (j *JsonCursor) advanceCursor(ctx context.Context) error {
199+
err := j.cur.advance(ctx)
200+
if err != nil {
201+
return err
202+
}
203+
if !j.cur.Valid() {
204+
// We hit the end of the tree. This shouldn't happen.
205+
return io.EOF
206+
}
207+
j.jsonScanner = ScanJsonFromMiddle(j.cur.currentValue(), j.jsonScanner.currentPath)
208+
return nil
209+
}
210+
198211
func (j *JsonCursor) AdvanceToNextLocation(ctx context.Context) (crossedBoundary bool, err error) {
199212
err = j.jsonScanner.AdvanceToNextLocation()
200213
if err == io.EOF {
201214
crossedBoundary = true
202215
// We hit the end of the chunk, load the next one
203-
err = j.cur.advance(ctx)
216+
err = j.advanceCursor(ctx)
204217
if err != nil {
205-
return
206-
}
207-
if !j.cur.Valid() {
208-
// We hit the end of the tree. This shouldn't happen.
209-
return true, io.EOF
218+
return false, err
210219
}
211-
j.jsonScanner = ScanJsonFromMiddle(j.cur.currentValue(), j.jsonScanner.currentPath)
212220
return true, j.jsonScanner.AdvanceToNextLocation()
213221
} else if err != nil {
214222
return
@@ -221,6 +229,12 @@ func (j *JsonCursor) GetCurrentPath() jsonLocation {
221229
return j.jsonScanner.currentPath
222230
}
223231

224-
func (j *JsonCursor) nextCharacter() byte {
225-
return j.jsonScanner.jsonBuffer[j.jsonScanner.valueOffset]
232+
func (j *JsonCursor) nextCharacter(ctx context.Context) (byte, error) {
233+
if j.jsonScanner.atEndOfChunk() {
234+
err := j.advanceCursor(ctx)
235+
if err != nil {
236+
return 255, err
237+
}
238+
}
239+
return j.jsonScanner.jsonBuffer[j.jsonScanner.valueOffset], nil
226240
}

go/store/prolly/tree/json_diff_test.go

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/dolthub/go-mysql-server/sql/expression/function/json"
2727

2828
"github.com/dolthub/go-mysql-server/sql/types"
29+
"github.com/stretchr/testify/assert"
2930
"github.com/stretchr/testify/require"
3031
)
3132

@@ -415,6 +416,34 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest {
415416
from: largeObject,
416417
to: insert(emptyDocument, "$.level6", insert(emptyDocument, "$.level4", lookup(largeObject, "$.level6.level4"))),
417418
},
419+
{
420+
// This is a regression test.
421+
//
422+
// If:
423+
// - A chunk begins with an object "A"
424+
// - If a value "A.b" within this object was modified
425+
// - The previous chunk was also modified
426+
// Then the differ would incorrectly report that the entire "A" object had been modified, instead of the sub-value "A.b"
427+
// The values in this test case are specifically chosen to meet these conditions,
428+
// as there is a chunk boundary immediately before "$.level5.level3.level1"
429+
name: "correctly diff object that begins on chunk boundary",
430+
from: largeObject,
431+
to: set(set(largeObject, "$.level5.level2.number", 2), "$.level5.level3.level1.number", 2),
432+
expectedDiffs: []JsonDiff{
433+
{
434+
Key: makeJsonPathKey(`level5`, `level2`, `number`),
435+
From: types.JSONDocument{Val: 1},
436+
To: types.JSONDocument{Val: 2},
437+
Type: ModifiedDiff,
438+
},
439+
{
440+
Key: makeJsonPathKey(`level5`, `level3`, `level1`, `number`),
441+
From: types.JSONDocument{Val: 1},
442+
To: types.JSONDocument{Val: 2},
443+
Type: ModifiedDiff,
444+
},
445+
},
446+
},
418447
}
419448
}
420449

@@ -489,7 +518,10 @@ func runTest(t *testing.T, test jsonDiffTest) {
489518
return cmp == 0
490519
}
491520
if test.expectedDiffs != nil {
492-
require.Equal(t, len(test.expectedDiffs), len(actualDiffs))
521+
522+
if !assert.Equal(t, len(test.expectedDiffs), len(actualDiffs)) {
523+
require.Fail(t, "Diffs don't match", "Expected: %v\nActual: %v", test.expectedDiffs, actualDiffs)
524+
}
493525
for i, expected := range test.expectedDiffs {
494526
actual := actualDiffs[i]
495527
require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual))

0 commit comments

Comments
 (0)