Skip to content

Commit 20f2fe9

Browse files
committed
fix: apply_diff finds closest match from start_line instead of buffer midpoint (#2657)
1 parent 9c08857 commit 20f2fe9

File tree

3 files changed

+153
-18
lines changed

3 files changed

+153
-18
lines changed

src/core/diff/strategies/__tests__/multi-search-replace.spec.ts

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,104 @@ function five() {
780780
}`)
781781
}
782782
})
783+
// Tests for Issue #2657 - Strange behavior with slightly wrong start_line
784+
// This test directly reproduces the bug where apply_diff finds line 21 instead of line 33
785+
// when start_line is 34 (one line off from the actual target at line 33)
786+
it("should find line 33 (closer match) instead of line 21 when start_line is 34", async () => {
787+
const strategy = new MultiSearchReplaceDiffStrategy(1.0, 40) // Default buffer of 40 lines
788+
789+
// Create content with "}" at lines 21 and 33
790+
const lines: string[] = []
791+
for (let i = 1; i <= 50; i++) {
792+
if (i === 21 || i === 33) {
793+
lines.push("}")
794+
} else {
795+
lines.push(`line ${i}`)
796+
}
797+
}
798+
const originalContent = lines.join("\n")
799+
800+
// The diff with start_line: 34 (one line off from actual line 33)
801+
const diffContent = `
802+
<<<<<<< SEARCH
803+
:start_line:34
804+
-------
805+
}
806+
=======
807+
} // modified
808+
>>>>>>> REPLACE`
809+
810+
const result = await strategy.applyDiff(originalContent, diffContent)
811+
812+
// The issue reports that it finds line 21 instead of line 33
813+
// Line 33 is closer to start_line 34 than line 21
814+
expect(result.success).toBe(true)
815+
816+
if (result.success) {
817+
const resultLines = result.content.split("\n")
818+
819+
// Check which line was modified
820+
const line21Modified = resultLines[20] === "} // modified"
821+
const line33Modified = resultLines[32] === "} // modified"
822+
823+
// After the fix, line33Modified should be true and line21Modified should be false
824+
expect(line21Modified).toBe(false) // Fix prevents this
825+
expect(line33Modified).toBe(true) // Fix causes this
826+
}
827+
})
828+
829+
// This test verifies the fix works correctly by ensuring the closest match
830+
// is found when multiple identical lines exist in the file
831+
it("should find closest match when multiple identical lines exist", async () => {
832+
const strategy = new MultiSearchReplaceDiffStrategy(1.0, 40)
833+
834+
// Create content with identical lines at positions 10, 25, 30, 45
835+
const lines: string[] = []
836+
for (let i = 1; i <= 50; i++) {
837+
if ([10, 25, 30, 45].includes(i)) {
838+
lines.push("duplicate line")
839+
} else {
840+
lines.push(`line ${i}`)
841+
}
842+
}
843+
const originalContent = lines.join("\n")
844+
845+
// Test 1: start_line 28 should find line 30 (distance 2)
846+
const diffContent1 = `
847+
<<<<<<< SEARCH
848+
:start_line:28
849+
-------
850+
duplicate line
851+
=======
852+
duplicate line // modified at 30
853+
>>>>>>> REPLACE`
854+
855+
const result1 = await strategy.applyDiff(originalContent, diffContent1)
856+
expect(result1.success).toBe(true)
857+
if (result1.success) {
858+
const resultLines = result1.content.split("\n")
859+
expect(resultLines[29]).toBe("duplicate line // modified at 30") // line 30 (0-indexed: 29)
860+
expect(resultLines[24]).toBe("duplicate line") // line 25 unchanged
861+
}
862+
863+
// Test 2: start_line 12 should find line 10 (distance 2)
864+
const diffContent2 = `
865+
<<<<<<< SEARCH
866+
:start_line:12
867+
-------
868+
duplicate line
869+
=======
870+
duplicate line // modified at 10
871+
>>>>>>> REPLACE`
872+
873+
const result2 = await strategy.applyDiff(originalContent, diffContent2)
874+
expect(result2.success).toBe(true)
875+
if (result2.success) {
876+
const resultLines = result2.content.split("\n")
877+
expect(resultLines[9]).toBe("duplicate line // modified at 10") // line 10 (0-indexed: 9)
878+
expect(resultLines[24]).toBe("duplicate line") // line 25 unchanged
879+
}
880+
})
783881
})
784882
})
785883

src/core/diff/strategies/multi-file-search-replace.ts

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,20 @@ function getSimilarity(original: string, search: string): number {
3333
* Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find
3434
* the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text.
3535
*/
36-
function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) {
36+
function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number, targetLine?: number) {
3737
let bestScore = 0
3838
let bestMatchIndex = -1
3939
let bestMatchContent = ""
4040

4141
const searchLen = searchChunk.split(/\r?\n/).length
4242

43-
// Middle-out from the midpoint
44-
const midPoint = Math.floor((startIndex + endIndex) / 2)
45-
let leftIndex = midPoint
46-
let rightIndex = midPoint + 1
43+
// If targetLine is provided, start from there; otherwise use midpoint
44+
const searchStart =
45+
targetLine !== undefined
46+
? Math.max(startIndex, Math.min(targetLine, endIndex - searchLen))
47+
: Math.floor((startIndex + endIndex) / 2)
48+
let leftIndex = searchStart
49+
let rightIndex = searchStart + 1
4750

4851
while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) {
4952
if (leftIndex >= startIndex) {
@@ -581,7 +584,13 @@ Each file requires its own path, start_line, and diff elements.
581584
bestScore,
582585
bestMatchIndex,
583586
bestMatchContent: midContent,
584-
} = fuzzySearch(resultLines, searchChunk, searchStartIndex, searchEndIndex)
587+
} = fuzzySearch(
588+
resultLines,
589+
searchChunk,
590+
searchStartIndex,
591+
searchEndIndex,
592+
startLine ? startLine - 1 : undefined,
593+
)
585594

586595
matchIndex = bestMatchIndex
587596
bestMatchScore = bestScore
@@ -601,7 +610,13 @@ Each file requires its own path, start_line, and diff elements.
601610
bestScore,
602611
bestMatchIndex,
603612
bestMatchContent: aggContent,
604-
} = fuzzySearch(resultLines, aggressiveSearchChunk, searchStartIndex, searchEndIndex)
613+
} = fuzzySearch(
614+
resultLines,
615+
aggressiveSearchChunk,
616+
searchStartIndex,
617+
searchEndIndex,
618+
startLine ? startLine - 1 : undefined,
619+
)
605620

606621
if (bestMatchIndex !== -1 && bestScore >= this.fuzzyThreshold) {
607622
matchIndex = bestMatchIndex

src/core/diff/strategies/multi-search-replace.ts

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,21 @@ function getSimilarity(original: string, search: string): number {
3535
/**
3636
* Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find
3737
* the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text.
38+
* If targetLine is provided, the search starts from that line instead of the midpoint.
3839
*/
39-
function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) {
40+
function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number, targetLine?: number) {
4041
let bestScore = 0
4142
let bestMatchIndex = -1
4243
let bestMatchContent = ""
4344
const searchLen = searchChunk.split(/\r?\n/).length
4445

45-
// Middle-out from the midpoint
46-
const midPoint = Math.floor((startIndex + endIndex) / 2)
47-
let leftIndex = midPoint
48-
let rightIndex = midPoint + 1
46+
// If targetLine is provided, start from there; otherwise use midpoint
47+
const searchStart =
48+
targetLine !== undefined
49+
? Math.max(startIndex, Math.min(targetLine, endIndex - searchLen))
50+
: Math.floor((startIndex + endIndex) / 2)
51+
let leftIndex = searchStart
52+
let rightIndex = searchStart + 1
4953

5054
while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) {
5155
if (leftIndex >= startIndex) {
@@ -378,7 +382,7 @@ Only use a single line of '=======' between search and replacement content, beca
378382

379383
let matches = [
380384
...diffContent.matchAll(
381-
/(?:^|\n)(?<!\\)<<<<<<< SEARCH\s*\n((?:\:start_line:\s*(\d+)\s*\n))?((?:\:end_line:\s*(\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/g,
385+
/(?:^|\n)(?<!\\)<<<<<<< SEARCH\s*\n((?:\:start_line:\s*(-?\d+)\s*\n))?((?:\:end_line:\s*(-?\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/g,
382386
),
383387
]
384388

@@ -464,8 +468,11 @@ Only use a single line of '=======' between search and replacement content, beca
464468

465469
// Validate and handle line range if provided
466470
if (startLine) {
471+
// Clamp startLine to valid range (1 to resultLines.length)
472+
const clampedStartLine = Math.max(1, Math.min(startLine, resultLines.length))
473+
467474
// Convert to 0-based index
468-
const exactStartIndex = startLine - 1
475+
const exactStartIndex = clampedStartLine - 1
469476
const searchLen = searchLines.length
470477
const exactEndIndex = exactStartIndex + searchLen - 1
471478

@@ -478,8 +485,11 @@ Only use a single line of '=======' between search and replacement content, beca
478485
bestMatchContent = originalChunk
479486
} else {
480487
// Set bounds for buffered search
481-
searchStartIndex = Math.max(0, startLine - (this.bufferLines + 1))
482-
searchEndIndex = Math.min(resultLines.length, startLine + searchLines.length + this.bufferLines)
488+
searchStartIndex = Math.max(0, clampedStartLine - (this.bufferLines + 1))
489+
searchEndIndex = Math.min(
490+
resultLines.length,
491+
clampedStartLine + searchLines.length + this.bufferLines,
492+
)
483493
}
484494
}
485495

@@ -489,7 +499,13 @@ Only use a single line of '=======' between search and replacement content, beca
489499
bestScore,
490500
bestMatchIndex,
491501
bestMatchContent: midContent,
492-
} = fuzzySearch(resultLines, searchChunk, searchStartIndex, searchEndIndex)
502+
} = fuzzySearch(
503+
resultLines,
504+
searchChunk,
505+
searchStartIndex,
506+
searchEndIndex,
507+
startLine ? Math.max(0, Math.min(startLine - 1, resultLines.length - 1)) : undefined,
508+
)
493509
matchIndex = bestMatchIndex
494510
bestMatchScore = bestScore
495511
bestMatchContent = midContent
@@ -509,7 +525,13 @@ Only use a single line of '=======' between search and replacement content, beca
509525
bestScore,
510526
bestMatchIndex,
511527
bestMatchContent: aggContent,
512-
} = fuzzySearch(resultLines, aggressiveSearchChunk, searchStartIndex, searchEndIndex)
528+
} = fuzzySearch(
529+
resultLines,
530+
aggressiveSearchChunk,
531+
searchStartIndex,
532+
searchEndIndex,
533+
startLine ? startLine - 1 : undefined,
534+
)
513535
if (bestMatchIndex !== -1 && bestScore >= this.fuzzyThreshold) {
514536
matchIndex = bestMatchIndex
515537
bestMatchScore = bestScore

0 commit comments

Comments
 (0)