Skip to content

Commit c18e25f

Browse files
authored
Fall back on aggressive line number stripping in diffs (#2453)
* Add option for aggressive line number stripping * Fall back on aggressive line number stripping in diffs
1 parent 4e71626 commit c18e25f

File tree

4 files changed

+219
-89
lines changed

4 files changed

+219
-89
lines changed

src/core/diff/strategies/__tests__/multi-search-replace.test.ts

Lines changed: 53 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -815,23 +815,6 @@ function five() {
815815
}
816816
})
817817

818-
it("should not strip when not all lines have numbers in either section", async () => {
819-
const originalContent = "function test() {\n return true;\n}\n"
820-
const diffContent = `test.ts
821-
<<<<<<< SEARCH
822-
1 | function test() {
823-
2 | return true;
824-
3 | }
825-
=======
826-
1 | function test() {
827-
return false;
828-
3 | }
829-
>>>>>>> REPLACE`
830-
831-
const result = await strategy.applyDiff(originalContent, diffContent)
832-
expect(result.success).toBe(false)
833-
})
834-
835818
it("should preserve content that naturally starts with pipe", async () => {
836819
const originalContent = "|header|another|\n|---|---|\n|data|more|\n"
837820
const diffContent = `test.ts
@@ -852,6 +835,59 @@ function five() {
852835
}
853836
})
854837

838+
describe("aggressive line number stripping fallback", () => {
839+
// Tests for aggressive line number stripping fallback
840+
it("should use aggressive line number stripping when line numbers are inconsistent", async () => {
841+
const originalContent = "function test() {\n return true;\n}\n"
842+
843+
const diffContent = [
844+
"<<<<<<< SEARCH",
845+
":start_line:1",
846+
":end_line:3",
847+
"-------",
848+
"1 | function test() {",
849+
" return true;", // missing line number
850+
"3 | }",
851+
"=======",
852+
"function test() {",
853+
" return fallback;",
854+
"}",
855+
">>>>>>> REPLACE",
856+
].join("\n")
857+
858+
const result = await strategy.applyDiff(originalContent, diffContent)
859+
expect(result.success).toBe(true)
860+
if (result.success) {
861+
expect(result.content).toBe("function test() {\n return fallback;\n}\n")
862+
}
863+
})
864+
865+
it("should handle pipe characters without numbers using aggressive fallback", async () => {
866+
const originalContent = "function test() {\n return true;\n}\n"
867+
868+
const diffContent = [
869+
"<<<<<<< SEARCH",
870+
":start_line:1",
871+
":end_line:3",
872+
"-------",
873+
"| function test() {",
874+
"| return true;",
875+
"| }",
876+
"=======",
877+
"function test() {",
878+
" return piped;",
879+
"}",
880+
">>>>>>> REPLACE",
881+
].join("\n")
882+
883+
const result = await strategy.applyDiff(originalContent, diffContent)
884+
expect(result.success).toBe(true)
885+
if (result.success) {
886+
expect(result.content).toBe("function test() {\n return piped;\n}\n")
887+
}
888+
})
889+
})
890+
855891
it("should preserve indentation when stripping line numbers", async () => {
856892
const originalContent = " function test() {\n return true;\n }\n"
857893
const diffContent = `test.ts

src/core/diff/strategies/multi-search-replace.ts

Lines changed: 115 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,48 @@ function getSimilarity(original: string, search: string): number {
2929
return 1 - dist / maxLength
3030
}
3131

32+
/**
33+
* Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find
34+
* the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text.
35+
*/
36+
function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) {
37+
let bestScore = 0
38+
let bestMatchIndex = -1
39+
let bestMatchContent = ""
40+
const searchLen = searchChunk.split(/\r?\n/).length
41+
42+
// Middle-out from the midpoint
43+
const midPoint = Math.floor((startIndex + endIndex) / 2)
44+
let leftIndex = midPoint
45+
let rightIndex = midPoint + 1
46+
47+
while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) {
48+
if (leftIndex >= startIndex) {
49+
const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n")
50+
const similarity = getSimilarity(originalChunk, searchChunk)
51+
if (similarity > bestScore) {
52+
bestScore = similarity
53+
bestMatchIndex = leftIndex
54+
bestMatchContent = originalChunk
55+
}
56+
leftIndex--
57+
}
58+
59+
if (rightIndex <= endIndex - searchLen) {
60+
const originalChunk = lines.slice(rightIndex, rightIndex + searchLen).join("\n")
61+
const similarity = getSimilarity(originalChunk, searchChunk)
62+
if (similarity > bestScore) {
63+
bestScore = similarity
64+
bestMatchIndex = rightIndex
65+
bestMatchContent = originalChunk
66+
}
67+
rightIndex++
68+
}
69+
}
70+
71+
return { bestScore, bestMatchIndex, bestMatchContent }
72+
}
73+
3274
export class MultiSearchReplaceDiffStrategy implements DiffStrategy {
3375
private fuzzyThreshold: number
3476
private bufferLines: number
@@ -253,7 +295,9 @@ Only use a single line of '=======' between search and replacement content, beca
253295
? { success: true }
254296
: {
255297
success: false,
256-
error: `ERROR: Unexpected end of sequence: Expected '${state.current === State.AFTER_SEARCH ? SEP : REPLACE}' was not found.`,
298+
error: `ERROR: Unexpected end of sequence: Expected '${
299+
state.current === State.AFTER_SEARCH ? "=======" : ">>>>>>> REPLACE"
300+
}' was not found.`,
257301
}
258302
}
259303

@@ -329,19 +373,21 @@ Only use a single line of '=======' between search and replacement content, beca
329373
}))
330374
.sort((a, b) => a.startLine - b.startLine)
331375

332-
for (let { searchContent, replaceContent, startLine, endLine } of replacements) {
333-
startLine += startLine === 0 ? 0 : delta
334-
endLine += delta
376+
for (const replacement of replacements) {
377+
let { searchContent, replaceContent } = replacement
378+
let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta)
379+
let endLine = replacement.endLine + delta
335380

336381
// First unescape any escaped markers in the content
337382
searchContent = this.unescapeMarkers(searchContent)
338383
replaceContent = this.unescapeMarkers(replaceContent)
339384

340385
// Strip line numbers from search and replace content if every line starts with a line number
341-
if (
386+
const hasAllLineNumbers =
342387
(everyLineHasLineNumbers(searchContent) && everyLineHasLineNumbers(replaceContent)) ||
343388
(everyLineHasLineNumbers(searchContent) && replaceContent.trim() === "")
344-
) {
389+
390+
if (hasAllLineNumbers) {
345391
searchContent = stripLineNumbers(searchContent)
346392
replaceContent = stripLineNumbers(replaceContent)
347393
}
@@ -360,8 +406,8 @@ Only use a single line of '=======' between search and replacement content, beca
360406
}
361407

362408
// Split content into lines, handling both \n and \r\n
363-
const searchLines = searchContent === "" ? [] : searchContent.split(/\r?\n/)
364-
const replaceLines = replaceContent === "" ? [] : replaceContent.split(/\r?\n/)
409+
let searchLines = searchContent === "" ? [] : searchContent.split(/\r?\n/)
410+
let replaceLines = replaceContent === "" ? [] : replaceContent.split(/\r?\n/)
365411

366412
// Validate that empty search requires start line
367413
if (searchLines.length === 0 && !startLine) {
@@ -385,7 +431,7 @@ Only use a single line of '=======' between search and replacement content, beca
385431
let matchIndex = -1
386432
let bestMatchScore = 0
387433
let bestMatchContent = ""
388-
const searchChunk = searchLines.join("\n")
434+
let searchChunk = searchLines.join("\n")
389435

390436
// Determine search bounds
391437
let searchStartIndex = 0
@@ -421,68 +467,70 @@ Only use a single line of '=======' between search and replacement content, beca
421467

422468
// If no match found yet, try middle-out search within bounds
423469
if (matchIndex === -1) {
424-
const midPoint = Math.floor((searchStartIndex + searchEndIndex) / 2)
425-
let leftIndex = midPoint
426-
let rightIndex = midPoint + 1
427-
428-
// Search outward from the middle within bounds
429-
while (leftIndex >= searchStartIndex || rightIndex <= searchEndIndex - searchLines.length) {
430-
// Check left side if still in range
431-
if (leftIndex >= searchStartIndex) {
432-
const originalChunk = resultLines.slice(leftIndex, leftIndex + searchLines.length).join("\n")
433-
const similarity = getSimilarity(originalChunk, searchChunk)
434-
if (similarity > bestMatchScore) {
435-
bestMatchScore = similarity
436-
matchIndex = leftIndex
437-
bestMatchContent = originalChunk
438-
}
439-
leftIndex--
440-
}
441-
442-
// Check right side if still in range
443-
if (rightIndex <= searchEndIndex - searchLines.length) {
444-
const originalChunk = resultLines.slice(rightIndex, rightIndex + searchLines.length).join("\n")
445-
const similarity = getSimilarity(originalChunk, searchChunk)
446-
if (similarity > bestMatchScore) {
447-
bestMatchScore = similarity
448-
matchIndex = rightIndex
449-
bestMatchContent = originalChunk
450-
}
451-
rightIndex++
452-
}
453-
}
470+
const {
471+
bestScore,
472+
bestMatchIndex,
473+
bestMatchContent: midContent,
474+
} = fuzzySearch(resultLines, searchChunk, searchStartIndex, searchEndIndex)
475+
matchIndex = bestMatchIndex
476+
bestMatchScore = bestScore
477+
bestMatchContent = midContent
454478
}
455479

456-
// Require similarity to meet threshold
480+
// Try aggressive line number stripping as a fallback if regular matching fails
457481
if (matchIndex === -1 || bestMatchScore < this.fuzzyThreshold) {
458-
const searchChunk = searchLines.join("\n")
459-
const originalContentSection =
460-
startLine !== undefined && endLine !== undefined
461-
? `\n\nOriginal Content:\n${addLineNumbers(
462-
resultLines
463-
.slice(
464-
Math.max(0, startLine - 1 - this.bufferLines),
465-
Math.min(resultLines.length, endLine + this.bufferLines),
466-
)
467-
.join("\n"),
468-
Math.max(1, startLine - this.bufferLines),
469-
)}`
470-
: `\n\nOriginal Content:\n${addLineNumbers(resultLines.join("\n"))}`
471-
472-
const bestMatchSection = bestMatchContent
473-
? `\n\nBest Match Found:\n${addLineNumbers(bestMatchContent, matchIndex + 1)}`
474-
: `\n\nBest Match Found:\n(no match)`
475-
476-
const lineRange =
477-
startLine || endLine
478-
? ` at ${startLine ? `start: ${startLine}` : "start"} to ${endLine ? `end: ${endLine}` : "end"}`
479-
: ""
482+
// Strip both search and replace content once (simultaneously)
483+
const aggressiveSearchContent = stripLineNumbers(searchContent, true)
484+
const aggressiveReplaceContent = stripLineNumbers(replaceContent, true)
485+
486+
const aggressiveSearchLines = aggressiveSearchContent ? aggressiveSearchContent.split(/\r?\n/) : []
487+
const aggressiveSearchChunk = aggressiveSearchLines.join("\n")
488+
489+
// Try middle-out search again with aggressive stripped content (respecting the same search bounds)
490+
const {
491+
bestScore,
492+
bestMatchIndex,
493+
bestMatchContent: aggContent,
494+
} = fuzzySearch(resultLines, aggressiveSearchChunk, searchStartIndex, searchEndIndex)
495+
if (bestMatchIndex !== -1 && bestScore >= this.fuzzyThreshold) {
496+
matchIndex = bestMatchIndex
497+
bestMatchScore = bestScore
498+
bestMatchContent = aggContent
499+
// Replace the original search/replace with their stripped versions
500+
searchContent = aggressiveSearchContent
501+
replaceContent = aggressiveReplaceContent
502+
searchLines = aggressiveSearchLines
503+
replaceLines = replaceContent ? replaceContent.split(/\r?\n/) : []
504+
} else {
505+
// No match found with either method
506+
const originalContentSection =
507+
startLine !== undefined && endLine !== undefined
508+
? `\n\nOriginal Content:\n${addLineNumbers(
509+
resultLines
510+
.slice(
511+
Math.max(0, startLine - 1 - this.bufferLines),
512+
Math.min(resultLines.length, endLine + this.bufferLines),
513+
)
514+
.join("\n"),
515+
Math.max(1, startLine - this.bufferLines),
516+
)}`
517+
: `\n\nOriginal Content:\n${addLineNumbers(resultLines.join("\n"))}`
518+
519+
const bestMatchSection = bestMatchContent
520+
? `\n\nBest Match Found:\n${addLineNumbers(bestMatchContent, matchIndex + 1)}`
521+
: `\n\nBest Match Found:\n(no match)`
522+
523+
const lineRange =
524+
startLine || endLine
525+
? ` at ${startLine ? `start: ${startLine}` : "start"} to ${endLine ? `end: ${endLine}` : "end"}`
526+
: ""
480527

481-
diffResults.push({
482-
success: false,
483-
error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine && endLine ? `lines ${startLine}-${endLine}` : "start to end"}\n- Tip: Use the read_file tool to get the latest content of the file before attempting to use the apply_diff tool again, as the file content may have changed\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`,
484-
})
485-
continue
528+
diffResults.push({
529+
success: false,
530+
error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine && endLine ? `lines ${startLine}-${endLine}` : "start to end"}\n- Tried both standard and aggressive line number stripping\n- Tip: Use the read_file tool to get the latest content of the file before attempting to use the apply_diff tool again, as the file content may have changed\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`,
531+
})
532+
continue
533+
}
486534
}
487535

488536
// Get the matched lines from the original content

src/integrations/misc/__tests__/extract-text.test.ts

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,46 @@ describe("stripLineNumbers", () => {
137137
const expected = "line one\nline two\nline three"
138138
expect(stripLineNumbers(input)).toBe(expected)
139139
})
140+
141+
describe("aggressive mode", () => {
142+
it("should strip content with just a pipe character", () => {
143+
const input = "| line one\n| line two\n| line three"
144+
const expected = "line one\nline two\nline three"
145+
expect(stripLineNumbers(input, true)).toBe(expected)
146+
})
147+
148+
it("should strip content with mixed formats in aggressive mode", () => {
149+
const input = "1 | line one\n| line two\n123 | line three"
150+
const expected = "line one\nline two\nline three"
151+
expect(stripLineNumbers(input, true)).toBe(expected)
152+
})
153+
154+
it("should not strip content with pipe characters not at start in aggressive mode", () => {
155+
const input = "text | more text\nx | y"
156+
expect(stripLineNumbers(input, true)).toBe(input)
157+
})
158+
159+
it("should handle empty content in aggressive mode", () => {
160+
expect(stripLineNumbers("", true)).toBe("")
161+
})
162+
163+
it("should preserve padding after pipe in aggressive mode", () => {
164+
const input = "| line with extra spaces\n1 | indented content"
165+
const expected = " line with extra spaces\n indented content"
166+
expect(stripLineNumbers(input, true)).toBe(expected)
167+
})
168+
169+
it("should preserve windows-style line endings in aggressive mode", () => {
170+
const input = "| line one\r\n| line two\r\n| line three"
171+
const expected = "line one\r\nline two\r\nline three"
172+
expect(stripLineNumbers(input, true)).toBe(expected)
173+
})
174+
175+
it("should not affect regular content when using aggressive mode", () => {
176+
const input = "regular line\nanother line\nno pipes here"
177+
expect(stripLineNumbers(input, true)).toBe(input)
178+
})
179+
})
140180
})
141181

142182
describe("truncateOutput", () => {

src/integrations/misc/extract-text.ts

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,17 +86,23 @@ export function everyLineHasLineNumbers(content: string): boolean {
8686
return lines.length > 0 && lines.every((line) => /^\s*\d+\s+\|(?!\|)/.test(line))
8787
}
8888

89-
// Strips line numbers from content while preserving the actual content
90-
// Handles formats like "1 | content", " 12 | content", "123 | content"
91-
// Preserves content that naturally starts with pipe characters
92-
export function stripLineNumbers(content: string): string {
89+
/**
90+
* Strips line numbers from content while preserving the actual content.
91+
*
92+
* @param content The content to process
93+
* @param aggressive When false (default): Only strips lines with clear number patterns like "123 | content"
94+
* When true: Uses a more lenient pattern that also matches lines with just a pipe character,
95+
* which can be useful when LLMs don't perfectly format the line numbers in diffs
96+
* @returns The content with line numbers removed
97+
*/
98+
export function stripLineNumbers(content: string, aggressive: boolean = false): string {
9399
// Split into lines to handle each line individually
94100
const lines = content.split(/\r?\n/)
95101

96102
// Process each line
97103
const processedLines = lines.map((line) => {
98104
// Match line number pattern and capture everything after the pipe
99-
const match = line.match(/^\s*\d+\s+\|(?!\|)\s?(.*)$/)
105+
const match = aggressive ? line.match(/^\s*(?:\d+\s)?\|\s(.*)$/) : line.match(/^\s*\d+\s+\|(?!\|)\s?(.*)$/)
100106
return match ? match[1] : line
101107
})
102108

0 commit comments

Comments
 (0)