Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 53 additions & 17 deletions src/core/diff/strategies/__tests__/multi-search-replace.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -815,23 +815,6 @@ function five() {
}
})

it("should not strip when not all lines have numbers in either section", async () => {
const originalContent = "function test() {\n return true;\n}\n"
const diffContent = `test.ts
<<<<<<< SEARCH
1 | function test() {
2 | return true;
3 | }
=======
1 | function test() {
return false;
3 | }
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(false)
})

it("should preserve content that naturally starts with pipe", async () => {
const originalContent = "|header|another|\n|---|---|\n|data|more|\n"
const diffContent = `test.ts
Expand All @@ -852,6 +835,59 @@ function five() {
}
})

describe("aggressive line number stripping fallback", () => {
// Tests for aggressive line number stripping fallback
it("should use aggressive line number stripping when line numbers are inconsistent", async () => {
const originalContent = "function test() {\n return true;\n}\n"

const diffContent = [
"<<<<<<< SEARCH",
":start_line:1",
":end_line:3",
"-------",
"1 | function test() {",
" return true;", // missing line number
"3 | }",
"=======",
"function test() {",
" return fallback;",
"}",
">>>>>>> REPLACE",
].join("\n")

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe("function test() {\n return fallback;\n}\n")
}
})

it("should handle pipe characters without numbers using aggressive fallback", async () => {
const originalContent = "function test() {\n return true;\n}\n"

const diffContent = [
"<<<<<<< SEARCH",
":start_line:1",
":end_line:3",
"-------",
"| function test() {",
"| return true;",
"| }",
"=======",
"function test() {",
" return piped;",
"}",
">>>>>>> REPLACE",
].join("\n")

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe("function test() {\n return piped;\n}\n")
}
})
})

it("should preserve indentation when stripping line numbers", async () => {
const originalContent = " function test() {\n return true;\n }\n"
const diffContent = `test.ts
Expand Down
182 changes: 115 additions & 67 deletions src/core/diff/strategies/multi-search-replace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,48 @@ function getSimilarity(original: string, search: string): number {
return 1 - dist / maxLength
}

/**
* Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find
* the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text.
*/
function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) {
let bestScore = 0
let bestMatchIndex = -1
let bestMatchContent = ""
const searchLen = searchChunk.split(/\r?\n/).length

// Middle-out from the midpoint
const midPoint = Math.floor((startIndex + endIndex) / 2)
let leftIndex = midPoint
let rightIndex = midPoint + 1

while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) {
if (leftIndex >= startIndex) {
const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n")
const similarity = getSimilarity(originalChunk, searchChunk)
if (similarity > bestScore) {
bestScore = similarity
bestMatchIndex = leftIndex
bestMatchContent = originalChunk
}
leftIndex--
}

if (rightIndex <= endIndex - searchLen) {
const originalChunk = lines.slice(rightIndex, rightIndex + searchLen).join("\n")
const similarity = getSimilarity(originalChunk, searchChunk)
if (similarity > bestScore) {
bestScore = similarity
bestMatchIndex = rightIndex
bestMatchContent = originalChunk
}
rightIndex++
}
}

return { bestScore, bestMatchIndex, bestMatchContent }
}

export class MultiSearchReplaceDiffStrategy implements DiffStrategy {
private fuzzyThreshold: number
private bufferLines: number
Expand Down Expand Up @@ -253,7 +295,9 @@ Only use a single line of '=======' between search and replacement content, beca
? { success: true }
: {
success: false,
error: `ERROR: Unexpected end of sequence: Expected '${state.current === State.AFTER_SEARCH ? SEP : REPLACE}' was not found.`,
error: `ERROR: Unexpected end of sequence: Expected '${
state.current === State.AFTER_SEARCH ? "=======" : ">>>>>>> REPLACE"
}' was not found.`,
}
}

Expand Down Expand Up @@ -329,19 +373,21 @@ Only use a single line of '=======' between search and replacement content, beca
}))
.sort((a, b) => a.startLine - b.startLine)

for (let { searchContent, replaceContent, startLine, endLine } of replacements) {
startLine += startLine === 0 ? 0 : delta
endLine += delta
for (const replacement of replacements) {
let { searchContent, replaceContent } = replacement
let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta)
let endLine = replacement.endLine + delta

// First unescape any escaped markers in the content
searchContent = this.unescapeMarkers(searchContent)
replaceContent = this.unescapeMarkers(replaceContent)

// Strip line numbers from search and replace content if every line starts with a line number
if (
const hasAllLineNumbers =
(everyLineHasLineNumbers(searchContent) && everyLineHasLineNumbers(replaceContent)) ||
(everyLineHasLineNumbers(searchContent) && replaceContent.trim() === "")
) {

if (hasAllLineNumbers) {
searchContent = stripLineNumbers(searchContent)
replaceContent = stripLineNumbers(replaceContent)
}
Expand All @@ -360,8 +406,8 @@ Only use a single line of '=======' between search and replacement content, beca
}

// Split content into lines, handling both \n and \r\n
const searchLines = searchContent === "" ? [] : searchContent.split(/\r?\n/)
const replaceLines = replaceContent === "" ? [] : replaceContent.split(/\r?\n/)
let searchLines = searchContent === "" ? [] : searchContent.split(/\r?\n/)
let replaceLines = replaceContent === "" ? [] : replaceContent.split(/\r?\n/)

// Validate that empty search requires start line
if (searchLines.length === 0 && !startLine) {
Expand All @@ -385,7 +431,7 @@ Only use a single line of '=======' between search and replacement content, beca
let matchIndex = -1
let bestMatchScore = 0
let bestMatchContent = ""
const searchChunk = searchLines.join("\n")
let searchChunk = searchLines.join("\n")

// Determine search bounds
let searchStartIndex = 0
Expand Down Expand Up @@ -421,68 +467,70 @@ Only use a single line of '=======' between search and replacement content, beca

// If no match found yet, try middle-out search within bounds
if (matchIndex === -1) {
const midPoint = Math.floor((searchStartIndex + searchEndIndex) / 2)
let leftIndex = midPoint
let rightIndex = midPoint + 1

// Search outward from the middle within bounds
while (leftIndex >= searchStartIndex || rightIndex <= searchEndIndex - searchLines.length) {
// Check left side if still in range
if (leftIndex >= searchStartIndex) {
const originalChunk = resultLines.slice(leftIndex, leftIndex + searchLines.length).join("\n")
const similarity = getSimilarity(originalChunk, searchChunk)
if (similarity > bestMatchScore) {
bestMatchScore = similarity
matchIndex = leftIndex
bestMatchContent = originalChunk
}
leftIndex--
}

// Check right side if still in range
if (rightIndex <= searchEndIndex - searchLines.length) {
const originalChunk = resultLines.slice(rightIndex, rightIndex + searchLines.length).join("\n")
const similarity = getSimilarity(originalChunk, searchChunk)
if (similarity > bestMatchScore) {
bestMatchScore = similarity
matchIndex = rightIndex
bestMatchContent = originalChunk
}
rightIndex++
}
}
const {
bestScore,
bestMatchIndex,
bestMatchContent: midContent,
} = fuzzySearch(resultLines, searchChunk, searchStartIndex, searchEndIndex)
matchIndex = bestMatchIndex
bestMatchScore = bestScore
bestMatchContent = midContent
}

// Require similarity to meet threshold
// Try aggressive line number stripping as a fallback if regular matching fails
if (matchIndex === -1 || bestMatchScore < this.fuzzyThreshold) {
const searchChunk = searchLines.join("\n")
const originalContentSection =
startLine !== undefined && endLine !== undefined
? `\n\nOriginal Content:\n${addLineNumbers(
resultLines
.slice(
Math.max(0, startLine - 1 - this.bufferLines),
Math.min(resultLines.length, endLine + this.bufferLines),
)
.join("\n"),
Math.max(1, startLine - this.bufferLines),
)}`
: `\n\nOriginal Content:\n${addLineNumbers(resultLines.join("\n"))}`

const bestMatchSection = bestMatchContent
? `\n\nBest Match Found:\n${addLineNumbers(bestMatchContent, matchIndex + 1)}`
: `\n\nBest Match Found:\n(no match)`

const lineRange =
startLine || endLine
? ` at ${startLine ? `start: ${startLine}` : "start"} to ${endLine ? `end: ${endLine}` : "end"}`
: ""
// Strip both search and replace content once (simultaneously)
const aggressiveSearchContent = stripLineNumbers(searchContent, true)
const aggressiveReplaceContent = stripLineNumbers(replaceContent, true)

const aggressiveSearchLines = aggressiveSearchContent ? aggressiveSearchContent.split(/\r?\n/) : []
const aggressiveSearchChunk = aggressiveSearchLines.join("\n")

// Try middle-out search again with aggressive stripped content (respecting the same search bounds)
const {
bestScore,
bestMatchIndex,
bestMatchContent: aggContent,
} = fuzzySearch(resultLines, aggressiveSearchChunk, searchStartIndex, searchEndIndex)
if (bestMatchIndex !== -1 && bestScore >= this.fuzzyThreshold) {
matchIndex = bestMatchIndex
bestMatchScore = bestScore
bestMatchContent = aggContent
// Replace the original search/replace with their stripped versions
searchContent = aggressiveSearchContent
replaceContent = aggressiveReplaceContent
searchLines = aggressiveSearchLines
replaceLines = replaceContent ? replaceContent.split(/\r?\n/) : []
} else {
// No match found with either method
const originalContentSection =
startLine !== undefined && endLine !== undefined
? `\n\nOriginal Content:\n${addLineNumbers(
resultLines
.slice(
Math.max(0, startLine - 1 - this.bufferLines),
Math.min(resultLines.length, endLine + this.bufferLines),
)
.join("\n"),
Math.max(1, startLine - this.bufferLines),
)}`
: `\n\nOriginal Content:\n${addLineNumbers(resultLines.join("\n"))}`

const bestMatchSection = bestMatchContent
? `\n\nBest Match Found:\n${addLineNumbers(bestMatchContent, matchIndex + 1)}`
: `\n\nBest Match Found:\n(no match)`

const lineRange =
startLine || endLine
? ` at ${startLine ? `start: ${startLine}` : "start"} to ${endLine ? `end: ${endLine}` : "end"}`
: ""

diffResults.push({
success: false,
error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine && endLine ? `lines ${startLine}-${endLine}` : "start to end"}\n- Tip: Use the read_file tool to get the latest content of the file before attempting to use the apply_diff tool again, as the file content may have changed\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`,
})
continue
diffResults.push({
success: false,
error: `No sufficiently similar match found${lineRange} (${Math.floor(bestMatchScore * 100)}% similar, needs ${Math.floor(this.fuzzyThreshold * 100)}%)\n\nDebug Info:\n- Similarity Score: ${Math.floor(bestMatchScore * 100)}%\n- Required Threshold: ${Math.floor(this.fuzzyThreshold * 100)}%\n- Search Range: ${startLine && endLine ? `lines ${startLine}-${endLine}` : "start to end"}\n- Tried both standard and aggressive line number stripping\n- Tip: Use the read_file tool to get the latest content of the file before attempting to use the apply_diff tool again, as the file content may have changed\n\nSearch Content:\n${searchChunk}${bestMatchSection}${originalContentSection}`,
})
continue
}
}

// Get the matched lines from the original content
Expand Down
40 changes: 40 additions & 0 deletions src/integrations/misc/__tests__/extract-text.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,46 @@ describe("stripLineNumbers", () => {
const expected = "line one\nline two\nline three"
expect(stripLineNumbers(input)).toBe(expected)
})

describe("aggressive mode", () => {
it("should strip content with just a pipe character", () => {
const input = "| line one\n| line two\n| line three"
const expected = "line one\nline two\nline three"
expect(stripLineNumbers(input, true)).toBe(expected)
})

it("should strip content with mixed formats in aggressive mode", () => {
const input = "1 | line one\n| line two\n123 | line three"
const expected = "line one\nline two\nline three"
expect(stripLineNumbers(input, true)).toBe(expected)
})

it("should not strip content with pipe characters not at start in aggressive mode", () => {
const input = "text | more text\nx | y"
expect(stripLineNumbers(input, true)).toBe(input)
})

it("should handle empty content in aggressive mode", () => {
expect(stripLineNumbers("", true)).toBe("")
})

it("should preserve padding after pipe in aggressive mode", () => {
const input = "| line with extra spaces\n1 | indented content"
const expected = " line with extra spaces\n indented content"
expect(stripLineNumbers(input, true)).toBe(expected)
})

it("should preserve windows-style line endings in aggressive mode", () => {
const input = "| line one\r\n| line two\r\n| line three"
const expected = "line one\r\nline two\r\nline three"
expect(stripLineNumbers(input, true)).toBe(expected)
})

it("should not affect regular content when using aggressive mode", () => {
const input = "regular line\nanother line\nno pipes here"
expect(stripLineNumbers(input, true)).toBe(input)
})
})
})

describe("truncateOutput", () => {
Expand Down
16 changes: 11 additions & 5 deletions src/integrations/misc/extract-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,23 @@ export function everyLineHasLineNumbers(content: string): boolean {
return lines.length > 0 && lines.every((line) => /^\s*\d+\s+\|(?!\|)/.test(line))
}

// Strips line numbers from content while preserving the actual content
// Handles formats like "1 | content", " 12 | content", "123 | content"
// Preserves content that naturally starts with pipe characters
export function stripLineNumbers(content: string): string {
/**
* Strips line numbers from content while preserving the actual content.
*
* @param content The content to process
* @param aggressive When false (default): Only strips lines with clear number patterns like "123 | content"
* When true: Uses a more lenient pattern that also matches lines with just a pipe character,
* which can be useful when LLMs don't perfectly format the line numbers in diffs
* @returns The content with line numbers removed
*/
export function stripLineNumbers(content: string, aggressive: boolean = false): string {
// Split into lines to handle each line individually
const lines = content.split(/\r?\n/)

// Process each line
const processedLines = lines.map((line) => {
// Match line number pattern and capture everything after the pipe
const match = line.match(/^\s*\d+\s+\|(?!\|)\s?(.*)$/)
const match = aggressive ? line.match(/^\s*(?:\d+\s)?\|\s(.*)$/) : line.match(/^\s*\d+\s+\|(?!\|)\s?(.*)$/)
return match ? match[1] : line
})

Expand Down