From cb03f337111e5b80a98a1c516fd3234c23d1295a Mon Sep 17 00:00:00 2001 From: Ruakij Date: Fri, 30 May 2025 08:44:14 +0200 Subject: [PATCH 1/3] Enhance MultiSearchReplaceDiffStrategy to handle line duplication heuristics in applyDiff method --- .../__tests__/multi-search-replace.test.ts | 163 ++++++++++++++++++ .../diff/strategies/multi-search-replace.ts | 37 +++- 2 files changed, 198 insertions(+), 2 deletions(-) diff --git a/src/core/diff/strategies/__tests__/multi-search-replace.test.ts b/src/core/diff/strategies/__tests__/multi-search-replace.test.ts index 37114830f3..e0573c64cc 100644 --- a/src/core/diff/strategies/__tests__/multi-search-replace.test.ts +++ b/src/core/diff/strategies/__tests__/multi-search-replace.test.ts @@ -2686,4 +2686,167 @@ function two() { expect(result.error).toContain(":start_line:5 <-- Invalid location") }) }) + + describe("Generalized line duplication heuristic", () => { + let strategy: MultiSearchReplaceDiffStrategy + + beforeEach(() => { + strategy = new MultiSearchReplaceDiffStrategy() + }) + + it("should correctly append content and avoid duplicating a closing brace", async () => { + const originalContent = `function test() { + console.log("original"); +}` + + const diffContent = `<<<<<<< SEARCH +:start_line:2 +------- + console.log("original"); +======= + console.log("original"); +} +function newFunction() { + console.log("appended"); +} +>>>>>>> REPLACE` + + const result = await strategy.applyDiff(originalContent, diffContent) + + expect(result.success).toBe(true) + if (result.success) + expect(result.content).toBe(`function test() { + console.log("original"); +} +function newFunction() { + console.log("appended"); +}`) + }) + + it("should correctly append content and avoid duplicating a common line", async () => { + const originalContent = `Section A +Common Line to keep +Section B` + + const diffContent = `<<<<<<< SEARCH +:start_line:1 +------- +Section A +======= +Section A +Common Line to keep +New Appended Content +>>>>>>> REPLACE` + + const result = await strategy.applyDiff(originalContent, diffContent) + + expect(result.success).toBe(true) + if (result.success) + expect(result.content).toBe(`Section A +Common Line to keep +New Appended Content +Section B`) + }) + + it("should not alter behavior when the line after search prefix in REPLACE differs from original line after SEARCH", async () => { + const originalContent = `Line 1 +Line 2 (original next) +Line 3` + + const diffContent = `<<<<<<< SEARCH +:start_line:1 +------- +Line 1 +======= +Line 1 +Line X (different next in replace) +Appended Content +>>>>>>> REPLACE` + + const result = await strategy.applyDiff(originalContent, diffContent) + + expect(result.success).toBe(true) + if (result.success) + expect(result.content).toBe(`Line 1 +Line X (different next in replace) +Appended Content +Line 2 (original next) +Line 3`) + }) + + it("should not trigger heuristic if SEARCH content is not a prefix of REPLACE content", async () => { + const originalContent = `Alpha +Bravo +Charlie` + + const diffContent = `<<<<<<< SEARCH +:start_line:1 +------- +Alpha +======= +Completely New Content +Even More New Content +Bravo +>>>>>>> REPLACE` + + const result = await strategy.applyDiff(originalContent, diffContent) + + expect(result.success).toBe(true) + if (result.success) + expect(result.content).toBe(`Completely New Content +Even More New Content +Bravo +Bravo +Charlie`) + }) + + it("should handle edge case where heuristic conditions are met but no line follows search in original", async () => { + const originalContent = `Last Line` + + const diffContent = `<<<<<<< SEARCH +:start_line:1 +------- +Last Line +======= +Last Line +New Line +Added Content +>>>>>>> REPLACE` + + const result = await strategy.applyDiff(originalContent, diffContent) + + expect(result.success).toBe(true) + if (result.success) + expect(result.content).toBe(`Last Line +New Line +Added Content`) + }) + + it("should handle case where similarity threshold is not met", async () => { + const originalContent = `Original Content +Next Line +Final Line` + + const diffContent = `<<<<<<< SEARCH +:start_line:1 +------- +Original Content +======= +Completely Different Content +Next Line +Appended Content +>>>>>>> REPLACE` + + const result = await strategy.applyDiff(originalContent, diffContent) + + expect(result.success).toBe(true) + // Should behave as normal replacement since similarity < 0.95 + if (result.success) + expect(result.content).toBe(`Completely Different Content +Next Line +Appended Content +Next Line +Final Line`) + }) + }) }) diff --git a/src/core/diff/strategies/multi-search-replace.ts b/src/core/diff/strategies/multi-search-replace.ts index 9e740a6571..fb980e161e 100644 --- a/src/core/diff/strategies/multi-search-replace.ts +++ b/src/core/diff/strategies/multi-search-replace.ts @@ -588,11 +588,44 @@ Only use a single line of '=======' between search and replacement content, beca return finalIndent + line.trim() }) + // Initialize effectiveSearchLinesCount (determines how many lines from original are considered "replaced") + let effectiveSearchLinesCount = searchLines.length // Default + + // Heuristic to adjust effectiveSearchLinesCount for superfluous duplicated line pattern + if (searchLines.length > 0 && replaceLines.length > searchLines.length) { + const searchBlockContent = searchLines.join("\n") + // Ensure replaceLines has enough elements before slicing + const firstPartOfReplaceBlock = replaceLines.slice(0, searchLines.length).join("\n") + + // Check if the search content is highly similar to the beginning of the replace content + if (getSimilarity(searchBlockContent, firstPartOfReplaceBlock) > 0.95) { + // Ensure there's a line in replaceLines immediately after the part that matches searchLines + if (searchLines.length < replaceLines.length) { + const lineInReplaceAfterPrefix = replaceLines[searchLines.length] + + // Ensure there's a line in the original content (resultLines) immediately after the matched search block + if (matchIndex + searchLines.length < resultLines.length) { + const lineInOriginalAfterMatchedSearch = resultLines[matchIndex + searchLines.length] + + // If the line in the replace block (after the prefix) is identical (ignoring leading/trailing whitespace) + // to the line in the original content (after the search match), + // it's likely a duplicated line scenario. + if (lineInReplaceAfterPrefix.trim() === lineInOriginalAfterMatchedSearch.trim()) { + effectiveSearchLinesCount = searchLines.length + 1 // Consume the duplicated line from the original + } + } + } + } + } + // Construct the final content const beforeMatch = resultLines.slice(0, matchIndex) - const afterMatch = resultLines.slice(matchIndex + searchLines.length) + // Use effectiveSearchLinesCount here to determine the slice point + const afterMatch = resultLines.slice(matchIndex + effectiveSearchLinesCount) + resultLines = [...beforeMatch, ...indentedReplaceLines, ...afterMatch] - delta = delta - matchedLines.length + replaceLines.length + // Use effectiveSearchLinesCount for delta calculation + delta = delta - effectiveSearchLinesCount + replaceLines.length appliedCount++ } const finalContent = resultLines.join(lineEnding) From f3bbf9590b48b5c069eff27b892673a34b499d27 Mon Sep 17 00:00:00 2001 From: Ruakij Date: Fri, 30 May 2025 08:44:14 +0200 Subject: [PATCH 2/3] Move to dedicated replacement-engine which is applied at beginning of processing every replacement-block --- .../superfluous-duplicated-line.engine.ts | 46 ++++++++++++++++++ .../diff/strategies/multi-search-replace.ts | 47 ++++++++----------- 2 files changed, 66 insertions(+), 27 deletions(-) create mode 100644 src/core/diff/strategies/engines/superfluous-duplicated-line.engine.ts diff --git a/src/core/diff/strategies/engines/superfluous-duplicated-line.engine.ts b/src/core/diff/strategies/engines/superfluous-duplicated-line.engine.ts new file mode 100644 index 0000000000..e15a0c8efc --- /dev/null +++ b/src/core/diff/strategies/engines/superfluous-duplicated-line.engine.ts @@ -0,0 +1,46 @@ +import { SearchReplaceContext } from "../multi-search-replace" + +export class SuperfluousDuplicatedLineEngine { + public static process(originalContent: string, context: SearchReplaceContext): SearchReplaceContext { + const { startLine, searchContent, replaceContent } = context + + // Early detection of superfluous duplicated line pattern + // Check if replace content has more lines than search content + const searchLines = searchContent.split(/\r?\n/) + const replaceLines = replaceContent.split(/\r?\n/) + const originalLines = originalContent.split(/\r?\n/) + + if (searchLines.length > 0 && replaceLines.length > searchLines.length && startLine > 0) { + // Check if the first part of replace content is similar to search content + const firstPartOfReplace = replaceLines.slice(0, searchLines.length).join("\n") + + // Simple similarity check (we can make this more sophisticated later) + const isFirstPartSimilar = firstPartOfReplace === searchContent + + if (isFirstPartSimilar && replaceLines.length > searchLines.length) { + // Get the line that comes after the search block in replace content + const lineAfterSearchInReplace = replaceLines[searchLines.length] + + // Get the line that comes after the search block in original content + const lineIndexInOriginal = startLine - 1 + searchLines.length + if (lineIndexInOriginal < originalLines.length) { + const lineAfterSearchInOriginal = originalLines[lineIndexInOriginal] + + // If they match, it's likely a superfluous duplicated line scenario + // We can modify the search content to include the extra line + if (lineAfterSearchInReplace.trim() === lineAfterSearchInOriginal.trim()) { + const modifiedSearchContent = searchContent + "\n" + lineAfterSearchInOriginal + return { + startLine, + searchContent: modifiedSearchContent, + replaceContent, + } + } + } + } + } + + // No modification needed, return as-is + return context + } +} diff --git a/src/core/diff/strategies/multi-search-replace.ts b/src/core/diff/strategies/multi-search-replace.ts index fb980e161e..3e31f43a9e 100644 --- a/src/core/diff/strategies/multi-search-replace.ts +++ b/src/core/diff/strategies/multi-search-replace.ts @@ -7,6 +7,13 @@ import { ToolProgressStatus } from "@roo-code/types" import { addLineNumbers, everyLineHasLineNumbers, stripLineNumbers } from "../../../integrations/misc/extract-text" import { ToolUse, DiffStrategy, DiffResult } from "../../../shared/tools" import { normalizeString } from "../../../utils/text-normalization" +import { SuperfluousDuplicatedLineEngine } from "./engines/superfluous-duplicated-line.engine" + +export interface SearchReplaceContext { + startLine: number + searchContent: string + replaceContent: string +} const BUFFER_LINES = 40 // Number of extra context lines to show before and after matches @@ -406,6 +413,19 @@ Only use a single line of '=======' between search and replacement content, beca let { searchContent, replaceContent } = replacement let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta) + // --- Start: Replacement Engine Processing --- + let context = SuperfluousDuplicatedLineEngine.process(originalContent, { + startLine, + searchContent, + replaceContent, + }) + + // Update variables from engine processing + startLine = context.startLine + searchContent = context.searchContent + replaceContent = context.replaceContent + // --- End: Replacement Engine Processing --- + // First unescape any escaped markers in the content searchContent = this.unescapeMarkers(searchContent) replaceContent = this.unescapeMarkers(replaceContent) @@ -591,33 +611,6 @@ Only use a single line of '=======' between search and replacement content, beca // Initialize effectiveSearchLinesCount (determines how many lines from original are considered "replaced") let effectiveSearchLinesCount = searchLines.length // Default - // Heuristic to adjust effectiveSearchLinesCount for superfluous duplicated line pattern - if (searchLines.length > 0 && replaceLines.length > searchLines.length) { - const searchBlockContent = searchLines.join("\n") - // Ensure replaceLines has enough elements before slicing - const firstPartOfReplaceBlock = replaceLines.slice(0, searchLines.length).join("\n") - - // Check if the search content is highly similar to the beginning of the replace content - if (getSimilarity(searchBlockContent, firstPartOfReplaceBlock) > 0.95) { - // Ensure there's a line in replaceLines immediately after the part that matches searchLines - if (searchLines.length < replaceLines.length) { - const lineInReplaceAfterPrefix = replaceLines[searchLines.length] - - // Ensure there's a line in the original content (resultLines) immediately after the matched search block - if (matchIndex + searchLines.length < resultLines.length) { - const lineInOriginalAfterMatchedSearch = resultLines[matchIndex + searchLines.length] - - // If the line in the replace block (after the prefix) is identical (ignoring leading/trailing whitespace) - // to the line in the original content (after the search match), - // it's likely a duplicated line scenario. - if (lineInReplaceAfterPrefix.trim() === lineInOriginalAfterMatchedSearch.trim()) { - effectiveSearchLinesCount = searchLines.length + 1 // Consume the duplicated line from the original - } - } - } - } - } - // Construct the final content const beforeMatch = resultLines.slice(0, matchIndex) // Use effectiveSearchLinesCount here to determine the slice point From 42879fbf481a50722d7ea7494a831623bd821de4 Mon Sep 17 00:00:00 2001 From: Ruakij Date: Fri, 30 May 2025 08:44:14 +0200 Subject: [PATCH 3/3] Generalize duplicated line detection (>=1), use match index, add tests --- .../__tests__/multi-search-replace.test.ts | 36 ++++++++++++ .../superfluous-duplicated-line.engine.ts | 57 ++++++++++++------- .../diff/strategies/multi-search-replace.ts | 30 +++++----- 3 files changed, 88 insertions(+), 35 deletions(-) diff --git a/src/core/diff/strategies/__tests__/multi-search-replace.test.ts b/src/core/diff/strategies/__tests__/multi-search-replace.test.ts index e0573c64cc..58497556b0 100644 --- a/src/core/diff/strategies/__tests__/multi-search-replace.test.ts +++ b/src/core/diff/strategies/__tests__/multi-search-replace.test.ts @@ -2748,6 +2748,42 @@ New Appended Content Section B`) }) + it("should correctly append content and avoid duplicating multiple lines with nested braces", async () => { + const originalContent = `function devConfig() { + return { + name: "test" + }; +}` + + const diffContent = `<<<<<<< SEARCH + name: "test" +======= + name: "test" + }; +} +function prodConfig() { + return { + name: "prod" + }; +} +>>>>>>> REPLACE` + + const result = await strategy.applyDiff(originalContent, diffContent) + + expect(result.success).toBe(true) + if (result.success) + expect(result.content).toBe(`function devConfig() { + return { + name: "test" + }; +} +function prodConfig() { + return { + name: "prod" + }; +}`) + }) + it("should not alter behavior when the line after search prefix in REPLACE differs from original line after SEARCH", async () => { const originalContent = `Line 1 Line 2 (original next) diff --git a/src/core/diff/strategies/engines/superfluous-duplicated-line.engine.ts b/src/core/diff/strategies/engines/superfluous-duplicated-line.engine.ts index e15a0c8efc..6d72b7b9cd 100644 --- a/src/core/diff/strategies/engines/superfluous-duplicated-line.engine.ts +++ b/src/core/diff/strategies/engines/superfluous-duplicated-line.engine.ts @@ -1,7 +1,15 @@ import { SearchReplaceContext } from "../multi-search-replace" export class SuperfluousDuplicatedLineEngine { - public static process(originalContent: string, context: SearchReplaceContext): SearchReplaceContext { + /** + * Processes a search/replace context to detect superfluous duplicated lines. + * + * @param originalContent The complete original content being modified + * @param context The search/replace context containing startLine, searchContent, and replaceContent + * @returns The number of additional lines from the original content that should be consumed + * to prevent duplication. Returns 0 if no additional lines should be consumed. + */ + public static process(originalContent: string, context: SearchReplaceContext): number { const { startLine, searchContent, replaceContent } = context // Early detection of superfluous duplicated line pattern @@ -18,29 +26,38 @@ export class SuperfluousDuplicatedLineEngine { const isFirstPartSimilar = firstPartOfReplace === searchContent if (isFirstPartSimilar && replaceLines.length > searchLines.length) { - // Get the line that comes after the search block in replace content - const lineAfterSearchInReplace = replaceLines[searchLines.length] - - // Get the line that comes after the search block in original content - const lineIndexInOriginal = startLine - 1 + searchLines.length - if (lineIndexInOriginal < originalLines.length) { - const lineAfterSearchInOriginal = originalLines[lineIndexInOriginal] - - // If they match, it's likely a superfluous duplicated line scenario - // We can modify the search content to include the extra line - if (lineAfterSearchInReplace.trim() === lineAfterSearchInOriginal.trim()) { - const modifiedSearchContent = searchContent + "\n" + lineAfterSearchInOriginal - return { - startLine, - searchContent: modifiedSearchContent, - replaceContent, - } + // Check for any number of consecutive matching lines after the search block + let matchingLinesCount = 0 + const searchEndIndex = startLine - 1 + searchLines.length + const maxPossibleMatches = Math.min( + replaceLines.length - searchLines.length, // Available lines in replace content + originalLines.length - searchEndIndex, // Available lines in original content after search + ) + + for (let i = 0; i < maxPossibleMatches; i++) { + const replaceLineIndex = searchLines.length + i + const originalLineIndex = searchEndIndex + i + + const lineInReplace = replaceLines[replaceLineIndex] + const lineInOriginal = originalLines[originalLineIndex] + + // Check if lines match (trimmed comparison to handle whitespace differences) + if (lineInReplace && lineInOriginal && lineInReplace.trim() === lineInOriginal.trim()) { + matchingLinesCount++ + } else { + // Stop at the first non-matching line + break } } + + // If we found any matching lines, return the count + if (matchingLinesCount > 0) { + return matchingLinesCount + } } } - // No modification needed, return as-is - return context + // No additional lines to consume + return 0 } } diff --git a/src/core/diff/strategies/multi-search-replace.ts b/src/core/diff/strategies/multi-search-replace.ts index 3e31f43a9e..ea5d704198 100644 --- a/src/core/diff/strategies/multi-search-replace.ts +++ b/src/core/diff/strategies/multi-search-replace.ts @@ -401,7 +401,7 @@ Only use a single line of '=======' between search and replacement content, beca let delta = 0 let diffResults: DiffResult[] = [] let appliedCount = 0 - const replacements = matches + const replacements: SearchReplaceContext[] = matches .map((match) => ({ startLine: Number(match[2] ?? 0), searchContent: match[6], @@ -413,19 +413,6 @@ Only use a single line of '=======' between search and replacement content, beca let { searchContent, replaceContent } = replacement let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta) - // --- Start: Replacement Engine Processing --- - let context = SuperfluousDuplicatedLineEngine.process(originalContent, { - startLine, - searchContent, - replaceContent, - }) - - // Update variables from engine processing - startLine = context.startLine - searchContent = context.searchContent - replaceContent = context.replaceContent - // --- End: Replacement Engine Processing --- - // First unescape any escaped markers in the content searchContent = this.unescapeMarkers(searchContent) replaceContent = this.unescapeMarkers(replaceContent) @@ -568,6 +555,19 @@ Only use a single line of '=======' between search and replacement content, beca } } + // --- Start: Replacement Engine Processing --- + // Now that we found the match, call the engine with the actual match location + const engineContext = { + startLine: matchIndex + 1, // Convert back to 1-based index for the engine + searchContent: replacement.searchContent, + replaceContent: replacement.replaceContent, + } + const additionalLinesToConsume = SuperfluousDuplicatedLineEngine.process( + resultLines.join("\n"), + engineContext, + ) + // --- End: Replacement Engine Processing --- + // Get the matched lines from the original content const matchedLines = resultLines.slice(matchIndex, matchIndex + searchLines.length) @@ -609,7 +609,7 @@ Only use a single line of '=======' between search and replacement content, beca }) // Initialize effectiveSearchLinesCount (determines how many lines from original are considered "replaced") - let effectiveSearchLinesCount = searchLines.length // Default + let effectiveSearchLinesCount = searchLines.length + additionalLinesToConsume // Construct the final content const beforeMatch = resultLines.slice(0, matchIndex)