diff --git a/src/core/diff/strategies/__tests__/multi-search-replace-hanging.spec.ts b/src/core/diff/strategies/__tests__/multi-search-replace-hanging.spec.ts new file mode 100644 index 00000000000..0f1af972910 --- /dev/null +++ b/src/core/diff/strategies/__tests__/multi-search-replace-hanging.spec.ts @@ -0,0 +1,274 @@ +import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace" +import { MultiFileSearchReplaceDiffStrategy } from "../multi-file-search-replace" +import { DiffResult } from "../../../../shared/tools" + +describe("MultiSearchReplaceDiffStrategy Hanging Issue #4852", () => { + describe("reproduce exact issue scenario", () => { + let strategy: MultiSearchReplaceDiffStrategy + let multiFileStrategy: MultiFileSearchReplaceDiffStrategy + + beforeEach(() => { + // Use exact settings that might cause the issue + strategy = new MultiSearchReplaceDiffStrategy(1.0, 40) // Exact match, 40 line buffer + multiFileStrategy = new MultiFileSearchReplaceDiffStrategy(1.0, 40) + }) + + it("should handle the exact XML from issue #4852 without hanging", async () => { + // This is the exact XML content from the issue + const issueXmlContent = ` + + + + + + + + + + + + + Value 1 + Value 2 + Value 3 + + + + This is deeply nested content + More content here + Even more content + + + Another deep element + And another one + + + + + More deeply nested + Content continues + + + + Value 4 + Value 5 + + + + Complex nested structure + + + Another complex structure + + + + + Pattern A + Pattern A + Pattern A + Pattern B + Pattern B + Pattern B + + This content has multiple possible matches + and can cause the regex to try many combinations + especially when looking for specific patterns + ======= + This looks like a separator but it's not + >>>>>>> + These patterns can confuse the regex + <<<<<<< + Causing it to backtrack extensively + + + + + + + + + + + + +` + + // Test multiple concurrent edits as described in the issue + const diffItems = [ + { + content: `<<<<<<< SEARCH + Pattern A +======= + Pattern X +>>>>>>> REPLACE`, + startLine: undefined, + }, + { + content: `<<<<<<< SEARCH + Pattern B +======= + Pattern Y +>>>>>>> REPLACE`, + startLine: undefined, + }, + { + content: `<<<<<<< SEARCH + Complex nested structure +======= + Updated nested structure +>>>>>>> REPLACE`, + startLine: undefined, + }, + ] + + console.log("Starting multi-file diff application...") + const startTime = Date.now() + + // Apply all diffs using the multi-file strategy + const result = await multiFileStrategy.applyDiff(issueXmlContent, diffItems) + + const endTime = Date.now() + const duration = endTime - startTime + console.log(`Multi-file diff completed in ${duration}ms`) + + // Check that it completed in reasonable time + expect(duration).toBeLessThan(2000) // 2 seconds max + + // Verify the result + expect(result.success).toBe(true) + if (result.success && result.content) { + // Count occurrences + const patternACount = (result.content.match(/Pattern A/g) || []).length + const patternBCount = (result.content.match(/Pattern B/g) || []).length + const patternXCount = (result.content.match(/Pattern X/g) || []).length + const patternYCount = (result.content.match(/Pattern Y/g) || []).length + + // Should have replaced all occurrences + expect(patternACount).toBe(0) + expect(patternBCount).toBe(0) + expect(patternXCount).toBe(3) + expect(patternYCount).toBe(3) + expect(result.content).toContain("Updated nested structure") + } + }, 10000) // 10 second timeout + + it("should handle worst-case scenario with ambiguous patterns", async () => { + // Create a pathological case with many ambiguous patterns + const lines = [] + + // Add many similar lines that could match + for (let i = 0; i < 200; i++) { + lines.push(` Similar content with slight variation ${i % 5}`) + } + + // Add the target in the middle + lines.splice(100, 0, ` Target pattern to replace`) + + // Add more similar lines + for (let i = 0; i < 200; i++) { + lines.push(` More similar content ${i % 5}`) + } + + const pathologicalContent = lines.join("\n") + + // Try to replace without line number hint (worst case) + const diffContent = `<<<<<<< SEARCH + Target pattern to replace +======= + Successfully replaced target +>>>>>>> REPLACE` + + console.log("Starting pathological case test...") + const startTime = Date.now() + + const result = await strategy.applyDiff(pathologicalContent, diffContent) + + const endTime = Date.now() + const duration = endTime - startTime + console.log(`Pathological case completed in ${duration}ms`) + + // Should complete even in worst case + expect(duration).toBeLessThan(5000) // 5 seconds max + expect(result.success).toBe(true) + if (result.success && result.content) { + expect(result.content).toContain("Successfully replaced target") + expect(result.content).not.toContain("Target pattern to replace") + } + }, 10000) + + it("should handle extremely deep nesting efficiently", async () => { + // Create extremely deep nesting that could cause stack issues + const depth = 100 + let content = '\n' + + // Open tags + for (let i = 0; i < depth; i++) { + content += `${" ".repeat(i)}\n` + } + + // Add content at deepest level + content += `${" ".repeat(depth)}Deep content to replace\n` + + // Close tags + for (let i = depth - 1; i >= 0; i--) { + content += `${" ".repeat(i)}\n` + } + + const diffContent = `<<<<<<< SEARCH +${" ".repeat(depth)}Deep content to replace +======= +${" ".repeat(depth)}Replaced deep content +>>>>>>> REPLACE` + + console.log("Starting deep nesting test...") + const startTime = Date.now() + + const result = await strategy.applyDiff(content, diffContent) + + const endTime = Date.now() + const duration = endTime - startTime + console.log(`Deep nesting test completed in ${duration}ms`) + + expect(duration).toBeLessThan(1000) // Should be fast + expect(result.success).toBe(true) + if (result.success && result.content) { + expect(result.content).toContain("Replaced deep content") + } + }, 10000) + + it("should handle the ambiguous content markers that look like diff markers", async () => { + const contentWithFakeMarkers = ` + + Normal content + + This has fake markers + ======= + Not a real separator + >>>>>>> + Also not real + <<<<<<< + Just content + + Replace this content + +` + + const diffContent = `<<<<<<< SEARCH + Replace this content +======= + Successfully replaced +>>>>>>> REPLACE` + + const result = await strategy.applyDiff(contentWithFakeMarkers, diffContent) + + expect(result.success).toBe(true) + if (result.success && result.content) { + expect(result.content).toContain("Successfully replaced") + // Should not have affected the fake markers + expect(result.content).toContain("=======") + expect(result.content).toContain(">>>>>>>") + expect(result.content).toContain("<<<<<<<") + } + }) + }) +}) diff --git a/src/core/diff/strategies/__tests__/multi-search-replace-performance.spec.ts b/src/core/diff/strategies/__tests__/multi-search-replace-performance.spec.ts new file mode 100644 index 00000000000..6f15c191dde --- /dev/null +++ b/src/core/diff/strategies/__tests__/multi-search-replace-performance.spec.ts @@ -0,0 +1,354 @@ +import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace" +import { MultiFileSearchReplaceDiffStrategy } from "../multi-file-search-replace" +import { DiffResult } from "../../../../shared/tools" + +describe("MultiSearchReplaceDiffStrategy Performance", () => { + describe("large XML file handling", () => { + let strategy: MultiSearchReplaceDiffStrategy + let multiFileStrategy: MultiFileSearchReplaceDiffStrategy + + beforeEach(() => { + strategy = new MultiSearchReplaceDiffStrategy(1.0, 40) // Default settings + multiFileStrategy = new MultiFileSearchReplaceDiffStrategy(1.0, 40) + }) + + it("should handle large complex XML files without hanging", async () => { + // Generate the large XML content from the issue + const largeXmlContent = ` + + + + + + + + + + + + + Value 1 + Value 2 + Value 3 + + + + This is deeply nested content + More content here + Even more content + + + Another deep element + And another one + + + + + More deeply nested + Content continues + + + + Value 4 + Value 5 + + + + Complex nested structure + + + Another complex structure + + + + + Pattern A + Pattern A + Pattern A + Pattern B + Pattern B + Pattern B + + This content has multiple possible matches + and can cause the regex to try many combinations + especially when looking for specific patterns + ======= + This looks like a separator but it's not + >>>>>>> + These patterns can confuse the regex + <<<<<<< + Causing it to backtrack extensively + + + + + + + + + + + + +` + + // Create diff content to change Pattern A to Pattern X and Pattern B to Pattern Y + const diffContent = ` +<<<<<<< SEARCH + Pattern A + Pattern A + Pattern A + Pattern B + Pattern B + Pattern B +======= + Pattern X + Pattern X + Pattern X + Pattern Y + Pattern Y + Pattern Y +>>>>>>> REPLACE + +<<<<<<< SEARCH + Complex nested structure +======= + Updated nested structure +>>>>>>> REPLACE` + + // Set a timeout to ensure the test doesn't hang indefinitely + const startTime = Date.now() + const timeout = 5000 // 5 seconds timeout + + const resultPromise = strategy.applyDiff(largeXmlContent, diffContent) + + // Use Promise.race to implement timeout + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => reject(new Error("Operation timed out")), timeout) + }) + + try { + const result = await Promise.race([resultPromise, timeoutPromise]) + const endTime = Date.now() + const duration = endTime - startTime + + // Ensure the operation completed within reasonable time + expect(duration).toBeLessThan(timeout) + + // Verify the result + const diffResult = result as DiffResult + expect(diffResult).toBeDefined() + expect(diffResult.success).toBe(true) + if (diffResult.success && diffResult.content) { + expect(diffResult.content).toContain("Pattern X") + expect(diffResult.content).toContain("Pattern Y") + expect(diffResult.content).toContain("Updated nested structure") + expect(diffResult.content).not.toContain("Pattern A") + expect(diffResult.content).not.toContain("Pattern B") + expect(diffResult.content).not.toContain("Complex nested structure") + } + } catch (error) { + if (error instanceof Error && error.message === "Operation timed out") { + throw new Error("applyDiff operation timed out - this indicates the hanging issue") + } else { + throw error + } + } + }, 10000) // Jest timeout of 10 seconds + + it("should handle multiple simultaneous edits on large XML files", async () => { + // Test the multi-file strategy with the same large XML content + const largeXmlContent = ` + + + + + + + + + + + + + Value 1 + Value 2 + Value 3 + + + + This is deeply nested content + More content here + Even more content + + + + Value 4 + Value 5 + + + Complex nested structure + + + + + Pattern A + Pattern A + Pattern A + Pattern B + Pattern B + Pattern B + + + + + + + + + + + +` + + // Create multiple diff items + const diffItems = [ + { + content: `<<<<<<< SEARCH + Pattern A + Pattern A + Pattern A +======= + Pattern X + Pattern X + Pattern X +>>>>>>> REPLACE`, + startLine: undefined, + }, + { + content: `<<<<<<< SEARCH + Pattern B + Pattern B + Pattern B +======= + Pattern Y + Pattern Y + Pattern Y +>>>>>>> REPLACE`, + startLine: undefined, + }, + { + content: `<<<<<<< SEARCH + Complex nested structure +======= + Updated nested structure +>>>>>>> REPLACE`, + startLine: undefined, + }, + ] + + const startTime = Date.now() + const timeout = 5000 // 5 seconds timeout + + const resultPromise = multiFileStrategy.applyDiff(largeXmlContent, diffItems) + + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => reject(new Error("Operation timed out")), timeout) + }) + + try { + const result = await Promise.race([resultPromise, timeoutPromise]) + const endTime = Date.now() + const duration = endTime - startTime + + expect(duration).toBeLessThan(timeout) + const diffResult = result as DiffResult + expect(diffResult).toBeDefined() + expect(diffResult.success).toBe(true) + if (diffResult.success && diffResult.content) { + expect(diffResult.content).toContain("Pattern X") + expect(diffResult.content).toContain("Pattern Y") + expect(diffResult.content).toContain("Updated nested structure") + } + } catch (error) { + if (error instanceof Error && error.message === "Operation timed out") { + throw new Error("Multi-file applyDiff operation timed out - this indicates the hanging issue") + } else { + throw error + } + } + }, 10000) + + it("should handle pathological cases with many similar patterns", async () => { + // Create content with many similar patterns that could cause excessive backtracking + const lines = [] + for (let i = 0; i < 100; i++) { + lines.push(` Similar content ${i % 10}`) + } + const pathologicalContent = lines.join("\n") + + // Try to replace a pattern in the middle + const diffContent = ` +<<<<<<< SEARCH + Similar content 5 +======= + Updated content 5 +>>>>>>> REPLACE` + + const startTime = Date.now() + const result = await strategy.applyDiff(pathologicalContent, diffContent) + const endTime = Date.now() + const duration = endTime - startTime + + // Should complete quickly even with many similar patterns + expect(duration).toBeLessThan(1000) // 1 second max + expect(result.success).toBe(true) + if (result.success && result.content) { + // Should update exactly one occurrence + const updatedCount = (result.content.match(/Updated content 5/g) || []).length + expect(updatedCount).toBe(1) + } + }) + + it("should handle deeply nested content with line number hints efficiently", async () => { + const deeplyNestedXml = ` + +${Array(50) + .fill(0) + .map((_, i) => ` ${" ".repeat(i)}`) + .join("\n")} +${Array(50) + .fill(0) + .map((_, i) => ` ${" ".repeat(49 - i)}Content at level ${49 - i}`) + .join("\n")} +${Array(50) + .fill(0) + .map((_, i) => ` ${" ".repeat(49 - i)}`) + .join("\n")} +` + + // Try to replace content at a specific level with line number hint + const diffContent = ` +<<<<<<< SEARCH +:start_line:30 +------- + Content at level 20 +======= + Updated content at level 20 +>>>>>>> REPLACE` + + const startTime = Date.now() + const result = await strategy.applyDiff(deeplyNestedXml, diffContent) + const endTime = Date.now() + const duration = endTime - startTime + + // Should be fast with line number hint + expect(duration).toBeLessThan(500) // 500ms max + expect(result.success).toBe(true) + if (result.success && result.content) { + expect(result.content).toContain("Updated content at level 20") + expect(result.content).not.toContain("Content at level 20") + } + }) + }) +}) diff --git a/src/core/diff/strategies/multi-file-search-replace.ts b/src/core/diff/strategies/multi-file-search-replace.ts index d875d723a15..d9ab41d9704 100644 --- a/src/core/diff/strategies/multi-file-search-replace.ts +++ b/src/core/diff/strategies/multi-file-search-replace.ts @@ -40,34 +40,61 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e const searchLen = searchChunk.split(/\r?\n/).length + // Early return if search range is invalid + if (startIndex < 0 || endIndex > lines.length || startIndex >= endIndex || searchLen > endIndex - startIndex) { + return { bestScore, bestMatchIndex, bestMatchContent } + } + // Middle-out from the midpoint const midPoint = Math.floor((startIndex + endIndex) / 2) let leftIndex = midPoint let rightIndex = midPoint + 1 - while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) { - if (leftIndex >= startIndex) { + // Add a maximum iteration count to prevent infinite loops + const maxIterations = endIndex - startIndex + let iterations = 0 + + while ((leftIndex >= startIndex || rightIndex <= endIndex - searchLen) && iterations < maxIterations) { + iterations++ + + // Check left side + if (leftIndex >= startIndex && leftIndex + searchLen <= endIndex) { const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n") const similarity = getSimilarity(originalChunk, searchChunk) + // Early termination if we find an exact match + if (similarity === 1.0) { + return { bestScore: similarity, bestMatchIndex: leftIndex, bestMatchContent: originalChunk } + } + if (similarity > bestScore) { bestScore = similarity bestMatchIndex = leftIndex bestMatchContent = originalChunk } leftIndex-- + } else { + leftIndex = startIndex - 1 // Force it out of bounds } - if (rightIndex <= endIndex - searchLen) { + // Check right side + if (rightIndex <= endIndex - searchLen && rightIndex >= startIndex) { const originalChunk = lines.slice(rightIndex, rightIndex + searchLen).join("\n") const similarity = getSimilarity(originalChunk, searchChunk) + // Early termination if we find an exact match + if (similarity === 1.0) { + return { bestScore: similarity, bestMatchIndex: rightIndex, bestMatchContent: originalChunk } + } + if (similarity > bestScore) { bestScore = similarity bestMatchIndex = rightIndex bestMatchContent = originalChunk } rightIndex++ + } else { + rightIndex = endIndex + 1 // Force it out of bounds } } @@ -446,6 +473,10 @@ Each file requires its own path, start_line, and diff elements. diffContent: string, _paramStartLine?: number, ): Promise { + // Add a timeout mechanism to prevent indefinite hanging + const DIFF_TIMEOUT_MS = 30000 // 30 seconds timeout + const startTime = Date.now() + const validseq = this.validateMarkerSequencing(diffContent) if (!validseq.success) { return { @@ -494,6 +525,15 @@ Each file requires its own path, start_line, and diff elements. .sort((a, b) => a.startLine - b.startLine) for (const replacement of replacements) { + // Check for timeout + if (Date.now() - startTime > DIFF_TIMEOUT_MS) { + return { + success: false, + error: `Operation timed out after ${DIFF_TIMEOUT_MS / 1000} seconds. This may indicate the file is too large or complex for the current search pattern.`, + failParts: diffResults, + } + } + let { searchContent, replaceContent } = replacement let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta) diff --git a/src/core/diff/strategies/multi-search-replace.ts b/src/core/diff/strategies/multi-search-replace.ts index b90ef4072d5..528a0f60e5c 100644 --- a/src/core/diff/strategies/multi-search-replace.ts +++ b/src/core/diff/strategies/multi-search-replace.ts @@ -42,32 +42,61 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e let bestMatchContent = "" const searchLen = searchChunk.split(/\r?\n/).length + // Early return if search range is invalid + if (startIndex < 0 || endIndex > lines.length || startIndex >= endIndex || searchLen > endIndex - startIndex) { + return { bestScore, bestMatchIndex, bestMatchContent } + } + // Middle-out from the midpoint const midPoint = Math.floor((startIndex + endIndex) / 2) let leftIndex = midPoint let rightIndex = midPoint + 1 - while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) { - if (leftIndex >= startIndex) { + // Add a maximum iteration count to prevent infinite loops + const maxIterations = endIndex - startIndex + let iterations = 0 + + while ((leftIndex >= startIndex || rightIndex <= endIndex - searchLen) && iterations < maxIterations) { + iterations++ + + // Check left side + if (leftIndex >= startIndex && leftIndex + searchLen <= endIndex) { const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n") const similarity = getSimilarity(originalChunk, searchChunk) + + // Early termination if we find an exact match + if (similarity === 1.0) { + return { bestScore: similarity, bestMatchIndex: leftIndex, bestMatchContent: originalChunk } + } + if (similarity > bestScore) { bestScore = similarity bestMatchIndex = leftIndex bestMatchContent = originalChunk } leftIndex-- + } else { + leftIndex = startIndex - 1 // Force it out of bounds } - if (rightIndex <= endIndex - searchLen) { + // Check right side + if (rightIndex <= endIndex - searchLen && rightIndex >= startIndex) { const originalChunk = lines.slice(rightIndex, rightIndex + searchLen).join("\n") const similarity = getSimilarity(originalChunk, searchChunk) + + // Early termination if we find an exact match + if (similarity === 1.0) { + return { bestScore: similarity, bestMatchIndex: rightIndex, bestMatchContent: originalChunk } + } + if (similarity > bestScore) { bestScore = similarity bestMatchIndex = rightIndex bestMatchContent = originalChunk } rightIndex++ + } else { + rightIndex = endIndex + 1 // Force it out of bounds } } @@ -337,6 +366,10 @@ Only use a single line of '=======' between search and replacement content, beca _paramStartLine?: number, _paramEndLine?: number, ): Promise { + // Add a timeout mechanism to prevent indefinite hanging + const DIFF_TIMEOUT_MS = 30000 // 30 seconds timeout + const startTime = Date.now() + const validseq = this.validateMarkerSequencing(diffContent) if (!validseq.success) { return { @@ -403,6 +436,15 @@ Only use a single line of '=======' between search and replacement content, beca .sort((a, b) => a.startLine - b.startLine) for (const replacement of replacements) { + // Check for timeout + if (Date.now() - startTime > DIFF_TIMEOUT_MS) { + return { + success: false, + error: `Operation timed out after ${DIFF_TIMEOUT_MS / 1000} seconds. This may indicate the file is too large or complex for the current search pattern.`, + failParts: diffResults, + } + } + let { searchContent, replaceContent } = replacement let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta)