diff --git a/src/core/diff/strategies/__tests__/fuzzy-search-performance.spec.ts b/src/core/diff/strategies/__tests__/fuzzy-search-performance.spec.ts new file mode 100644 index 0000000000..6666f1030c --- /dev/null +++ b/src/core/diff/strategies/__tests__/fuzzy-search-performance.spec.ts @@ -0,0 +1,183 @@ +import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace" +import { MultiFileSearchReplaceDiffStrategy } from "../multi-file-search-replace" + +describe("FuzzySearch Performance Tests", () => { + describe("MultiSearchReplaceDiffStrategy", () => { + it("should not hang on large XML files", async () => { + const strategy = new MultiSearchReplaceDiffStrategy() + + // Create a large XML-like content (simulating a 1000+ line file) + const largeXmlContent = Array.from( + { length: 1000 }, + (_, i) => + ` + Item ${i} + This is a description for item ${i} + ${i * 10} + `, + ).join("\n") + + const originalContent = ` + +${largeXmlContent} +` + + // Create a diff that searches for content that doesn't exist + // This would previously cause the fuzzySearch to hang + const diffContent = `<<<<<<< SEARCH +:start_line:500 +------- + + Non-existent Item + This item does not exist + 999999 + +======= + + Updated Non-existent Item + This item still does not exist + 999999 + +>>>>>>> REPLACE` + + const startTime = Date.now() + + // This should complete within a reasonable time (not hang) + const result = await strategy.applyDiff(originalContent, diffContent) + + const endTime = Date.now() + const duration = endTime - startTime + + // Should complete within 10 seconds (was hanging indefinitely before) + expect(duration).toBeLessThan(10000) + + // Should fail to find the match (which is expected) + expect(result.success).toBe(false) + if (!result.success) { + // Check if there's a direct error or error in failParts + const errorMessage = + result.error || + (result.failParts?.[0] && !result.failParts[0].success ? result.failParts[0].error : undefined) + expect(errorMessage).toContain("No sufficiently similar match found") + } + }, 15000) // 15 second timeout for the test itself + + it("should handle complex XML structure efficiently", async () => { + const strategy = new MultiSearchReplaceDiffStrategy() + + // Create complex nested XML structure + const complexXml = Array.from( + { length: 500 }, + (_, i) => + `
+
+ Section ${i} + + 2024-01-${(i % 28) + 1} + Author ${i % 10} + +
+ + This is paragraph 1 of section ${i} + This is paragraph 2 of section ${i} + + Item 1 + Item 2 + Item 3 + + +
`, + ).join("\n") + + const originalContent = ` + +
+ Large Document +
+ +${complexXml} + +
` + + // Search for an actual existing section to replace + const diffContent = `<<<<<<< SEARCH +:start_line:10 +------- +
+
+ Section 1 + + 2024-01-2 + Author 1 + +
+======= +
+
+ Updated Section 1 + + 2024-01-2 + Author 1 + 2024-12-18 + +
+>>>>>>> REPLACE` + + const startTime = Date.now() + + const result = await strategy.applyDiff(originalContent, diffContent) + + const endTime = Date.now() + const duration = endTime - startTime + + // Should complete quickly + expect(duration).toBeLessThan(5000) + + // Should successfully find and replace the content + expect(result.success).toBe(true) + if (result.success) { + expect(result.content).toContain("Updated Section 1") + expect(result.content).toContain("2024-12-18") + } + }, 10000) + }) + + describe("MultiFileSearchReplaceDiffStrategy", () => { + it("should not hang on large files with array-based diff input", async () => { + const strategy = new MultiFileSearchReplaceDiffStrategy() + + // Create a large file content + const largeContent = Array.from( + { length: 2000 }, + (_, i) => + `Line ${i}: This is a long line with some content that might be searched for in a large file.`, + ).join("\n") + + // Create diff items that search for non-existent content + const diffItems = [ + { + content: `<<<<<<< SEARCH +Line 99999: This line does not exist +======= +Line 99999: This line has been updated +>>>>>>> REPLACE`, + startLine: 1000, + }, + ] + + const startTime = Date.now() + + // This should complete within a reasonable time (not hang) + const result = await strategy.applyDiff(largeContent, diffItems) + + const endTime = Date.now() + const duration = endTime - startTime + + // Should complete within 10 seconds + expect(duration).toBeLessThan(10000) + + // Should fail to find the match + expect(result.success).toBe(false) + }, 15000) + }) +}) diff --git a/src/core/diff/strategies/multi-file-search-replace.ts b/src/core/diff/strategies/multi-file-search-replace.ts index d35f32685e..e071dd3e69 100644 --- a/src/core/diff/strategies/multi-file-search-replace.ts +++ b/src/core/diff/strategies/multi-file-search-replace.ts @@ -32,6 +32,11 @@ function getSimilarity(original: string, search: string): number { /** * Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find * the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text. + * + * Performance safeguards: + * - Maximum iteration limit to prevent hanging on large files + * - Early exit when perfect match is found + * - Timeout mechanism for very large files */ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) { let bestScore = 0 @@ -40,12 +45,25 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e const searchLen = searchChunk.split(/\r?\n/).length + // Performance safeguards for large files + const searchRange = endIndex - startIndex + const MAX_ITERATIONS = Math.min(searchRange, 10000) // Limit iterations to prevent hanging + const TIMEOUT_MS = 5000 // 5 second timeout for very large files + const startTime = Date.now() + // Middle-out from the midpoint const midPoint = Math.floor((startIndex + endIndex) / 2) let leftIndex = midPoint let rightIndex = midPoint + 1 + let iterations = 0 + + while ((leftIndex >= startIndex || rightIndex <= endIndex - searchLen) && iterations < MAX_ITERATIONS) { + // Check for timeout on large files to prevent hanging + if (iterations % 100 === 0 && Date.now() - startTime > TIMEOUT_MS) { + console.warn(`[fuzzySearch] Timeout reached after ${iterations} iterations on large file search`) + break + } - while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) { if (leftIndex >= startIndex) { const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n") const similarity = getSimilarity(originalChunk, searchChunk) @@ -54,6 +72,11 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e bestScore = similarity bestMatchIndex = leftIndex bestMatchContent = originalChunk + + // Early exit for perfect matches to improve performance + if (similarity >= 1.0) { + break + } } leftIndex-- } @@ -66,9 +89,23 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e bestScore = similarity bestMatchIndex = rightIndex bestMatchContent = originalChunk + + // Early exit for perfect matches to improve performance + if (similarity >= 1.0) { + break + } } rightIndex++ } + + iterations++ + } + + // Log performance metrics for debugging large file issues + if (iterations >= MAX_ITERATIONS || Date.now() - startTime > 1000) { + console.warn( + `[fuzzySearch] Performance warning: ${iterations} iterations, ${Date.now() - startTime}ms, range: ${searchRange} lines`, + ) } return { bestScore, bestMatchIndex, bestMatchContent } diff --git a/src/core/diff/strategies/multi-search-replace.ts b/src/core/diff/strategies/multi-search-replace.ts index 9e740a6571..62b83a9a5d 100644 --- a/src/core/diff/strategies/multi-search-replace.ts +++ b/src/core/diff/strategies/multi-search-replace.ts @@ -35,6 +35,11 @@ function getSimilarity(original: string, search: string): number { /** * Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find * the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text. + * + * Performance safeguards: + * - Maximum iteration limit to prevent hanging on large files + * - Early exit when perfect match is found + * - Timeout mechanism for very large files */ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) { let bestScore = 0 @@ -42,12 +47,25 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e let bestMatchContent = "" const searchLen = searchChunk.split(/\r?\n/).length + // Performance safeguards for large files + const searchRange = endIndex - startIndex + const MAX_ITERATIONS = Math.min(searchRange, 10000) // Limit iterations to prevent hanging + const TIMEOUT_MS = 5000 // 5 second timeout for very large files + const startTime = Date.now() + // Middle-out from the midpoint const midPoint = Math.floor((startIndex + endIndex) / 2) let leftIndex = midPoint let rightIndex = midPoint + 1 + let iterations = 0 + + while ((leftIndex >= startIndex || rightIndex <= endIndex - searchLen) && iterations < MAX_ITERATIONS) { + // Check for timeout on large files to prevent hanging + if (iterations % 100 === 0 && Date.now() - startTime > TIMEOUT_MS) { + console.warn(`[fuzzySearch] Timeout reached after ${iterations} iterations on large file search`) + break + } - while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) { if (leftIndex >= startIndex) { const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n") const similarity = getSimilarity(originalChunk, searchChunk) @@ -55,6 +73,11 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e bestScore = similarity bestMatchIndex = leftIndex bestMatchContent = originalChunk + + // Early exit for perfect matches to improve performance + if (similarity >= 1.0) { + break + } } leftIndex-- } @@ -66,9 +89,23 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e bestScore = similarity bestMatchIndex = rightIndex bestMatchContent = originalChunk + + // Early exit for perfect matches to improve performance + if (similarity >= 1.0) { + break + } } rightIndex++ } + + iterations++ + } + + // Log performance metrics for debugging large file issues + if (iterations >= MAX_ITERATIONS || Date.now() - startTime > 1000) { + console.warn( + `[fuzzySearch] Performance warning: ${iterations} iterations, ${Date.now() - startTime}ms, range: ${searchRange} lines`, + ) } return { bestScore, bestMatchIndex, bestMatchContent }