diff --git a/src/core/diff/strategies/__tests__/fuzzy-search-performance.spec.ts b/src/core/diff/strategies/__tests__/fuzzy-search-performance.spec.ts
new file mode 100644
index 0000000000..6666f1030c
--- /dev/null
+++ b/src/core/diff/strategies/__tests__/fuzzy-search-performance.spec.ts
@@ -0,0 +1,183 @@
+import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace"
+import { MultiFileSearchReplaceDiffStrategy } from "../multi-file-search-replace"
+
+describe("FuzzySearch Performance Tests", () => {
+ describe("MultiSearchReplaceDiffStrategy", () => {
+ it("should not hang on large XML files", async () => {
+ const strategy = new MultiSearchReplaceDiffStrategy()
+
+ // Create a large XML-like content (simulating a 1000+ line file)
+ const largeXmlContent = Array.from(
+ { length: 1000 },
+ (_, i) =>
+ ` -
+ Item ${i}
+ This is a description for item ${i}
+ ${i * 10}
+
`,
+ ).join("\n")
+
+ const originalContent = `
+
+${largeXmlContent}
+`
+
+ // Create a diff that searches for content that doesn't exist
+ // This would previously cause the fuzzySearch to hang
+ const diffContent = `<<<<<<< SEARCH
+:start_line:500
+-------
+ -
+ Non-existent Item
+ This item does not exist
+ 999999
+
+=======
+ -
+ Updated Non-existent Item
+ This item still does not exist
+ 999999
+
+>>>>>>> REPLACE`
+
+ const startTime = Date.now()
+
+ // This should complete within a reasonable time (not hang)
+ const result = await strategy.applyDiff(originalContent, diffContent)
+
+ const endTime = Date.now()
+ const duration = endTime - startTime
+
+ // Should complete within 10 seconds (was hanging indefinitely before)
+ expect(duration).toBeLessThan(10000)
+
+ // Should fail to find the match (which is expected)
+ expect(result.success).toBe(false)
+ if (!result.success) {
+ // Check if there's a direct error or error in failParts
+ const errorMessage =
+ result.error ||
+ (result.failParts?.[0] && !result.failParts[0].success ? result.failParts[0].error : undefined)
+ expect(errorMessage).toContain("No sufficiently similar match found")
+ }
+ }, 15000) // 15 second timeout for the test itself
+
+ it("should handle complex XML structure efficiently", async () => {
+ const strategy = new MultiSearchReplaceDiffStrategy()
+
+ // Create complex nested XML structure
+ const complexXml = Array.from(
+ { length: 500 },
+ (_, i) =>
+ `
+
+ Section ${i}
+
+ 2024-01-${(i % 28) + 1}
+ Author ${i % 10}
+
+
+
+ This is paragraph 1 of section ${i}
+ This is paragraph 2 of section ${i}
+
+ - Item 1
+ - Item 2
+ - Item 3
+
+
+ `,
+ ).join("\n")
+
+ const originalContent = `
+
+
+
+${complexXml}
+
+`
+
+ // Search for an actual existing section to replace
+ const diffContent = `<<<<<<< SEARCH
+:start_line:10
+-------
+
+
+ Section 1
+
+ 2024-01-2
+ Author 1
+
+
+=======
+
+
+ Updated Section 1
+
+ 2024-01-2
+ Author 1
+ 2024-12-18
+
+
+>>>>>>> REPLACE`
+
+ const startTime = Date.now()
+
+ const result = await strategy.applyDiff(originalContent, diffContent)
+
+ const endTime = Date.now()
+ const duration = endTime - startTime
+
+ // Should complete quickly
+ expect(duration).toBeLessThan(5000)
+
+ // Should successfully find and replace the content
+ expect(result.success).toBe(true)
+ if (result.success) {
+ expect(result.content).toContain("Updated Section 1")
+ expect(result.content).toContain("2024-12-18")
+ }
+ }, 10000)
+ })
+
+ describe("MultiFileSearchReplaceDiffStrategy", () => {
+ it("should not hang on large files with array-based diff input", async () => {
+ const strategy = new MultiFileSearchReplaceDiffStrategy()
+
+ // Create a large file content
+ const largeContent = Array.from(
+ { length: 2000 },
+ (_, i) =>
+ `Line ${i}: This is a long line with some content that might be searched for in a large file.`,
+ ).join("\n")
+
+ // Create diff items that search for non-existent content
+ const diffItems = [
+ {
+ content: `<<<<<<< SEARCH
+Line 99999: This line does not exist
+=======
+Line 99999: This line has been updated
+>>>>>>> REPLACE`,
+ startLine: 1000,
+ },
+ ]
+
+ const startTime = Date.now()
+
+ // This should complete within a reasonable time (not hang)
+ const result = await strategy.applyDiff(largeContent, diffItems)
+
+ const endTime = Date.now()
+ const duration = endTime - startTime
+
+ // Should complete within 10 seconds
+ expect(duration).toBeLessThan(10000)
+
+ // Should fail to find the match
+ expect(result.success).toBe(false)
+ }, 15000)
+ })
+})
diff --git a/src/core/diff/strategies/multi-file-search-replace.ts b/src/core/diff/strategies/multi-file-search-replace.ts
index d35f32685e..e071dd3e69 100644
--- a/src/core/diff/strategies/multi-file-search-replace.ts
+++ b/src/core/diff/strategies/multi-file-search-replace.ts
@@ -32,6 +32,11 @@ function getSimilarity(original: string, search: string): number {
/**
* Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find
* the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text.
+ *
+ * Performance safeguards:
+ * - Maximum iteration limit to prevent hanging on large files
+ * - Early exit when perfect match is found
+ * - Timeout mechanism for very large files
*/
function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) {
let bestScore = 0
@@ -40,12 +45,25 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e
const searchLen = searchChunk.split(/\r?\n/).length
+ // Performance safeguards for large files
+ const searchRange = endIndex - startIndex
+ const MAX_ITERATIONS = Math.min(searchRange, 10000) // Limit iterations to prevent hanging
+ const TIMEOUT_MS = 5000 // 5 second timeout for very large files
+ const startTime = Date.now()
+
// Middle-out from the midpoint
const midPoint = Math.floor((startIndex + endIndex) / 2)
let leftIndex = midPoint
let rightIndex = midPoint + 1
+ let iterations = 0
+
+ while ((leftIndex >= startIndex || rightIndex <= endIndex - searchLen) && iterations < MAX_ITERATIONS) {
+ // Check for timeout on large files to prevent hanging
+ if (iterations % 100 === 0 && Date.now() - startTime > TIMEOUT_MS) {
+ console.warn(`[fuzzySearch] Timeout reached after ${iterations} iterations on large file search`)
+ break
+ }
- while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) {
if (leftIndex >= startIndex) {
const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n")
const similarity = getSimilarity(originalChunk, searchChunk)
@@ -54,6 +72,11 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e
bestScore = similarity
bestMatchIndex = leftIndex
bestMatchContent = originalChunk
+
+ // Early exit for perfect matches to improve performance
+ if (similarity >= 1.0) {
+ break
+ }
}
leftIndex--
}
@@ -66,9 +89,23 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e
bestScore = similarity
bestMatchIndex = rightIndex
bestMatchContent = originalChunk
+
+ // Early exit for perfect matches to improve performance
+ if (similarity >= 1.0) {
+ break
+ }
}
rightIndex++
}
+
+ iterations++
+ }
+
+ // Log performance metrics for debugging large file issues
+ if (iterations >= MAX_ITERATIONS || Date.now() - startTime > 1000) {
+ console.warn(
+ `[fuzzySearch] Performance warning: ${iterations} iterations, ${Date.now() - startTime}ms, range: ${searchRange} lines`,
+ )
}
return { bestScore, bestMatchIndex, bestMatchContent }
diff --git a/src/core/diff/strategies/multi-search-replace.ts b/src/core/diff/strategies/multi-search-replace.ts
index 9e740a6571..62b83a9a5d 100644
--- a/src/core/diff/strategies/multi-search-replace.ts
+++ b/src/core/diff/strategies/multi-search-replace.ts
@@ -35,6 +35,11 @@ function getSimilarity(original: string, search: string): number {
/**
* Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find
* the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text.
+ *
+ * Performance safeguards:
+ * - Maximum iteration limit to prevent hanging on large files
+ * - Early exit when perfect match is found
+ * - Timeout mechanism for very large files
*/
function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) {
let bestScore = 0
@@ -42,12 +47,25 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e
let bestMatchContent = ""
const searchLen = searchChunk.split(/\r?\n/).length
+ // Performance safeguards for large files
+ const searchRange = endIndex - startIndex
+ const MAX_ITERATIONS = Math.min(searchRange, 10000) // Limit iterations to prevent hanging
+ const TIMEOUT_MS = 5000 // 5 second timeout for very large files
+ const startTime = Date.now()
+
// Middle-out from the midpoint
const midPoint = Math.floor((startIndex + endIndex) / 2)
let leftIndex = midPoint
let rightIndex = midPoint + 1
+ let iterations = 0
+
+ while ((leftIndex >= startIndex || rightIndex <= endIndex - searchLen) && iterations < MAX_ITERATIONS) {
+ // Check for timeout on large files to prevent hanging
+ if (iterations % 100 === 0 && Date.now() - startTime > TIMEOUT_MS) {
+ console.warn(`[fuzzySearch] Timeout reached after ${iterations} iterations on large file search`)
+ break
+ }
- while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) {
if (leftIndex >= startIndex) {
const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n")
const similarity = getSimilarity(originalChunk, searchChunk)
@@ -55,6 +73,11 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e
bestScore = similarity
bestMatchIndex = leftIndex
bestMatchContent = originalChunk
+
+ // Early exit for perfect matches to improve performance
+ if (similarity >= 1.0) {
+ break
+ }
}
leftIndex--
}
@@ -66,9 +89,23 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e
bestScore = similarity
bestMatchIndex = rightIndex
bestMatchContent = originalChunk
+
+ // Early exit for perfect matches to improve performance
+ if (similarity >= 1.0) {
+ break
+ }
}
rightIndex++
}
+
+ iterations++
+ }
+
+ // Log performance metrics for debugging large file issues
+ if (iterations >= MAX_ITERATIONS || Date.now() - startTime > 1000) {
+ console.warn(
+ `[fuzzySearch] Performance warning: ${iterations} iterations, ${Date.now() - startTime}ms, range: ${searchRange} lines`,
+ )
}
return { bestScore, bestMatchIndex, bestMatchContent }