Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 183 additions & 0 deletions src/core/diff/strategies/__tests__/fuzzy-search-performance.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace"
import { MultiFileSearchReplaceDiffStrategy } from "../multi-file-search-replace"

describe("FuzzySearch Performance Tests", () => {
describe("MultiSearchReplaceDiffStrategy", () => {
it("should not hang on large XML files", async () => {
const strategy = new MultiSearchReplaceDiffStrategy()

// Create a large XML-like content (simulating a 1000+ line file)
const largeXmlContent = Array.from(
{ length: 1000 },
(_, i) =>
` <item id="${i}">
<name>Item ${i}</name>
<description>This is a description for item ${i}</description>
<value>${i * 10}</value>
</item>`,
).join("\n")

const originalContent = `<?xml version="1.0" encoding="UTF-8"?>
<root>
${largeXmlContent}
</root>`

// Create a diff that searches for content that doesn't exist
// This would previously cause the fuzzySearch to hang
const diffContent = `<<<<<<< SEARCH
:start_line:500
-------
<item id="999999">
<name>Non-existent Item</name>
<description>This item does not exist</description>
<value>999999</value>
</item>
=======
<item id="999999">
<name>Updated Non-existent Item</name>
<description>This item still does not exist</description>
<value>999999</value>
</item>
>>>>>>> REPLACE`

const startTime = Date.now()

// This should complete within a reasonable time (not hang)
const result = await strategy.applyDiff(originalContent, diffContent)

const endTime = Date.now()
const duration = endTime - startTime

// Should complete within 10 seconds (was hanging indefinitely before)
expect(duration).toBeLessThan(10000)

// Should fail to find the match (which is expected)
expect(result.success).toBe(false)
if (!result.success) {
// Check if there's a direct error or error in failParts
const errorMessage =
result.error ||
(result.failParts?.[0] && !result.failParts[0].success ? result.failParts[0].error : undefined)
expect(errorMessage).toContain("No sufficiently similar match found")
}
}, 15000) // 15 second timeout for the test itself

it("should handle complex XML structure efficiently", async () => {
const strategy = new MultiSearchReplaceDiffStrategy()

// Create complex nested XML structure
const complexXml = Array.from(
{ length: 500 },
(_, i) =>
` <section id="section-${i}">
<header>
<title>Section ${i}</title>
<metadata>
<created>2024-01-${(i % 28) + 1}</created>
<author>Author ${i % 10}</author>
</metadata>
</header>
<content>
<paragraph>This is paragraph 1 of section ${i}</paragraph>
<paragraph>This is paragraph 2 of section ${i}</paragraph>
<list>
<item>Item 1</item>
<item>Item 2</item>
<item>Item 3</item>
</list>
</content>
</section>`,
).join("\n")

const originalContent = `<?xml version="1.0" encoding="UTF-8"?>
<document>
<header>
<title>Large Document</title>
</header>
<body>
${complexXml}
</body>
</document>`

// Search for an actual existing section to replace
const diffContent = `<<<<<<< SEARCH
:start_line:10
-------
<section id="section-1">
<header>
<title>Section 1</title>
<metadata>
<created>2024-01-2</created>
<author>Author 1</author>
</metadata>
</header>
=======
<section id="section-1">
<header>
<title>Updated Section 1</title>
<metadata>
<created>2024-01-2</created>
<author>Author 1</author>
<updated>2024-12-18</updated>
</metadata>
</header>
>>>>>>> REPLACE`

const startTime = Date.now()

const result = await strategy.applyDiff(originalContent, diffContent)

const endTime = Date.now()
const duration = endTime - startTime

// Should complete quickly
expect(duration).toBeLessThan(5000)

// Should successfully find and replace the content
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toContain("Updated Section 1")
expect(result.content).toContain("<updated>2024-12-18</updated>")
}
}, 10000)
})

describe("MultiFileSearchReplaceDiffStrategy", () => {
it("should not hang on large files with array-based diff input", async () => {
const strategy = new MultiFileSearchReplaceDiffStrategy()

// Create a large file content
const largeContent = Array.from(
{ length: 2000 },
(_, i) =>
`Line ${i}: This is a long line with some content that might be searched for in a large file.`,
).join("\n")

// Create diff items that search for non-existent content
const diffItems = [
{
content: `<<<<<<< SEARCH
Line 99999: This line does not exist
=======
Line 99999: This line has been updated
>>>>>>> REPLACE`,
startLine: 1000,
},
]

const startTime = Date.now()

// This should complete within a reasonable time (not hang)
const result = await strategy.applyDiff(largeContent, diffItems)

const endTime = Date.now()
const duration = endTime - startTime

// Should complete within 10 seconds
expect(duration).toBeLessThan(10000)

// Should fail to find the match
expect(result.success).toBe(false)
}, 15000)
})
})
39 changes: 38 additions & 1 deletion src/core/diff/strategies/multi-file-search-replace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ function getSimilarity(original: string, search: string): number {
/**
* Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find
* the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text.
*
* Performance safeguards:
* - Maximum iteration limit to prevent hanging on large files
* - Early exit when perfect match is found
* - Timeout mechanism for very large files
*/
function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) {
let bestScore = 0
Expand All @@ -40,12 +45,25 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e

const searchLen = searchChunk.split(/\r?\n/).length

// Performance safeguards for large files
const searchRange = endIndex - startIndex
const MAX_ITERATIONS = Math.min(searchRange, 10000) // Limit iterations to prevent hanging
const TIMEOUT_MS = 5000 // 5 second timeout for very large files
const startTime = Date.now()

// Middle-out from the midpoint
const midPoint = Math.floor((startIndex + endIndex) / 2)
let leftIndex = midPoint
let rightIndex = midPoint + 1
let iterations = 0

while ((leftIndex >= startIndex || rightIndex <= endIndex - searchLen) && iterations < MAX_ITERATIONS) {
// Check for timeout on large files to prevent hanging
if (iterations % 100 === 0 && Date.now() - startTime > TIMEOUT_MS) {
console.warn(`[fuzzySearch] Timeout reached after ${iterations} iterations on large file search`)
break
}

while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) {
if (leftIndex >= startIndex) {
const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n")
const similarity = getSimilarity(originalChunk, searchChunk)
Expand All @@ -54,6 +72,11 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e
bestScore = similarity
bestMatchIndex = leftIndex
bestMatchContent = originalChunk

// Early exit for perfect matches to improve performance
if (similarity >= 1.0) {
break
}
}
leftIndex--
}
Expand All @@ -66,9 +89,23 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e
bestScore = similarity
bestMatchIndex = rightIndex
bestMatchContent = originalChunk

// Early exit for perfect matches to improve performance
if (similarity >= 1.0) {
break
}
}
rightIndex++
}

iterations++
}

// Log performance metrics for debugging large file issues
if (iterations >= MAX_ITERATIONS || Date.now() - startTime > 1000) {
console.warn(
`[fuzzySearch] Performance warning: ${iterations} iterations, ${Date.now() - startTime}ms, range: ${searchRange} lines`,
)
}

return { bestScore, bestMatchIndex, bestMatchContent }
Expand Down
39 changes: 38 additions & 1 deletion src/core/diff/strategies/multi-search-replace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,26 +35,49 @@ function getSimilarity(original: string, search: string): number {
/**
* Performs a "middle-out" search of `lines` (between [startIndex, endIndex]) to find
* the slice that is most similar to `searchChunk`. Returns the best score, index, and matched text.
*
* Performance safeguards:
* - Maximum iteration limit to prevent hanging on large files
* - Early exit when perfect match is found
* - Timeout mechanism for very large files
*/
function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, endIndex: number) {
let bestScore = 0
let bestMatchIndex = -1
let bestMatchContent = ""
const searchLen = searchChunk.split(/\r?\n/).length

// Performance safeguards for large files
const searchRange = endIndex - startIndex
const MAX_ITERATIONS = Math.min(searchRange, 10000) // Limit iterations to prevent hanging
const TIMEOUT_MS = 5000 // 5 second timeout for very large files
const startTime = Date.now()

// Middle-out from the midpoint
const midPoint = Math.floor((startIndex + endIndex) / 2)
let leftIndex = midPoint
let rightIndex = midPoint + 1
let iterations = 0

while ((leftIndex >= startIndex || rightIndex <= endIndex - searchLen) && iterations < MAX_ITERATIONS) {
// Check for timeout on large files to prevent hanging
if (iterations % 100 === 0 && Date.now() - startTime > TIMEOUT_MS) {
console.warn(`[fuzzySearch] Timeout reached after ${iterations} iterations on large file search`)
break
}

while (leftIndex >= startIndex || rightIndex <= endIndex - searchLen) {
if (leftIndex >= startIndex) {
const originalChunk = lines.slice(leftIndex, leftIndex + searchLen).join("\n")
const similarity = getSimilarity(originalChunk, searchChunk)
if (similarity > bestScore) {
bestScore = similarity
bestMatchIndex = leftIndex
bestMatchContent = originalChunk

// Early exit for perfect matches to improve performance
if (similarity >= 1.0) {
break
}
}
leftIndex--
}
Expand All @@ -66,9 +89,23 @@ function fuzzySearch(lines: string[], searchChunk: string, startIndex: number, e
bestScore = similarity
bestMatchIndex = rightIndex
bestMatchContent = originalChunk

// Early exit for perfect matches to improve performance
if (similarity >= 1.0) {
break
}
}
rightIndex++
}

iterations++
}

// Log performance metrics for debugging large file issues
if (iterations >= MAX_ITERATIONS || Date.now() - startTime > 1000) {
console.warn(
`[fuzzySearch] Performance warning: ${iterations} iterations, ${Date.now() - startTime}ms, range: ${searchRange} lines`,
)
}

return { bestScore, bestMatchIndex, bestMatchContent }
Expand Down
Loading