Skip to content

Commit 2410003

Browse files
committed
fix: prevent apply_diff from hanging on large/complex XML files
- Add performance optimizations to fuzzySearch function with early termination for exact matches - Add bounds checking and maximum iteration limit to prevent infinite loops - Add 30-second timeout mechanism to prevent indefinite hanging - Add integration tests to verify performance on large XML files Fixes #4852
1 parent 2eb586b commit 2410003

File tree

4 files changed

+716
-6
lines changed

4 files changed

+716
-6
lines changed
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace"
2+
import { MultiFileSearchReplaceDiffStrategy } from "../multi-file-search-replace"
3+
import { DiffResult } from "../../../../shared/tools"
4+
5+
describe("MultiSearchReplaceDiffStrategy Hanging Issue #4852", () => {
6+
describe("reproduce exact issue scenario", () => {
7+
let strategy: MultiSearchReplaceDiffStrategy
8+
let multiFileStrategy: MultiFileSearchReplaceDiffStrategy
9+
10+
beforeEach(() => {
11+
// Use exact settings that might cause the issue
12+
strategy = new MultiSearchReplaceDiffStrategy(1.0, 40) // Exact match, 40 line buffer
13+
multiFileStrategy = new MultiFileSearchReplaceDiffStrategy(1.0, 40)
14+
})
15+
16+
it("should handle the exact XML from issue #4852 without hanging", async () => {
17+
// This is the exact XML content from the issue
18+
const issueXmlContent = `<?xml version="1.0" encoding="UTF-8"?>
19+
<root>
20+
<level1>
21+
<level2>
22+
<level3>
23+
<level4>
24+
<level5>
25+
<level6>
26+
<level7>
27+
<level8>
28+
<level9>
29+
<level10>
30+
<data>
31+
<item>Value 1</item>
32+
<item>Value 2</item>
33+
<item>Value 3</item>
34+
<nested>
35+
<subnested>
36+
<subsubnested>
37+
<deep>This is deeply nested content</deep>
38+
<deep>More content here</deep>
39+
<deep>Even more content</deep>
40+
</subsubnested>
41+
<subsubnested>
42+
<deep>Another deep element</deep>
43+
<deep>And another one</deep>
44+
</subsubnested>
45+
</subnested>
46+
<subnested>
47+
<subsubnested>
48+
<deep>More deeply nested</deep>
49+
<deep>Content continues</deep>
50+
</subsubnested>
51+
</subnested>
52+
</nested>
53+
<item>Value 4</item>
54+
<item>Value 5</item>
55+
<complexPattern>
56+
<!-- This pattern is designed to cause backtracking -->
57+
<a><b><c><d><e><f><g><h><i><j>
58+
<content>Complex nested structure</content>
59+
</j></i></h></g></f></e></d></c></b></a>
60+
<a><b><c><d><e><f><g><h><i><j>
61+
<content>Another complex structure</content>
62+
</j></i></h></g></f></e></d></c></b></a>
63+
</complexPattern>
64+
</data>
65+
<moreData>
66+
<repeatingPattern>Pattern A</repeatingPattern>
67+
<repeatingPattern>Pattern A</repeatingPattern>
68+
<repeatingPattern>Pattern A</repeatingPattern>
69+
<repeatingPattern>Pattern B</repeatingPattern>
70+
<repeatingPattern>Pattern B</repeatingPattern>
71+
<repeatingPattern>Pattern B</repeatingPattern>
72+
<ambiguousContent>
73+
This content has multiple possible matches
74+
and can cause the regex to try many combinations
75+
especially when looking for specific patterns
76+
=======
77+
This looks like a separator but it's not
78+
>>>>>>>
79+
These patterns can confuse the regex
80+
<<<<<<<
81+
Causing it to backtrack extensively
82+
</ambiguousContent>
83+
</moreData>
84+
</level10>
85+
</level9>
86+
</level8>
87+
</level7>
88+
</level6>
89+
</level5>
90+
</level4>
91+
</level3>
92+
</level2>
93+
</level1>
94+
</root>`
95+
96+
// Test multiple concurrent edits as described in the issue
97+
const diffItems = [
98+
{
99+
content: `<<<<<<< SEARCH
100+
<repeatingPattern>Pattern A</repeatingPattern>
101+
=======
102+
<repeatingPattern>Pattern X</repeatingPattern>
103+
>>>>>>> REPLACE`,
104+
startLine: undefined,
105+
},
106+
{
107+
content: `<<<<<<< SEARCH
108+
<repeatingPattern>Pattern B</repeatingPattern>
109+
=======
110+
<repeatingPattern>Pattern Y</repeatingPattern>
111+
>>>>>>> REPLACE`,
112+
startLine: undefined,
113+
},
114+
{
115+
content: `<<<<<<< SEARCH
116+
<content>Complex nested structure</content>
117+
=======
118+
<content>Updated nested structure</content>
119+
>>>>>>> REPLACE`,
120+
startLine: undefined,
121+
},
122+
]
123+
124+
console.log("Starting multi-file diff application...")
125+
const startTime = Date.now()
126+
127+
// Apply all diffs using the multi-file strategy
128+
const result = await multiFileStrategy.applyDiff(issueXmlContent, diffItems)
129+
130+
const endTime = Date.now()
131+
const duration = endTime - startTime
132+
console.log(`Multi-file diff completed in ${duration}ms`)
133+
134+
// Check that it completed in reasonable time
135+
expect(duration).toBeLessThan(2000) // 2 seconds max
136+
137+
// Verify the result
138+
expect(result.success).toBe(true)
139+
if (result.success && result.content) {
140+
// Count occurrences
141+
const patternACount = (result.content.match(/Pattern A/g) || []).length
142+
const patternBCount = (result.content.match(/Pattern B/g) || []).length
143+
const patternXCount = (result.content.match(/Pattern X/g) || []).length
144+
const patternYCount = (result.content.match(/Pattern Y/g) || []).length
145+
146+
// Should have replaced all occurrences
147+
expect(patternACount).toBe(0)
148+
expect(patternBCount).toBe(0)
149+
expect(patternXCount).toBe(3)
150+
expect(patternYCount).toBe(3)
151+
expect(result.content).toContain("Updated nested structure")
152+
}
153+
}, 10000) // 10 second timeout
154+
155+
it("should handle worst-case scenario with ambiguous patterns", async () => {
156+
// Create a pathological case with many ambiguous patterns
157+
const lines = []
158+
159+
// Add many similar lines that could match
160+
for (let i = 0; i < 200; i++) {
161+
lines.push(` <pattern>Similar content with slight variation ${i % 5}</pattern>`)
162+
}
163+
164+
// Add the target in the middle
165+
lines.splice(100, 0, ` <pattern>Target pattern to replace</pattern>`)
166+
167+
// Add more similar lines
168+
for (let i = 0; i < 200; i++) {
169+
lines.push(` <pattern>More similar content ${i % 5}</pattern>`)
170+
}
171+
172+
const pathologicalContent = lines.join("\n")
173+
174+
// Try to replace without line number hint (worst case)
175+
const diffContent = `<<<<<<< SEARCH
176+
<pattern>Target pattern to replace</pattern>
177+
=======
178+
<pattern>Successfully replaced target</pattern>
179+
>>>>>>> REPLACE`
180+
181+
console.log("Starting pathological case test...")
182+
const startTime = Date.now()
183+
184+
const result = await strategy.applyDiff(pathologicalContent, diffContent)
185+
186+
const endTime = Date.now()
187+
const duration = endTime - startTime
188+
console.log(`Pathological case completed in ${duration}ms`)
189+
190+
// Should complete even in worst case
191+
expect(duration).toBeLessThan(5000) // 5 seconds max
192+
expect(result.success).toBe(true)
193+
if (result.success && result.content) {
194+
expect(result.content).toContain("Successfully replaced target")
195+
expect(result.content).not.toContain("Target pattern to replace")
196+
}
197+
}, 10000)
198+
199+
it("should handle extremely deep nesting efficiently", async () => {
200+
// Create extremely deep nesting that could cause stack issues
201+
const depth = 100
202+
let content = '<?xml version="1.0" encoding="UTF-8"?>\n'
203+
204+
// Open tags
205+
for (let i = 0; i < depth; i++) {
206+
content += `${" ".repeat(i)}<level${i}>\n`
207+
}
208+
209+
// Add content at deepest level
210+
content += `${" ".repeat(depth)}<data>Deep content to replace</data>\n`
211+
212+
// Close tags
213+
for (let i = depth - 1; i >= 0; i--) {
214+
content += `${" ".repeat(i)}</level${i}>\n`
215+
}
216+
217+
const diffContent = `<<<<<<< SEARCH
218+
${" ".repeat(depth)}<data>Deep content to replace</data>
219+
=======
220+
${" ".repeat(depth)}<data>Replaced deep content</data>
221+
>>>>>>> REPLACE`
222+
223+
console.log("Starting deep nesting test...")
224+
const startTime = Date.now()
225+
226+
const result = await strategy.applyDiff(content, diffContent)
227+
228+
const endTime = Date.now()
229+
const duration = endTime - startTime
230+
console.log(`Deep nesting test completed in ${duration}ms`)
231+
232+
expect(duration).toBeLessThan(1000) // Should be fast
233+
expect(result.success).toBe(true)
234+
if (result.success && result.content) {
235+
expect(result.content).toContain("Replaced deep content")
236+
}
237+
}, 10000)
238+
239+
it("should handle the ambiguous content markers that look like diff markers", async () => {
240+
const contentWithFakeMarkers = `<root>
241+
<data>
242+
<item>Normal content</item>
243+
<ambiguous>
244+
This has fake markers
245+
=======
246+
Not a real separator
247+
>>>>>>>
248+
Also not real
249+
<<<<<<<
250+
Just content
251+
</ambiguous>
252+
<target>Replace this content</target>
253+
</data>
254+
</root>`
255+
256+
const diffContent = `<<<<<<< SEARCH
257+
<target>Replace this content</target>
258+
=======
259+
<target>Successfully replaced</target>
260+
>>>>>>> REPLACE`
261+
262+
const result = await strategy.applyDiff(contentWithFakeMarkers, diffContent)
263+
264+
expect(result.success).toBe(true)
265+
if (result.success && result.content) {
266+
expect(result.content).toContain("Successfully replaced")
267+
// Should not have affected the fake markers
268+
expect(result.content).toContain("=======")
269+
expect(result.content).toContain(">>>>>>>")
270+
expect(result.content).toContain("<<<<<<<")
271+
}
272+
})
273+
})
274+
})

0 commit comments

Comments
 (0)