Skip to content

Commit d7f9a5e

Browse files
committed
Fixes #4851: Prevent apply_diff tool hangs when modifying XML files
- Added timeout protection to XML parsing in multiApplyDiffTool to prevent hangs on complex XML content - Implemented safer HTML entity unescaping for XML files to avoid corruption - Added timeout mechanisms to regex operations in diff strategies to prevent infinite loops - Enhanced error messages with specific guidance for XML file modifications - Improved detection of problematic XML content patterns The fix addresses the root causes: 1. XML parsing timeouts that could hang indefinitely 2. Double-escaping/unescaping issues with XML entities 3. Complex regex patterns hanging on large XML content 4. Lack of proper error handling for XML-specific edge cases
1 parent 2e2f83b commit d7f9a5e

File tree

4 files changed

+186
-20
lines changed

4 files changed

+186
-20
lines changed

src/core/diff/strategies/multi-file-search-replace.ts

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -463,11 +463,39 @@ Each file requires its own path, start_line, and diff elements.
463463
8. ([\s\S]*?)(?:\n)? Non‐greedy match for the "replace content" (group 7).
464464
9. (?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$) Matches the final ">>>>>>> REPLACE" marker on its own line (and requires a following newline or the end of file).
465465
*/
466-
let matches = [
467-
...diffContent.matchAll(
468-
/(?:^|\n)(?<!\\)<<<<<<< SEARCH\s*\n((?:\:start_line:\s*(\d+)\s*\n))?((?:\:end_line:\s*(\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/g,
469-
),
470-
]
466+
let matches: RegExpMatchArray[] = []
467+
468+
try {
469+
// Use a timeout to prevent regex hangs on complex XML content
470+
const timeoutMs = 10000 // 10 seconds timeout
471+
const startTime = Date.now()
472+
473+
const regex =
474+
/(?:^|\n)(?<!\\)<<<<<<< SEARCH\s*\n((?:\:start_line:\s*(\d+)\s*\n))?((?:\:end_line:\s*(\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/g
475+
476+
let match: RegExpMatchArray | null
477+
while ((match = regex.exec(diffContent)) !== null) {
478+
// Check for timeout to prevent infinite loops on complex content
479+
if (Date.now() - startTime > timeoutMs) {
480+
throw new Error(
481+
`Regex matching timed out after ${timeoutMs}ms - this may indicate overly complex diff content`,
482+
)
483+
}
484+
485+
matches.push(match)
486+
487+
// Prevent infinite loops on zero-length matches
488+
if (match.index === regex.lastIndex) {
489+
regex.lastIndex++
490+
}
491+
}
492+
} catch (error) {
493+
const errorMessage = error instanceof Error ? error.message : String(error)
494+
return {
495+
success: false,
496+
error: `Failed to parse diff content: ${errorMessage}\n\nThis often happens with complex XML content. Try:\n1. Breaking down large changes into smaller diffs\n2. Simplifying the diff content\n3. Using write_to_file for extensive changes`,
497+
}
498+
}
471499

472500
if (matches.length === 0) {
473501
return {

src/core/diff/strategies/multi-search-replace.ts

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -376,11 +376,39 @@ Only use a single line of '=======' between search and replacement content, beca
376376
  Matches the final “>>>>>>> REPLACE” marker on its own line (and requires a following newline or the end of file).
377377
*/
378378

379-
let matches = [
380-
...diffContent.matchAll(
381-
/(?:^|\n)(?<!\\)<<<<<<< SEARCH\s*\n((?:\:start_line:\s*(\d+)\s*\n))?((?:\:end_line:\s*(\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/g,
382-
),
383-
]
379+
let matches: RegExpMatchArray[] = []
380+
381+
try {
382+
// Use a timeout to prevent regex hangs on complex XML content
383+
const timeoutMs = 10000 // 10 seconds timeout
384+
const startTime = Date.now()
385+
386+
const regex =
387+
/(?:^|\n)(?<!\\)<<<<<<< SEARCH\s*\n((?:\:start_line:\s*(\d+)\s*\n))?((?:\:end_line:\s*(\d+)\s*\n))?((?<!\\)-------\s*\n)?([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)=======\s*\n)([\s\S]*?)(?:\n)?(?:(?<=\n)(?<!\\)>>>>>>> REPLACE)(?=\n|$)/g
388+
389+
let match: RegExpMatchArray | null
390+
while ((match = regex.exec(diffContent)) !== null) {
391+
// Check for timeout to prevent infinite loops on complex content
392+
if (Date.now() - startTime > timeoutMs) {
393+
throw new Error(
394+
`Regex matching timed out after ${timeoutMs}ms - this may indicate overly complex diff content`,
395+
)
396+
}
397+
398+
matches.push(match)
399+
400+
// Prevent infinite loops on zero-length matches
401+
if (match.index === regex.lastIndex) {
402+
regex.lastIndex++
403+
}
404+
}
405+
} catch (error) {
406+
const errorMessage = error instanceof Error ? error.message : String(error)
407+
return {
408+
success: false,
409+
error: `Failed to parse diff content: ${errorMessage}\n\nThis often happens with complex XML content. Try:\n1. Breaking down large changes into smaller diffs\n2. Simplifying the diff content\n3. Using write_to_file for extensive changes`,
410+
}
411+
}
384412

385413
if (matches.length === 0) {
386414
return {

src/core/tools/applyDiffTool.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,22 @@ export async function applyDiffToolLegacy(
2424
let diffContent: string | undefined = block.params.diff
2525

2626
if (diffContent && !cline.api.getModel().id.includes("claude")) {
27-
diffContent = unescapeHtmlEntities(diffContent)
27+
// Be more careful with XML content - only unescape for non-Claude models
28+
// and add additional safety checks
29+
const hasXmlStructure = diffContent.includes("<") && diffContent.includes(">")
30+
const hasXmlEntities = /&[a-zA-Z0-9#]+;/.test(diffContent)
31+
32+
// If it looks like XML content, be more conservative about unescaping
33+
if (hasXmlStructure && hasXmlEntities) {
34+
// Only unescape basic HTML entities, not XML-specific ones
35+
diffContent = diffContent
36+
.replace(/&lt;/g, "<")
37+
.replace(/&gt;/g, ">")
38+
.replace(/&quot;/g, '"')
39+
.replace(/&amp;/g, "&") // Do this last to avoid double-unescaping
40+
} else {
41+
diffContent = unescapeHtmlEntities(diffContent)
42+
}
2843
}
2944

3045
const sharedMessageProps: ClineSayTool = {

src/core/tools/multiApplyDiffTool.ts

Lines changed: 104 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,54 @@ interface ParsedXmlResult {
4949
file: ParsedFile | ParsedFile[]
5050
}
5151

52+
/**
53+
* Safely parse XML with timeout and error handling to prevent hangs
54+
*/
55+
function safeParseXml(xmlString: string, stopNodes?: string[], timeoutMs: number = 5000): Promise<unknown> {
56+
return new Promise((resolve, reject) => {
57+
const timeoutId = setTimeout(() => {
58+
reject(
59+
new Error(
60+
`XML parsing timed out after ${timeoutMs}ms - this may indicate malformed or overly complex XML content`,
61+
),
62+
)
63+
}, timeoutMs)
64+
65+
try {
66+
const result = parseXml(xmlString, stopNodes)
67+
clearTimeout(timeoutId)
68+
resolve(result)
69+
} catch (error) {
70+
clearTimeout(timeoutId)
71+
reject(error)
72+
}
73+
})
74+
}
75+
76+
/**
77+
* Check if content appears to be XML-like and might cause parsing issues
78+
*/
79+
function isProblematicXmlContent(content: string): boolean {
80+
// Check for XML-like patterns that might cause issues
81+
const xmlPatterns = [
82+
/<[^>]+>/g, // XML tags
83+
/&[a-zA-Z0-9#]+;/g, // HTML/XML entities
84+
/<!DOCTYPE/i, // DOCTYPE declarations
85+
/<\?xml/i, // XML declarations
86+
]
87+
88+
let xmlElementCount = 0
89+
for (const pattern of xmlPatterns) {
90+
const matches = content.match(pattern)
91+
if (matches) {
92+
xmlElementCount += matches.length
93+
}
94+
}
95+
96+
// If we have many XML-like elements, it might be problematic
97+
return xmlElementCount > 10
98+
}
99+
52100
export async function applyDiffTool(
53101
cline: Task,
54102
block: ToolUse,
@@ -105,9 +153,15 @@ export async function applyDiffTool(
105153
}
106154

107155
if (argsXmlTag) {
108-
// Parse file entries from XML (new way)
156+
// Parse file entries from XML (new way) with safety checks
109157
try {
110-
const parsed = parseXml(argsXmlTag, ["file.diff.content"]) as ParsedXmlResult
158+
// Check if the XML content might be problematic before parsing
159+
if (isProblematicXmlContent(argsXmlTag)) {
160+
console.warn("Detected potentially problematic XML content, using safer parsing approach")
161+
}
162+
163+
// Use safe XML parsing with timeout to prevent hangs
164+
const parsed = (await safeParseXml(argsXmlTag, ["file.diff.content"])) as ParsedXmlResult
111165
const files = Array.isArray(parsed.file) ? parsed.file : [parsed.file].filter(Boolean)
112166

113167
for (const file of files) {
@@ -142,10 +196,30 @@ export async function applyDiffTool(
142196
}
143197
} catch (error) {
144198
const errorMessage = error instanceof Error ? error.message : String(error)
145-
const detailedError = `Failed to parse apply_diff XML. This usually means:
146-
1. The XML structure is malformed or incomplete
199+
200+
// Check if this is a timeout error specifically
201+
const isTimeoutError = errorMessage.includes("timed out")
202+
203+
const detailedError = `Failed to parse apply_diff XML. ${
204+
isTimeoutError
205+
? "The operation timed out, which often happens when processing XML files with complex content."
206+
: "This usually means:"
207+
}
208+
209+
${
210+
isTimeoutError
211+
? `
212+
Possible solutions:
213+
1. Break down large XML changes into smaller, more targeted diffs
214+
2. Use the legacy apply_diff format (path/diff parameters) instead of XML args
215+
3. Simplify the diff content to avoid complex nested XML structures
216+
4. Consider using write_to_file for complete file rewrites if the changes are extensive
217+
218+
`
219+
: `1. The XML structure is malformed or incomplete
147220
2. Missing required <file>, <path>, or <diff> tags
148221
3. Invalid characters or encoding in the XML
222+
4. Complex XML content within diff content causing parsing conflicts
149223
150224
Expected structure:
151225
<args>
@@ -158,7 +232,8 @@ Expected structure:
158232
</file>
159233
</args>
160234
161-
Original error: ${errorMessage}`
235+
`
236+
}Original error: ${errorMessage}`
162237
throw new Error(detailedError)
163238
}
164239
} else if (legacyPath && typeof legacyDiffContent === "string") {
@@ -406,11 +481,29 @@ Original error: ${errorMessage}`
406481
let formattedError = ""
407482

408483
// Pre-process all diff items for HTML entity unescaping if needed
484+
// Be more careful with XML content - only unescape for non-Claude models
485+
// and add additional safety checks
409486
const processedDiffItems = !cline.api.getModel().id.includes("claude")
410-
? diffItems.map((item) => ({
411-
...item,
412-
content: item.content ? unescapeHtmlEntities(item.content) : item.content,
413-
}))
487+
? diffItems.map((item) => {
488+
if (!item.content) return item
489+
490+
// Check if content looks like it might be XML that shouldn't be unescaped
491+
const hasXmlStructure = item.content.includes("<") && item.content.includes(">")
492+
const hasXmlEntities = /&[a-zA-Z0-9#]+;/.test(item.content)
493+
494+
// If it looks like XML content, be more conservative about unescaping
495+
if (hasXmlStructure && hasXmlEntities) {
496+
// Only unescape basic HTML entities, not XML-specific ones
497+
const safeUnescaped = item.content
498+
.replace(/&lt;/g, "<")
499+
.replace(/&gt;/g, ">")
500+
.replace(/&quot;/g, '"')
501+
.replace(/&amp;/g, "&") // Do this last to avoid double-unescaping
502+
return { ...item, content: safeUnescaped }
503+
} else {
504+
return { ...item, content: unescapeHtmlEntities(item.content) }
505+
}
506+
})
414507
: diffItems
415508

416509
// Apply all diffs at once with the array-based method
@@ -450,6 +543,7 @@ Suggested fixes:
450543
3. Use <read_file> to see the current file content
451544
4. Consider breaking complex changes into smaller diffs
452545
5. Ensure start_line parameter matches the actual content location
546+
6. For XML files, be careful with special characters and entities
453547
${errorDetails ? `\nDetailed error information:\n${errorDetails}\n` : ""}
454548
</error_details>\n\n`
455549
}
@@ -465,6 +559,7 @@ Recovery suggestions:
465559
3. Check that the search content exactly matches what's in the file
466560
4. Consider using line numbers with start_line parameter
467561
5. Break large changes into smaller, more specific diffs
562+
6. For XML files, ensure proper escaping of special characters
468563
${errorDetails ? `\nTechnical details:\n${errorDetails}\n` : ""}
469564
</error_details>\n\n`
470565
}

0 commit comments

Comments
 (0)