Skip to content

Commit 6540f2b

Browse files
roomote[bot]roomote-agentdaniel-lxs
authored
fix: prevent XML entity decoding in diff tools (#7107) (#7108)
* fix: prevent XML entity decoding in diff tools - Add parseXmlForDiff function with processEntities: false to preserve exact content - Update multiApplyDiffTool to use parseXmlForDiff instead of parseXml - Add comprehensive tests for entity handling in parseXmlForDiff This fixes the issue where fast-xml-parser was decoding HTML entities like & causing mismatches in diff tools when comparing against original file content. Fixes #7107 * refactor: eliminate code duplication between parseXml and parseXmlForDiff - Refactored parseXml to accept optional ParseXmlOptions parameter - parseXmlForDiff now delegates to parseXml with processEntities: false - Added explanatory comment in multiApplyDiffTool.ts about why parseXmlForDiff is used - Improved JSDoc documentation with specific use cases for parseXmlForDiff This maintains backward compatibility while eliminating code duplication. parseXml continues to be used for general XML parsing (file reads, follow-up questions), while parseXmlForDiff is specifically for diff operations where entity processing must be disabled. --------- Co-authored-by: Roo Code <[email protected]> Co-authored-by: daniel-lxs <[email protected]>
1 parent 342123d commit 6540f2b

File tree

3 files changed

+166
-4
lines changed

3 files changed

+166
-4
lines changed

src/core/tools/multiApplyDiffTool.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import { formatResponse } from "../prompts/responses"
1212
import { fileExistsAtPath } from "../../utils/fs"
1313
import { RecordSource } from "../context-tracking/FileContextTrackerTypes"
1414
import { unescapeHtmlEntities } from "../../utils/text-normalization"
15-
import { parseXml } from "../../utils/xml"
15+
import { parseXmlForDiff } from "../../utils/xml"
1616
import { EXPERIMENT_IDS, experiments } from "../../shared/experiments"
1717
import { applyDiffToolLegacy } from "./applyDiffTool"
1818

@@ -108,7 +108,10 @@ export async function applyDiffTool(
108108
if (argsXmlTag) {
109109
// Parse file entries from XML (new way)
110110
try {
111-
const parsed = parseXml(argsXmlTag, ["file.diff.content"]) as ParsedXmlResult
111+
// IMPORTANT: We use parseXmlForDiff here instead of parseXml to prevent HTML entity decoding
112+
// This ensures exact character matching when comparing parsed content against original file content
113+
// Without this, special characters like & would be decoded to &amp; causing diff mismatches
114+
const parsed = parseXmlForDiff(argsXmlTag, ["file.diff.content"]) as ParsedXmlResult
112115
const files = Array.isArray(parsed.file) ? parsed.file : [parsed.file].filter(Boolean)
113116

114117
for (const file of files) {

src/utils/__tests__/xml.spec.ts

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { parseXml } from "../xml"
1+
import { parseXml, parseXmlForDiff } from "../xml"
22

33
describe("parseXml", () => {
44
describe("type conversion", () => {
@@ -115,3 +115,126 @@ describe("parseXml", () => {
115115
})
116116
})
117117
})
118+
119+
describe("parseXmlForDiff", () => {
120+
describe("HTML entity handling", () => {
121+
it("should NOT decode HTML entities like &amp;", () => {
122+
const xml = `
123+
<root>
124+
<content>Team Identity &amp; Project Positioning</content>
125+
</root>
126+
`
127+
128+
const result = parseXmlForDiff(xml) as any
129+
130+
// The &amp; should remain as-is, not be decoded to &
131+
expect(result.root.content).toBe("Team Identity &amp; Project Positioning")
132+
})
133+
134+
it("should preserve & character without encoding", () => {
135+
const xml = `
136+
<root>
137+
<content>Team Identity & Project Positioning</content>
138+
</root>
139+
`
140+
141+
const result = parseXmlForDiff(xml) as any
142+
143+
// The & should remain as-is
144+
expect(result.root.content).toBe("Team Identity & Project Positioning")
145+
})
146+
147+
it("should NOT decode other HTML entities", () => {
148+
const xml = `
149+
<root>
150+
<content>&lt;div&gt; &quot;Hello&quot; &apos;World&apos;</content>
151+
</root>
152+
`
153+
154+
const result = parseXmlForDiff(xml) as any
155+
156+
// All HTML entities should remain as-is
157+
expect(result.root.content).toBe("&lt;div&gt; &quot;Hello&quot; &apos;World&apos;")
158+
})
159+
160+
it("should handle mixed content with entities correctly", () => {
161+
const xml = `
162+
<root>
163+
<code>if (a &lt; b &amp;&amp; c &gt; d) { return &quot;test&quot;; }</code>
164+
</root>
165+
`
166+
167+
const result = parseXmlForDiff(xml) as any
168+
169+
// All entities should remain unchanged
170+
expect(result.root.code).toBe("if (a &lt; b &amp;&amp; c &gt; d) { return &quot;test&quot;; }")
171+
})
172+
})
173+
174+
describe("basic functionality (same as parseXml)", () => {
175+
it("should correctly parse a simple XML string", () => {
176+
const xml = `
177+
<root>
178+
<name>Test Name</name>
179+
<description>Some description</description>
180+
</root>
181+
`
182+
183+
const result = parseXmlForDiff(xml) as any
184+
185+
expect(result).toHaveProperty("root")
186+
expect(result.root).toHaveProperty("name", "Test Name")
187+
expect(result.root).toHaveProperty("description", "Some description")
188+
})
189+
190+
it("should handle attributes correctly", () => {
191+
const xml = `
192+
<root>
193+
<item id="1" category="test">Item content</item>
194+
</root>
195+
`
196+
197+
const result = parseXmlForDiff(xml) as any
198+
199+
expect(result.root.item).toHaveProperty("@_id", "1")
200+
expect(result.root.item).toHaveProperty("@_category", "test")
201+
expect(result.root.item).toHaveProperty("#text", "Item content")
202+
})
203+
204+
it("should support stopNodes parameter", () => {
205+
const xml = `
206+
<root>
207+
<data>
208+
<nestedXml><item>Should not parse this</item></nestedXml>
209+
</data>
210+
</root>
211+
`
212+
213+
const result = parseXmlForDiff(xml, ["nestedXml"]) as any
214+
215+
expect(result.root.data.nestedXml).toBeTruthy()
216+
expect(result.root.data.nestedXml).toHaveProperty("item", "Should not parse this")
217+
})
218+
})
219+
220+
describe("diff-specific use case", () => {
221+
it("should preserve exact content for diff matching", () => {
222+
// This simulates the actual use case from the issue
223+
const xml = `
224+
<args>
225+
<file>
226+
<path>./doc.md</path>
227+
<diff>
228+
<content>Team Identity & Project Positioning</content>
229+
</diff>
230+
</file>
231+
</args>
232+
`
233+
234+
const result = parseXmlForDiff(xml, ["file.diff.content"]) as any
235+
236+
// The & should remain as-is for exact matching with file content
237+
expect(result.args.file.diff.content).toBe("Team Identity & Project Positioning")
238+
})
239+
})
240+
})

src/utils/xml.ts

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,36 @@
11
import { XMLParser } from "fast-xml-parser"
22

3+
/**
4+
* Options for XML parsing
5+
*/
6+
interface ParseXmlOptions {
7+
/**
8+
* Whether to process HTML entities (e.g., &amp; to &).
9+
* Default: true for general parsing, false for diff operations
10+
*/
11+
processEntities?: boolean
12+
}
13+
314
/**
415
* Parses an XML string into a JavaScript object
516
* @param xmlString The XML string to parse
17+
* @param stopNodes Optional array of node names to stop parsing at
18+
* @param options Optional parsing options
619
* @returns Parsed JavaScript object representation of the XML
720
* @throws Error if the XML is invalid or parsing fails
821
*/
9-
export function parseXml(xmlString: string, stopNodes?: string[]): unknown {
22+
export function parseXml(xmlString: string, stopNodes?: string[], options?: ParseXmlOptions): unknown {
1023
const _stopNodes = stopNodes ?? []
24+
const processEntities = options?.processEntities ?? true
25+
1126
try {
1227
const parser = new XMLParser({
1328
ignoreAttributes: false,
1429
attributeNamePrefix: "@_",
1530
parseAttributeValue: false,
1631
parseTagValue: false,
1732
trimValues: true,
33+
processEntities,
1834
stopNodes: _stopNodes,
1935
})
2036

@@ -25,3 +41,23 @@ export function parseXml(xmlString: string, stopNodes?: string[]): unknown {
2541
throw new Error(`Failed to parse XML: ${errorMessage}`)
2642
}
2743
}
44+
45+
/**
46+
* Parses an XML string for diffing purposes, ensuring no HTML entities are decoded.
47+
* This is a specialized version of parseXml to be used exclusively by diffing tools
48+
* to prevent mismatches caused by entity processing.
49+
*
50+
* Use this instead of parseXml when:
51+
* - Comparing parsed content against original file content
52+
* - Performing diff operations where exact character matching is required
53+
* - Processing XML that will be used in search/replace operations
54+
*
55+
* @param xmlString The XML string to parse
56+
* @param stopNodes Optional array of node names to stop parsing at
57+
* @returns Parsed JavaScript object representation of the XML
58+
* @throws Error if the XML is invalid or parsing fails
59+
*/
60+
export function parseXmlForDiff(xmlString: string, stopNodes?: string[]): unknown {
61+
// Delegate to parseXml with processEntities disabled
62+
return parseXml(xmlString, stopNodes, { processEntities: false })
63+
}

0 commit comments

Comments
 (0)