Skip to content

Commit 3b5167b

Browse files
committed
fix: prevent XML entity decoding in diff tools
- Add parseXmlForDiff function with processEntities: false to preserve exact content - Update multiApplyDiffTool to use parseXmlForDiff instead of parseXml - Add comprehensive tests for entity handling in parseXmlForDiff This fixes the issue where fast-xml-parser was decoding HTML entities like & causing mismatches in diff tools when comparing against original file content. Fixes #7107
1 parent 2a105a5 commit 3b5167b

File tree

3 files changed

+155
-3
lines changed

3 files changed

+155
-3
lines changed

src/core/tools/multiApplyDiffTool.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import { formatResponse } from "../prompts/responses"
1212
import { fileExistsAtPath } from "../../utils/fs"
1313
import { RecordSource } from "../context-tracking/FileContextTrackerTypes"
1414
import { unescapeHtmlEntities } from "../../utils/text-normalization"
15-
import { parseXml } from "../../utils/xml"
15+
import { parseXmlForDiff } from "../../utils/xml"
1616
import { EXPERIMENT_IDS, experiments } from "../../shared/experiments"
1717
import { applyDiffToolLegacy } from "./applyDiffTool"
1818

@@ -108,7 +108,7 @@ export async function applyDiffTool(
108108
if (argsXmlTag) {
109109
// Parse file entries from XML (new way)
110110
try {
111-
const parsed = parseXml(argsXmlTag, ["file.diff.content"]) as ParsedXmlResult
111+
const parsed = parseXmlForDiff(argsXmlTag, ["file.diff.content"]) as ParsedXmlResult
112112
const files = Array.isArray(parsed.file) ? parsed.file : [parsed.file].filter(Boolean)
113113

114114
for (const file of files) {

src/utils/__tests__/xml.spec.ts

Lines changed: 124 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { parseXml } from "../xml"
1+
import { parseXml, parseXmlForDiff } from "../xml"
22

33
describe("parseXml", () => {
44
describe("type conversion", () => {
@@ -115,3 +115,126 @@ describe("parseXml", () => {
115115
})
116116
})
117117
})
118+
119+
describe("parseXmlForDiff", () => {
120+
describe("HTML entity handling", () => {
121+
it("should NOT decode HTML entities like &", () => {
122+
const xml = `
123+
<root>
124+
<content>Team Identity &amp; Project Positioning</content>
125+
</root>
126+
`
127+
128+
const result = parseXmlForDiff(xml) as any
129+
130+
// The &amp; should remain as-is, not be decoded to &
131+
expect(result.root.content).toBe("Team Identity &amp; Project Positioning")
132+
})
133+
134+
it("should preserve & character without encoding", () => {
135+
const xml = `
136+
<root>
137+
<content>Team Identity & Project Positioning</content>
138+
</root>
139+
`
140+
141+
const result = parseXmlForDiff(xml) as any
142+
143+
// The & should remain as-is
144+
expect(result.root.content).toBe("Team Identity & Project Positioning")
145+
})
146+
147+
it("should NOT decode other HTML entities", () => {
148+
const xml = `
149+
<root>
150+
<content>&lt;div&gt; &quot;Hello&quot; &apos;World&apos;</content>
151+
</root>
152+
`
153+
154+
const result = parseXmlForDiff(xml) as any
155+
156+
// All HTML entities should remain as-is
157+
expect(result.root.content).toBe("&lt;div&gt; &quot;Hello&quot; &apos;World&apos;")
158+
})
159+
160+
it("should handle mixed content with entities correctly", () => {
161+
const xml = `
162+
<root>
163+
<code>if (a &lt; b &amp;&amp; c &gt; d) { return &quot;test&quot;; }</code>
164+
</root>
165+
`
166+
167+
const result = parseXmlForDiff(xml) as any
168+
169+
// All entities should remain unchanged
170+
expect(result.root.code).toBe("if (a &lt; b &amp;&amp; c &gt; d) { return &quot;test&quot;; }")
171+
})
172+
})
173+
174+
describe("basic functionality (same as parseXml)", () => {
175+
it("should correctly parse a simple XML string", () => {
176+
const xml = `
177+
<root>
178+
<name>Test Name</name>
179+
<description>Some description</description>
180+
</root>
181+
`
182+
183+
const result = parseXmlForDiff(xml) as any
184+
185+
expect(result).toHaveProperty("root")
186+
expect(result.root).toHaveProperty("name", "Test Name")
187+
expect(result.root).toHaveProperty("description", "Some description")
188+
})
189+
190+
it("should handle attributes correctly", () => {
191+
const xml = `
192+
<root>
193+
<item id="1" category="test">Item content</item>
194+
</root>
195+
`
196+
197+
const result = parseXmlForDiff(xml) as any
198+
199+
expect(result.root.item).toHaveProperty("@_id", "1")
200+
expect(result.root.item).toHaveProperty("@_category", "test")
201+
expect(result.root.item).toHaveProperty("#text", "Item content")
202+
})
203+
204+
it("should support stopNodes parameter", () => {
205+
const xml = `
206+
<root>
207+
<data>
208+
<nestedXml><item>Should not parse this</item></nestedXml>
209+
</data>
210+
</root>
211+
`
212+
213+
const result = parseXmlForDiff(xml, ["nestedXml"]) as any
214+
215+
expect(result.root.data.nestedXml).toBeTruthy()
216+
expect(result.root.data.nestedXml).toHaveProperty("item", "Should not parse this")
217+
})
218+
})
219+
220+
describe("diff-specific use case", () => {
221+
it("should preserve exact content for diff matching", () => {
222+
// This simulates the actual use case from the issue
223+
const xml = `
224+
<args>
225+
<file>
226+
<path>./doc.md</path>
227+
<diff>
228+
<content>Team Identity & Project Positioning</content>
229+
</diff>
230+
</file>
231+
</args>
232+
`
233+
234+
const result = parseXmlForDiff(xml, ["file.diff.content"]) as any
235+
236+
// The & should remain as-is for exact matching with file content
237+
expect(result.args.file.diff.content).toBe("Team Identity & Project Positioning")
238+
})
239+
})
240+
})

src/utils/xml.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,32 @@ export function parseXml(xmlString: string, stopNodes?: string[]): unknown {
2525
throw new Error(`Failed to parse XML: ${errorMessage}`)
2626
}
2727
}
28+
29+
/**
30+
* Parses an XML string for diffing purposes, ensuring no HTML entities are decoded.
31+
* This is a specialized version of parseXml to be used exclusively by diffing tools
32+
* to prevent mismatches caused by entity processing.
33+
* @param xmlString The XML string to parse
34+
* @returns Parsed JavaScript object representation of the XML
35+
* @throws Error if the XML is invalid or parsing fails
36+
*/
37+
export function parseXmlForDiff(xmlString: string, stopNodes?: string[]): unknown {
38+
const _stopNodes = stopNodes ?? []
39+
try {
40+
const parser = new XMLParser({
41+
ignoreAttributes: false,
42+
attributeNamePrefix: "@_",
43+
parseAttributeValue: false,
44+
parseTagValue: false,
45+
trimValues: true,
46+
processEntities: false, // Do not process HTML entities, keep them as is
47+
stopNodes: _stopNodes,
48+
})
49+
50+
return parser.parse(xmlString)
51+
} catch (error) {
52+
// Enhance error message for better debugging
53+
const errorMessage = error instanceof Error ? error.message : "Unknown error"
54+
throw new Error(`Failed to parse XML: ${errorMessage}`)
55+
}
56+
}

0 commit comments

Comments
 (0)