Skip to content

Commit 73de887

Browse files
committed
fix: distinguish HTML entities from literal characters in diff comparison
- Store original search/replace content before unescaping markers - Compare original content to preserve HTML entity distinction - Add comprehensive tests for HTML entity handling - Fixes issue where < and < were incorrectly treated as identical Fixes #7964
1 parent 72bc790 commit 73de887

File tree

3 files changed

+184
-2
lines changed

3 files changed

+184
-2
lines changed
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace"
2+
3+
describe("HTML entity handling", () => {
4+
let strategy: MultiSearchReplaceDiffStrategy
5+
6+
beforeEach(() => {
7+
strategy = new MultiSearchReplaceDiffStrategy()
8+
})
9+
10+
it("should distinguish between HTML entities and their literal characters", async () => {
11+
const originalContent = `.FilterBatch&lt;int&gt;(batch =&gt; batch.Count == 3)
12+
.MapBatch&lt;int, int&gt;(batch =&gt; batch.Sum())`
13+
14+
const diffContent = `
15+
<<<<<<< SEARCH
16+
.FilterBatch&lt;int&gt;(batch =&gt; batch.Count == 3)
17+
=======
18+
.FilterBatch<int>(batch => batch.Count == 3)
19+
>>>>>>> REPLACE
20+
21+
<<<<<<< SEARCH
22+
.MapBatch&lt;int, int&gt;(batch =&gt; batch.Sum())
23+
=======
24+
.MapBatch<int, int>(batch => batch.Sum())
25+
>>>>>>> REPLACE`
26+
27+
const result = await strategy.applyDiff(originalContent, diffContent)
28+
expect(result.success).toBe(true)
29+
if (result.success) {
30+
expect(result.content).toBe(`.FilterBatch<int>(batch => batch.Count == 3)
31+
.MapBatch<int, int>(batch => batch.Sum())`)
32+
}
33+
})
34+
35+
it("should not treat &lt; and < as identical in search/replace comparison", async () => {
36+
const originalContent = `public List&lt;string&gt; GetItems() {
37+
return new List&lt;string&gt;();
38+
}`
39+
40+
const diffContent = `
41+
<<<<<<< SEARCH
42+
public List&lt;string&gt; GetItems() {
43+
return new List&lt;string&gt;();
44+
}
45+
=======
46+
public List<string> GetItems() {
47+
return new List<string>();
48+
}
49+
>>>>>>> REPLACE`
50+
51+
const result = await strategy.applyDiff(originalContent, diffContent)
52+
expect(result.success).toBe(true)
53+
if (result.success) {
54+
expect(result.content).toBe(`public List<string> GetItems() {
55+
return new List<string>();
56+
}`)
57+
}
58+
})
59+
60+
it("should handle mixed HTML entities correctly", async () => {
61+
const originalContent = `&lt;div class=&quot;container&quot;&gt;
62+
&lt;p&gt;Hello &amp; welcome&lt;/p&gt;
63+
&lt;/div&gt;`
64+
65+
const diffContent = `
66+
<<<<<<< SEARCH
67+
&lt;div class=&quot;container&quot;&gt;
68+
&lt;p&gt;Hello &amp; welcome&lt;/p&gt;
69+
&lt;/div&gt;
70+
=======
71+
<div class="container">
72+
<p>Hello & welcome</p>
73+
</div>
74+
>>>>>>> REPLACE`
75+
76+
const result = await strategy.applyDiff(originalContent, diffContent)
77+
expect(result.success).toBe(true)
78+
if (result.success) {
79+
expect(result.content).toBe(`<div class="container">
80+
<p>Hello & welcome</p>
81+
</div>`)
82+
}
83+
})
84+
85+
it("should reject when search and replace are identical (including HTML entities)", async () => {
86+
const originalContent = `function test<T>() {
87+
return value;
88+
}`
89+
90+
// Both search and replace have the same content (literal angle brackets)
91+
const diffContent = `
92+
<<<<<<< SEARCH
93+
function test<T>() {
94+
return value;
95+
}
96+
=======
97+
function test<T>() {
98+
return value;
99+
}
100+
>>>>>>> REPLACE`
101+
102+
const result = await strategy.applyDiff(originalContent, diffContent)
103+
expect(result.success).toBe(false)
104+
if (!result.success && result.error) {
105+
expect(result.error).toContain("Search and replace content are identical")
106+
}
107+
})
108+
109+
it("should handle apostrophes and quotes with HTML entities", async () => {
110+
const originalContent = `const message = &apos;It&apos;s a &quot;test&quot; message&apos;;`
111+
112+
const diffContent = `
113+
<<<<<<< SEARCH
114+
const message = &apos;It&apos;s a &quot;test&quot; message&apos;;
115+
=======
116+
const message = 'It\'s a "test" message';
117+
>>>>>>> REPLACE`
118+
119+
const result = await strategy.applyDiff(originalContent, diffContent)
120+
expect(result.success).toBe(true)
121+
if (result.success) {
122+
expect(result.content).toBe(`const message = 'It\'s a "test" message';`)
123+
}
124+
})
125+
126+
it("should handle C# generics with escaped HTML entities", async () => {
127+
const originalContent = `var dict = new Dictionary&lt;string, List&lt;int&gt;&gt;();
128+
dict.Add(&quot;key&quot;, new List&lt;int&gt; { 1, 2, 3 });`
129+
130+
const diffContent = `
131+
<<<<<<< SEARCH
132+
var dict = new Dictionary&lt;string, List&lt;int&gt;&gt;();
133+
dict.Add(&quot;key&quot;, new List&lt;int&gt; { 1, 2, 3 });
134+
=======
135+
var dict = new Dictionary<string, List<int>>();
136+
dict.Add("key", new List<int> { 1, 2, 3 });
137+
>>>>>>> REPLACE`
138+
139+
const result = await strategy.applyDiff(originalContent, diffContent)
140+
expect(result.success).toBe(true)
141+
if (result.success) {
142+
expect(result.content).toBe(`var dict = new Dictionary<string, List<int>>();
143+
dict.Add("key", new List<int> { 1, 2, 3 });`)
144+
}
145+
})
146+
147+
it("should handle the exact issue from bug report", async () => {
148+
const originalContent = ` .FilterBatch&lt;int&gt;(batch =&gt; batch.Count == 3)
149+
.MapBatch&lt;int, int&gt;(batch =&gt; batch.Sum())`
150+
151+
// This is the exact diff that was failing before the fix
152+
const diffContent = `
153+
<<<<<<< SEARCH
154+
.FilterBatch&lt;int&gt;(batch =&gt; batch.Count == 3)
155+
=======
156+
.FilterBatch<int>(batch => batch.Count == 3)
157+
>>>>>>> REPLACE
158+
159+
<<<<<<< SEARCH
160+
.MapBatch&lt;int, int&gt;(batch =&gt; batch.Sum())
161+
=======
162+
.MapBatch<int, int>(batch => batch.Sum())
163+
>>>>>>> REPLACE`
164+
165+
const result = await strategy.applyDiff(originalContent, diffContent)
166+
expect(result.success).toBe(true)
167+
if (result.success) {
168+
expect(result.content).toBe(` .FilterBatch<int>(batch => batch.Count == 3)
169+
.MapBatch<int, int>(batch => batch.Sum())`)
170+
}
171+
})
172+
})

src/core/diff/strategies/multi-file-search-replace.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,10 @@ Each file requires its own path, start_line, and diff elements.
492492
let { searchContent, replaceContent } = replacement
493493
let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta)
494494

495+
// Store original content for comparison before any transformations
496+
const originalSearchContent = searchContent
497+
const originalReplaceContent = replaceContent
498+
495499
// First unescape any escaped markers in the content
496500
searchContent = this.unescapeMarkers(searchContent)
497501
replaceContent = this.unescapeMarkers(replaceContent)
@@ -511,7 +515,8 @@ Each file requires its own path, start_line, and diff elements.
511515
}
512516

513517
// Validate that search and replace content are not identical
514-
if (searchContent === replaceContent) {
518+
// Compare the original content to preserve HTML entities distinction
519+
if (originalSearchContent === originalReplaceContent) {
515520
diffResults.push({
516521
success: false,
517522
error:

src/core/diff/strategies/multi-search-replace.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,10 @@ Only use a single line of '=======' between search and replacement content, beca
409409
let { searchContent, replaceContent } = replacement
410410
let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta)
411411

412+
// Store original content for comparison before any transformations
413+
const originalSearchContent = searchContent
414+
const originalReplaceContent = replaceContent
415+
412416
// First unescape any escaped markers in the content
413417
searchContent = this.unescapeMarkers(searchContent)
414418
replaceContent = this.unescapeMarkers(replaceContent)
@@ -428,7 +432,8 @@ Only use a single line of '=======' between search and replacement content, beca
428432
}
429433

430434
// Validate that search and replace content are not identical
431-
if (searchContent === replaceContent) {
435+
// Compare the original content to preserve HTML entities distinction
436+
if (originalSearchContent === originalReplaceContent) {
432437
diffResults.push({
433438
success: false,
434439
error:

0 commit comments

Comments
 (0)