Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 172 additions & 0 deletions src/core/diff/strategies/__tests__/html-entity-handling.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace"

describe("HTML entity handling", () => {
let strategy: MultiSearchReplaceDiffStrategy

beforeEach(() => {
strategy = new MultiSearchReplaceDiffStrategy()
})

it("should distinguish between HTML entities and their literal characters", async () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great test coverage! Have we considered adding a test for nested/double-encoded HTML entities like &lt; (which represents <)? This edge case could help ensure robustness with malformed or multiply-escaped content.

const originalContent = `.FilterBatch&lt;int&gt;(batch =&gt; batch.Count == 3)
.MapBatch&lt;int, int&gt;(batch =&gt; batch.Sum())`

const diffContent = `
<<<<<<< SEARCH
.FilterBatch&lt;int&gt;(batch =&gt; batch.Count == 3)
=======
.FilterBatch<int>(batch => batch.Count == 3)
>>>>>>> REPLACE
<<<<<<< SEARCH
.MapBatch&lt;int, int&gt;(batch =&gt; batch.Sum())
=======
.MapBatch<int, int>(batch => batch.Sum())
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`.FilterBatch<int>(batch => batch.Count == 3)
.MapBatch<int, int>(batch => batch.Sum())`)
}
})

it("should not treat &lt; and < as identical in search/replace comparison", async () => {
const originalContent = `public List&lt;string&gt; GetItems() {
return new List&lt;string&gt;();
}`

const diffContent = `
<<<<<<< SEARCH
public List&lt;string&gt; GetItems() {
return new List&lt;string&gt;();
}
=======
public List<string> GetItems() {
return new List<string>();
}
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`public List<string> GetItems() {
return new List<string>();
}`)
}
})

it("should handle mixed HTML entities correctly", async () => {
const originalContent = `&lt;div class=&quot;container&quot;&gt;
&lt;p&gt;Hello &amp; welcome&lt;/p&gt;
&lt;/div&gt;`

const diffContent = `
<<<<<<< SEARCH
&lt;div class=&quot;container&quot;&gt;
&lt;p&gt;Hello &amp; welcome&lt;/p&gt;
&lt;/div&gt;
=======
<div class="container">
<p>Hello & welcome</p>
</div>
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`<div class="container">
<p>Hello & welcome</p>
</div>`)
}
})

it("should reject when search and replace are identical (including HTML entities)", async () => {
const originalContent = `function test<T>() {
return value;
}`

// Both search and replace have the same content (literal angle brackets)
const diffContent = `
<<<<<<< SEARCH
function test<T>() {
return value;
}
=======
function test<T>() {
return value;
}
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(false)
if (!result.success && result.error) {
expect(result.error).toContain("Search and replace content are identical")
}
})

it("should handle apostrophes and quotes with HTML entities", async () => {
const originalContent = `const message = &apos;It&apos;s a &quot;test&quot; message&apos;;`

const diffContent = `
<<<<<<< SEARCH
const message = &apos;It&apos;s a &quot;test&quot; message&apos;;
=======
const message = 'It\'s a "test" message';
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`const message = 'It\'s a "test" message';`)
}
})

it("should handle C# generics with escaped HTML entities", async () => {
const originalContent = `var dict = new Dictionary&lt;string, List&lt;int&gt;&gt;();
dict.Add(&quot;key&quot;, new List&lt;int&gt; { 1, 2, 3 });`

const diffContent = `
<<<<<<< SEARCH
var dict = new Dictionary&lt;string, List&lt;int&gt;&gt;();
dict.Add(&quot;key&quot;, new List&lt;int&gt; { 1, 2, 3 });
=======
var dict = new Dictionary<string, List<int>>();
dict.Add("key", new List<int> { 1, 2, 3 });
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`var dict = new Dictionary<string, List<int>>();
dict.Add("key", new List<int> { 1, 2, 3 });`)
}
})

it("should handle the exact issue from bug report", async () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add one more test case for mixed escaped and unescaped content in the same diff block? For example, a file that has some lines with < and others with < to ensure the comparison handles mixed scenarios correctly.

const originalContent = ` .FilterBatch&lt;int&gt;(batch =&gt; batch.Count == 3)
.MapBatch&lt;int, int&gt;(batch =&gt; batch.Sum())`

// This is the exact diff that was failing before the fix
const diffContent = `
<<<<<<< SEARCH
.FilterBatch&lt;int&gt;(batch =&gt; batch.Count == 3)
=======
.FilterBatch<int>(batch => batch.Count == 3)
>>>>>>> REPLACE
<<<<<<< SEARCH
.MapBatch&lt;int, int&gt;(batch =&gt; batch.Sum())
=======
.MapBatch<int, int>(batch => batch.Sum())
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(` .FilterBatch<int>(batch => batch.Count == 3)
.MapBatch<int, int>(batch => batch.Sum())`)
}
})
})
7 changes: 6 additions & 1 deletion src/core/diff/strategies/multi-file-search-replace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,10 @@ Each file requires its own path, start_line, and diff elements.
let { searchContent, replaceContent } = replacement
let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta)

// Store original content for comparison before any transformations
const originalSearchContent = searchContent
const originalReplaceContent = replaceContent

// First unescape any escaped markers in the content
searchContent = this.unescapeMarkers(searchContent)
replaceContent = this.unescapeMarkers(replaceContent)
Expand All @@ -511,7 +515,8 @@ Each file requires its own path, start_line, and diff elements.
}

// Validate that search and replace content are not identical
if (searchContent === replaceContent) {
// Compare the original content to preserve HTML entities distinction
if (originalSearchContent === originalReplaceContent) {
diffResults.push({
success: false,
error:
Expand Down
7 changes: 6 additions & 1 deletion src/core/diff/strategies/multi-search-replace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,10 @@ Only use a single line of '=======' between search and replacement content, beca
let { searchContent, replaceContent } = replacement
let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta)

// Store original content for comparison before any transformations
const originalSearchContent = searchContent
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this approach intentional? We're storing both original and transformed content for every replacement, which doubles memory usage temporarily. For files with many replacements, could this become a performance concern, or is the trade-off acceptable for correctness?

const originalReplaceContent = replaceContent
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make this comment more specific? Something like:

Suggested change
const originalReplaceContent = replaceContent
// Store original content to preserve HTML entity distinction for identity comparison
const originalSearchContent = searchContent
const originalReplaceContent = replaceContent

This would clarify why we need the original values.


// First unescape any escaped markers in the content
searchContent = this.unescapeMarkers(searchContent)
replaceContent = this.unescapeMarkers(replaceContent)
Expand All @@ -428,7 +432,8 @@ Only use a single line of '=======' between search and replacement content, beca
}

// Validate that search and replace content are not identical
if (searchContent === replaceContent) {
// Compare the original content to preserve HTML entities distinction
if (originalSearchContent === originalReplaceContent) {
diffResults.push({
success: false,
error:
Expand Down
Loading