-
Notifications
You must be signed in to change notification settings - Fork 2.5k
fix: distinguish HTML entities from literal characters in diff comparison #7965
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,172 @@ | ||
| import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace" | ||
|
|
||
| describe("HTML entity handling", () => { | ||
| let strategy: MultiSearchReplaceDiffStrategy | ||
|
|
||
| beforeEach(() => { | ||
| strategy = new MultiSearchReplaceDiffStrategy() | ||
| }) | ||
|
|
||
| it("should distinguish between HTML entities and their literal characters", async () => { | ||
| const originalContent = `.FilterBatch<int>(batch => batch.Count == 3) | ||
| .MapBatch<int, int>(batch => batch.Sum())` | ||
|
|
||
| const diffContent = ` | ||
| <<<<<<< SEARCH | ||
| .FilterBatch<int>(batch => batch.Count == 3) | ||
| ======= | ||
| .FilterBatch<int>(batch => batch.Count == 3) | ||
| >>>>>>> REPLACE | ||
| <<<<<<< SEARCH | ||
| .MapBatch<int, int>(batch => batch.Sum()) | ||
| ======= | ||
| .MapBatch<int, int>(batch => batch.Sum()) | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`.FilterBatch<int>(batch => batch.Count == 3) | ||
| .MapBatch<int, int>(batch => batch.Sum())`) | ||
| } | ||
| }) | ||
|
|
||
| it("should not treat < and < as identical in search/replace comparison", async () => { | ||
| const originalContent = `public List<string> GetItems() { | ||
| return new List<string>(); | ||
| }` | ||
|
|
||
| const diffContent = ` | ||
| <<<<<<< SEARCH | ||
| public List<string> GetItems() { | ||
| return new List<string>(); | ||
| } | ||
| ======= | ||
| public List<string> GetItems() { | ||
| return new List<string>(); | ||
| } | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`public List<string> GetItems() { | ||
| return new List<string>(); | ||
| }`) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle mixed HTML entities correctly", async () => { | ||
| const originalContent = `<div class="container"> | ||
| <p>Hello & welcome</p> | ||
| </div>` | ||
|
|
||
| const diffContent = ` | ||
| <<<<<<< SEARCH | ||
| <div class="container"> | ||
| <p>Hello & welcome</p> | ||
| </div> | ||
| ======= | ||
| <div class="container"> | ||
| <p>Hello & welcome</p> | ||
| </div> | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`<div class="container"> | ||
| <p>Hello & welcome</p> | ||
| </div>`) | ||
| } | ||
| }) | ||
|
|
||
| it("should reject when search and replace are identical (including HTML entities)", async () => { | ||
| const originalContent = `function test<T>() { | ||
| return value; | ||
| }` | ||
|
|
||
| // Both search and replace have the same content (literal angle brackets) | ||
| const diffContent = ` | ||
| <<<<<<< SEARCH | ||
| function test<T>() { | ||
| return value; | ||
| } | ||
| ======= | ||
| function test<T>() { | ||
| return value; | ||
| } | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(false) | ||
| if (!result.success && result.error) { | ||
| expect(result.error).toContain("Search and replace content are identical") | ||
| } | ||
| }) | ||
|
|
||
| it("should handle apostrophes and quotes with HTML entities", async () => { | ||
| const originalContent = `const message = 'It's a "test" message';` | ||
|
|
||
| const diffContent = ` | ||
| <<<<<<< SEARCH | ||
| const message = 'It's a "test" message'; | ||
| ======= | ||
| const message = 'It\'s a "test" message'; | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`const message = 'It\'s a "test" message';`) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle C# generics with escaped HTML entities", async () => { | ||
| const originalContent = `var dict = new Dictionary<string, List<int>>(); | ||
| dict.Add("key", new List<int> { 1, 2, 3 });` | ||
|
|
||
| const diffContent = ` | ||
| <<<<<<< SEARCH | ||
| var dict = new Dictionary<string, List<int>>(); | ||
| dict.Add("key", new List<int> { 1, 2, 3 }); | ||
| ======= | ||
| var dict = new Dictionary<string, List<int>>(); | ||
| dict.Add("key", new List<int> { 1, 2, 3 }); | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`var dict = new Dictionary<string, List<int>>(); | ||
| dict.Add("key", new List<int> { 1, 2, 3 });`) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle the exact issue from bug report", async () => { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we add one more test case for mixed escaped and unescaped content in the same diff block? For example, a file that has some lines with |
||
| const originalContent = ` .FilterBatch<int>(batch => batch.Count == 3) | ||
| .MapBatch<int, int>(batch => batch.Sum())` | ||
|
|
||
| // This is the exact diff that was failing before the fix | ||
| const diffContent = ` | ||
| <<<<<<< SEARCH | ||
| .FilterBatch<int>(batch => batch.Count == 3) | ||
| ======= | ||
| .FilterBatch<int>(batch => batch.Count == 3) | ||
| >>>>>>> REPLACE | ||
| <<<<<<< SEARCH | ||
| .MapBatch<int, int>(batch => batch.Sum()) | ||
| ======= | ||
| .MapBatch<int, int>(batch => batch.Sum()) | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(` .FilterBatch<int>(batch => batch.Count == 3) | ||
| .MapBatch<int, int>(batch => batch.Sum())`) | ||
| } | ||
| }) | ||
| }) | ||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -409,6 +409,10 @@ Only use a single line of '=======' between search and replacement content, beca | |||||||||
| let { searchContent, replaceContent } = replacement | ||||||||||
| let startLine = replacement.startLine + (replacement.startLine === 0 ? 0 : delta) | ||||||||||
|
|
||||||||||
| // Store original content for comparison before any transformations | ||||||||||
| const originalSearchContent = searchContent | ||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this approach intentional? We're storing both original and transformed content for every replacement, which doubles memory usage temporarily. For files with many replacements, could this become a performance concern, or is the trade-off acceptable for correctness? |
||||||||||
| const originalReplaceContent = replaceContent | ||||||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we make this comment more specific? Something like:
Suggested change
This would clarify why we need the original values. |
||||||||||
|
|
||||||||||
| // First unescape any escaped markers in the content | ||||||||||
| searchContent = this.unescapeMarkers(searchContent) | ||||||||||
| replaceContent = this.unescapeMarkers(replaceContent) | ||||||||||
|
|
@@ -428,7 +432,8 @@ Only use a single line of '=======' between search and replacement content, beca | |||||||||
| } | ||||||||||
|
|
||||||||||
| // Validate that search and replace content are not identical | ||||||||||
| if (searchContent === replaceContent) { | ||||||||||
| // Compare the original content to preserve HTML entities distinction | ||||||||||
| if (originalSearchContent === originalReplaceContent) { | ||||||||||
| diffResults.push({ | ||||||||||
| success: false, | ||||||||||
| error: | ||||||||||
|
|
||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great test coverage! Have we considered adding a test for nested/double-encoded HTML entities like
<(which represents<)? This edge case could help ensure robustness with malformed or multiply-escaped content.