-
Notifications
You must be signed in to change notification settings - Fork 2.6k
fix: ensure Unicode emoji characters work correctly in apply_diff tool #6873
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace" | ||
|
|
||
| describe("Issue #6872 - apply_diff Tool Fails with Unicode Emoji Characters", () => { | ||
| let strategy: MultiSearchReplaceDiffStrategy | ||
|
|
||
| beforeEach(() => { | ||
| strategy = new MultiSearchReplaceDiffStrategy(1.0) // Exact matching (100% threshold) | ||
| }) | ||
|
|
||
| it("should handle the exact scenario from issue #6872", async () => { | ||
| // This is the exact test case from the issue report | ||
| const originalContent = `# Test File | ||
|
|
||
| **✔ This is a test line.** | ||
|
|
||
| Some other content.` | ||
|
|
||
| const diffContent = `<<<<<<< SEARCH | ||
| **✔ This is a test line.** | ||
| ======= | ||
| **This line has been successfully modified.** | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
|
|
||
| // The issue reports this should fail with 99% match, but we expect it to work | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment mentions expecting it to work, but could we add a more specific assertion? For example, we could verify that the similarity score is exactly 100% to prove the normalization isn't affecting emoji matching. This would make the test's intent clearer. |
||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`# Test File | ||
|
|
||
| **This line has been successfully modified.** | ||
|
|
||
| Some other content.`) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle the exact scenario with start_line from issue #6872", async () => { | ||
| const originalContent = `# Test File | ||
|
|
||
| **✔ This is a test line.** | ||
|
|
||
| Some other content.` | ||
|
|
||
| const diffContent = `<<<<<<< SEARCH | ||
| :start_line:3 | ||
| ------- | ||
| **✔ This is a test line.** | ||
| ======= | ||
| **This line has been successfully modified.** | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
|
|
||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`# Test File | ||
|
|
||
| **This line has been successfully modified.** | ||
|
|
||
| Some other content.`) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle checkmark emoji with different normalization settings", async () => { | ||
| // Test with 90% threshold to see if normalization affects matching | ||
| const fuzzyStrategy = new MultiSearchReplaceDiffStrategy(0.9) | ||
|
|
||
| const originalContent = `# Test File | ||
|
|
||
| **✔ This is a test line.** | ||
|
|
||
| Some other content.` | ||
|
|
||
| const diffContent = `<<<<<<< SEARCH | ||
| **✔ This is a test line.** | ||
| ======= | ||
| **This line has been successfully modified.** | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await fuzzyStrategy.applyDiff(originalContent, diffContent) | ||
|
|
||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`# Test File | ||
|
|
||
| **This line has been successfully modified.** | ||
|
|
||
| Some other content.`) | ||
| } | ||
| }) | ||
|
|
||
| it("should provide helpful error message if emoji causes mismatch", async () => { | ||
| const originalContent = `# Test File | ||
|
|
||
| **✔ This is a test line.** | ||
|
|
||
| Some other content.` | ||
|
|
||
| // Intentionally use a different emoji to test error handling | ||
| const diffContent = `<<<<<<< SEARCH | ||
| **✅ This is a test line.** | ||
| ======= | ||
| **This line has been successfully modified.** | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
|
|
||
| // This should fail because the emojis don't match | ||
| expect(result.success).toBe(false) | ||
| if (!result.success && result.error) { | ||
| expect(result.error).toContain("No sufficiently similar match found") | ||
| expect(result.error).toContain("100%") // Should mention the threshold | ||
| } | ||
| }) | ||
| }) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,181 @@ | ||
| import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace" | ||
|
|
||
| describe("MultiSearchReplaceDiffStrategy - Unicode Emoji Handling", () => { | ||
| let strategy: MultiSearchReplaceDiffStrategy | ||
|
|
||
| beforeEach(() => { | ||
| strategy = new MultiSearchReplaceDiffStrategy(1.0) // Exact matching | ||
| }) | ||
|
|
||
| describe("Unicode emoji character handling", () => { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great comprehensive test coverage! Have you considered adding edge cases like:
These edge cases could help ensure robustness. |
||
| it("should correctly match and replace content containing checkmark emoji (✔)", async () => { | ||
| const originalContent = `# Test File | ||
|
|
||
| **✔ This is a test line.** | ||
|
|
||
| Some other content.` | ||
|
|
||
| const diffContent = `<<<<<<< SEARCH | ||
| **✔ This is a test line.** | ||
| ======= | ||
| **This line has been successfully modified.** | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`# Test File | ||
|
|
||
| **This line has been successfully modified.** | ||
|
|
||
| Some other content.`) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle multiple different emoji characters", async () => { | ||
| const originalContent = `# Task List | ||
|
|
||
| ✅ Completed task | ||
| ⚠️ Warning task | ||
| ❌ Failed task | ||
| 🚀 Rocket task` | ||
|
|
||
| const diffContent = `<<<<<<< SEARCH | ||
| ✅ Completed task | ||
| ⚠️ Warning task | ||
| ❌ Failed task | ||
| 🚀 Rocket task | ||
| ======= | ||
| ✅ Completed task | ||
| ⚠️ Warning task | ||
| ❌ Failed task | ||
| 🚀 Rocket task | ||
| 🎉 Celebration task | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`# Task List | ||
|
|
||
| ✅ Completed task | ||
| ⚠️ Warning task | ||
| ❌ Failed task | ||
| 🚀 Rocket task | ||
| 🎉 Celebration task`) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle emoji in code comments", async () => { | ||
| const originalContent = `function celebrate() { | ||
| // 🎉 This function celebrates success | ||
| console.log("Success!"); | ||
| }` | ||
|
|
||
| const diffContent = `<<<<<<< SEARCH | ||
| // 🎉 This function celebrates success | ||
| ======= | ||
| // 🎉 This function celebrates success | ||
| // 🚀 And launches rockets! | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`function celebrate() { | ||
| // 🎉 This function celebrates success | ||
| // 🚀 And launches rockets! | ||
| console.log("Success!"); | ||
| }`) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle mixed emoji and regular text", async () => { | ||
| const originalContent = `## Status Report | ||
|
|
||
| Current status: ✔ All systems operational | ||
| Performance: 🚀 Blazing fast | ||
| Issues: ❌ None found` | ||
|
|
||
| const diffContent = `<<<<<<< SEARCH | ||
| Current status: ✔ All systems operational | ||
| Performance: 🚀 Blazing fast | ||
| Issues: ❌ None found | ||
| ======= | ||
| Current status: ✅ All systems operational | ||
| Performance: 🚀 Blazing fast | ||
| Issues: ⚠️ Minor warnings detected | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`## Status Report | ||
|
|
||
| Current status: ✅ All systems operational | ||
| Performance: 🚀 Blazing fast | ||
| Issues: ⚠️ Minor warnings detected`) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle emoji with line numbers", async () => { | ||
| const originalContent = `# Test File | ||
|
|
||
| **✔ This is a test line.** | ||
|
|
||
| Some other content.` | ||
|
|
||
| const diffContent = `<<<<<<< SEARCH | ||
| :start_line:3 | ||
| ------- | ||
| **✔ This is a test line.** | ||
| ======= | ||
| **✅ This line has been successfully modified.** | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`# Test File | ||
|
|
||
| **✅ This line has been successfully modified.** | ||
|
|
||
| Some other content.`) | ||
| } | ||
| }) | ||
|
|
||
| it("should handle complex Unicode characters beyond basic emoji", async () => { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Excellent test for international characters beyond emoji! This ensures the fix works for all Unicode, not just emoji. 🌍 |
||
| const originalContent = `# International Characters | ||
|
|
||
| Chinese: 你好世界 | ||
| Japanese: こんにちは世界 | ||
| Korean: 안녕하세요 | ||
| Arabic: مرحبا بالعالم | ||
| Hebrew: שלום עולם | ||
| Emoji: 🌍🌎🌏` | ||
|
|
||
| const diffContent = `<<<<<<< SEARCH | ||
| Chinese: 你好世界 | ||
| Japanese: こんにちは世界 | ||
| Korean: 안녕하세요 | ||
| ======= | ||
| Chinese: 你好世界 (Hello World) | ||
| Japanese: こんにちは世界 (Hello World) | ||
| Korean: 안녕하세요 (Hello) | ||
| >>>>>>> REPLACE` | ||
|
|
||
| const result = await strategy.applyDiff(originalContent, diffContent) | ||
| expect(result.success).toBe(true) | ||
| if (result.success) { | ||
| expect(result.content).toBe(`# International Characters | ||
|
|
||
| Chinese: 你好世界 (Hello World) | ||
| Japanese: こんにちは世界 (Hello World) | ||
| Korean: 안녕하세요 (Hello) | ||
| Arabic: مرحبا بالعالم | ||
| Hebrew: שלום עולם | ||
| Emoji: 🌍🌎🌏`) | ||
| } | ||
| }) | ||
| }) | ||
| }) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we consider consolidating these tests with the unicode-emoji.spec.ts file? Both files test Unicode emoji handling, and unicode-emoji.spec.ts already covers the issue comprehensively. Having them in one file would reduce duplication and make the test suite easier to maintain.