Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions src/core/diff/strategies/__tests__/issue-6872-reproduction.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace"

describe("Issue #6872 - apply_diff Tool Fails with Unicode Emoji Characters", () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we consider consolidating these tests with the unicode-emoji.spec.ts file? Both files test Unicode emoji handling, and unicode-emoji.spec.ts already covers the issue comprehensively. Having them in one file would reduce duplication and make the test suite easier to maintain.

let strategy: MultiSearchReplaceDiffStrategy

beforeEach(() => {
strategy = new MultiSearchReplaceDiffStrategy(1.0) // Exact matching (100% threshold)
})

it("should handle the exact scenario from issue #6872", async () => {
// This is the exact test case from the issue report
const originalContent = `# Test File

**✔ This is a test line.**

Some other content.`

const diffContent = `<<<<<<< SEARCH
**✔ This is a test line.**
=======
**This line has been successfully modified.**
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)

// The issue reports this should fail with 99% match, but we expect it to work
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment mentions expecting it to work, but could we add a more specific assertion? For example, we could verify that the similarity score is exactly 100% to prove the normalization isn't affecting emoji matching. This would make the test's intent clearer.

expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`# Test File

**This line has been successfully modified.**

Some other content.`)
}
})

it("should handle the exact scenario with start_line from issue #6872", async () => {
const originalContent = `# Test File

**✔ This is a test line.**

Some other content.`

const diffContent = `<<<<<<< SEARCH
:start_line:3
-------
**✔ This is a test line.**
=======
**This line has been successfully modified.**
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)

expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`# Test File

**This line has been successfully modified.**

Some other content.`)
}
})

it("should handle checkmark emoji with different normalization settings", async () => {
// Test with 90% threshold to see if normalization affects matching
const fuzzyStrategy = new MultiSearchReplaceDiffStrategy(0.9)

const originalContent = `# Test File

**✔ This is a test line.**

Some other content.`

const diffContent = `<<<<<<< SEARCH
**✔ This is a test line.**
=======
**This line has been successfully modified.**
>>>>>>> REPLACE`

const result = await fuzzyStrategy.applyDiff(originalContent, diffContent)

expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`# Test File

**This line has been successfully modified.**

Some other content.`)
}
})

it("should provide helpful error message if emoji causes mismatch", async () => {
const originalContent = `# Test File

**✔ This is a test line.**

Some other content.`

// Intentionally use a different emoji to test error handling
const diffContent = `<<<<<<< SEARCH
**✅ This is a test line.**
=======
**This line has been successfully modified.**
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)

// This should fail because the emojis don't match
expect(result.success).toBe(false)
if (!result.success && result.error) {
expect(result.error).toContain("No sufficiently similar match found")
expect(result.error).toContain("100%") // Should mention the threshold
}
})
})
181 changes: 181 additions & 0 deletions src/core/diff/strategies/__tests__/unicode-emoji.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
import { MultiSearchReplaceDiffStrategy } from "../multi-search-replace"

describe("MultiSearchReplaceDiffStrategy - Unicode Emoji Handling", () => {
let strategy: MultiSearchReplaceDiffStrategy

beforeEach(() => {
strategy = new MultiSearchReplaceDiffStrategy(1.0) // Exact matching
})

describe("Unicode emoji character handling", () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great comprehensive test coverage! Have you considered adding edge cases like:

  • Emoji at the very start or end of a file
  • Files containing only emoji characters
  • Zero-width joiners and emoji sequences (like 👨‍👩‍👧‍👦)

These edge cases could help ensure robustness.

it("should correctly match and replace content containing checkmark emoji (✔)", async () => {
const originalContent = `# Test File

**✔ This is a test line.**

Some other content.`

const diffContent = `<<<<<<< SEARCH
**✔ This is a test line.**
=======
**This line has been successfully modified.**
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`# Test File

**This line has been successfully modified.**

Some other content.`)
}
})

it("should handle multiple different emoji characters", async () => {
const originalContent = `# Task List

✅ Completed task
⚠️ Warning task
❌ Failed task
🚀 Rocket task`

const diffContent = `<<<<<<< SEARCH
✅ Completed task
⚠️ Warning task
❌ Failed task
🚀 Rocket task
=======
✅ Completed task
⚠️ Warning task
❌ Failed task
🚀 Rocket task
🎉 Celebration task
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`# Task List

✅ Completed task
⚠️ Warning task
❌ Failed task
🚀 Rocket task
🎉 Celebration task`)
}
})

it("should handle emoji in code comments", async () => {
const originalContent = `function celebrate() {
// 🎉 This function celebrates success
console.log("Success!");
}`

const diffContent = `<<<<<<< SEARCH
// 🎉 This function celebrates success
=======
// 🎉 This function celebrates success
// 🚀 And launches rockets!
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`function celebrate() {
// 🎉 This function celebrates success
// 🚀 And launches rockets!
console.log("Success!");
}`)
}
})

it("should handle mixed emoji and regular text", async () => {
const originalContent = `## Status Report

Current status: ✔ All systems operational
Performance: 🚀 Blazing fast
Issues: ❌ None found`

const diffContent = `<<<<<<< SEARCH
Current status: ✔ All systems operational
Performance: 🚀 Blazing fast
Issues: ❌ None found
=======
Current status: ✅ All systems operational
Performance: 🚀 Blazing fast
Issues: ⚠️ Minor warnings detected
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`## Status Report

Current status: ✅ All systems operational
Performance: 🚀 Blazing fast
Issues: ⚠️ Minor warnings detected`)
}
})

it("should handle emoji with line numbers", async () => {
const originalContent = `# Test File

**✔ This is a test line.**

Some other content.`

const diffContent = `<<<<<<< SEARCH
:start_line:3
-------
**✔ This is a test line.**
=======
**✅ This line has been successfully modified.**
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`# Test File

**✅ This line has been successfully modified.**

Some other content.`)
}
})

it("should handle complex Unicode characters beyond basic emoji", async () => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent test for international characters beyond emoji! This ensures the fix works for all Unicode, not just emoji. 🌍

const originalContent = `# International Characters

Chinese: 你好世界
Japanese: こんにちは世界
Korean: 안녕하세요
Arabic: مرحبا بالعالم
Hebrew: שלום עולם
Emoji: 🌍🌎🌏`

const diffContent = `<<<<<<< SEARCH
Chinese: 你好世界
Japanese: こんにちは世界
Korean: 안녕하세요
=======
Chinese: 你好世界 (Hello World)
Japanese: こんにちは世界 (Hello World)
Korean: 안녕하세요 (Hello)
>>>>>>> REPLACE`

const result = await strategy.applyDiff(originalContent, diffContent)
expect(result.success).toBe(true)
if (result.success) {
expect(result.content).toBe(`# International Characters

Chinese: 你好世界 (Hello World)
Japanese: こんにちは世界 (Hello World)
Korean: 안녕하세요 (Hello)
Arabic: مرحبا بالعالم
Hebrew: שלום עולם
Emoji: 🌍🌎🌏`)
}
})
})
})
Loading