diff --git a/src/core/diff/strategies/__tests__/multi-search-replace.test.ts b/src/core/diff/strategies/__tests__/multi-search-replace.test.ts index e7dc128f433..63111ba9aae 100644 --- a/src/core/diff/strategies/__tests__/multi-search-replace.test.ts +++ b/src/core/diff/strategies/__tests__/multi-search-replace.test.ts @@ -1711,6 +1711,27 @@ function sum(a, b) { } }) + it("should match content with smart quotes", async () => { + const originalContent = + "**Enjoy Roo Code!** Whether you keep it on a short leash or let it roam autonomously, we can’t wait to see what you build. If you have questions or feature ideas, drop by our [Reddit community](https://www.reddit.com/r/RooCode/) or [Discord](https://discord.gg/roocode). Happy coding!" + const diffContent = `test.ts +<<<<<<< SEARCH +**Enjoy Roo Code!** Whether you keep it on a short leash or let it roam autonomously, we can’t wait to see what you build. If you have questions or feature ideas, drop by our [Reddit community](https://www.reddit.com/r/RooCode/) or [Discord](https://discord.gg/roocode). Happy coding! +======= +**Enjoy Roo Code!** Whether you keep it on a short leash or let it roam autonomously, we can't wait to see what you build. If you have questions or feature ideas, drop by our [Reddit community](https://www.reddit.com/r/RooCode/) or [Discord](https://discord.gg/roocode). Happy coding! + +You're still here? +>>>>>>> REPLACE` + + const result = await strategy.applyDiff(originalContent, diffContent) + expect(result.success).toBe(true) + if (result.success) { + expect(result.content).toBe( + "**Enjoy Roo Code!** Whether you keep it on a short leash or let it roam autonomously, we can't wait to see what you build. If you have questions or feature ideas, drop by our [Reddit community](https://www.reddit.com/r/RooCode/) or [Discord](https://discord.gg/roocode). Happy coding!\n\nYou're still here?", + ) + } + }) + it("should not exact match empty lines", async () => { const originalContent = "function sum(a, b) {\n\n return a + b;\n}" const diffContent = `test.ts diff --git a/src/core/diff/strategies/multi-search-replace.ts b/src/core/diff/strategies/multi-search-replace.ts index 67928f45341..5ba1825bac8 100644 --- a/src/core/diff/strategies/multi-search-replace.ts +++ b/src/core/diff/strategies/multi-search-replace.ts @@ -3,6 +3,7 @@ import { addLineNumbers, everyLineHasLineNumbers, stripLineNumbers } from "../.. import { distance } from "fastest-levenshtein" import { ToolProgressStatus } from "../../../shared/ExtensionMessage" import { ToolUse } from "../../assistant-message" +import { normalizeString } from "../../../utils/text-normalization" const BUFFER_LINES = 40 // Number of extra context lines to show before and after matches @@ -12,11 +13,9 @@ function getSimilarity(original: string, search: string): number { return 0 } - // Normalize strings by removing extra whitespace but preserve case - const normalizeStr = (str: string) => str.replace(/\s+/g, " ").trim() - - const normalizedOriginal = normalizeStr(original) - const normalizedSearch = normalizeStr(search) + // Use the normalizeString utility to handle smart quotes and other special characters + const normalizedOriginal = normalizeString(original) + const normalizedSearch = normalizeString(search) if (normalizedOriginal === normalizedSearch) { return 1 diff --git a/src/utils/__tests__/text-normalization.test.ts b/src/utils/__tests__/text-normalization.test.ts new file mode 100644 index 00000000000..da7184d889f --- /dev/null +++ b/src/utils/__tests__/text-normalization.test.ts @@ -0,0 +1,33 @@ +import { normalizeString } from "../text-normalization" + +describe("Text normalization utilities", () => { + describe("normalizeString", () => { + test("normalizes smart quotes by default", () => { + expect(normalizeString("These are \u201Csmart quotes\u201D and \u2018single quotes\u2019")).toBe( + "These are \"smart quotes\" and 'single quotes'", + ) + }) + + test("normalizes typographic characters by default", () => { + expect(normalizeString("This has an em dash \u2014 and ellipsis\u2026")).toBe( + "This has an em dash - and ellipsis...", + ) + }) + + test("normalizes whitespace by default", () => { + expect(normalizeString("Multiple spaces and\t\ttabs")).toBe("Multiple spaces and tabs") + }) + + test("can be configured to skip certain normalizations", () => { + const input = "Keep \u201Csmart quotes\u201D but normalize whitespace" + expect(normalizeString(input, { smartQuotes: false })).toBe( + "Keep \u201Csmart quotes\u201D but normalize whitespace", + ) + }) + + test("real-world example with mixed characters", () => { + const input = "Let\u2019s test this\u2014with some \u201Cfancy\u201D punctuation\u2026 and spaces" + expect(normalizeString(input)).toBe('Let\'s test this-with some "fancy" punctuation... and spaces') + }) + }) +}) diff --git a/src/utils/text-normalization.ts b/src/utils/text-normalization.ts new file mode 100644 index 00000000000..b6e4e8da58d --- /dev/null +++ b/src/utils/text-normalization.ts @@ -0,0 +1,77 @@ +/** + * Common character mappings for normalization + */ +export const NORMALIZATION_MAPS = { + // Smart quotes to regular quotes + SMART_QUOTES: { + "\u201C": '"', // Left double quote (U+201C) + "\u201D": '"', // Right double quote (U+201D) + "\u2018": "'", // Left single quote (U+2018) + "\u2019": "'", // Right single quote (U+2019) + }, + // Other typographic characters + TYPOGRAPHIC: { + "\u2026": "...", // Ellipsis + "\u2014": "-", // Em dash + "\u2013": "-", // En dash + "\u00A0": " ", // Non-breaking space + }, +} + +/** + * Options for string normalization + */ +export interface NormalizeOptions { + smartQuotes?: boolean // Replace smart quotes with straight quotes + typographicChars?: boolean // Replace typographic characters + extraWhitespace?: boolean // Collapse multiple whitespace to single space + trim?: boolean // Trim whitespace from start and end +} + +/** + * Default options for normalization + */ +const DEFAULT_OPTIONS: NormalizeOptions = { + smartQuotes: true, + typographicChars: true, + extraWhitespace: true, + trim: true, +} + +/** + * Normalizes a string based on the specified options + * + * @param str The string to normalize + * @param options Normalization options + * @returns The normalized string + */ +export function normalizeString(str: string, options: NormalizeOptions = DEFAULT_OPTIONS): string { + const opts = { ...DEFAULT_OPTIONS, ...options } + let normalized = str + + // Replace smart quotes + if (opts.smartQuotes) { + for (const [smart, regular] of Object.entries(NORMALIZATION_MAPS.SMART_QUOTES)) { + normalized = normalized.replace(new RegExp(smart, "g"), regular) + } + } + + // Replace typographic characters + if (opts.typographicChars) { + for (const [typographic, regular] of Object.entries(NORMALIZATION_MAPS.TYPOGRAPHIC)) { + normalized = normalized.replace(new RegExp(typographic, "g"), regular) + } + } + + // Normalize whitespace + if (opts.extraWhitespace) { + normalized = normalized.replace(/\s+/g, " ") + } + + // Trim whitespace + if (opts.trim) { + normalized = normalized.trim() + } + + return normalized +}