From 6477cad44319016c3d57530ff05a304a3ea97bd0 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Thu, 11 Sep 2025 07:35:13 +0000 Subject: [PATCH] fix: improve HTML entity unescaping for Gemini and other models - Add HTML entity unescaping to applyDiffTool for non-Claude models - Enhance unescapeHtmlEntities function to handle more entity types - Add support for alternative encodings like ', /, \, ` - Add comprehensive tests for new entity types - Improve comments to clarify the purpose of unescaping Fixes #7890 --- src/core/tools/applyDiffTool.ts | 2 + src/core/tools/writeToFileTool.ts | 8 ++- .../text-normalization-extended.spec.ts | 57 +++++++++++++++++++ src/utils/text-normalization.ts | 7 ++- 4 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 src/utils/__tests__/text-normalization-extended.spec.ts diff --git a/src/core/tools/applyDiffTool.ts b/src/core/tools/applyDiffTool.ts index 903e3c846e..39ca4eeaaa 100644 --- a/src/core/tools/applyDiffTool.ts +++ b/src/core/tools/applyDiffTool.ts @@ -25,6 +25,8 @@ export async function applyDiffToolLegacy( const relPath: string | undefined = block.params.path let diffContent: string | undefined = block.params.diff + // Unescape HTML entities for non-Claude models (e.g., Gemini, DeepSeek, Llama) + // These models may return content with escaped characters that need to be unescaped if (diffContent && !cline.api.getModel().id.includes("claude")) { diffContent = unescapeHtmlEntities(diffContent) } diff --git a/src/core/tools/writeToFileTool.ts b/src/core/tools/writeToFileTool.ts index e82eab92bc..43701d6f95 100644 --- a/src/core/tools/writeToFileTool.ts +++ b/src/core/tools/writeToFileTool.ts @@ -73,9 +73,11 @@ export async function writeToFileTool( cline.diffViewProvider.editType = fileExists ? "modify" : "create" } - // pre-processing newContent for cases where weaker models might add artifacts like markdown codeblock markers (deepseek/llama) or extra escape characters (gemini) + // Pre-processing newContent for cases where models might add artifacts + // Some models (DeepSeek/Llama) add markdown codeblock markers + // Others (Gemini) return content with HTML-escaped characters if (newContent.startsWith("```")) { - // cline handles cases where it includes language specifiers like ```python ```js + // Handle cases where it includes language specifiers like ```python ```js newContent = newContent.split("\n").slice(1).join("\n") } @@ -83,6 +85,8 @@ export async function writeToFileTool( newContent = newContent.split("\n").slice(0, -1).join("\n") } + // Unescape HTML entities for non-Claude models (e.g., Gemini, DeepSeek, Llama) + // These models may return content with escaped characters that need to be unescaped if (!cline.api.getModel().id.includes("claude")) { newContent = unescapeHtmlEntities(newContent) } diff --git a/src/utils/__tests__/text-normalization-extended.spec.ts b/src/utils/__tests__/text-normalization-extended.spec.ts new file mode 100644 index 0000000000..06f6328a2c --- /dev/null +++ b/src/utils/__tests__/text-normalization-extended.spec.ts @@ -0,0 +1,57 @@ +import { describe, it, expect } from "vitest" +import { unescapeHtmlEntities } from "../text-normalization" + +describe("Extended HTML entity unescaping", () => { + describe("unescapeHtmlEntities", () => { + it("unescapes alternative apostrophe encoding", () => { + const input = "It's working" + const expected = "It's working" + expect(unescapeHtmlEntities(input)).toBe(expected) + }) + + it("unescapes forward slash", () => { + const input = "path/to/file" + const expected = "path/to/file" + expect(unescapeHtmlEntities(input)).toBe(expected) + }) + + it("unescapes backslash", () => { + const input = "C:\Users\file" + const expected = "C:\\Users\\file" + expect(unescapeHtmlEntities(input)).toBe(expected) + }) + + it("unescapes backtick", () => { + const input = "`code`" + const expected = "`code`" + expect(unescapeHtmlEntities(input)).toBe(expected) + }) + + it("unescapes non-breaking space", () => { + const input = "Hello World" + const expected = "Hello World" + expect(unescapeHtmlEntities(input)).toBe(expected) + }) + + it("handles complex mixed content with all entity types", () => { + const input = + "<div class="test">It's a  test/path\file with `code` & more</div>" + const expected = '
It\'s a test/path\\file with `code` & more
' + expect(unescapeHtmlEntities(input)).toBe(expected) + }) + + it("handles Gemini-style escaped markdown content", () => { + const input = + "```python\n<search>\ndef old_function():\n return 'old'\n</search>\n<replace>\ndef new_function():\n return 'new'\n</replace>\n```" + const expected = + "```python\n\ndef old_function():\n return 'old'\n\n\ndef new_function():\n return 'new'\n\n```" + expect(unescapeHtmlEntities(input)).toBe(expected) + }) + + it("correctly orders ampersand unescaping to avoid double-unescaping", () => { + const input = "&lt;&gt;&amp;" + const expected = "<>&" + expect(unescapeHtmlEntities(input)).toBe(expected) + }) + }) +}) diff --git a/src/utils/text-normalization.ts b/src/utils/text-normalization.ts index 15f35c8437..099c121848 100644 --- a/src/utils/text-normalization.ts +++ b/src/utils/text-normalization.ts @@ -91,5 +91,10 @@ export function unescapeHtmlEntities(text: string): string { .replace(/"/g, '"') .replace(/'/g, "'") .replace(/'/g, "'") - .replace(/&/g, "&") + .replace(/'/g, "'") // Alternative apostrophe encoding + .replace(///g, "/") // Forward slash + .replace(/\/g, "\\") // Backslash + .replace(/`/g, "`") // Backtick + .replace(/ /g, " ") // Non-breaking space + .replace(/&/g, "&") // Must be last to avoid double-unescaping }