From 5df2ce0882967a9406ee290bc38113ded463b46c Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Mon, 25 Aug 2025 07:35:28 +0000
Subject: [PATCH 1/2] =?UTF-8?q?fix:=20sanitize=20unwanted=20"=E6=9E=81?=
 =?UTF-8?q?=E9=80=9F=E6=A8=A1=E5=BC=8F"=20characters=20from=20DeepSeek=20V?=
 =?UTF-8?q?3.1=20responses?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add sanitization logic to remove "极速模式" and its variations from DeepSeek responses
- These unwanted characters were being injected into file paths and content
- Add comprehensive unit tests to verify the sanitization works correctly
- Preserve legitimate Chinese text while removing artifacts

Fixes #7382
---
 src/api/providers/__tests__/deepseek.spec.ts | 174 +++++++++++++++++++
 src/api/providers/deepseek.ts                |  59 ++++++-
 2 files changed, 232 insertions(+), 1 deletion(-)

diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts
index 50cabfa922..3ecd0df882 100644
--- a/src/api/providers/__tests__/deepseek.spec.ts
+++ b/src/api/providers/__tests__/deepseek.spec.ts
@@ -260,6 +260,180 @@ describe("DeepSeekHandler", () => {
 			expect(usageChunks[0].cacheWriteTokens).toBe(8)
 			expect(usageChunks[0].cacheReadTokens).toBe(2)
 		})
+
+		it("should sanitize unwanted '极速模式' characters from response", async () => {
+			// Mock a response with unwanted characters
+			mockCreate.mockImplementationOnce(async (options) => {
+				if (!options.stream) {
+					return {
+						id: "test-completion",
+						choices: [
+							{
+								message: {
+									role: "assistant",
+									content: "Test response with 极速模式 unwanted characters",
+									refusal: null,
+								},
+								finish_reason: "stop",
+								index: 0,
+							},
+						],
+						usage: {
+							prompt_tokens: 10,
+							completion_tokens: 5,
+							total_tokens: 15,
+						},
+					}
+				}
+
+				// Return async iterator for streaming with unwanted characters
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield {
+							choices: [
+								{
+									delta: {
+										content: "Here is 极速模式 some text with 极 unwanted 速 characters 模式",
+									},
+									index: 0,
+								},
+							],
+							usage: null,
+						}
+						yield {
+							choices: [
+								{
+									delta: {},
+									index: 0,
+								},
+							],
+							usage: {
+								prompt_tokens: 10,
+								completion_tokens: 5,
+								total_tokens: 15,
+							},
+						}
+					},
+				}
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const textChunks = chunks.filter((chunk) => chunk.type === "text")
+			expect(textChunks).toHaveLength(1)
+			// The unwanted characters should be removed
+			expect(textChunks[0].text).toBe("Here is some text with unwanted characters")
+			expect(textChunks[0].text).not.toContain("极速模式")
+			expect(textChunks[0].text).not.toContain("极")
+			expect(textChunks[0].text).not.toContain("速")
+			expect(textChunks[0].text).not.toContain("模")
+			expect(textChunks[0].text).not.toContain("式")
+		})
+
+		it("should preserve legitimate Chinese text while removing artifacts", async () => {
+			// Mock a response with both legitimate Chinese text and unwanted artifacts
+			mockCreate.mockImplementationOnce(async (options) => {
+				// Return async iterator for streaming
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield {
+							choices: [
+								{
+									delta: {
+										content: "这是正常的中文文本极速模式，不应该被删除。File path: 极 test.txt",
+									},
+									index: 0,
+								},
+							],
+							usage: null,
+						}
+						yield {
+							choices: [
+								{
+									delta: {},
+									index: 0,
+								},
+							],
+							usage: {
+								prompt_tokens: 10,
+								completion_tokens: 5,
+								total_tokens: 15,
+							},
+						}
+					},
+				}
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			const textChunks = chunks.filter((chunk) => chunk.type === "text")
+			expect(textChunks).toHaveLength(1)
+			// Should remove "极速模式" phrase and isolated "极" between spaces
+			expect(textChunks[0].text).toBe("这是正常的中文文本，不应该被删除。File path: test.txt")
+			expect(textChunks[0].text).toContain("这是正常的中文文本")
+			expect(textChunks[0].text).not.toContain("极速模式")
+			// The isolated "极" between spaces should be removed
+			expect(textChunks[0].text).not.toContain(" 极 ")
+		})
+
+		it("should handle reasoning content with unwanted characters", async () => {
+			// Mock a response with reasoning content containing unwanted characters
+			mockCreate.mockImplementationOnce(async (options) => {
+				return {
+					[Symbol.asyncIterator]: async function* () {
+						yield {
+							choices: [
+								{
+									delta: {
+										content: "<think>Reasoning with 极速模式 artifacts</think>Regular text",
+									},
+									index: 0,
+								},
+							],
+							usage: null,
+						}
+						yield {
+							choices: [
+								{
+									delta: {},
+									index: 0,
+								},
+							],
+							usage: {
+								prompt_tokens: 10,
+								completion_tokens: 5,
+								total_tokens: 15,
+							},
+						}
+					},
+				}
+			})
+
+			const stream = handler.createMessage(systemPrompt, messages)
+			const chunks: any[] = []
+			for await (const chunk of stream) {
+				chunks.push(chunk)
+			}
+
+			// Check both reasoning and text chunks
+			const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning")
+			const textChunks = chunks.filter((chunk) => chunk.type === "text")
+
+			if (reasoningChunks.length > 0) {
+				expect(reasoningChunks[0].text).not.toContain("极速模式")
+			}
+			if (textChunks.length > 0) {
+				expect(textChunks[0].text).not.toContain("极速模式")
+			}
+		})
 	})
 
 	describe("processUsageMetrics", () => {
diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts
index de119de6db..98610bd7ac 100644
--- a/src/api/providers/deepseek.ts
+++ b/src/api/providers/deepseek.ts
@@ -1,13 +1,18 @@
 import { deepSeekModels, deepSeekDefaultModelId } from "@roo-code/types"
+import { Anthropic } from "@anthropic-ai/sdk"
 
 import type { ApiHandlerOptions } from "../../shared/api"
 
-import type { ApiStreamUsageChunk } from "../transform/stream"
+import type { ApiStreamUsageChunk, ApiStream } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
+import type { ApiHandlerCreateMessageMetadata } from "../index"
 
 import { OpenAiHandler } from "./openai"
 
 export class DeepSeekHandler extends OpenAiHandler {
+	// Pattern to match unwanted "极速模式" characters and its variations
+	private readonly UNWANTED_PATTERN = /[极極][速][模][式]|[极極]|[速]?[模]?[式]?/g
+
 	constructor(options: ApiHandlerOptions) {
 		super({
 			...options,
@@ -26,6 +31,58 @@ export class DeepSeekHandler extends OpenAiHandler {
 		return { id, info, ...params }
 	}
 
+	override async *createMessage(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		// Get the stream from the parent class
+		const stream = super.createMessage(systemPrompt, messages, metadata)
+
+		// Process each chunk to remove unwanted characters
+		for await (const chunk of stream) {
+			if (chunk.type === "text" && chunk.text) {
+				// Sanitize the text content
+				chunk.text = this.sanitizeContent(chunk.text)
+			} else if (chunk.type === "reasoning" && chunk.text) {
+				// Also sanitize reasoning content
+				chunk.text = this.sanitizeContent(chunk.text)
+			}
+			yield chunk
+		}
+	}
+
+	/**
+	 * Removes unwanted "极速模式" characters from the content.
+	 * These characters appear to be injected by some DeepSeek V3.1 configurations.
+	 */
+	private sanitizeContent(content: string): string {
+		// First, try to remove the complete phrase "极速模式"
+		let sanitized = content.replace(/极速模式/g, "")
+
+		// Remove partial sequences like "模式" that might remain
+		sanitized = sanitized.replace(/模式(?![一-龿])/g, "")
+
+		// Remove isolated occurrences of these characters when they appear
+		// between non-Chinese characters or at boundaries
+		// Using more specific patterns to avoid removing legitimate Chinese text
+		sanitized = sanitized.replace(/(?<![一-龿])极(?![一-龿])/g, "")
+		sanitized = sanitized.replace(/(?<![一-龿])速(?![一-龿])/g, "")
+		sanitized = sanitized.replace(/(?<![一-龿])模(?![一-龿])/g, "")
+		sanitized = sanitized.replace(/(?<![一-龿])式(?![一-龿])/g, "")
+
+		// Handle cases where these characters appear with spaces
+		sanitized = sanitized.replace(/\s+极\s*/g, " ")
+		sanitized = sanitized.replace(/\s+速\s*/g, " ")
+		sanitized = sanitized.replace(/\s+模\s*/g, " ")
+		sanitized = sanitized.replace(/\s+式\s*/g, " ")
+
+		// Clean up any resulting multiple spaces
+		sanitized = sanitized.replace(/\s+/g, " ").trim()
+
+		return sanitized
+	}
+
 	// Override to handle DeepSeek's usage metrics, including caching.
 	protected override processUsageMetrics(usage: any): ApiStreamUsageChunk {
 		return {

From 37ee6779b1aa3934f78a9e69612b897a5b5f0b25 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Mon, 25 Aug 2025 07:39:27 +0000
Subject: [PATCH 2/2] refactor: address review feedback

- Remove unused UNWANTED_PATTERN property
- Add more detailed comment explaining the issue origin
- Clarify that sanitization preserves legitimate Chinese text
---
 src/api/providers/deepseek.ts | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts
index 98610bd7ac..4cf6ebc22c 100644
--- a/src/api/providers/deepseek.ts
+++ b/src/api/providers/deepseek.ts
@@ -10,9 +10,6 @@ import type { ApiHandlerCreateMessageMetadata } from "../index"
 import { OpenAiHandler } from "./openai"
 
 export class DeepSeekHandler extends OpenAiHandler {
-	// Pattern to match unwanted "极速模式" characters and its variations
-	private readonly UNWANTED_PATTERN = /[极極][速][模][式]|[极極]|[速]?[模]?[式]?/g
-
 	constructor(options: ApiHandlerOptions) {
 		super({
 			...options,
@@ -53,8 +50,10 @@ export class DeepSeekHandler extends OpenAiHandler {
 	}
 
 	/**
-	 * Removes unwanted "极速模式" characters from the content.
-	 * These characters appear to be injected by some DeepSeek V3.1 configurations.
+	 * Removes unwanted "极速模式" (speed mode) characters from the content.
+	 * These characters appear to be injected by some DeepSeek V3.1 configurations,
+	 * possibly from a Chinese language interface or prompt template.
+	 * The sanitization preserves legitimate Chinese text while removing these artifacts.
 	 */
 	private sanitizeContent(content: string): string {
 		// First, try to remove the complete phrase "极速模式"