From 5df2ce0882967a9406ee290bc38113ded463b46c Mon Sep 17 00:00:00 2001 From: Roo Code Date: Mon, 25 Aug 2025 07:35:28 +0000 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20sanitize=20unwanted=20"=E6=9E=81?= =?UTF-8?q?=E9=80=9F=E6=A8=A1=E5=BC=8F"=20characters=20from=20DeepSeek=20V?= =?UTF-8?q?3.1=20responses?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add sanitization logic to remove "极速模式" and its variations from DeepSeek responses - These unwanted characters were being injected into file paths and content - Add comprehensive unit tests to verify the sanitization works correctly - Preserve legitimate Chinese text while removing artifacts Fixes #7382 --- src/api/providers/__tests__/deepseek.spec.ts | 174 +++++++++++++++++++ src/api/providers/deepseek.ts | 59 ++++++- 2 files changed, 232 insertions(+), 1 deletion(-) diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts index 50cabfa922..3ecd0df882 100644 --- a/src/api/providers/__tests__/deepseek.spec.ts +++ b/src/api/providers/__tests__/deepseek.spec.ts @@ -260,6 +260,180 @@ describe("DeepSeekHandler", () => { expect(usageChunks[0].cacheWriteTokens).toBe(8) expect(usageChunks[0].cacheReadTokens).toBe(2) }) + + it("should sanitize unwanted '极速模式' characters from response", async () => { + // Mock a response with unwanted characters + mockCreate.mockImplementationOnce(async (options) => { + if (!options.stream) { + return { + id: "test-completion", + choices: [ + { + message: { + role: "assistant", + content: "Test response with 极速模式 unwanted characters", + refusal: null, + }, + finish_reason: "stop", + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + } + + // Return async iterator for streaming with unwanted characters + return { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + content: "Here is 极速模式 some text with 极 unwanted 速 characters 模式", + }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + }, + } + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const textChunks = chunks.filter((chunk) => chunk.type === "text") + expect(textChunks).toHaveLength(1) + // The unwanted characters should be removed + expect(textChunks[0].text).toBe("Here is some text with unwanted characters") + expect(textChunks[0].text).not.toContain("极速模式") + expect(textChunks[0].text).not.toContain("极") + expect(textChunks[0].text).not.toContain("速") + expect(textChunks[0].text).not.toContain("模") + expect(textChunks[0].text).not.toContain("式") + }) + + it("should preserve legitimate Chinese text while removing artifacts", async () => { + // Mock a response with both legitimate Chinese text and unwanted artifacts + mockCreate.mockImplementationOnce(async (options) => { + // Return async iterator for streaming + return { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + content: "这是正常的中文文本极速模式,不应该被删除。File path: 极 test.txt", + }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + }, + } + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const textChunks = chunks.filter((chunk) => chunk.type === "text") + expect(textChunks).toHaveLength(1) + // Should remove "极速模式" phrase and isolated "极" between spaces + expect(textChunks[0].text).toBe("这是正常的中文文本,不应该被删除。File path: test.txt") + expect(textChunks[0].text).toContain("这是正常的中文文本") + expect(textChunks[0].text).not.toContain("极速模式") + // The isolated "极" between spaces should be removed + expect(textChunks[0].text).not.toContain(" 极 ") + }) + + it("should handle reasoning content with unwanted characters", async () => { + // Mock a response with reasoning content containing unwanted characters + mockCreate.mockImplementationOnce(async (options) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + content: "Reasoning with 极速模式 artifactsRegular text", + }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + }, + } + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Check both reasoning and text chunks + const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning") + const textChunks = chunks.filter((chunk) => chunk.type === "text") + + if (reasoningChunks.length > 0) { + expect(reasoningChunks[0].text).not.toContain("极速模式") + } + if (textChunks.length > 0) { + expect(textChunks[0].text).not.toContain("极速模式") + } + }) }) describe("processUsageMetrics", () => { diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index de119de6db..98610bd7ac 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -1,13 +1,18 @@ import { deepSeekModels, deepSeekDefaultModelId } from "@roo-code/types" +import { Anthropic } from "@anthropic-ai/sdk" import type { ApiHandlerOptions } from "../../shared/api" -import type { ApiStreamUsageChunk } from "../transform/stream" +import type { ApiStreamUsageChunk, ApiStream } from "../transform/stream" import { getModelParams } from "../transform/model-params" +import type { ApiHandlerCreateMessageMetadata } from "../index" import { OpenAiHandler } from "./openai" export class DeepSeekHandler extends OpenAiHandler { + // Pattern to match unwanted "极速模式" characters and its variations + private readonly UNWANTED_PATTERN = /[极極][速][模][式]|[极極]|[速]?[模]?[式]?/g + constructor(options: ApiHandlerOptions) { super({ ...options, @@ -26,6 +31,58 @@ export class DeepSeekHandler extends OpenAiHandler { return { id, info, ...params } } + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + // Get the stream from the parent class + const stream = super.createMessage(systemPrompt, messages, metadata) + + // Process each chunk to remove unwanted characters + for await (const chunk of stream) { + if (chunk.type === "text" && chunk.text) { + // Sanitize the text content + chunk.text = this.sanitizeContent(chunk.text) + } else if (chunk.type === "reasoning" && chunk.text) { + // Also sanitize reasoning content + chunk.text = this.sanitizeContent(chunk.text) + } + yield chunk + } + } + + /** + * Removes unwanted "极速模式" characters from the content. + * These characters appear to be injected by some DeepSeek V3.1 configurations. + */ + private sanitizeContent(content: string): string { + // First, try to remove the complete phrase "极速模式" + let sanitized = content.replace(/极速模式/g, "") + + // Remove partial sequences like "模式" that might remain + sanitized = sanitized.replace(/模式(?![一-龿])/g, "") + + // Remove isolated occurrences of these characters when they appear + // between non-Chinese characters or at boundaries + // Using more specific patterns to avoid removing legitimate Chinese text + sanitized = sanitized.replace(/(? Date: Mon, 25 Aug 2025 07:39:27 +0000 Subject: [PATCH 2/2] refactor: address review feedback - Remove unused UNWANTED_PATTERN property - Add more detailed comment explaining the issue origin - Clarify that sanitization preserves legitimate Chinese text --- src/api/providers/deepseek.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 98610bd7ac..4cf6ebc22c 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -10,9 +10,6 @@ import type { ApiHandlerCreateMessageMetadata } from "../index" import { OpenAiHandler } from "./openai" export class DeepSeekHandler extends OpenAiHandler { - // Pattern to match unwanted "极速模式" characters and its variations - private readonly UNWANTED_PATTERN = /[极極][速][模][式]|[极極]|[速]?[模]?[式]?/g - constructor(options: ApiHandlerOptions) { super({ ...options, @@ -53,8 +50,10 @@ export class DeepSeekHandler extends OpenAiHandler { } /** - * Removes unwanted "极速模式" characters from the content. - * These characters appear to be injected by some DeepSeek V3.1 configurations. + * Removes unwanted "极速模式" (speed mode) characters from the content. + * These characters appear to be injected by some DeepSeek V3.1 configurations, + * possibly from a Chinese language interface or prompt template. + * The sanitization preserves legitimate Chinese text while removing these artifacts. */ private sanitizeContent(content: string): string { // First, try to remove the complete phrase "极速模式"