diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts index 50cabfa922..3ecd0df882 100644 --- a/src/api/providers/__tests__/deepseek.spec.ts +++ b/src/api/providers/__tests__/deepseek.spec.ts @@ -260,6 +260,180 @@ describe("DeepSeekHandler", () => { expect(usageChunks[0].cacheWriteTokens).toBe(8) expect(usageChunks[0].cacheReadTokens).toBe(2) }) + + it("should sanitize unwanted '极速模式' characters from response", async () => { + // Mock a response with unwanted characters + mockCreate.mockImplementationOnce(async (options) => { + if (!options.stream) { + return { + id: "test-completion", + choices: [ + { + message: { + role: "assistant", + content: "Test response with 极速模式 unwanted characters", + refusal: null, + }, + finish_reason: "stop", + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + } + + // Return async iterator for streaming with unwanted characters + return { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + content: "Here is 极速模式 some text with 极 unwanted 速 characters 模式", + }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + }, + } + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const textChunks = chunks.filter((chunk) => chunk.type === "text") + expect(textChunks).toHaveLength(1) + // The unwanted characters should be removed + expect(textChunks[0].text).toBe("Here is some text with unwanted characters") + expect(textChunks[0].text).not.toContain("极速模式") + expect(textChunks[0].text).not.toContain("极") + expect(textChunks[0].text).not.toContain("速") + expect(textChunks[0].text).not.toContain("模") + expect(textChunks[0].text).not.toContain("式") + }) + + it("should preserve legitimate Chinese text while removing artifacts", async () => { + // Mock a response with both legitimate Chinese text and unwanted artifacts + mockCreate.mockImplementationOnce(async (options) => { + // Return async iterator for streaming + return { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + content: "这是正常的中文文本极速模式,不应该被删除。File path: 极 test.txt", + }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + }, + } + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const textChunks = chunks.filter((chunk) => chunk.type === "text") + expect(textChunks).toHaveLength(1) + // Should remove "极速模式" phrase and isolated "极" between spaces + expect(textChunks[0].text).toBe("这是正常的中文文本,不应该被删除。File path: test.txt") + expect(textChunks[0].text).toContain("这是正常的中文文本") + expect(textChunks[0].text).not.toContain("极速模式") + // The isolated "极" between spaces should be removed + expect(textChunks[0].text).not.toContain(" 极 ") + }) + + it("should handle reasoning content with unwanted characters", async () => { + // Mock a response with reasoning content containing unwanted characters + mockCreate.mockImplementationOnce(async (options) => { + return { + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + content: "Reasoning with 极速模式 artifactsRegular text", + }, + index: 0, + }, + ], + usage: null, + } + yield { + choices: [ + { + delta: {}, + index: 0, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + } + }, + } + }) + + const stream = handler.createMessage(systemPrompt, messages) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + // Check both reasoning and text chunks + const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning") + const textChunks = chunks.filter((chunk) => chunk.type === "text") + + if (reasoningChunks.length > 0) { + expect(reasoningChunks[0].text).not.toContain("极速模式") + } + if (textChunks.length > 0) { + expect(textChunks[0].text).not.toContain("极速模式") + } + }) }) describe("processUsageMetrics", () => { diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index de119de6db..4cf6ebc22c 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -1,9 +1,11 @@ import { deepSeekModels, deepSeekDefaultModelId } from "@roo-code/types" +import { Anthropic } from "@anthropic-ai/sdk" import type { ApiHandlerOptions } from "../../shared/api" -import type { ApiStreamUsageChunk } from "../transform/stream" +import type { ApiStreamUsageChunk, ApiStream } from "../transform/stream" import { getModelParams } from "../transform/model-params" +import type { ApiHandlerCreateMessageMetadata } from "../index" import { OpenAiHandler } from "./openai" @@ -26,6 +28,60 @@ export class DeepSeekHandler extends OpenAiHandler { return { id, info, ...params } } + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + // Get the stream from the parent class + const stream = super.createMessage(systemPrompt, messages, metadata) + + // Process each chunk to remove unwanted characters + for await (const chunk of stream) { + if (chunk.type === "text" && chunk.text) { + // Sanitize the text content + chunk.text = this.sanitizeContent(chunk.text) + } else if (chunk.type === "reasoning" && chunk.text) { + // Also sanitize reasoning content + chunk.text = this.sanitizeContent(chunk.text) + } + yield chunk + } + } + + /** + * Removes unwanted "极速模式" (speed mode) characters from the content. + * These characters appear to be injected by some DeepSeek V3.1 configurations, + * possibly from a Chinese language interface or prompt template. + * The sanitization preserves legitimate Chinese text while removing these artifacts. + */ + private sanitizeContent(content: string): string { + // First, try to remove the complete phrase "极速模式" + let sanitized = content.replace(/极速模式/g, "") + + // Remove partial sequences like "模式" that might remain + sanitized = sanitized.replace(/模式(?![一-龿])/g, "") + + // Remove isolated occurrences of these characters when they appear + // between non-Chinese characters or at boundaries + // Using more specific patterns to avoid removing legitimate Chinese text + sanitized = sanitized.replace(/(?