fix: use chat_template_kwargs for DeepSeek V3.1 Terminus reasoning control

roomote · roomote · commit 52be5e5ffa30 · 2025-11-06T18:50:25.000Z
- Add chat_template_kwargs support to OpenRouterChatCompletionParams type - Convert reasoning configuration to chat_template_kwargs for DeepSeek V3.1 Terminus models - Set thinking parameter based on reasoning enabled state (not excluded) - Add comprehensive tests for the new behavior - Ensures reasoning can be properly disabled (default OFF) for DeepSeek V3.1 Terminus Fixes #8270
diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts
@@ -51,6 +51,17 @@ vitest.mock("../fetchers/modelCache", () => ({
 				cacheReadsPrice: 0.3,
 				description: "Claude 3.7 Sonnet with thinking",
 			},
+			"deepseek/deepseek-v3.1-terminus": {
+				maxTokens: 8192,
+				contextWindow: 128000,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 0.3,
+				outputPrice: 1.2,
+				description: "DeepSeek V3.1 Terminus",
+				supportsReasoningEffort: true,
+				supportedReasoningEfforts: ["low", "medium", "high"],
+			},
 		})
 	}),
 }))
@@ -330,4 +341,144 @@ describe("OpenRouterHandler", () => {
 			await expect(handler.completePrompt("test prompt")).rejects.toThrow("Unexpected error")
 		})
 	})
+
+	describe("DeepSeek V3.1 Terminus handling", () => {
+		it("should use chat_template_kwargs with thinking:true when reasoning is enabled for V3.1 Terminus", async () => {
+			const handler = new OpenRouterHandler({
+				openRouterApiKey: "test-key",
+				openRouterModelId: "deepseek/deepseek-v3.1-terminus",
+				reasoningEffort: "medium",
+			})
+
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						id: "test-id",
+						choices: [{ delta: { content: "test response" } }],
+					}
+				},
+			}
+
+			const mockCreate = vitest.fn().mockResolvedValue(mockStream)
+			;(OpenAI as any).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
+
+			await handler.createMessage("test", []).next()
+
+			// Should include chat_template_kwargs with thinking:true and NOT include reasoning parameter
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "deepseek/deepseek-v3.1-terminus",
+					chat_template_kwargs: { thinking: true },
+				}),
+			)
+			// Ensure reasoning parameter is NOT included
+			expect(mockCreate).not.toHaveBeenCalledWith(
+				expect.objectContaining({
+					reasoning: expect.anything(),
+				}),
+			)
+		})
+
+		it("should use chat_template_kwargs with thinking:false when reasoning is disabled for V3.1 Terminus", async () => {
+			const handler = new OpenRouterHandler({
+				openRouterApiKey: "test-key",
+				openRouterModelId: "deepseek/deepseek-v3.1-terminus",
+				// No reasoning effort specified
+			})
+
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						id: "test-id",
+						choices: [{ delta: { content: "test response" } }],
+					}
+				},
+			}
+
+			const mockCreate = vitest.fn().mockResolvedValue(mockStream)
+			;(OpenAI as any).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
+
+			await handler.createMessage("test", []).next()
+
+			// Should include chat_template_kwargs with thinking:false
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "deepseek/deepseek-v3.1-terminus",
+					chat_template_kwargs: { thinking: false },
+				}),
+			)
+			// Ensure reasoning parameter is NOT included
+			expect(mockCreate).not.toHaveBeenCalledWith(
+				expect.objectContaining({
+					reasoning: expect.anything(),
+				}),
+			)
+		})
+
+		it("should not use chat_template_kwargs for non-Terminus models", async () => {
+			const handler = new OpenRouterHandler({
+				openRouterApiKey: "test-key",
+				openRouterModelId: "anthropic/claude-sonnet-4",
+				reasoningEffort: "medium",
+			})
+
+			const mockStream = {
+				async *[Symbol.asyncIterator]() {
+					yield {
+						id: "test-id",
+						choices: [{ delta: { content: "test response" } }],
+					}
+				},
+			}
+
+			const mockCreate = vitest.fn().mockResolvedValue(mockStream)
+			;(OpenAI as any).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
+
+			await handler.createMessage("test", []).next()
+
+			// Should NOT include chat_template_kwargs for non-Terminus models
+			expect(mockCreate).not.toHaveBeenCalledWith(
+				expect.objectContaining({
+					chat_template_kwargs: expect.anything(),
+				}),
+			)
+		})
+
+		it("should handle chat_template_kwargs in completePrompt for V3.1 Terminus", async () => {
+			const handler = new OpenRouterHandler({
+				openRouterApiKey: "test-key",
+				openRouterModelId: "deepseek/deepseek-v3.1-terminus",
+				reasoningEffort: "high",
+			})
+
+			const mockResponse = { choices: [{ message: { content: "test completion" } }] }
+			const mockCreate = vitest.fn().mockResolvedValue(mockResponse)
+			;(OpenAI as any).prototype.chat = {
+				completions: { create: mockCreate },
+			} as any
+
+			await handler.completePrompt("test prompt")
+
+			// Should include chat_template_kwargs with thinking:true for non-streaming as well
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					model: "deepseek/deepseek-v3.1-terminus",
+					chat_template_kwargs: { thinking: true },
+					stream: false,
+				}),
+			)
+			// Ensure reasoning parameter is NOT included
+			expect(mockCreate).not.toHaveBeenCalledWith(
+				expect.objectContaining({
+					reasoning: expect.anything(),
+				}),
+			)
+		})
+	})
 })
diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts
@@ -60,6 +60,8 @@ type OpenRouterChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & {
 	include_reasoning?: boolean
 	// https://openrouter.ai/docs/use-cases/reasoning-tokens
 	reasoning?: OpenRouterReasoningParams
+	// For DeepSeek V3.1 Terminus models that require chat_template_kwargs
+	chat_template_kwargs?: { thinking?: boolean }
 }
 
 // See `OpenAI.Chat.Completions.ChatCompletionChunk["usage"]`
@@ -141,6 +143,20 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 
 		const transforms = (this.options.openRouterUseMiddleOutTransform ?? true) ? ["middle-out"] : undefined
 
+		// Special handling for DeepSeek V3.1 Terminus models
+		// These models use chat_template_kwargs with thinking parameter instead of reasoning
+		let chatTemplateKwargs: { thinking?: boolean } | undefined
+		let finalReasoning = reasoning
+
+		if (modelId.startsWith("deepseek/deepseek-v3.1-terminus")) {
+			// For DeepSeek V3.1 Terminus, convert reasoning to chat_template_kwargs
+			// The reasoning object will be present if reasoning is enabled
+			const hasReasoningEnabled = Boolean(reasoning && !reasoning.exclude)
+			chatTemplateKwargs = { thinking: hasReasoningEnabled }
+			// Don't pass reasoning parameter for this model
+			finalReasoning = undefined
+		}
+
 		// https://openrouter.ai/docs/transforms
 		const completionParams: OpenRouterChatCompletionParams = {
 			model: modelId,
@@ -160,7 +176,8 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 					},
 				}),
 			...(transforms && { transforms }),
-			...(reasoning && { reasoning }),
+			...(finalReasoning && { reasoning: finalReasoning }),
+			...(chatTemplateKwargs && { chat_template_kwargs: chatTemplateKwargs }),
 		}
 
 		let stream
@@ -248,6 +265,16 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 	async completePrompt(prompt: string) {
 		let { id: modelId, maxTokens, temperature, reasoning } = await this.fetchModel()
 
+		// Handle DeepSeek V3.1 Terminus for non-streaming as well
+		let chatTemplateKwargs: { thinking?: boolean } | undefined
+		let finalReasoning = reasoning
+
+		if (modelId.startsWith("deepseek/deepseek-v3.1-terminus")) {
+			const hasReasoningEnabled = Boolean(reasoning && !reasoning.exclude)
+			chatTemplateKwargs = { thinking: hasReasoningEnabled }
+			finalReasoning = undefined
+		}
+
 		const completionParams: OpenRouterChatCompletionParams = {
 			model: modelId,
 			max_tokens: maxTokens,
@@ -263,7 +290,8 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH
 						allow_fallbacks: false,
 					},
 				}),
-			...(reasoning && { reasoning }),
+			...(finalReasoning && { reasoning: finalReasoning }),
+			...(chatTemplateKwargs && { chat_template_kwargs: chatTemplateKwargs }),
 		}
 
 		let response