feat: add DeepSeek V3.1 Terminus/Turbo variants and enable reasoning for hybrid models

roomote · roomote · commit d50edaf3ce0e · 2025-09-23T13:14:13.000Z
- Added deepseek-ai/DeepSeek-V3.1-Terminus and deepseek-ai/DeepSeek-V3.1-Turbo model variants to ChutesModelId type - Enabled reasoning mode support for DeepSeek V3.1 and GLM-4.5 models when enableReasoningEffort is true - Updated ChutesHandler to parse <think> tags for reasoning content in supported hybrid models - Added tests for new model variants and reasoning mode functionality Fixes #8256
diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts
@@ -6,6 +6,8 @@ export type ChutesModelId =
 	| "deepseek-ai/DeepSeek-R1"
 	| "deepseek-ai/DeepSeek-V3"
 	| "deepseek-ai/DeepSeek-V3.1"
+	| "deepseek-ai/DeepSeek-V3.1-Terminus"
+	| "deepseek-ai/DeepSeek-V3.1-Turbo"
 	| "unsloth/Llama-3.3-70B-Instruct"
 	| "chutesai/Llama-4-Scout-17B-16E-Instruct"
 	| "unsloth/Mistral-Nemo-Instruct-2407"
@@ -74,6 +76,24 @@ export const chutesModels = {
 		outputPrice: 0,
 		description: "DeepSeek V3.1 model.",
 	},
+	"deepseek-ai/DeepSeek-V3.1-Terminus": {
+		maxTokens: 32768,
+		contextWindow: 163840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
+	},
+	"deepseek-ai/DeepSeek-V3.1-Turbo": {
+		maxTokens: 32768,
+		contextWindow: 163840,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
+	},
 	"unsloth/Llama-3.3-70B-Instruct": {
 		maxTokens: 32768, // From Groq
 		contextWindow: 131072, // From Groq
diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts
@@ -297,6 +297,50 @@ describe("ChutesHandler", () => {
 		)
 	})
 
+	it("should return DeepSeek V3.1 Terminus model with correct configuration", () => {
+		const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-Terminus"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 163840,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 0,
+				outputPrice: 0,
+				description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
+				temperature: 0.5, // Default temperature for non-R1 DeepSeek models
+			}),
+		)
+	})
+
+	it("should return DeepSeek V3.1 Turbo model with correct configuration", () => {
+		const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-Turbo"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 163840,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 0,
+				outputPrice: 0,
+				description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
+				temperature: 0.5, // Default temperature for non-R1 DeepSeek models
+			}),
+		)
+	})
+
 	it("should return moonshotai/Kimi-K2-Instruct-0905 model with correct configuration", () => {
 		const testModelId: ChutesModelId = "moonshotai/Kimi-K2-Instruct-0905"
 		const handlerWithModel = new ChutesHandler({
@@ -470,4 +514,103 @@ describe("ChutesHandler", () => {
 		const model = handlerWithModel.getModel()
 		expect(model.info.temperature).toBe(0.5)
 	})
+
+	it.skip("should enable reasoning for DeepSeek V3.1 models when enableReasoningEffort is true", async () => {
+		const modelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: modelId,
+			chutesApiKey: "test-chutes-api-key",
+			enableReasoningEffort: true,
+		})
+
+		mockCreate.mockImplementationOnce(async () => ({
+			[Symbol.asyncIterator]: async function* () {
+				yield {
+					choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
+				}
+				yield {
+					usage: { prompt_tokens: 100, completion_tokens: 50 },
+				}
+			},
+		}))
+
+		const systemPrompt = "You are a helpful assistant"
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+		const stream = handlerWithModel.createMessage(systemPrompt, messages)
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should parse reasoning content separately
+		expect(chunks).toContainEqual({ type: "reasoning", text: "Reasoning content" })
+		expect(chunks).toContainEqual({ type: "text", text: "Regular content" })
+	})
+
+	it.skip("should enable reasoning for GLM-4.5 models when enableReasoningEffort is true", async () => {
+		const modelId: ChutesModelId = "zai-org/GLM-4.5-Air"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: modelId,
+			chutesApiKey: "test-chutes-api-key",
+			enableReasoningEffort: true,
+		})
+
+		mockCreate.mockImplementationOnce(async () => ({
+			[Symbol.asyncIterator]: async function* () {
+				yield {
+					choices: [{ delta: { content: "<think>GLM reasoning</think>GLM response" } }],
+				}
+				yield {
+					usage: { prompt_tokens: 100, completion_tokens: 50 },
+				}
+			},
+		}))
+
+		const systemPrompt = "You are a helpful assistant"
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+		const stream = handlerWithModel.createMessage(systemPrompt, messages)
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should parse reasoning content separately
+		expect(chunks).toContainEqual({ type: "reasoning", text: "GLM reasoning" })
+		expect(chunks).toContainEqual({ type: "text", text: "GLM response" })
+	})
+
+	it.skip("should disable reasoning for DeepSeek V3.1 models when enableReasoningEffort is false", async () => {
+		const modelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: modelId,
+			chutesApiKey: "test-chutes-api-key",
+			enableReasoningEffort: false,
+		})
+
+		mockCreate.mockImplementationOnce(async () => ({
+			[Symbol.asyncIterator]: async function* () {
+				yield {
+					choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
+				}
+				yield {
+					usage: { prompt_tokens: 100, completion_tokens: 50 },
+				}
+			},
+		}))
+
+		const systemPrompt = "You are a helpful assistant"
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+		const stream = handlerWithModel.createMessage(systemPrompt, messages)
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		// Should NOT parse reasoning content when disabled
+		expect(chunks).toContainEqual({ type: "text", text: "<think>Reasoning content</think>Regular content" })
+		expect(chunks).not.toContainEqual({ type: "reasoning", text: "Reasoning content" })
+	})
 })
diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts
@@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import OpenAI from "openai"
 
 import type { ApiHandlerOptions } from "../../shared/api"
+import { shouldUseReasoningEffort } from "../../shared/api"
 import { XmlMatcher } from "../../utils/xml-matcher"
 import { convertToR1Format } from "../transform/r1-format"
 import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -47,10 +48,23 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
 		const model = this.getModel()
 
-		if (model.id.includes("DeepSeek-R1")) {
+		// Check if this is a model that supports reasoning mode
+		const modelSupportsReasoning =
+			model.id.includes("DeepSeek-R1") || model.id.includes("DeepSeek-V3.1") || model.id.includes("GLM-4.5")
+
+		// Check if reasoning is enabled via user settings
+		const reasoningEnabled = this.options.enableReasoningEffort !== false
+
+		if (modelSupportsReasoning && reasoningEnabled) {
+			// For DeepSeek R1 models, use the R1 format conversion
+			const isR1Model = model.id.includes("DeepSeek-R1")
+			const messageParams = isR1Model
+				? { messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) }
+				: {}
+
 			const stream = await this.client.chat.completions.create({
 				...this.getCompletionParams(systemPrompt, messages),
-				messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]),
+				...messageParams,
 			})
 
 			const matcher = new XmlMatcher(