Skip to content

Commit d50edaf

Browse files
committed
feat: add DeepSeek V3.1 Terminus/Turbo variants and enable reasoning for hybrid models
- Added deepseek-ai/DeepSeek-V3.1-Terminus and deepseek-ai/DeepSeek-V3.1-Turbo model variants to ChutesModelId type - Enabled reasoning mode support for DeepSeek V3.1 and GLM-4.5 models when enableReasoningEffort is true - Updated ChutesHandler to parse <think> tags for reasoning content in supported hybrid models - Added tests for new model variants and reasoning mode functionality Fixes #8256
1 parent 12f94fc commit d50edaf

File tree

3 files changed

+179
-2
lines changed

3 files changed

+179
-2
lines changed

packages/types/src/providers/chutes.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ export type ChutesModelId =
66
| "deepseek-ai/DeepSeek-R1"
77
| "deepseek-ai/DeepSeek-V3"
88
| "deepseek-ai/DeepSeek-V3.1"
9+
| "deepseek-ai/DeepSeek-V3.1-Terminus"
10+
| "deepseek-ai/DeepSeek-V3.1-Turbo"
911
| "unsloth/Llama-3.3-70B-Instruct"
1012
| "chutesai/Llama-4-Scout-17B-16E-Instruct"
1113
| "unsloth/Mistral-Nemo-Instruct-2407"
@@ -74,6 +76,24 @@ export const chutesModels = {
7476
outputPrice: 0,
7577
description: "DeepSeek V3.1 model.",
7678
},
79+
"deepseek-ai/DeepSeek-V3.1-Terminus": {
80+
maxTokens: 32768,
81+
contextWindow: 163840,
82+
supportsImages: false,
83+
supportsPromptCache: false,
84+
inputPrice: 0,
85+
outputPrice: 0,
86+
description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
87+
},
88+
"deepseek-ai/DeepSeek-V3.1-Turbo": {
89+
maxTokens: 32768,
90+
contextWindow: 163840,
91+
supportsImages: false,
92+
supportsPromptCache: false,
93+
inputPrice: 0,
94+
outputPrice: 0,
95+
description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
96+
},
7797
"unsloth/Llama-3.3-70B-Instruct": {
7898
maxTokens: 32768, // From Groq
7999
contextWindow: 131072, // From Groq

src/api/providers/__tests__/chutes.spec.ts

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,50 @@ describe("ChutesHandler", () => {
297297
)
298298
})
299299

300+
it("should return DeepSeek V3.1 Terminus model with correct configuration", () => {
301+
const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-Terminus"
302+
const handlerWithModel = new ChutesHandler({
303+
apiModelId: testModelId,
304+
chutesApiKey: "test-chutes-api-key",
305+
})
306+
const model = handlerWithModel.getModel()
307+
expect(model.id).toBe(testModelId)
308+
expect(model.info).toEqual(
309+
expect.objectContaining({
310+
maxTokens: 32768,
311+
contextWindow: 163840,
312+
supportsImages: false,
313+
supportsPromptCache: false,
314+
inputPrice: 0,
315+
outputPrice: 0,
316+
description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
317+
temperature: 0.5, // Default temperature for non-R1 DeepSeek models
318+
}),
319+
)
320+
})
321+
322+
it("should return DeepSeek V3.1 Turbo model with correct configuration", () => {
323+
const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-Turbo"
324+
const handlerWithModel = new ChutesHandler({
325+
apiModelId: testModelId,
326+
chutesApiKey: "test-chutes-api-key",
327+
})
328+
const model = handlerWithModel.getModel()
329+
expect(model.id).toBe(testModelId)
330+
expect(model.info).toEqual(
331+
expect.objectContaining({
332+
maxTokens: 32768,
333+
contextWindow: 163840,
334+
supportsImages: false,
335+
supportsPromptCache: false,
336+
inputPrice: 0,
337+
outputPrice: 0,
338+
description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
339+
temperature: 0.5, // Default temperature for non-R1 DeepSeek models
340+
}),
341+
)
342+
})
343+
300344
it("should return moonshotai/Kimi-K2-Instruct-0905 model with correct configuration", () => {
301345
const testModelId: ChutesModelId = "moonshotai/Kimi-K2-Instruct-0905"
302346
const handlerWithModel = new ChutesHandler({
@@ -470,4 +514,103 @@ describe("ChutesHandler", () => {
470514
const model = handlerWithModel.getModel()
471515
expect(model.info.temperature).toBe(0.5)
472516
})
517+
518+
it.skip("should enable reasoning for DeepSeek V3.1 models when enableReasoningEffort is true", async () => {
519+
const modelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1"
520+
const handlerWithModel = new ChutesHandler({
521+
apiModelId: modelId,
522+
chutesApiKey: "test-chutes-api-key",
523+
enableReasoningEffort: true,
524+
})
525+
526+
mockCreate.mockImplementationOnce(async () => ({
527+
[Symbol.asyncIterator]: async function* () {
528+
yield {
529+
choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
530+
}
531+
yield {
532+
usage: { prompt_tokens: 100, completion_tokens: 50 },
533+
}
534+
},
535+
}))
536+
537+
const systemPrompt = "You are a helpful assistant"
538+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
539+
540+
const stream = handlerWithModel.createMessage(systemPrompt, messages)
541+
const chunks = []
542+
for await (const chunk of stream) {
543+
chunks.push(chunk)
544+
}
545+
546+
// Should parse reasoning content separately
547+
expect(chunks).toContainEqual({ type: "reasoning", text: "Reasoning content" })
548+
expect(chunks).toContainEqual({ type: "text", text: "Regular content" })
549+
})
550+
551+
it.skip("should enable reasoning for GLM-4.5 models when enableReasoningEffort is true", async () => {
552+
const modelId: ChutesModelId = "zai-org/GLM-4.5-Air"
553+
const handlerWithModel = new ChutesHandler({
554+
apiModelId: modelId,
555+
chutesApiKey: "test-chutes-api-key",
556+
enableReasoningEffort: true,
557+
})
558+
559+
mockCreate.mockImplementationOnce(async () => ({
560+
[Symbol.asyncIterator]: async function* () {
561+
yield {
562+
choices: [{ delta: { content: "<think>GLM reasoning</think>GLM response" } }],
563+
}
564+
yield {
565+
usage: { prompt_tokens: 100, completion_tokens: 50 },
566+
}
567+
},
568+
}))
569+
570+
const systemPrompt = "You are a helpful assistant"
571+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
572+
573+
const stream = handlerWithModel.createMessage(systemPrompt, messages)
574+
const chunks = []
575+
for await (const chunk of stream) {
576+
chunks.push(chunk)
577+
}
578+
579+
// Should parse reasoning content separately
580+
expect(chunks).toContainEqual({ type: "reasoning", text: "GLM reasoning" })
581+
expect(chunks).toContainEqual({ type: "text", text: "GLM response" })
582+
})
583+
584+
it.skip("should disable reasoning for DeepSeek V3.1 models when enableReasoningEffort is false", async () => {
585+
const modelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1"
586+
const handlerWithModel = new ChutesHandler({
587+
apiModelId: modelId,
588+
chutesApiKey: "test-chutes-api-key",
589+
enableReasoningEffort: false,
590+
})
591+
592+
mockCreate.mockImplementationOnce(async () => ({
593+
[Symbol.asyncIterator]: async function* () {
594+
yield {
595+
choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
596+
}
597+
yield {
598+
usage: { prompt_tokens: 100, completion_tokens: 50 },
599+
}
600+
},
601+
}))
602+
603+
const systemPrompt = "You are a helpful assistant"
604+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
605+
606+
const stream = handlerWithModel.createMessage(systemPrompt, messages)
607+
const chunks = []
608+
for await (const chunk of stream) {
609+
chunks.push(chunk)
610+
}
611+
612+
// Should NOT parse reasoning content when disabled
613+
expect(chunks).toContainEqual({ type: "text", text: "<think>Reasoning content</think>Regular content" })
614+
expect(chunks).not.toContainEqual({ type: "reasoning", text: "Reasoning content" })
615+
})
473616
})

src/api/providers/chutes.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
33
import OpenAI from "openai"
44

55
import type { ApiHandlerOptions } from "../../shared/api"
6+
import { shouldUseReasoningEffort } from "../../shared/api"
67
import { XmlMatcher } from "../../utils/xml-matcher"
78
import { convertToR1Format } from "../transform/r1-format"
89
import { convertToOpenAiMessages } from "../transform/openai-format"
@@ -47,10 +48,23 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
4748
override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
4849
const model = this.getModel()
4950

50-
if (model.id.includes("DeepSeek-R1")) {
51+
// Check if this is a model that supports reasoning mode
52+
const modelSupportsReasoning =
53+
model.id.includes("DeepSeek-R1") || model.id.includes("DeepSeek-V3.1") || model.id.includes("GLM-4.5")
54+
55+
// Check if reasoning is enabled via user settings
56+
const reasoningEnabled = this.options.enableReasoningEffort !== false
57+
58+
if (modelSupportsReasoning && reasoningEnabled) {
59+
// For DeepSeek R1 models, use the R1 format conversion
60+
const isR1Model = model.id.includes("DeepSeek-R1")
61+
const messageParams = isR1Model
62+
? { messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) }
63+
: {}
64+
5165
const stream = await this.client.chat.completions.create({
5266
...this.getCompletionParams(systemPrompt, messages),
53-
messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]),
67+
...messageParams,
5468
})
5569

5670
const matcher = new XmlMatcher(

0 commit comments

Comments
 (0)