Skip to content

Commit be2ad23

Browse files
committed
feat: Add DeepSeek V3.1 variants and GLM-4.6 with reasoning support (#8256)
- Add DeepSeek-V3.1-Terminus and DeepSeek-V3.1-turbo models - Add GLM-4.6-FP8 model with 200K context window - Fix reasoning implementation to use chat_template_kwargs with thinking parameter - Parse reasoning_content field for hybrid reasoning models (DeepSeek V3.1, GLM-4.5, GLM-4.6) - Update tests to verify reasoning mode functionality - Fix capitalization: DeepSeek-V3.1-Turbo -> DeepSeek-V3.1-turbo Fixes #8256
1 parent d50edaf commit be2ad23

File tree

3 files changed

+144
-28
lines changed

3 files changed

+144
-28
lines changed

packages/types/src/providers/chutes.ts

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ export type ChutesModelId =
77
| "deepseek-ai/DeepSeek-V3"
88
| "deepseek-ai/DeepSeek-V3.1"
99
| "deepseek-ai/DeepSeek-V3.1-Terminus"
10-
| "deepseek-ai/DeepSeek-V3.1-Turbo"
10+
| "deepseek-ai/DeepSeek-V3.1-turbo"
1111
| "unsloth/Llama-3.3-70B-Instruct"
1212
| "chutesai/Llama-4-Scout-17B-16E-Instruct"
1313
| "unsloth/Mistral-Nemo-Instruct-2407"
@@ -31,6 +31,7 @@ export type ChutesModelId =
3131
| "tngtech/DeepSeek-R1T-Chimera"
3232
| "zai-org/GLM-4.5-Air"
3333
| "zai-org/GLM-4.5-FP8"
34+
| "zai-org/GLM-4.6-FP8"
3435
| "moonshotai/Kimi-K2-Instruct-75k"
3536
| "moonshotai/Kimi-K2-Instruct-0905"
3637
| "Qwen/Qwen3-235B-A22B-Thinking-2507"
@@ -72,6 +73,7 @@ export const chutesModels = {
7273
contextWindow: 163840,
7374
supportsImages: false,
7475
supportsPromptCache: false,
76+
supportsReasoningEffort: true,
7577
inputPrice: 0,
7678
outputPrice: 0,
7779
description: "DeepSeek V3.1 model.",
@@ -81,15 +83,17 @@ export const chutesModels = {
8183
contextWindow: 163840,
8284
supportsImages: false,
8385
supportsPromptCache: false,
86+
supportsReasoningEffort: true,
8487
inputPrice: 0,
8588
outputPrice: 0,
8689
description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
8790
},
88-
"deepseek-ai/DeepSeek-V3.1-Turbo": {
91+
"deepseek-ai/DeepSeek-V3.1-turbo": {
8992
maxTokens: 32768,
9093
contextWindow: 163840,
9194
supportsImages: false,
9295
supportsPromptCache: false,
96+
supportsReasoningEffort: true,
9397
inputPrice: 0,
9498
outputPrice: 0,
9599
description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
@@ -279,6 +283,7 @@ export const chutesModels = {
279283
contextWindow: 151329,
280284
supportsImages: false,
281285
supportsPromptCache: false,
286+
supportsReasoningEffort: true,
282287
inputPrice: 0,
283288
outputPrice: 0,
284289
description:
@@ -289,11 +294,23 @@ export const chutesModels = {
289294
contextWindow: 131072,
290295
supportsImages: false,
291296
supportsPromptCache: false,
297+
supportsReasoningEffort: true,
292298
inputPrice: 0,
293299
outputPrice: 0,
294300
description:
295301
"GLM-4.5-FP8 model with 128k token context window, optimized for agent-based applications with MoE architecture.",
296302
},
303+
"zai-org/GLM-4.6-FP8": {
304+
maxTokens: 32768,
305+
contextWindow: 204800,
306+
supportsImages: false,
307+
supportsPromptCache: false,
308+
supportsReasoningEffort: true,
309+
inputPrice: 0,
310+
outputPrice: 0,
311+
description:
312+
"GLM-4.6-FP8 model with 200K context window, FP8 precision for efficient inference. Improved reasoning, coding, and agent capabilities.",
313+
},
297314
"Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
298315
maxTokens: 32768,
299316
contextWindow: 262144,

src/api/providers/__tests__/chutes.spec.ts

Lines changed: 70 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,30 @@ describe("ChutesHandler", () => {
253253
)
254254
})
255255

256+
it("should return zai-org/GLM-4.6-FP8 model with correct configuration", () => {
257+
const testModelId: ChutesModelId = "zai-org/GLM-4.6-FP8"
258+
const handlerWithModel = new ChutesHandler({
259+
apiModelId: testModelId,
260+
chutesApiKey: "test-chutes-api-key",
261+
})
262+
const model = handlerWithModel.getModel()
263+
expect(model.id).toBe(testModelId)
264+
expect(model.info).toEqual(
265+
expect.objectContaining({
266+
maxTokens: 32768,
267+
contextWindow: 204800,
268+
supportsImages: false,
269+
supportsPromptCache: false,
270+
supportsReasoningEffort: true,
271+
inputPrice: 0,
272+
outputPrice: 0,
273+
description:
274+
"GLM-4.6-FP8 model with 200K context window, FP8 precision for efficient inference. Improved reasoning, coding, and agent capabilities.",
275+
temperature: 0.5, // Default temperature for non-DeepSeek models
276+
}),
277+
)
278+
})
279+
256280
it("should return Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 model with correct configuration", () => {
257281
const testModelId: ChutesModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
258282
const handlerWithModel = new ChutesHandler({
@@ -311,6 +335,7 @@ describe("ChutesHandler", () => {
311335
contextWindow: 163840,
312336
supportsImages: false,
313337
supportsPromptCache: false,
338+
supportsReasoningEffort: true,
314339
inputPrice: 0,
315340
outputPrice: 0,
316341
description: "DeepSeek V3.1 Terminus variant - optimized for complex reasoning and extended context.",
@@ -319,8 +344,8 @@ describe("ChutesHandler", () => {
319344
)
320345
})
321346

322-
it("should return DeepSeek V3.1 Turbo model with correct configuration", () => {
323-
const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-Turbo"
347+
it("should return DeepSeek V3.1 turbo model with correct configuration", () => {
348+
const testModelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1-turbo"
324349
const handlerWithModel = new ChutesHandler({
325350
apiModelId: testModelId,
326351
chutesApiKey: "test-chutes-api-key",
@@ -333,6 +358,7 @@ describe("ChutesHandler", () => {
333358
contextWindow: 163840,
334359
supportsImages: false,
335360
supportsPromptCache: false,
361+
supportsReasoningEffort: true,
336362
inputPrice: 0,
337363
outputPrice: 0,
338364
description: "DeepSeek V3.1 Turbo variant - faster inference with maintained quality.",
@@ -515,7 +541,7 @@ describe("ChutesHandler", () => {
515541
expect(model.info.temperature).toBe(0.5)
516542
})
517543

518-
it.skip("should enable reasoning for DeepSeek V3.1 models when enableReasoningEffort is true", async () => {
544+
it("should enable reasoning for DeepSeek V3.1 models when enableReasoningEffort is true", async () => {
519545
const modelId: ChutesModelId = "deepseek-ai/DeepSeek-V3.1"
520546
const handlerWithModel = new ChutesHandler({
521547
apiModelId: modelId,
@@ -525,10 +551,17 @@ describe("ChutesHandler", () => {
525551

526552
mockCreate.mockImplementationOnce(async () => ({
527553
[Symbol.asyncIterator]: async function* () {
554+
// First yield reasoning content
528555
yield {
529-
choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
556+
choices: [{ delta: { reasoning_content: "Let me think about this..." } }],
530557
}
558+
// Then yield regular content
531559
yield {
560+
choices: [{ delta: { content: "Here's my response." } }],
561+
}
562+
// Finally yield usage
563+
yield {
564+
choices: [],
532565
usage: { prompt_tokens: 100, completion_tokens: 50 },
533566
}
534567
},
@@ -543,12 +576,22 @@ describe("ChutesHandler", () => {
543576
chunks.push(chunk)
544577
}
545578

546-
// Should parse reasoning content separately
547-
expect(chunks).toContainEqual({ type: "reasoning", text: "Reasoning content" })
548-
expect(chunks).toContainEqual({ type: "text", text: "Regular content" })
579+
// Should parse reasoning content and regular content separately
580+
expect(chunks).toContainEqual({ type: "reasoning", text: "Let me think about this..." })
581+
expect(chunks).toContainEqual({ type: "text", text: "Here's my response." })
582+
expect(chunks).toContainEqual({ type: "usage", inputTokens: 100, outputTokens: 50 })
583+
584+
// Verify that the API was called with reasoning enabled
585+
expect(mockCreate).toHaveBeenCalledWith(
586+
expect.objectContaining({
587+
chat_template_kwargs: {
588+
thinking: true,
589+
},
590+
}),
591+
)
549592
})
550593

551-
it.skip("should enable reasoning for GLM-4.5 models when enableReasoningEffort is true", async () => {
594+
it("should enable reasoning for GLM-4.5 models when enableReasoningEffort is true", async () => {
552595
const modelId: ChutesModelId = "zai-org/GLM-4.5-Air"
553596
const handlerWithModel = new ChutesHandler({
554597
apiModelId: modelId,
@@ -558,10 +601,17 @@ describe("ChutesHandler", () => {
558601

559602
mockCreate.mockImplementationOnce(async () => ({
560603
[Symbol.asyncIterator]: async function* () {
604+
// First yield reasoning content
605+
yield {
606+
choices: [{ delta: { reasoning_content: "GLM reasoning process..." } }],
607+
}
608+
// Then yield regular content
561609
yield {
562-
choices: [{ delta: { content: "<think>GLM reasoning</think>GLM response" } }],
610+
choices: [{ delta: { content: "GLM response" } }],
563611
}
612+
// Finally yield usage
564613
yield {
614+
choices: [],
565615
usage: { prompt_tokens: 100, completion_tokens: 50 },
566616
}
567617
},
@@ -577,8 +627,17 @@ describe("ChutesHandler", () => {
577627
}
578628

579629
// Should parse reasoning content separately
580-
expect(chunks).toContainEqual({ type: "reasoning", text: "GLM reasoning" })
630+
expect(chunks).toContainEqual({ type: "reasoning", text: "GLM reasoning process..." })
581631
expect(chunks).toContainEqual({ type: "text", text: "GLM response" })
632+
633+
// Verify that the API was called with reasoning enabled
634+
expect(mockCreate).toHaveBeenCalledWith(
635+
expect.objectContaining({
636+
chat_template_kwargs: {
637+
thinking: true,
638+
},
639+
}),
640+
)
582641
})
583642

584643
it.skip("should disable reasoning for DeepSeek V3.1 models when enableReasoningEffort is false", async () => {
@@ -595,6 +654,7 @@ describe("ChutesHandler", () => {
595654
choices: [{ delta: { content: "<think>Reasoning content</think>Regular content" } }],
596655
}
597656
yield {
657+
choices: [],
598658
usage: { prompt_tokens: 100, completion_tokens: 50 },
599659
}
600660
},

src/api/providers/chutes.ts

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
2727
private getCompletionParams(
2828
systemPrompt: string,
2929
messages: Anthropic.Messages.MessageParam[],
30+
enableReasoning: boolean = false,
3031
): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming {
3132
const {
3233
id: model,
@@ -35,36 +36,33 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
3536

3637
const temperature = this.options.modelTemperature ?? this.getModel().info.temperature
3738

38-
return {
39+
const params: any = {
3940
model,
4041
max_tokens,
4142
temperature,
4243
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
4344
stream: true,
4445
stream_options: { include_usage: true },
4546
}
47+
48+
// Add reasoning support for DeepSeek V3.1, GLM-4.5, and GLM-4.6 models
49+
if (enableReasoning) {
50+
params.chat_template_kwargs = {
51+
thinking: true,
52+
}
53+
}
54+
55+
return params
4656
}
4757

4858
override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
4959
const model = this.getModel()
5060

51-
// Check if this is a model that supports reasoning mode
52-
const modelSupportsReasoning =
53-
model.id.includes("DeepSeek-R1") || model.id.includes("DeepSeek-V3.1") || model.id.includes("GLM-4.5")
54-
55-
// Check if reasoning is enabled via user settings
56-
const reasoningEnabled = this.options.enableReasoningEffort !== false
57-
58-
if (modelSupportsReasoning && reasoningEnabled) {
59-
// For DeepSeek R1 models, use the R1 format conversion
60-
const isR1Model = model.id.includes("DeepSeek-R1")
61-
const messageParams = isR1Model
62-
? { messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) }
63-
: {}
64-
61+
// Handle DeepSeek R1 models with XML tag parsing
62+
if (model.id.includes("DeepSeek-R1")) {
6563
const stream = await this.client.chat.completions.create({
6664
...this.getCompletionParams(systemPrompt, messages),
67-
...messageParams,
65+
messages: convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]),
6866
})
6967

7068
const matcher = new XmlMatcher(
@@ -98,7 +96,48 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
9896
for (const processedChunk of matcher.final()) {
9997
yield processedChunk
10098
}
99+
return
100+
}
101+
102+
// Handle DeepSeek V3.1, GLM-4.5, and GLM-4.6 models with reasoning_content parsing
103+
const isHybridReasoningModel =
104+
model.id.includes("DeepSeek-V3.1") || model.id.includes("GLM-4.5") || model.id.includes("GLM-4.6")
105+
const reasoningEnabled = this.options.enableReasoningEffort === true
106+
107+
if (isHybridReasoningModel && reasoningEnabled) {
108+
const stream = await this.client.chat.completions.create(
109+
this.getCompletionParams(systemPrompt, messages, true),
110+
)
111+
112+
for await (const chunk of stream) {
113+
const delta = chunk.choices[0]?.delta
114+
115+
// Handle reasoning content from the response
116+
if ((delta as any)?.reasoning_content) {
117+
yield {
118+
type: "reasoning",
119+
text: (delta as any).reasoning_content,
120+
}
121+
}
122+
123+
// Handle regular text content
124+
if (delta?.content) {
125+
yield {
126+
type: "text",
127+
text: delta.content,
128+
}
129+
}
130+
131+
if (chunk.usage) {
132+
yield {
133+
type: "usage",
134+
inputTokens: chunk.usage.prompt_tokens || 0,
135+
outputTokens: chunk.usage.completion_tokens || 0,
136+
}
137+
}
138+
}
101139
} else {
140+
// For non-reasoning models or when reasoning is disabled, use the base implementation
102141
yield* super.createMessage(systemPrompt, messages)
103142
}
104143
}

0 commit comments

Comments
 (0)