Skip to content

Commit dfa2dea

Browse files
author
unixsysdev
committed
feat: add Qwen models with optimized settings and reasoning support
- Add Qwen/Qwen3-Next-80B-A3B-Instruct and Qwen/Qwen3-Next-80B-A3B-Thinking models - Implement optimized temperature settings: 0.7/0.8 for Instruct, 0.6/0.95 for Thinking - Add reasoning support for Qwen Thinking models (similar to DeepSeek-R1) - Fix alphabetical ordering of all Qwen models - Add comprehensive unit tests for both models and reasoning functionality - Keep pricing at 0 as requested (to be verified on chutes.ai) Addresses roomote bot review feedback: - Alphabetical ordering corrected - Reasoning support added for Qwen Thinking models - Pricing maintained at 0 for verification
1 parent 03709fd commit dfa2dea

File tree

3 files changed

+187
-6
lines changed

3 files changed

+187
-6
lines changed

packages/types/src/providers/chutes.ts

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,22 @@ export type ChutesModelId =
1818
| "deepseek-ai/DeepSeek-V3-Base"
1919
| "deepseek-ai/DeepSeek-R1-Zero"
2020
| "deepseek-ai/DeepSeek-V3-0324"
21+
| "Qwen/Qwen3-14B"
2122
| "Qwen/Qwen3-235B-A22B"
2223
| "Qwen/Qwen3-235B-A22B-Instruct-2507"
23-
| "Qwen/Qwen3-32B"
24+
| "Qwen/Qwen3-235B-A22B-Thinking-2507"
2425
| "Qwen/Qwen3-30B-A3B"
25-
| "Qwen/Qwen3-14B"
26+
| "Qwen/Qwen3-32B"
2627
| "Qwen/Qwen3-8B"
2728
| "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
29+
| "Qwen/Qwen3-Next-80B-A3B-Instruct"
30+
| "Qwen/Qwen3-Next-80B-A3B-Thinking"
2831
| "microsoft/MAI-DS-R1-FP8"
2932
| "tngtech/DeepSeek-R1T-Chimera"
3033
| "zai-org/GLM-4.5-Air"
3134
| "zai-org/GLM-4.5-FP8"
3235
| "moonshotai/Kimi-K2-Instruct-75k"
3336
| "moonshotai/Kimi-K2-Instruct-0905"
34-
| "Qwen/Qwen3-235B-A22B-Thinking-2507"
3537

3638
export const chutesDefaultModelId: ChutesModelId = "deepseek-ai/DeepSeek-R1-0528"
3739

@@ -308,4 +310,22 @@ export const chutesModels = {
308310
outputPrice: 0.31202496,
309311
description: "Qwen3 235B A22B Thinking 2507 model with 262K context window.",
310312
},
313+
"Qwen/Qwen3-Next-80B-A3B-Instruct": {
314+
maxTokens: 32768,
315+
contextWindow: 262144,
316+
supportsImages: false,
317+
supportsPromptCache: false,
318+
inputPrice: 0,
319+
outputPrice: 0,
320+
description: "Qwen3 Next 80B A3B Instruct model with 262K context window.",
321+
},
322+
"Qwen/Qwen3-Next-80B-A3B-Thinking": {
323+
maxTokens: 32768,
324+
contextWindow: 262144,
325+
supportsImages: false,
326+
supportsPromptCache: false,
327+
inputPrice: 0,
328+
outputPrice: 0,
329+
description: "Qwen3 Next 80B A3B Thinking model with 262K context window.",
330+
},
311331
} as const satisfies Record<string, ModelInfo>

src/api/providers/__tests__/chutes.spec.ts

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,106 @@ describe("ChutesHandler", () => {
319319
)
320320
})
321321

322+
it("should return Qwen/Qwen3-Next-80B-A3B-Instruct model with correct configuration", () => {
323+
const testModelId: ChutesModelId = "Qwen/Qwen3-Next-80B-A3B-Instruct"
324+
const handlerWithModel = new ChutesHandler({
325+
apiModelId: testModelId,
326+
chutesApiKey: "test-chutes-api-key",
327+
})
328+
const model = handlerWithModel.getModel()
329+
expect(model.id).toBe(testModelId)
330+
expect(model.info).toEqual(
331+
expect.objectContaining({
332+
maxTokens: 32768,
333+
contextWindow: 262144,
334+
supportsImages: false,
335+
supportsPromptCache: false,
336+
inputPrice: 0,
337+
outputPrice: 0,
338+
description: "Qwen3 Next 80B A3B Instruct model with 262K context window.",
339+
temperature: 0.7, // Optimized temperature for Qwen Instruct models
340+
topP: 0.8, // Optimized top-p for Qwen Instruct models
341+
}),
342+
)
343+
})
344+
345+
it("should return Qwen/Qwen3-Next-80B-A3B-Thinking model with correct configuration", () => {
346+
const testModelId: ChutesModelId = "Qwen/Qwen3-Next-80B-A3B-Thinking"
347+
const handlerWithModel = new ChutesHandler({
348+
apiModelId: testModelId,
349+
chutesApiKey: "test-chutes-api-key",
350+
})
351+
const model = handlerWithModel.getModel()
352+
expect(model.id).toBe(testModelId)
353+
expect(model.info).toEqual(
354+
expect.objectContaining({
355+
maxTokens: 32768,
356+
contextWindow: 262144,
357+
supportsImages: false,
358+
supportsPromptCache: false,
359+
inputPrice: 0,
360+
outputPrice: 0,
361+
description: "Qwen3 Next 80B A3B Thinking model with 262K context window.",
362+
temperature: 0.6, // Optimized temperature for Qwen Thinking models
363+
topP: 0.95, // Optimized top-p for Qwen Thinking models
364+
}),
365+
)
366+
})
367+
368+
it("should handle Qwen Thinking model reasoning format", async () => {
369+
// Override the mock for this specific test
370+
mockCreate.mockImplementationOnce(async () => ({
371+
[Symbol.asyncIterator]: async function* () {
372+
yield {
373+
choices: [
374+
{
375+
delta: { content: "<think>Analyzing the problem..." },
376+
index: 0,
377+
},
378+
],
379+
usage: null,
380+
}
381+
yield {
382+
choices: [
383+
{
384+
delta: { content: "</think>Solution: Use dynamic programming" },
385+
index: 0,
386+
},
387+
],
388+
usage: null,
389+
}
390+
yield {
391+
choices: [
392+
{
393+
delta: {},
394+
index: 0,
395+
},
396+
],
397+
usage: { prompt_tokens: 15, completion_tokens: 8 },
398+
}
399+
},
400+
}))
401+
402+
const systemPrompt = "You are a helpful assistant."
403+
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this algorithm problem" }]
404+
vi.spyOn(handler, "getModel").mockReturnValue({
405+
id: "Qwen/Qwen3-Next-80B-A3B-Thinking",
406+
info: { maxTokens: 1024, temperature: 0.6 },
407+
} as any)
408+
409+
const stream = handler.createMessage(systemPrompt, messages)
410+
const chunks = []
411+
for await (const chunk of stream) {
412+
chunks.push(chunk)
413+
}
414+
415+
expect(chunks).toEqual([
416+
{ type: "reasoning", text: "Analyzing the problem..." },
417+
{ type: "text", text: "Solution: Use dynamic programming" },
418+
{ type: "usage", inputTokens: 15, outputTokens: 8 },
419+
])
420+
})
421+
322422
it("completePrompt method should return text from Chutes API", async () => {
323423
const expectedResponse = "This is a test response from Chutes"
324424
mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: expectedResponse } }] })

src/api/providers/chutes.ts

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,25 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
2929
): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming {
3030
const {
3131
id: model,
32-
info: { maxTokens: max_tokens },
32+
info: { maxTokens: max_tokens, topP },
3333
} = this.getModel()
3434

3535
const temperature = this.options.modelTemperature ?? this.getModel().info.temperature
3636

37-
return {
37+
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
3838
model,
3939
max_tokens,
4040
temperature,
4141
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
4242
stream: true,
4343
stream_options: { include_usage: true },
4444
}
45+
46+
if (topP !== undefined) {
47+
params.top_p = topP
48+
}
49+
50+
return params
4551
}
4652

4753
override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
@@ -80,6 +86,44 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
8086
}
8187
}
8288

89+
// Process any remaining content
90+
for (const processedChunk of matcher.final()) {
91+
yield processedChunk
92+
}
93+
} else if (model.id.includes("Qwen") && model.id.includes("Thinking")) {
94+
// Add reasoning support for Qwen Thinking models
95+
const stream = await this.client.chat.completions.create({
96+
...this.getCompletionParams(systemPrompt, messages),
97+
messages: [{ role: "user", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
98+
})
99+
100+
const matcher = new XmlMatcher(
101+
"think",
102+
(chunk) =>
103+
({
104+
type: chunk.matched ? "reasoning" : "text",
105+
text: chunk.data,
106+
}) as const,
107+
)
108+
109+
for await (const chunk of stream) {
110+
const delta = chunk.choices[0]?.delta
111+
112+
if (delta?.content) {
113+
for (const processedChunk of matcher.update(delta.content)) {
114+
yield processedChunk
115+
}
116+
}
117+
118+
if (chunk.usage) {
119+
yield {
120+
type: "usage",
121+
inputTokens: chunk.usage.prompt_tokens || 0,
122+
outputTokens: chunk.usage.completion_tokens || 0,
123+
}
124+
}
125+
}
126+
83127
// Process any remaining content
84128
for (const processedChunk of matcher.final()) {
85129
yield processedChunk
@@ -92,11 +136,28 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
92136
override getModel() {
93137
const model = super.getModel()
94138
const isDeepSeekR1 = model.id.includes("DeepSeek-R1")
139+
const isQwenThinking = model.id.includes("Qwen") && model.id.includes("Thinking")
140+
const isQwenInstruct = model.id.includes("Qwen") && model.id.includes("Instruct")
141+
142+
let temperature = this.defaultTemperature
143+
let topP: number | undefined
144+
145+
if (isDeepSeekR1) {
146+
temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
147+
} else if (isQwenThinking) {
148+
temperature = 0.6
149+
topP = 0.95
150+
} else if (isQwenInstruct) {
151+
temperature = 0.7
152+
topP = 0.8
153+
}
154+
95155
return {
96156
...model,
97157
info: {
98158
...model.info,
99-
temperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : this.defaultTemperature,
159+
temperature,
160+
...(topP !== undefined && { topP }),
100161
},
101162
}
102163
}

0 commit comments

Comments
 (0)