feat: add Qwen models with optimized settings and reasoning support

unixsysdev · unixsysdev · commit dfa2dea1ee8d · 2025-09-12T15:48:23.000+03:00
- Add Qwen/Qwen3-Next-80B-A3B-Instruct and Qwen/Qwen3-Next-80B-A3B-Thinking models
- Implement optimized temperature settings: 0.7/0.8 for Instruct, 0.6/0.95 for Thinking
- Add reasoning support for Qwen Thinking models (similar to DeepSeek-R1)
- Fix alphabetical ordering of all Qwen models
- Add comprehensive unit tests for both models and reasoning functionality
- Keep pricing at 0 as requested (to be verified on chutes.ai)

Addresses roomote bot review feedback:
- Alphabetical ordering corrected
- Reasoning support added for Qwen Thinking models
- Pricing maintained at 0 for verification
diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts
@@ -18,20 +18,22 @@ export type ChutesModelId =
 	| "deepseek-ai/DeepSeek-V3-Base"
 	| "deepseek-ai/DeepSeek-R1-Zero"
 	| "deepseek-ai/DeepSeek-V3-0324"
+	| "Qwen/Qwen3-14B"
 	| "Qwen/Qwen3-235B-A22B"
 	| "Qwen/Qwen3-235B-A22B-Instruct-2507"
-	| "Qwen/Qwen3-32B"
+	| "Qwen/Qwen3-235B-A22B-Thinking-2507"
 	| "Qwen/Qwen3-30B-A3B"
-	| "Qwen/Qwen3-14B"
+	| "Qwen/Qwen3-32B"
 	| "Qwen/Qwen3-8B"
 	| "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
+	| "Qwen/Qwen3-Next-80B-A3B-Instruct"
+	| "Qwen/Qwen3-Next-80B-A3B-Thinking"
 	| "microsoft/MAI-DS-R1-FP8"
 	| "tngtech/DeepSeek-R1T-Chimera"
 	| "zai-org/GLM-4.5-Air"
 	| "zai-org/GLM-4.5-FP8"
 	| "moonshotai/Kimi-K2-Instruct-75k"
 	| "moonshotai/Kimi-K2-Instruct-0905"
-	| "Qwen/Qwen3-235B-A22B-Thinking-2507"
 
 export const chutesDefaultModelId: ChutesModelId = "deepseek-ai/DeepSeek-R1-0528"
 
@@ -308,4 +310,22 @@ export const chutesModels = {
 		outputPrice: 0.31202496,
 		description: "Qwen3 235B A22B Thinking 2507 model with 262K context window.",
 	},
+	"Qwen/Qwen3-Next-80B-A3B-Instruct": {
+		maxTokens: 32768,
+		contextWindow: 262144,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "Qwen3 Next 80B A3B Instruct model with 262K context window.",
+	},
+	"Qwen/Qwen3-Next-80B-A3B-Thinking": {
+		maxTokens: 32768,
+		contextWindow: 262144,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		description: "Qwen3 Next 80B A3B Thinking model with 262K context window.",
+	},
 } as const satisfies Record<string, ModelInfo>
diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts
@@ -319,6 +319,106 @@ describe("ChutesHandler", () => {
 		)
 	})
 
+	it("should return Qwen/Qwen3-Next-80B-A3B-Instruct model with correct configuration", () => {
+		const testModelId: ChutesModelId = "Qwen/Qwen3-Next-80B-A3B-Instruct"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 262144,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 0,
+				outputPrice: 0,
+				description: "Qwen3 Next 80B A3B Instruct model with 262K context window.",
+				temperature: 0.7, // Optimized temperature for Qwen Instruct models
+				topP: 0.8, // Optimized top-p for Qwen Instruct models
+			}),
+		)
+	})
+
+	it("should return Qwen/Qwen3-Next-80B-A3B-Thinking model with correct configuration", () => {
+		const testModelId: ChutesModelId = "Qwen/Qwen3-Next-80B-A3B-Thinking"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 262144,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 0,
+				outputPrice: 0,
+				description: "Qwen3 Next 80B A3B Thinking model with 262K context window.",
+				temperature: 0.6, // Optimized temperature for Qwen Thinking models
+				topP: 0.95, // Optimized top-p for Qwen Thinking models
+			}),
+		)
+	})
+
+	it("should handle Qwen Thinking model reasoning format", async () => {
+		// Override the mock for this specific test
+		mockCreate.mockImplementationOnce(async () => ({
+			[Symbol.asyncIterator]: async function* () {
+				yield {
+					choices: [
+						{
+							delta: { content: "<think>Analyzing the problem..." },
+							index: 0,
+						},
+					],
+					usage: null,
+				}
+				yield {
+					choices: [
+						{
+							delta: { content: "</think>Solution: Use dynamic programming" },
+							index: 0,
+						},
+					],
+					usage: null,
+				}
+				yield {
+					choices: [
+						{
+							delta: {},
+							index: 0,
+						},
+					],
+					usage: { prompt_tokens: 15, completion_tokens: 8 },
+				}
+			},
+		}))
+
+		const systemPrompt = "You are a helpful assistant."
+		const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this algorithm problem" }]
+		vi.spyOn(handler, "getModel").mockReturnValue({
+			id: "Qwen/Qwen3-Next-80B-A3B-Thinking",
+			info: { maxTokens: 1024, temperature: 0.6 },
+		} as any)
+
+		const stream = handler.createMessage(systemPrompt, messages)
+		const chunks = []
+		for await (const chunk of stream) {
+			chunks.push(chunk)
+		}
+
+		expect(chunks).toEqual([
+			{ type: "reasoning", text: "Analyzing the problem..." },
+			{ type: "text", text: "Solution: Use dynamic programming" },
+			{ type: "usage", inputTokens: 15, outputTokens: 8 },
+		])
+	})
+
 	it("completePrompt method should return text from Chutes API", async () => {
 		const expectedResponse = "This is a test response from Chutes"
 		mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: expectedResponse } }] })
diff --git a/src/api/providers/chutes.ts b/src/api/providers/chutes.ts
@@ -29,19 +29,25 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 	): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming {
 		const {
 			id: model,
-			info: { maxTokens: max_tokens },
+			info: { maxTokens: max_tokens, topP },
 		} = this.getModel()
 
 		const temperature = this.options.modelTemperature ?? this.getModel().info.temperature
 
-		return {
+		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 			model,
 			max_tokens,
 			temperature,
 			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
 			stream: true,
 			stream_options: { include_usage: true },
 		}
+
+		if (topP !== undefined) {
+			params.top_p = topP
+		}
+
+		return params
 	}
 
 	override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
@@ -80,6 +86,44 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 				}
 			}
 
+			// Process any remaining content
+			for (const processedChunk of matcher.final()) {
+				yield processedChunk
+			}
+		} else if (model.id.includes("Qwen") && model.id.includes("Thinking")) {
+			// Add reasoning support for Qwen Thinking models
+			const stream = await this.client.chat.completions.create({
+				...this.getCompletionParams(systemPrompt, messages),
+				messages: [{ role: "user", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+			})
+
+			const matcher = new XmlMatcher(
+				"think",
+				(chunk) =>
+					({
+						type: chunk.matched ? "reasoning" : "text",
+						text: chunk.data,
+					}) as const,
+			)
+
+			for await (const chunk of stream) {
+				const delta = chunk.choices[0]?.delta
+
+				if (delta?.content) {
+					for (const processedChunk of matcher.update(delta.content)) {
+						yield processedChunk
+					}
+				}
+
+				if (chunk.usage) {
+					yield {
+						type: "usage",
+						inputTokens: chunk.usage.prompt_tokens || 0,
+						outputTokens: chunk.usage.completion_tokens || 0,
+					}
+				}
+			}
+
 			// Process any remaining content
 			for (const processedChunk of matcher.final()) {
 				yield processedChunk
@@ -92,11 +136,28 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
 	override getModel() {
 		const model = super.getModel()
 		const isDeepSeekR1 = model.id.includes("DeepSeek-R1")
+		const isQwenThinking = model.id.includes("Qwen") && model.id.includes("Thinking")
+		const isQwenInstruct = model.id.includes("Qwen") && model.id.includes("Instruct")
+
+		let temperature = this.defaultTemperature
+		let topP: number | undefined
+
+		if (isDeepSeekR1) {
+			temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
+		} else if (isQwenThinking) {
+			temperature = 0.6
+			topP = 0.95
+		} else if (isQwenInstruct) {
+			temperature = 0.7
+			topP = 0.8
+		}
+
 		return {
 			...model,
 			info: {
 				...model.info,
-				temperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : this.defaultTemperature,
+				temperature,
+				...(topP !== undefined && { topP }),
 			},
 		}
 	}