Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions packages/types/src/providers/chutes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,22 @@ export type ChutesModelId =
| "deepseek-ai/DeepSeek-V3-Base"
| "deepseek-ai/DeepSeek-R1-Zero"
| "deepseek-ai/DeepSeek-V3-0324"
| "Qwen/Qwen3-14B"
| "Qwen/Qwen3-235B-A22B"
| "Qwen/Qwen3-235B-A22B-Instruct-2507"
| "Qwen/Qwen3-32B"
| "Qwen/Qwen3-235B-A22B-Thinking-2507"
| "Qwen/Qwen3-30B-A3B"
| "Qwen/Qwen3-14B"
| "Qwen/Qwen3-32B"
| "Qwen/Qwen3-8B"
| "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
| "Qwen/Qwen3-Next-80B-A3B-Instruct"
| "Qwen/Qwen3-Next-80B-A3B-Thinking"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed the alphabetical ordering isn't quite right here. The Qwen models should be ordered alphabetically by their full names. Currently, the order mixes different model families.

Could we reorder them as:

  • Qwen/Qwen3-14B
  • Qwen/Qwen3-235B-A22B
  • Qwen/Qwen3-235B-A22B-Instruct-2507
  • Qwen/Qwen3-235B-A22B-Thinking-2507
  • Qwen/Qwen3-30B-A3B
  • Qwen/Qwen3-32B
  • Qwen/Qwen3-8B
  • Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8
  • Qwen/Qwen3-Next-80B-A3B-Instruct
  • Qwen/Qwen3-Next-80B-A3B-Thinking

| "microsoft/MAI-DS-R1-FP8"
| "tngtech/DeepSeek-R1T-Chimera"
| "zai-org/GLM-4.5-Air"
| "zai-org/GLM-4.5-FP8"
| "moonshotai/Kimi-K2-Instruct-75k"
| "moonshotai/Kimi-K2-Instruct-0905"
| "Qwen/Qwen3-235B-A22B-Thinking-2507"

export const chutesDefaultModelId: ChutesModelId = "deepseek-ai/DeepSeek-R1-0528"

Expand Down Expand Up @@ -308,4 +310,22 @@ export const chutesModels = {
outputPrice: 0.31202496,
description: "Qwen3 235B A22B Thinking 2507 model with 262K context window.",
},
"Qwen/Qwen3-Next-80B-A3B-Instruct": {
maxTokens: 32768,
contextWindow: 262144,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "Qwen3 Next 80B A3B Instruct model with 262K context window.",
},
"Qwen/Qwen3-Next-80B-A3B-Thinking": {
maxTokens: 32768,
contextWindow: 262144,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it intentional that both new models have inputPrice: 0 and outputPrice: 0? I noticed that Qwen3-235B-A22B-Thinking-2507 has actual pricing values. Should we add pricing information for these models when it becomes available?

outputPrice: 0,
description: "Qwen3 Next 80B A3B Thinking model with 262K context window.",
},
} as const satisfies Record<string, ModelInfo>
104 changes: 102 additions & 2 deletions src/api/providers/__tests__/chutes.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ describe("ChutesHandler", () => {
inputPrice: 0,
outputPrice: 0,
description: "Qwen3 235B A22B Instruct 2507 model with 262K context window.",
temperature: 0.5, // Default temperature for non-DeepSeek models
temperature: 0.5, // Default temperature for existing Qwen models
}),
)
})
Expand Down Expand Up @@ -270,7 +270,7 @@ describe("ChutesHandler", () => {
inputPrice: 0,
outputPrice: 0,
description: "Qwen3 Coder 480B A35B Instruct FP8 model, optimized for coding tasks.",
temperature: 0.5, // Default temperature for non-DeepSeek models
temperature: 0.5, // Default temperature for existing Qwen models
}),
)
})
Expand Down Expand Up @@ -319,6 +319,106 @@ describe("ChutesHandler", () => {
)
})

it("should return Qwen/Qwen3-Next-80B-A3B-Instruct model with correct configuration", () => {
const testModelId: ChutesModelId = "Qwen/Qwen3-Next-80B-A3B-Instruct"
const handlerWithModel = new ChutesHandler({
apiModelId: testModelId,
chutesApiKey: "test-chutes-api-key",
})
const model = handlerWithModel.getModel()
expect(model.id).toBe(testModelId)
expect(model.info).toEqual(
expect.objectContaining({
maxTokens: 32768,
contextWindow: 262144,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "Qwen3 Next 80B A3B Instruct model with 262K context window.",
temperature: 0.7, // Optimized temperature for Qwen Instruct models
topP: 0.8, // Optimized top-p for Qwen Instruct models
}),
)
})

it("should return Qwen/Qwen3-Next-80B-A3B-Thinking model with correct configuration", () => {
const testModelId: ChutesModelId = "Qwen/Qwen3-Next-80B-A3B-Thinking"
const handlerWithModel = new ChutesHandler({
apiModelId: testModelId,
chutesApiKey: "test-chutes-api-key",
})
const model = handlerWithModel.getModel()
expect(model.id).toBe(testModelId)
expect(model.info).toEqual(
expect.objectContaining({
maxTokens: 32768,
contextWindow: 262144,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "Qwen3 Next 80B A3B Thinking model with 262K context window.",
temperature: 0.6, // Optimized temperature for Qwen Thinking models
topP: 0.95, // Optimized top-p for Qwen Thinking models
}),
)
})

it("should handle Qwen Thinking model reasoning format", async () => {
// Override the mock for this specific test
mockCreate.mockImplementationOnce(async () => ({
[Symbol.asyncIterator]: async function* () {
yield {
choices: [
{
delta: { content: "<think>Analyzing the problem..." },
index: 0,
},
],
usage: null,
}
yield {
choices: [
{
delta: { content: "</think>Solution: Use dynamic programming" },
index: 0,
},
],
usage: null,
}
yield {
choices: [
{
delta: {},
index: 0,
},
],
usage: { prompt_tokens: 15, completion_tokens: 8 },
}
},
}))

const systemPrompt = "You are a helpful assistant."
const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Solve this algorithm problem" }]
vi.spyOn(handler, "getModel").mockReturnValue({
id: "Qwen/Qwen3-Next-80B-A3B-Thinking",
info: { maxTokens: 1024, temperature: 0.6 },
} as any)

const stream = handler.createMessage(systemPrompt, messages)
const chunks = []
for await (const chunk of stream) {
chunks.push(chunk)
}

expect(chunks).toEqual([
{ type: "reasoning", text: "Analyzing the problem..." },
{ type: "text", text: "Solution: Use dynamic programming" },
{ type: "usage", inputTokens: 15, outputTokens: 8 },
])
})

it("completePrompt method should return text from Chutes API", async () => {
const expectedResponse = "This is a test response from Chutes"
mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: expectedResponse } }] })
Expand Down
67 changes: 64 additions & 3 deletions src/api/providers/chutes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,25 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
): OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming {
const {
id: model,
info: { maxTokens: max_tokens },
info: { maxTokens: max_tokens, topP },
} = this.getModel()

const temperature = this.options.modelTemperature ?? this.getModel().info.temperature

return {
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model,
max_tokens,
temperature,
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
stream: true,
stream_options: { include_usage: true },
}

if (topP !== undefined) {
params.top_p = topP
}

return params
}

override async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
Expand Down Expand Up @@ -80,6 +86,44 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
}
}

// Process any remaining content
for (const processedChunk of matcher.final()) {
yield processedChunk
}
} else if (model.id === "Qwen/Qwen3-Next-80B-A3B-Thinking") {
// Add reasoning support for the new Qwen3-Next-80B-A3B-Thinking model
const stream = await this.client.chat.completions.create({
...this.getCompletionParams(systemPrompt, messages),
messages: [{ role: "user", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
})

const matcher = new XmlMatcher(
"think",
(chunk) =>
({
type: chunk.matched ? "reasoning" : "text",
text: chunk.data,
}) as const,
)

for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta

if (delta?.content) {
for (const processedChunk of matcher.update(delta.content)) {
yield processedChunk
}
}

if (chunk.usage) {
yield {
type: "usage",
inputTokens: chunk.usage.prompt_tokens || 0,
outputTokens: chunk.usage.completion_tokens || 0,
}
}
}

// Process any remaining content
for (const processedChunk of matcher.final()) {
yield processedChunk
Expand All @@ -92,11 +136,28 @@ export class ChutesHandler extends BaseOpenAiCompatibleProvider<ChutesModelId> {
override getModel() {
const model = super.getModel()
const isDeepSeekR1 = model.id.includes("DeepSeek-R1")
const isQwenNextThinking = model.id === "Qwen/Qwen3-Next-80B-A3B-Thinking"
const isQwenNextInstruct = model.id === "Qwen/Qwen3-Next-80B-A3B-Instruct"

let temperature = this.defaultTemperature
let topP: number | undefined

if (isDeepSeekR1) {
temperature = DEEP_SEEK_DEFAULT_TEMPERATURE
} else if (isQwenNextThinking) {
temperature = 0.6
topP = 0.95
} else if (isQwenNextInstruct) {
temperature = 0.7
topP = 0.8
}

return {
...model,
info: {
...model.info,
temperature: isDeepSeekR1 ? DEEP_SEEK_DEFAULT_TEMPERATURE : this.defaultTemperature,
temperature,
...(topP !== undefined && { topP }),
},
}
}
Expand Down
Loading