diff --git a/packages/types/src/providers/cerebras.ts b/packages/types/src/providers/cerebras.ts index c5ad100123..2bec81562b 100644 --- a/packages/types/src/providers/cerebras.ts +++ b/packages/types/src/providers/cerebras.ts @@ -3,44 +3,64 @@ import type { ModelInfo } from "../model.js" // https://inference-docs.cerebras.ai/api-reference/chat-completions export type CerebrasModelId = keyof typeof cerebrasModels -export const cerebrasDefaultModelId: CerebrasModelId = "qwen-3-235b-a22b-instruct-2507" +export const cerebrasDefaultModelId: CerebrasModelId = "qwen-3-coder-480b-free" export const cerebrasModels = { - "llama-3.3-70b": { - maxTokens: 64000, + "qwen-3-coder-480b-free": { + maxTokens: 40000, contextWindow: 64000, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - description: "Smart model with ~2600 tokens/s", + description: + "SOTA coding model with ~2000 tokens/s ($0 free tier)\n\n• Use this if you don't have a Cerebras subscription\n• 64K context window\n• Rate limits: 150K TPM, 1M TPH/TPD, 10 RPM, 100 RPH/RPD\n\nUpgrade for higher limits: [https://cloud.cerebras.ai/?utm=roocode](https://cloud.cerebras.ai/?utm=roocode)", }, - "qwen-3-32b": { + "qwen-3-coder-480b": { + maxTokens: 40000, + contextWindow: 128000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: + "SOTA coding model with ~2000 tokens/s ($50/$250 paid tiers)\n\n• Use this if you have a Cerebras subscription\n• 131K context window with higher rate limits", + }, + "qwen-3-235b-a22b-instruct-2507": { maxTokens: 64000, contextWindow: 64000, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - description: "SOTA coding performance with ~2500 tokens/s", + description: "Intelligent model with ~1400 tokens/s", }, - "qwen-3-235b-a22b": { - maxTokens: 40000, - contextWindow: 40000, + "llama-3.3-70b": { + maxTokens: 64000, + contextWindow: 64000, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - description: "SOTA performance with ~1400 tokens/s", + description: "Powerful model with ~2600 tokens/s", }, - "qwen-3-235b-a22b-instruct-2507": { + "qwen-3-32b": { maxTokens: 64000, contextWindow: 64000, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - description: "SOTA performance with ~1400 tokens/s", + description: "SOTA coding performance with ~2500 tokens/s", + }, + "qwen-3-235b-a22b-thinking-2507": { + maxTokens: 40000, + contextWindow: 65000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "SOTA performance with ~1500 tokens/s", supportsReasoningEffort: true, }, } as const satisfies Record diff --git a/src/api/providers/__tests__/cerebras.spec.ts b/src/api/providers/__tests__/cerebras.spec.ts index 1ab319ef26..2b7668435f 100644 --- a/src/api/providers/__tests__/cerebras.spec.ts +++ b/src/api/providers/__tests__/cerebras.spec.ts @@ -58,7 +58,7 @@ describe("CerebrasHandler", () => { it("should fallback to default model when apiModelId is not provided", () => { const handlerWithoutModel = new CerebrasHandler({ cerebrasApiKey: "test" }) const { id } = handlerWithoutModel.getModel() - expect(id).toBe("qwen-3-235b-a22b-instruct-2507") // cerebrasDefaultModelId + expect(id).toBe("qwen-3-coder-480b") // cerebrasDefaultModelId (routed) }) }) diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts index 364477866b..a0421844e8 100644 --- a/src/api/providers/cerebras.ts +++ b/src/api/providers/cerebras.ts @@ -98,10 +98,19 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan } getModel(): { id: CerebrasModelId; info: (typeof cerebrasModels)[CerebrasModelId] } { - const modelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId + const originalModelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId + + // Route both qwen coder models to the same actual model ID for API calls + // This allows them to have different rate limits/descriptions in the UI + // while using the same underlying model + let apiModelId = originalModelId + if (originalModelId === "qwen-3-coder-480b-free") { + apiModelId = "qwen-3-coder-480b" + } + return { - id: modelId, - info: this.providerModels[modelId], + id: apiModelId, + info: this.providerModels[originalModelId], // Use original model info for rate limits/descriptions } }