Skip to content
44 changes: 32 additions & 12 deletions packages/types/src/providers/cerebras.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,64 @@ import type { ModelInfo } from "../model.js"
// https://inference-docs.cerebras.ai/api-reference/chat-completions
export type CerebrasModelId = keyof typeof cerebrasModels

export const cerebrasDefaultModelId: CerebrasModelId = "qwen-3-235b-a22b-instruct-2507"
export const cerebrasDefaultModelId: CerebrasModelId = "qwen-3-coder-480b-free"

export const cerebrasModels = {
"llama-3.3-70b": {
maxTokens: 64000,
"qwen-3-coder-480b-free": {
maxTokens: 40000,
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "Smart model with ~2600 tokens/s",
description:
"SOTA coding model with ~2000 tokens/s ($0 free tier)\n\n• Use this if you don't have a Cerebras subscription\n• 64K context window\n• Rate limits: 150K TPM, 1M TPH/TPD, 10 RPM, 100 RPH/RPD\n\nUpgrade for higher limits: [https://cloud.cerebras.ai/?utm=roocode](https://cloud.cerebras.ai/?utm=roocode)",
},
"qwen-3-32b": {
"qwen-3-coder-480b": {
maxTokens: 40000,
contextWindow: 128000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description:
"SOTA coding model with ~2000 tokens/s ($50/$250 paid tiers)\n\n• Use this if you have a Cerebras subscription\n• 131K context window with higher rate limits",
},
"qwen-3-235b-a22b-instruct-2507": {
maxTokens: 64000,
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "SOTA coding performance with ~2500 tokens/s",
description: "Intelligent model with ~1400 tokens/s",
},
"qwen-3-235b-a22b": {
maxTokens: 40000,
contextWindow: 40000,
"llama-3.3-70b": {
maxTokens: 64000,
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "SOTA performance with ~1400 tokens/s",
description: "Powerful model with ~2600 tokens/s",
},
"qwen-3-235b-a22b-instruct-2507": {
"qwen-3-32b": {
maxTokens: 64000,
contextWindow: 64000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "SOTA performance with ~1400 tokens/s",
description: "SOTA coding performance with ~2500 tokens/s",
},
"qwen-3-235b-a22b-thinking-2507": {
maxTokens: 40000,
contextWindow: 65000,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
description: "SOTA performance with ~1500 tokens/s",
supportsReasoningEffort: true,
},
} as const satisfies Record<string, ModelInfo>
15 changes: 12 additions & 3 deletions src/api/providers/cerebras.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,19 @@ export class CerebrasHandler extends BaseProvider implements SingleCompletionHan
}

getModel(): { id: CerebrasModelId; info: (typeof cerebrasModels)[CerebrasModelId] } {
const modelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId
const originalModelId = (this.options.apiModelId as CerebrasModelId) || this.defaultProviderModelId

// Route both qwen coder models to the same actual model ID for API calls
// This allows them to have different rate limits/descriptions in the UI
// while using the same underlying model
let apiModelId = originalModelId
if (originalModelId === "qwen-3-coder-480b-free") {
apiModelId = "qwen-3-coder-480b"
}

return {
id: modelId,
info: this.providerModels[modelId],
id: apiModelId,
info: this.providerModels[originalModelId], // Use original model info for rate limits/descriptions
}
}

Expand Down
Loading