diff --git a/packages/types/src/providers/cerebras.ts b/packages/types/src/providers/cerebras.ts index 4765302a4e62..d578db0ddc20 100644 --- a/packages/types/src/providers/cerebras.ts +++ b/packages/types/src/providers/cerebras.ts @@ -3,9 +3,18 @@ import type { ModelInfo } from "../model.js" // https://inference-docs.cerebras.ai/api-reference/chat-completions export type CerebrasModelId = keyof typeof cerebrasModels -export const cerebrasDefaultModelId: CerebrasModelId = "qwen-3-coder-480b-free" +export const cerebrasDefaultModelId: CerebrasModelId = "gpt-oss-120b" export const cerebrasModels = { + "zai-glm-4.6": { + maxTokens: 16_384, + contextWindow: 128000, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + description: "Highly intelligent general-purpose model with ~2000 tokens/s", + }, "qwen-3-coder-480b-free": { maxTokens: 40000, contextWindow: 64000, @@ -14,7 +23,7 @@ export const cerebrasModels = { inputPrice: 0, outputPrice: 0, description: - "SOTA coding model with ~2000 tokens/s ($0 free tier)\n\n• Use this if you don't have a Cerebras subscription\n• 64K context window\n• Rate limits: 150K TPM, 1M TPH/TPD, 10 RPM, 100 RPH/RPD\n\nUpgrade for higher limits: [https://cloud.cerebras.ai/?utm=roocode](https://cloud.cerebras.ai/?utm=roocode)", + "[SOON TO BE DEPRECATED] SOTA coding model with ~2000 tokens/s ($0 free tier)\n\n• Use this if you don't have a Cerebras subscription\n• 64K context window\n• Rate limits: 150K TPM, 1M TPH/TPD, 10 RPM, 100 RPH/RPD\n\nUpgrade for higher limits: [https://cloud.cerebras.ai/?utm=roocode](https://cloud.cerebras.ai/?utm=roocode)", }, "qwen-3-coder-480b": { maxTokens: 40000, @@ -24,7 +33,7 @@ export const cerebrasModels = { inputPrice: 0, outputPrice: 0, description: - "SOTA coding model with ~2000 tokens/s ($50/$250 paid tiers)\n\n• Use this if you have a Cerebras subscription\n• 131K context window with higher rate limits", + "[SOON TO BE DEPRECATED] SOTA coding model with ~2000 tokens/s ($50/$250 paid tiers)\n\n• Use this if you have a Cerebras subscription\n• 131K context window with higher rate limits", }, "qwen-3-235b-a22b-instruct-2507": { maxTokens: 64000, diff --git a/src/api/providers/__tests__/cerebras.spec.ts b/src/api/providers/__tests__/cerebras.spec.ts index ff4d50e6417c..936bd4fd3f49 100644 --- a/src/api/providers/__tests__/cerebras.spec.ts +++ b/src/api/providers/__tests__/cerebras.spec.ts @@ -56,7 +56,7 @@ describe("CerebrasHandler", () => { it("should fallback to default model when apiModelId is not provided", () => { const handlerWithoutModel = new CerebrasHandler({ cerebrasApiKey: "test" }) const { id } = handlerWithoutModel.getModel() - expect(id).toBe("qwen-3-coder-480b") // cerebrasDefaultModelId (routed) + expect(id).toBe("gpt-oss-120b") // cerebrasDefaultModelId }) })