Added Cerebras as a Provider (RooCodeInc#3810)

kevint-cerebras · sam · web-flow · commit 6fa819a17069 · 2025-05-26T20:06:15.000-07:00
* Added Cerebras as a Provider

* prettier fix

* prettier

---------

Co-authored-by: sam &lt;sam@MacBook-Air-3.local&gt;
diff --git a/.changeset/cerebras-provider.md b/.changeset/cerebras-provider.md
@@ -0,0 +1,15 @@
+---
+"claude-dev": minor
+---
+
+Add Cerebras as a new API provider with comprehensive model support. Features include:
+
+- **5 Cerebras models**: llama3.1-8b, llama-4-scout-17b-16e-instruct, llama-3.3-70b, qwen-3-32b, and deepseek-r1-distill-llama-70b
+- **Native Cerebras SDK integration** using @cerebras/cerebras_cloud_sdk
+- **Reasoning support** for Qwen and DeepSeek R1 Distill models with `<think>` tag handling
+- **Streaming responses** with proper error handling and usage tracking
+- **Cost calculation** and token counting
+- **UI integration** with API key configuration and model selection
+- **Free pricing** for all models (set to $0 input/output costs)
+
+Users can now connect to Cerebras's high-performance inference API using their API key and access fast, efficient LLM services directly from within Cline. 
diff --git a/.clinerules/cline-overview.md b/.clinerules/cline-overview.md
@@ -164,6 +164,7 @@ Key providers include:
 - **OpenRouter**: Meta-provider supporting multiple model providers
 - **AWS Bedrock**: Integration with Amazon's AI services
 - **Gemini**: Google's AI models
+- **Cerebras**: High-performance inference with Llama, Qwen, and DeepSeek models
 - **Ollama**: Local model hosting
 - **LM Studio**: Local model hosting
 - **VSCode LM**: VSCode's built-in language models
diff --git a/README.md b/README.md
@@ -51,7 +51,7 @@ Thanks to [Claude 3.7 Sonnet's agentic coding capabilities](https://www.anthrop
 
 ### Use any API and Model
 
-Cline supports API providers like OpenRouter, Anthropic, OpenAI, Google Gemini, AWS Bedrock, Azure, and GCP Vertex. You can also configure any OpenAI compatible API, or use a local model through LM Studio/Ollama. If you're using OpenRouter, the extension fetches their latest model list, allowing you to use the newest models as soon as they're available.
+Cline supports API providers like OpenRouter, Anthropic, OpenAI, Google Gemini, AWS Bedrock, Azure, GCP Vertex, and Cerebras. You can also configure any OpenAI compatible API, or use a local model through LM Studio/Ollama. If you're using OpenRouter, the extension fetches their latest model list, allowing you to use the newest models as soon as they're available.
 
 The extension also keeps track of total tokens and API usage cost for the entire task loop and individual requests, keeping you informed of spend every step of the way.
 
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -349,6 +349,7 @@
 		"@anthropic-ai/vertex-sdk": "^0.6.4",
 		"@aws-sdk/client-bedrock-runtime": "^3.758.0",
 		"@bufbuild/protobuf": "^2.2.5",
+		"@cerebras/cerebras_cloud_sdk": "^1.35.0",
 		"@google-cloud/vertexai": "^1.9.3",
 		"@google/genai": "^0.13.0",
 		"@grpc/grpc-js": "^1.9.15",
diff --git a/src/api/index.ts b/src/api/index.ts
@@ -24,6 +24,7 @@ import { FireworksHandler } from "./providers/fireworks"
 import { AskSageHandler } from "./providers/asksage"
 import { XAIHandler } from "./providers/xai"
 import { SambanovaHandler } from "./providers/sambanova"
+import { CerebrasHandler } from "./providers/cerebras"
 
 export interface ApiHandler {
 	createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
@@ -84,6 +85,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
 			return new XAIHandler(options)
 		case "sambanova":
 			return new SambanovaHandler(options)
+		case "cerebras":
+			return new CerebrasHandler(options)
 		default:
 			return new AnthropicHandler(options)
 	}
diff --git a/src/api/providers/cerebras.ts b/src/api/providers/cerebras.ts
@@ -0,0 +1,169 @@
+import { Anthropic } from "@anthropic-ai/sdk"
+import Cerebras from "@cerebras/cerebras_cloud_sdk"
+import { withRetry } from "../retry"
+import { ApiHandlerOptions, ModelInfo, CerebrasModelId, cerebrasDefaultModelId, cerebrasModels } from "@shared/api"
+import { ApiHandler } from "../index"
+import { ApiStream } from "@api/transform/stream"
+
+export class CerebrasHandler implements ApiHandler {
+	private options: ApiHandlerOptions
+	private client: Cerebras
+
+	constructor(options: ApiHandlerOptions) {
+		this.options = options
+
+		// Clean and validate the API key
+		const cleanApiKey = this.options.cerebrasApiKey?.trim()
+
+		if (!cleanApiKey) {
+			throw new Error("Cerebras API key is required")
+		}
+
+		this.client = new Cerebras({
+			apiKey: cleanApiKey,
+			timeout: 30000, // 30 second timeout
+		})
+	}
+
+	@withRetry()
+	async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
+		// Convert Anthropic messages to Cerebras format
+		const cerebrasMessages: Array<{
+			role: "system" | "user" | "assistant"
+			content: string
+		}> = [{ role: "system", content: systemPrompt }]
+
+		// Convert Anthropic messages to Cerebras format
+		for (const message of messages) {
+			if (message.role === "user") {
+				const content = Array.isArray(message.content)
+					? message.content
+							.map((block) => {
+								if (block.type === "text") {
+									return block.text
+								} else if (block.type === "image") {
+									return "[Image content not supported in Cerebras]"
+								}
+								return ""
+							})
+							.join("\n")
+					: message.content
+				cerebrasMessages.push({ role: "user", content })
+			} else if (message.role === "assistant") {
+				const content = Array.isArray(message.content)
+					? message.content
+							.map((block) => {
+								if (block.type === "text") {
+									return block.text
+								}
+								return ""
+							})
+							.join("\n")
+					: message.content || ""
+				cerebrasMessages.push({ role: "assistant", content })
+			}
+		}
+
+		try {
+			const stream = await this.client.chat.completions.create({
+				model: this.getModel().id,
+				messages: cerebrasMessages,
+				temperature: 0,
+				stream: true,
+			})
+
+			// Handle streaming response
+			let reasoning: string | null = null // Track reasoning content for models that support thinking
+			const modelId = this.getModel().id
+			const isReasoningModel = modelId.includes("qwen") || modelId.includes("deepseek-r1-distill")
+
+			for await (const chunk of stream as any) {
+				// Type assertion for the streaming chunk
+				const streamChunk = chunk as any
+
+				if (streamChunk.choices?.[0]?.delta?.content) {
+					const content = streamChunk.choices[0].delta.content
+
+					// Handle reasoning models (Qwen and DeepSeek R1 Distill) that use <think> tags
+					if (isReasoningModel) {
+						// Check if we're entering or continuing reasoning mode
+						if (reasoning || content.includes("<think>")) {
+							reasoning = (reasoning || "") + content
+
+							// Clean the content by removing think tags for display
+							let cleanContent = content.replace(/<think>/g, "").replace(/<\/think>/g, "")
+
+							// Only yield reasoning content if there's actual content after cleaning
+							if (cleanContent.trim()) {
+								yield {
+									type: "reasoning",
+									reasoning: cleanContent,
+								}
+							}
+
+							// Check if reasoning is complete
+							if (reasoning.includes("</think>")) {
+								reasoning = null
+							}
+						} else {
+							// Regular content outside of thinking tags
+							yield {
+								type: "text",
+								text: content,
+							}
+						}
+					} else {
+						// Non-reasoning models - just yield text content
+						yield {
+							type: "text",
+							text: content,
+						}
+					}
+				}
+
+				// Handle usage information from Cerebras API
+				// Usage is typically only available in the final chunk
+				if (streamChunk.usage) {
+					const totalCost = this.calculateCost({
+						inputTokens: streamChunk.usage.prompt_tokens || 0,
+						outputTokens: streamChunk.usage.completion_tokens || 0,
+					})
+
+					yield {
+						type: "usage",
+						inputTokens: streamChunk.usage.prompt_tokens || 0,
+						outputTokens: streamChunk.usage.completion_tokens || 0,
+						cacheReadTokens: 0,
+						cacheWriteTokens: 0,
+						totalCost,
+					}
+				}
+			}
+		} catch (error) {
+			throw error
+		}
+	}
+
+	getModel(): { id: string; info: ModelInfo } {
+		const modelId = this.options.apiModelId
+		if (modelId && modelId in cerebrasModels) {
+			const id = modelId as CerebrasModelId
+			return { id, info: cerebrasModels[id] }
+		}
+		return {
+			id: cerebrasDefaultModelId,
+			info: cerebrasModels[cerebrasDefaultModelId],
+		}
+	}
+
+	private calculateCost({ inputTokens, outputTokens }: { inputTokens: number; outputTokens: number }): number {
+		const model = this.getModel()
+		const inputPrice = model.info.inputPrice || 0
+		const outputPrice = model.info.outputPrice || 0
+
+		const inputCost = (inputPrice / 1_000_000) * inputTokens
+		const outputCost = (outputPrice / 1_000_000) * outputTokens
+
+		return inputCost + outputCost
+	}
+}
diff --git a/src/core/storage/state-keys.ts b/src/core/storage/state-keys.ts
@@ -21,6 +21,7 @@ export type SecretKey =
 	| "xaiApiKey"
 	| "nebiusApiKey"
 	| "sambanovaApiKey"
+	| "cerebrasApiKey"
 
 export type GlobalStateKey =
 	| "apiProvider"
diff --git a/src/core/storage/state.ts b/src/core/storage/state.ts
@@ -155,6 +155,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
 		thinkingBudgetTokens,
 		reasoningEffort,
 		sambanovaApiKey,
+		cerebrasApiKey,
 		nebiusApiKey,
 		planActSeparateModelsSettingRaw,
 		favoritedModelIds,
@@ -244,6 +245,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
 		getGlobalState(context, "thinkingBudgetTokens") as Promise<number | undefined>,
 		getGlobalState(context, "reasoningEffort") as Promise<string | undefined>,
 		getSecret(context, "sambanovaApiKey") as Promise<string | undefined>,
+		getSecret(context, "cerebrasApiKey") as Promise<string | undefined>,
 		getSecret(context, "nebiusApiKey") as Promise<string | undefined>,
 		getGlobalState(context, "planActSeparateModelsSetting") as Promise<boolean | undefined>,
 		getGlobalState(context, "favoritedModelIds") as Promise<string[] | undefined>,
@@ -357,6 +359,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
 			asksageApiUrl,
 			xaiApiKey,
 			sambanovaApiKey,
+			cerebrasApiKey,
 			nebiusApiKey,
 			favoritedModelIds,
 			requestTimeoutMs,
@@ -451,6 +454,7 @@ export async function updateApiConfiguration(context: vscode.ExtensionContext, a
 		reasoningEffort,
 		clineApiKey,
 		sambanovaApiKey,
+		cerebrasApiKey,
 		nebiusApiKey,
 		favoritedModelIds,
 	} = apiConfiguration
@@ -512,6 +516,7 @@ export async function updateApiConfiguration(context: vscode.ExtensionContext, a
 	await updateGlobalState(context, "reasoningEffort", reasoningEffort)
 	await storeSecret(context, "clineApiKey", clineApiKey)
 	await storeSecret(context, "sambanovaApiKey", sambanovaApiKey)
+	await storeSecret(context, "cerebrasApiKey", cerebrasApiKey)
 	await storeSecret(context, "nebiusApiKey", nebiusApiKey)
 	await updateGlobalState(context, "favoritedModelIds", favoritedModelIds)
 	await updateGlobalState(context, "requestTimeoutMs", apiConfiguration.requestTimeoutMs)
@@ -542,6 +547,7 @@ export async function resetExtensionState(context: vscode.ExtensionContext) {
 		"asksageApiKey",
 		"xaiApiKey",
 		"sambanovaApiKey",
+		"cerebrasApiKey",
 		"nebiusApiKey",
 	]
 	for (const key of secretKeys) {
diff --git a/src/shared/api.ts b/src/shared/api.ts
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx