Skip to content

Commit 6fa819a

Browse files
kevint-cerebrassam
andauthored
Added Cerebras as a Provider (RooCodeInc#3810)
* Added Cerebras as a Provider * prettier fix * prettier --------- Co-authored-by: sam <[email protected]>
1 parent 2ca3e9a commit 6fa819a

File tree

11 files changed

+339
-3
lines changed

11 files changed

+339
-3
lines changed

.changeset/cerebras-provider.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
---
2+
"claude-dev": minor
3+
---
4+
5+
Add Cerebras as a new API provider with comprehensive model support. Features include:
6+
7+
- **5 Cerebras models**: llama3.1-8b, llama-4-scout-17b-16e-instruct, llama-3.3-70b, qwen-3-32b, and deepseek-r1-distill-llama-70b
8+
- **Native Cerebras SDK integration** using @cerebras/cerebras_cloud_sdk
9+
- **Reasoning support** for Qwen and DeepSeek R1 Distill models with `<think>` tag handling
10+
- **Streaming responses** with proper error handling and usage tracking
11+
- **Cost calculation** and token counting
12+
- **UI integration** with API key configuration and model selection
13+
- **Free pricing** for all models (set to $0 input/output costs)
14+
15+
Users can now connect to Cerebras's high-performance inference API using their API key and access fast, efficient LLM services directly from within Cline.

.clinerules/cline-overview.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ Key providers include:
164164
- **OpenRouter**: Meta-provider supporting multiple model providers
165165
- **AWS Bedrock**: Integration with Amazon's AI services
166166
- **Gemini**: Google's AI models
167+
- **Cerebras**: High-performance inference with Llama, Qwen, and DeepSeek models
167168
- **Ollama**: Local model hosting
168169
- **LM Studio**: Local model hosting
169170
- **VSCode LM**: VSCode's built-in language models

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ Thanks to [Claude 3.7 Sonnet's agentic coding capabilities](https://www.anthrop
5151

5252
### Use any API and Model
5353

54-
Cline supports API providers like OpenRouter, Anthropic, OpenAI, Google Gemini, AWS Bedrock, Azure, and GCP Vertex. You can also configure any OpenAI compatible API, or use a local model through LM Studio/Ollama. If you're using OpenRouter, the extension fetches their latest model list, allowing you to use the newest models as soon as they're available.
54+
Cline supports API providers like OpenRouter, Anthropic, OpenAI, Google Gemini, AWS Bedrock, Azure, GCP Vertex, and Cerebras. You can also configure any OpenAI compatible API, or use a local model through LM Studio/Ollama. If you're using OpenRouter, the extension fetches their latest model list, allowing you to use the newest models as soon as they're available.
5555

5656
The extension also keeps track of total tokens and API usage cost for the entire task loop and individual requests, keeping you informed of spend every step of the way.
5757

package-lock.json

Lines changed: 51 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@
349349
"@anthropic-ai/vertex-sdk": "^0.6.4",
350350
"@aws-sdk/client-bedrock-runtime": "^3.758.0",
351351
"@bufbuild/protobuf": "^2.2.5",
352+
"@cerebras/cerebras_cloud_sdk": "^1.35.0",
352353
"@google-cloud/vertexai": "^1.9.3",
353354
"@google/genai": "^0.13.0",
354355
"@grpc/grpc-js": "^1.9.15",

src/api/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import { FireworksHandler } from "./providers/fireworks"
2424
import { AskSageHandler } from "./providers/asksage"
2525
import { XAIHandler } from "./providers/xai"
2626
import { SambanovaHandler } from "./providers/sambanova"
27+
import { CerebrasHandler } from "./providers/cerebras"
2728

2829
export interface ApiHandler {
2930
createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
@@ -84,6 +85,8 @@ export function buildApiHandler(configuration: ApiConfiguration): ApiHandler {
8485
return new XAIHandler(options)
8586
case "sambanova":
8687
return new SambanovaHandler(options)
88+
case "cerebras":
89+
return new CerebrasHandler(options)
8790
default:
8891
return new AnthropicHandler(options)
8992
}

src/api/providers/cerebras.ts

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
import { Anthropic } from "@anthropic-ai/sdk"
2+
import Cerebras from "@cerebras/cerebras_cloud_sdk"
3+
import { withRetry } from "../retry"
4+
import { ApiHandlerOptions, ModelInfo, CerebrasModelId, cerebrasDefaultModelId, cerebrasModels } from "@shared/api"
5+
import { ApiHandler } from "../index"
6+
import { ApiStream } from "@api/transform/stream"
7+
8+
export class CerebrasHandler implements ApiHandler {
9+
private options: ApiHandlerOptions
10+
private client: Cerebras
11+
12+
constructor(options: ApiHandlerOptions) {
13+
this.options = options
14+
15+
// Clean and validate the API key
16+
const cleanApiKey = this.options.cerebrasApiKey?.trim()
17+
18+
if (!cleanApiKey) {
19+
throw new Error("Cerebras API key is required")
20+
}
21+
22+
this.client = new Cerebras({
23+
apiKey: cleanApiKey,
24+
timeout: 30000, // 30 second timeout
25+
})
26+
}
27+
28+
@withRetry()
29+
async *createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream {
30+
// Convert Anthropic messages to Cerebras format
31+
const cerebrasMessages: Array<{
32+
role: "system" | "user" | "assistant"
33+
content: string
34+
}> = [{ role: "system", content: systemPrompt }]
35+
36+
// Convert Anthropic messages to Cerebras format
37+
for (const message of messages) {
38+
if (message.role === "user") {
39+
const content = Array.isArray(message.content)
40+
? message.content
41+
.map((block) => {
42+
if (block.type === "text") {
43+
return block.text
44+
} else if (block.type === "image") {
45+
return "[Image content not supported in Cerebras]"
46+
}
47+
return ""
48+
})
49+
.join("\n")
50+
: message.content
51+
cerebrasMessages.push({ role: "user", content })
52+
} else if (message.role === "assistant") {
53+
const content = Array.isArray(message.content)
54+
? message.content
55+
.map((block) => {
56+
if (block.type === "text") {
57+
return block.text
58+
}
59+
return ""
60+
})
61+
.join("\n")
62+
: message.content || ""
63+
cerebrasMessages.push({ role: "assistant", content })
64+
}
65+
}
66+
67+
try {
68+
const stream = await this.client.chat.completions.create({
69+
model: this.getModel().id,
70+
messages: cerebrasMessages,
71+
temperature: 0,
72+
stream: true,
73+
})
74+
75+
// Handle streaming response
76+
let reasoning: string | null = null // Track reasoning content for models that support thinking
77+
const modelId = this.getModel().id
78+
const isReasoningModel = modelId.includes("qwen") || modelId.includes("deepseek-r1-distill")
79+
80+
for await (const chunk of stream as any) {
81+
// Type assertion for the streaming chunk
82+
const streamChunk = chunk as any
83+
84+
if (streamChunk.choices?.[0]?.delta?.content) {
85+
const content = streamChunk.choices[0].delta.content
86+
87+
// Handle reasoning models (Qwen and DeepSeek R1 Distill) that use <think> tags
88+
if (isReasoningModel) {
89+
// Check if we're entering or continuing reasoning mode
90+
if (reasoning || content.includes("<think>")) {
91+
reasoning = (reasoning || "") + content
92+
93+
// Clean the content by removing think tags for display
94+
let cleanContent = content.replace(/<think>/g, "").replace(/<\/think>/g, "")
95+
96+
// Only yield reasoning content if there's actual content after cleaning
97+
if (cleanContent.trim()) {
98+
yield {
99+
type: "reasoning",
100+
reasoning: cleanContent,
101+
}
102+
}
103+
104+
// Check if reasoning is complete
105+
if (reasoning.includes("</think>")) {
106+
reasoning = null
107+
}
108+
} else {
109+
// Regular content outside of thinking tags
110+
yield {
111+
type: "text",
112+
text: content,
113+
}
114+
}
115+
} else {
116+
// Non-reasoning models - just yield text content
117+
yield {
118+
type: "text",
119+
text: content,
120+
}
121+
}
122+
}
123+
124+
// Handle usage information from Cerebras API
125+
// Usage is typically only available in the final chunk
126+
if (streamChunk.usage) {
127+
const totalCost = this.calculateCost({
128+
inputTokens: streamChunk.usage.prompt_tokens || 0,
129+
outputTokens: streamChunk.usage.completion_tokens || 0,
130+
})
131+
132+
yield {
133+
type: "usage",
134+
inputTokens: streamChunk.usage.prompt_tokens || 0,
135+
outputTokens: streamChunk.usage.completion_tokens || 0,
136+
cacheReadTokens: 0,
137+
cacheWriteTokens: 0,
138+
totalCost,
139+
}
140+
}
141+
}
142+
} catch (error) {
143+
throw error
144+
}
145+
}
146+
147+
getModel(): { id: string; info: ModelInfo } {
148+
const modelId = this.options.apiModelId
149+
if (modelId && modelId in cerebrasModels) {
150+
const id = modelId as CerebrasModelId
151+
return { id, info: cerebrasModels[id] }
152+
}
153+
return {
154+
id: cerebrasDefaultModelId,
155+
info: cerebrasModels[cerebrasDefaultModelId],
156+
}
157+
}
158+
159+
private calculateCost({ inputTokens, outputTokens }: { inputTokens: number; outputTokens: number }): number {
160+
const model = this.getModel()
161+
const inputPrice = model.info.inputPrice || 0
162+
const outputPrice = model.info.outputPrice || 0
163+
164+
const inputCost = (inputPrice / 1_000_000) * inputTokens
165+
const outputCost = (outputPrice / 1_000_000) * outputTokens
166+
167+
return inputCost + outputCost
168+
}
169+
}

src/core/storage/state-keys.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ export type SecretKey =
2121
| "xaiApiKey"
2222
| "nebiusApiKey"
2323
| "sambanovaApiKey"
24+
| "cerebrasApiKey"
2425

2526
export type GlobalStateKey =
2627
| "apiProvider"

src/core/storage/state.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
155155
thinkingBudgetTokens,
156156
reasoningEffort,
157157
sambanovaApiKey,
158+
cerebrasApiKey,
158159
nebiusApiKey,
159160
planActSeparateModelsSettingRaw,
160161
favoritedModelIds,
@@ -244,6 +245,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
244245
getGlobalState(context, "thinkingBudgetTokens") as Promise<number | undefined>,
245246
getGlobalState(context, "reasoningEffort") as Promise<string | undefined>,
246247
getSecret(context, "sambanovaApiKey") as Promise<string | undefined>,
248+
getSecret(context, "cerebrasApiKey") as Promise<string | undefined>,
247249
getSecret(context, "nebiusApiKey") as Promise<string | undefined>,
248250
getGlobalState(context, "planActSeparateModelsSetting") as Promise<boolean | undefined>,
249251
getGlobalState(context, "favoritedModelIds") as Promise<string[] | undefined>,
@@ -357,6 +359,7 @@ export async function getAllExtensionState(context: vscode.ExtensionContext) {
357359
asksageApiUrl,
358360
xaiApiKey,
359361
sambanovaApiKey,
362+
cerebrasApiKey,
360363
nebiusApiKey,
361364
favoritedModelIds,
362365
requestTimeoutMs,
@@ -451,6 +454,7 @@ export async function updateApiConfiguration(context: vscode.ExtensionContext, a
451454
reasoningEffort,
452455
clineApiKey,
453456
sambanovaApiKey,
457+
cerebrasApiKey,
454458
nebiusApiKey,
455459
favoritedModelIds,
456460
} = apiConfiguration
@@ -512,6 +516,7 @@ export async function updateApiConfiguration(context: vscode.ExtensionContext, a
512516
await updateGlobalState(context, "reasoningEffort", reasoningEffort)
513517
await storeSecret(context, "clineApiKey", clineApiKey)
514518
await storeSecret(context, "sambanovaApiKey", sambanovaApiKey)
519+
await storeSecret(context, "cerebrasApiKey", cerebrasApiKey)
515520
await storeSecret(context, "nebiusApiKey", nebiusApiKey)
516521
await updateGlobalState(context, "favoritedModelIds", favoritedModelIds)
517522
await updateGlobalState(context, "requestTimeoutMs", apiConfiguration.requestTimeoutMs)
@@ -542,6 +547,7 @@ export async function resetExtensionState(context: vscode.ExtensionContext) {
542547
"asksageApiKey",
543548
"xaiApiKey",
544549
"sambanovaApiKey",
550+
"cerebrasApiKey",
545551
"nebiusApiKey",
546552
]
547553
for (const key of secretKeys) {

0 commit comments

Comments
 (0)