Skip to content

Commit 376573e

Browse files
Add Cerebras model support with official SDK
Implement CerebrasModel and CerebrasProvider using cerebras-cloud-sdk to provide ultra-fast inference powered by Cerebras Wafer-Scale Engine. Key features: - CerebrasModel extends OpenAIChatModel with web search disabled - Custom _completions_create override to handle SDK compatibility: - Filters OMIT and NOT_GIVEN parameters - Removes unsupported web_search_options parameter - Converts Cerebras SDK response to OpenAI ChatCompletion format - CerebrasProvider uses AsyncCerebras client from cerebras-cloud-sdk - Updated KnownModelName with current Cerebras model list
1 parent ea6372a commit 376573e

File tree

5 files changed

+116
-7
lines changed

5 files changed

+116
-7
lines changed

docs/models/cerebras.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,16 @@
22

33
## Install
44

5-
To use `CerebrasModel`, you need to either install `pydantic-ai`, or install `pydantic-ai-slim` with the `cerebras` optional group:
5+
To use `CerebrasModel`, you need to either install `pydantic-ai`, or install `pydantic-ai-slim` with the `cerebras` optional group (which installs the `cerebras-cloud-sdk`):
66

77
```bash
8-
pip/uv-add "pydantic-ai-slim[cerebras]"
8+
pip install "pydantic-ai-slim[cerebras]"
9+
```
10+
11+
or
12+
13+
```bash
14+
uv add "pydantic-ai-slim[cerebras]"
915
```
1016

1117
## Configuration

pydantic_ai_slim/pydantic_ai/models/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@
134134
'cerebras:llama-3.3-70b',
135135
'cerebras:llama3.1-8b',
136136
'cerebras:qwen-3-235b-a22b-instruct-2507',
137-
'cerebras:qwen-3-235b-a22b-thinking-2507',
138137
'cerebras:qwen-3-32b',
139138
'cerebras:zai-glm-4.6',
140139
'cohere:c4ai-aya-expanse-32b',

pydantic_ai_slim/pydantic_ai/models/cerebras.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@
2424
__all__ = ('CerebrasModel', 'CerebrasModelName')
2525

2626
CerebrasModelName = Literal[
27+
'gpt-oss-120b',
2728
'llama-3.3-70b',
28-
'llama-4-scout-17b-16e-instruct',
29+
'llama3.1-8b',
2930
'qwen-3-235b-a22b-instruct-2507',
3031
'qwen-3-32b',
31-
'gpt-oss-120b',
3232
'zai-glm-4.6',
3333
]
3434

@@ -93,3 +93,54 @@ def _cerebras_model_profile(self, model_name: str) -> ModelProfile:
9393
return OpenAIModelProfile(
9494
openai_chat_supports_web_search=False,
9595
).update(base_profile)
96+
97+
async def _completions_create(
98+
self,
99+
messages: list[Any],
100+
stream: bool,
101+
model_settings: dict[str, Any],
102+
model_request_parameters: Any,
103+
) -> Any:
104+
"""Override to remove web_search_options parameter and convert Cerebras response to OpenAI format."""
105+
from openai._types import NOT_GIVEN
106+
from openai.types.chat import ChatCompletion
107+
108+
# Get the original client method
109+
original_create = self.client.chat.completions.create
110+
111+
# Create a wrapper that removes web_search_options and filters OMIT values
112+
async def create_without_web_search(**kwargs):
113+
# Remove web_search_options if present
114+
kwargs.pop('web_search_options', None)
115+
116+
# Remove all keys with OMIT or NOT_GIVEN values
117+
keys_to_remove = []
118+
for key, value in kwargs.items():
119+
# Check if it's OMIT by checking the type name
120+
if hasattr(value, '__class__') and value.__class__.__name__ == 'Omit':
121+
keys_to_remove.append(key)
122+
elif value is NOT_GIVEN:
123+
keys_to_remove.append(key)
124+
125+
for key in keys_to_remove:
126+
del kwargs[key]
127+
128+
# Call Cerebras SDK
129+
cerebras_response = await original_create(**kwargs)
130+
131+
# Convert Cerebras response to OpenAI ChatCompletion
132+
# The Cerebras SDK returns a compatible structure, we just need to convert the type
133+
response_dict = (
134+
cerebras_response.model_dump() if hasattr(cerebras_response, 'model_dump') else cerebras_response
135+
)
136+
return ChatCompletion.model_validate(response_dict)
137+
138+
# Temporarily replace the method
139+
self.client.chat.completions.create = create_without_web_search # type: ignore
140+
141+
try:
142+
# Call the parent implementation
143+
return await super()._completions_create(messages, stream, model_settings, model_request_parameters) # type: ignore
144+
finally:
145+
# Restore the original method
146+
self.client.chat.completions.create = original_create # type: ignore

pydantic_ai_slim/pydantic_ai/providers/cerebras.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def name(self) -> str:
3131

3232
@property
3333
def base_url(self) -> str:
34-
return 'https://api.cerebras.ai/v1'
34+
return 'https://api.cerebras.ai'
3535

3636
@property
3737
def client(self) -> AsyncCerebras:
@@ -83,7 +83,7 @@ def __init__(
8383
self._client = cerebras_client
8484
else:
8585
api_key = api_key or os.getenv('CEREBRAS_API_KEY')
86-
base_url = base_url or 'https://api.cerebras.ai/v1'
86+
base_url = base_url or 'https://api.cerebras.ai'
8787

8888
if not api_key:
8989
raise UserError(

tests/models/cassettes/test_model_names/test_known_model_names.yaml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,57 @@ interactions:
108108
status:
109109
code: 200
110110
message: OK
111+
- request:
112+
body: ''
113+
headers:
114+
accept:
115+
- application/json
116+
connection:
117+
- keep-alive
118+
host:
119+
- api.cerebras.ai
120+
method: GET
121+
uri: https://api.cerebras.ai/v1/models
122+
response:
123+
headers:
124+
alt-svc:
125+
- h3=":443"; ma=86400
126+
content-length:
127+
- '479'
128+
content-type:
129+
- application/json
130+
referrer-policy:
131+
- strict-origin-when-cross-origin
132+
strict-transport-security:
133+
- max-age=3600; includeSubDomains
134+
parsed_body:
135+
data:
136+
- created: 0
137+
id: llama-3.3-70b
138+
object: model
139+
owned_by: Cerebras
140+
- created: 0
141+
id: llama3.1-8b
142+
object: model
143+
owned_by: Cerebras
144+
- created: 0
145+
id: zai-glm-4.6
146+
object: model
147+
owned_by: Cerebras
148+
- created: 0
149+
id: qwen-3-32b
150+
object: model
151+
owned_by: Cerebras
152+
- created: 0
153+
id: gpt-oss-120b
154+
object: model
155+
owned_by: Cerebras
156+
- created: 0
157+
id: qwen-3-235b-a22b-instruct-2507
158+
object: model
159+
owned_by: Cerebras
160+
object: list
161+
status:
162+
code: 200
163+
message: OK
111164
version: 1

0 commit comments

Comments
 (0)