Skip to content

Commit 47a3db3

Browse files
authored
Merge branch 'main' into filepart-from-path
2 parents fa7aabf + eae558b commit 47a3db3

File tree

69 files changed

+3634
-495
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+3634
-495
lines changed

docs/.hooks/main.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616

1717
def on_page_markdown(markdown: str, page: Page, config: Config, files: Files) -> str:
1818
"""Called on each file after it is read and before it is converted to HTML."""
19-
relative_path_root = (DOCS_ROOT / page.file.src_uri).parent
20-
markdown = inject_snippets(markdown, relative_path_root)
19+
relative_path = DOCS_ROOT / page.file.src_uri
20+
markdown = inject_snippets(markdown, relative_path.parent)
2121
markdown = replace_uv_python_run(markdown)
2222
markdown = render_examples(markdown)
2323
markdown = render_video(markdown)
24-
markdown = create_gateway_toggle(markdown, relative_path_root)
24+
markdown = create_gateway_toggle(markdown, relative_path)
2525
return markdown
2626

2727

@@ -120,13 +120,13 @@ def sub_cf_video(m: re.Match[str]) -> str:
120120
"""
121121

122122

123-
def create_gateway_toggle(markdown: str, relative_path_root: Path) -> str:
123+
def create_gateway_toggle(markdown: str, relative_path: Path) -> str:
124124
"""Transform Python code blocks with Agent() calls to show both Pydantic AI and Gateway versions."""
125125
# Pattern matches Python code blocks with or without attributes, and optional annotation definitions after
126126
# Annotation definitions are numbered list items like "1. Some text" that follow the code block
127127
return re.sub(
128128
r'```py(?:thon)?(?: *\{?([^}\n]*)\}?)?\n(.*?)\n```(\n\n(?:\d+\..+?\n)+?\n)?',
129-
lambda m: transform_gateway_code_block(m, relative_path_root),
129+
lambda m: transform_gateway_code_block(m, relative_path),
130130
markdown,
131131
flags=re.MULTILINE | re.DOTALL,
132132
)
@@ -136,7 +136,7 @@ def create_gateway_toggle(markdown: str, relative_path_root: Path) -> str:
136136
GATEWAY_MODELS = ('anthropic', 'openai', 'openai-responses', 'openai-chat', 'bedrock', 'google-vertex', 'groq')
137137

138138

139-
def transform_gateway_code_block(m: re.Match[str], relative_path_root: Path) -> str:
139+
def transform_gateway_code_block(m: re.Match[str], relative_path: Path) -> str:
140140
"""Transform a single code block to show both versions if it contains Agent() calls."""
141141
attrs = m.group(1) or ''
142142
code = m.group(2)
@@ -186,9 +186,9 @@ def replace_agent_model(match: re.Match[str]) -> str:
186186

187187
# Build attributes string
188188
docs_path = DOCS_ROOT / 'gateway'
189-
relative_path = docs_path.relative_to(relative_path_root, walk_up=True)
190-
link = f"<a href='{relative_path}' style='float: right;'>Learn about Gateway</a>"
191189

190+
relative_path_to_gateway = docs_path.relative_to(relative_path, walk_up=True)
191+
link = f"<a href='{relative_path_to_gateway}' style='float: right;'>Learn about Gateway</a>"
192192
attrs_str = f' {{{attrs}}}' if attrs else ''
193193

194194
if 'title="' in attrs:

docs/deferred-tools.md

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ PROTECTED_FILES = {'.env'}
4747
@agent.tool
4848
def update_file(ctx: RunContext, path: str, content: str) -> str:
4949
if path in PROTECTED_FILES and not ctx.tool_call_approved:
50-
raise ApprovalRequired
50+
raise ApprovalRequired(metadata={'reason': 'protected'}) # (1)!
5151
return f'File {path!r} updated: {content!r}'
5252

5353

@@ -77,6 +77,7 @@ DeferredToolRequests(
7777
tool_call_id='delete_file',
7878
),
7979
],
80+
metadata={'update_file_dotenv': {'reason': 'protected'}},
8081
)
8182
"""
8283

@@ -175,6 +176,8 @@ print(result.all_messages())
175176
"""
176177
```
177178

179+
1. The optional `metadata` parameter can attach arbitrary context to deferred tool calls, accessible in `DeferredToolRequests.metadata` keyed by `tool_call_id`.
180+
178181
_(This example is complete, it can be run "as is")_
179182

180183
## External Tool Execution
@@ -209,13 +212,13 @@ from pydantic_ai import (
209212

210213
@dataclass
211214
class TaskResult:
212-
tool_call_id: str
215+
task_id: str
213216
result: Any
214217

215218

216-
async def calculate_answer_task(tool_call_id: str, question: str) -> TaskResult:
219+
async def calculate_answer_task(task_id: str, question: str) -> TaskResult:
217220
await asyncio.sleep(1)
218-
return TaskResult(tool_call_id=tool_call_id, result=42)
221+
return TaskResult(task_id=task_id, result=42)
219222

220223

221224
agent = Agent('openai:gpt-5', output_type=[str, DeferredToolRequests])
@@ -225,12 +228,11 @@ tasks: list[asyncio.Task[TaskResult]] = []
225228

226229
@agent.tool
227230
async def calculate_answer(ctx: RunContext, question: str) -> str:
228-
assert ctx.tool_call_id is not None
229-
230-
task = asyncio.create_task(calculate_answer_task(ctx.tool_call_id, question)) # (1)!
231+
task_id = f'task_{len(tasks)}' # (1)!
232+
task = asyncio.create_task(calculate_answer_task(task_id, question))
231233
tasks.append(task)
232234

233-
raise CallDeferred
235+
raise CallDeferred(metadata={'task_id': task_id}) # (2)!
234236

235237

236238
async def main():
@@ -252,17 +254,19 @@ async def main():
252254
)
253255
],
254256
approvals=[],
257+
metadata={'pyd_ai_tool_call_id': {'task_id': 'task_0'}},
255258
)
256259
"""
257260

258-
done, _ = await asyncio.wait(tasks) # (2)!
261+
done, _ = await asyncio.wait(tasks) # (3)!
259262
task_results = [task.result() for task in done]
260-
task_results_by_tool_call_id = {result.tool_call_id: result.result for result in task_results}
263+
task_results_by_task_id = {result.task_id: result.result for result in task_results}
261264

262265
results = DeferredToolResults()
263266
for call in requests.calls:
264267
try:
265-
result = task_results_by_tool_call_id[call.tool_call_id]
268+
task_id = requests.metadata[call.tool_call_id]['task_id']
269+
result = task_results_by_task_id[task_id]
266270
except KeyError:
267271
result = ModelRetry('No result for this tool call was found.')
268272

@@ -324,8 +328,9 @@ async def main():
324328
"""
325329
```
326330

327-
1. In reality, you'd likely use Celery or a similar task queue to run the task in the background.
328-
2. In reality, this would typically happen in a separate process that polls for the task status or is notified when all pending tasks are complete.
331+
1. Generate a task ID that can be tracked independently of the tool call ID.
332+
2. The optional `metadata` parameter passes the `task_id` so it can be matched with results later, accessible in `DeferredToolRequests.metadata` keyed by `tool_call_id`.
333+
3. In reality, this would typically happen in a separate process that polls for the task status or is notified when all pending tasks are complete.
329334

330335
_(This example is complete, it can be run "as is" — you'll need to add `asyncio.run(main())` to run `main`)_
331336

docs/durable_execution/temporal.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ As workflows and activities run in separate processes, any values passed between
172172

173173
To account for these limitations, tool functions and the [event stream handler](#streaming) running inside activities receive a limited version of the agent's [`RunContext`][pydantic_ai.tools.RunContext], and it's your responsibility to make sure that the [dependencies](../dependencies.md) object provided to [`TemporalAgent.run()`][pydantic_ai.durable_exec.temporal.TemporalAgent.run] can be serialized using Pydantic.
174174

175-
Specifically, only the `deps`, `run_id`, `retries`, `tool_call_id`, `tool_name`, `tool_call_approved`, `retry`, `max_retries`, `run_step` and `partial_output` fields are available by default, and trying to access `model`, `usage`, `prompt`, `messages`, or `tracer` will raise an error.
175+
Specifically, only the `deps`, `run_id`, `retries`, `tool_call_id`, `tool_name`, `tool_call_approved`, `retry`, `max_retries`, `run_step`, `usage`, and `partial_output` fields are available by default, and trying to access `model`, `prompt`, `messages`, or `tracer` will raise an error.
176176
If you need one or more of these attributes to be available inside activities, you can create a [`TemporalRunContext`][pydantic_ai.durable_exec.temporal.TemporalRunContext] subclass with custom `serialize_run_context` and `deserialize_run_context` class methods and pass it to [`TemporalAgent`][pydantic_ai.durable_exec.temporal.TemporalAgent] as `run_context_type`.
177177

178178
### Streaming

docs/gateway.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ status: new
55

66
# Pydantic AI Gateway
77

8-
**[Pydantic AI Gateway](https://pydantic.dev/ai-gateway)** (PAIG) is a unified interface for accessing multiple AI providers with a single key. Features include built-in OpenTelemetry observability, real-time cost monitoring, failover management, and native integration with the other tools in the [Pydantic stack](https://pydantic.dev/).
8+
**[Pydantic AI Gateway](https://pydantic.dev/ai-gateway)** is a unified interface for accessing multiple AI providers with a single key. Features include built-in OpenTelemetry observability, real-time cost monitoring, failover management, and native integration with the other tools in the [Pydantic stack](https://pydantic.dev/).
99

1010
!!! note "Free while in Beta"
1111
The Pydantic AI Gateway is currently in Beta. You can bring your own key (BYOK) or buy inference through the Gateway (we will eat the card fee for now).
@@ -26,8 +26,8 @@ To help you get started with [Pydantic AI Gateway](https://gateway.pydantic.dev)
2626
- **BYOK and managed providers:** Bring your own API keys (BYOK) from LLM providers, or pay for inference directly through the platform.
2727
- **Multi-provider support:** Access models from OpenAI, Anthropic, Google Vertex, Groq, and AWS Bedrock. _More providers coming soon_.
2828
- **Backend observability:** Log every request through [Pydantic Logfire](https://pydantic.dev/logfire) or any OpenTelemetry backend (_coming soon_).
29-
- **Zero translation**: Unlike traditional AI gateways that translate everything to one common schema, PAIG allows requests to flow through directly in each provider's native format. This gives you immediate access to the new model features as soon as they are released.
30-
- **Open source with self-hosting**: PAIG's core is [open source](https://github.com/pydantic/pydantic-ai-gateway/) (under [AGPL-3.0](https://www.gnu.org/licenses/agpl-3.0.en.html)), allowing self-hosting with file-based configuration, instead of using the managed service.
29+
- **Zero translation**: Unlike traditional AI gateways that translate everything to one common schema, **Pydantic AI Gateway** allows requests to flow through directly in each provider's native format. This gives you immediate access to the new model features as soon as they are released.
30+
- **Open source with self-hosting**: Pydantic AI Gateway core is [open source](https://github.com/pydantic/pydantic-ai-gateway/) (under [AGPL-3.0](https://www.gnu.org/licenses/agpl-3.0.en.html)), allowing self-hosting with file-based configuration, instead of using the managed service.
3131
- **Enterprise ready**: Includes SSO (with OIDC support), granular permissions, and flexible deployment options. Deploy to your Cloudflare account, or run on-premises with our [consulting support](https://pydantic.dev/contact).
3232

3333
```python {title="hello_world.py"}
@@ -80,7 +80,7 @@ Users can only create personal keys, that will inherit spending caps from both U
8080
## Usage
8181

8282
After setting up your account with the instructions above, you will be able to make an AI model request with the Pydantic AI Gateway.
83-
The code snippets below show how you can use PAIG with different frameworks and SDKs.
83+
The code snippets below show how you can use Pydantic AI Gateway with different frameworks and SDKs.
8484
You can add `gateway/` as prefix on every known provider that
8585

8686
To use different models, change the model string `gateway/<api_format>:<model_name>` to other models offered by the supported providers.
@@ -114,7 +114,7 @@ Before you start, make sure you are on version 1.16 or later of `pydantic-ai`. T
114114
Set the `PYDANTIC_AI_GATEWAY_API_KEY` environment variable to your Gateway API key:
115115

116116
```bash
117-
export PYDANTIC_AI_GATEWAY_API_KEY="YOUR_PAIG_TOKEN"
117+
export PYDANTIC_AI_GATEWAY_API_KEY="YOUR_PYDANTIC_AI_GATEWAY_API_KEY"
118118
```
119119

120120
You can access multiple models with the same API key, as shown in the code snippet below.
@@ -140,10 +140,10 @@ Set your gateway credentials as environment variables:
140140

141141
```bash
142142
export ANTHROPIC_BASE_URL="https://gateway.pydantic.dev/proxy/anthropic"
143-
export ANTHROPIC_AUTH_TOKEN="YOUR_PAIG_TOKEN"
143+
export ANTHROPIC_AUTH_TOKEN="YOUR_PYDANTIC_AI_GATEWAY_API_KEY"
144144
```
145145

146-
Replace `YOUR_PAIG_TOKEN` with the API key from the Keys page.
146+
Replace `YOUR_PYDANTIC_AI_GATEWAY_API_KEY` with the API key from the Keys page.
147147

148148
Launch Claude Code by typing `claude`. All requests will now route through the Pydantic AI Gateway.
149149

docs/logfire.md

Lines changed: 16 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -106,49 +106,30 @@ We can also query data with SQL in Logfire to monitor the performance of an appl
106106

107107
### Monitoring HTTP Requests
108108

109-
!!! tip "\"F**k you, show me the prompt.\""
110-
As per Hamel Husain's influential 2024 blog post ["Fuck You, Show Me The Prompt."](https://hamel.dev/blog/posts/prompt/)
111-
(bear with the capitalization, the point is valid), it's often useful to be able to view the raw HTTP requests and responses made to model providers.
109+
As per Hamel Husain's influential 2024 blog post ["Fuck You, Show Me The Prompt."](https://hamel.dev/blog/posts/prompt/)
110+
(bear with the capitalization, the point is valid), it's often useful to be able to view the raw HTTP requests and responses made to model providers.
112111

113-
To observe raw HTTP requests made to model providers, you can use Logfire's [HTTPX instrumentation](https://logfire.pydantic.dev/docs/integrations/http-clients/httpx/) since all provider SDKs use the [HTTPX](https://www.python-httpx.org/) library internally.
112+
To observe raw HTTP requests made to model providers, you can use Logfire's [HTTPX instrumentation](https://logfire.pydantic.dev/docs/integrations/http-clients/httpx/) since all provider SDKs (except for [Bedrock](models/bedrock.md)) use the [HTTPX](https://www.python-httpx.org/) library internally:
114113

115-
=== "With HTTP instrumentation"
116114

117-
```py {title="with_logfire_instrument_httpx.py" hl_lines="7"}
118-
import logfire
119-
120-
from pydantic_ai import Agent
121-
122-
logfire.configure()
123-
logfire.instrument_pydantic_ai()
124-
logfire.instrument_httpx(capture_all=True) # (1)!
125-
agent = Agent('openai:gpt-5')
126-
result = agent.run_sync('What is the capital of France?')
127-
print(result.output)
128-
#> The capital of France is Paris.
129-
```
130-
131-
1. See the [`logfire.instrument_httpx` docs][logfire.Logfire.instrument_httpx] more details, `capture_all=True` means both headers and body are captured for both the request and response.
132-
133-
![Logfire with HTTPX instrumentation](img/logfire-with-httpx.png)
134-
135-
=== "Without HTTP instrumentation"
115+
```py {title="with_logfire_instrument_httpx.py" hl_lines="7"}
116+
import logfire
136117

137-
```py {title="without_logfire_instrument_httpx.py"}
138-
import logfire
118+
from pydantic_ai import Agent
139119

140-
from pydantic_ai import Agent
120+
logfire.configure()
121+
logfire.instrument_pydantic_ai()
122+
logfire.instrument_httpx(capture_all=True) # (1)!
141123

142-
logfire.configure()
143-
logfire.instrument_pydantic_ai()
124+
agent = Agent('openai:gpt-5')
125+
result = agent.run_sync('What is the capital of France?')
126+
print(result.output)
127+
#> The capital of France is Paris.
128+
```
144129

145-
agent = Agent('openai:gpt-5')
146-
result = agent.run_sync('What is the capital of France?')
147-
print(result.output)
148-
#> The capital of France is Paris.
149-
```
130+
1. See the [`logfire.instrument_httpx` docs][logfire.Logfire.instrument_httpx] more details, `capture_all=True` means both headers and body are captured for both the request and response.
150131

151-
![Logfire without HTTPX instrumentation](img/logfire-without-httpx.png)
132+
![Logfire with HTTPX instrumentation](img/logfire-with-httpx.png)
152133

153134
## Using OpenTelemetry
154135

docs/mcp/client.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,29 @@ calculator_server = MCPServerSSE(
338338
agent = Agent('openai:gpt-5', toolsets=[weather_server, calculator_server])
339339
```
340340

341+
## Server Instructions
342+
343+
MCP servers can provide instructions during initialization that give context about how to best interact with the server's tools. These instructions are accessible via the [`instructions`][pydantic_ai.mcp.MCPServer.instructions] property after the server connection is established.
344+
345+
```python {title="mcp_server_instructions.py"}
346+
from pydantic_ai import Agent
347+
from pydantic_ai.mcp import MCPServerStreamableHTTP
348+
349+
server = MCPServerStreamableHTTP('http://localhost:8000/mcp')
350+
agent = Agent('openai:gpt-5', toolsets=[server])
351+
352+
@agent.instructions
353+
async def mcp_server_instructions():
354+
return server.instructions # (1)!
355+
356+
async def main():
357+
result = await agent.run('What is 7 plus 5?')
358+
print(result.output)
359+
#> The answer is 12.
360+
```
361+
362+
1. The server connection is guaranteed to be established by this point, so `server.instructions` is available.
363+
341364
## Tool metadata
342365

343366
MCP tools can include metadata that provides additional information about the tool's characteristics, which can be useful when [filtering tools][pydantic_ai.toolsets.FilteredToolset]. The `meta`, `annotations`, and `output_schema` fields can be found on the `metadata` dict on the [`ToolDefinition`][pydantic_ai.tools.ToolDefinition] object that's passed to filter functions.

docs/models/anthropic.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ agent = Agent(model)
8383
Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching:
8484

8585
1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it
86-
2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt
87-
3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions
86+
2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
87+
3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
8888

8989
You can combine all three strategies for maximum savings:
9090

@@ -96,8 +96,9 @@ agent = Agent(
9696
'anthropic:claude-sonnet-4-5',
9797
system_prompt='Detailed instructions...',
9898
model_settings=AnthropicModelSettings(
99+
# Use True for default 5m TTL, or specify '5m' / '1h' directly
99100
anthropic_cache_instructions=True,
100-
anthropic_cache_tool_definitions=True,
101+
anthropic_cache_tool_definitions='1h', # Longer cache for tool definitions
101102
),
102103
)
103104

@@ -134,7 +135,7 @@ agent = Agent(
134135
'anthropic:claude-sonnet-4-5',
135136
system_prompt='Instructions...',
136137
model_settings=AnthropicModelSettings(
137-
anthropic_cache_instructions=True
138+
anthropic_cache_instructions=True # Default 5m TTL
138139
),
139140
)
140141

docs/models/google.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,22 +214,22 @@ from pydantic_ai.models.google import GoogleModel, GoogleModelSettings
214214
settings = GoogleModelSettings(
215215
temperature=0.2,
216216
max_tokens=1024,
217-
google_thinking_config={'thinking_budget': 2048},
217+
google_thinking_config={'thinking_level': 'low'},
218218
google_safety_settings=[
219219
{
220220
'category': HarmCategory.HARM_CATEGORY_HATE_SPEECH,
221221
'threshold': HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
222222
}
223223
]
224224
)
225-
model = GoogleModel('gemini-2.5-flash')
225+
model = GoogleModel('gemini-2.5-pro')
226226
agent = Agent(model, model_settings=settings)
227227
...
228228
```
229229

230230
### Disable thinking
231231

232-
You can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
232+
On models older than Gemini 2.5 Pro, you can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
233233

234234
```python
235235
from pydantic_ai import Agent

docs/toolsets.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,7 @@ DeferredToolRequests(
362362
tool_call_id='pyd_ai_tool_call_id__temperature_fahrenheit',
363363
),
364364
],
365+
metadata={},
365366
)
366367
"""
367368

0 commit comments

Comments
 (0)