pydantic
diff --git a/‎docs/deferred-tools.md‎
Lines changed: 18 additions & 13 deletions b/‎docs/deferred-tools.md‎
Lines changed: 18 additions & 13 deletions
diff --git a/‎docs/durable_execution/temporal.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/durable_execution/temporal.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/logfire.md‎
Lines changed: 16 additions & 35 deletions b/‎docs/logfire.md‎
Lines changed: 16 additions & 35 deletions
diff --git a/‎docs/mcp/client.md‎
Lines changed: 84 additions & 0 deletions b/‎docs/mcp/client.md‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎docs/models/anthropic.md‎
Lines changed: 5 additions & 4 deletions b/‎docs/models/anthropic.md‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎docs/models/google.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/models/google.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/toolsets.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/toolsets.md‎
Lines changed: 1 addition & 0 deletions
@@ -47,7 +47,7 @@ PROTECTED_FILES = {'.env'}
 @agent.tool
 def update_file(ctx: RunContext, path: str, content: str) -> str:
     if path in PROTECTED_FILES and not ctx.tool_call_approved:
-        raise ApprovalRequired
+        raise ApprovalRequired(metadata={'reason': 'protected'})  # (1)!
     return f'File {path!r} updated: {content!r}'
 
 
@@ -77,6 +77,7 @@ DeferredToolRequests(
             tool_call_id='delete_file',
         ),
     ],
+    metadata={'update_file_dotenv': {'reason': 'protected'}},
 )
 """
 
@@ -175,6 +176,8 @@ print(result.all_messages())
 """
 ```
 
+1. The optional `metadata` parameter can attach arbitrary context to deferred tool calls, accessible in `DeferredToolRequests.metadata` keyed by `tool_call_id`.
+
 _(This example is complete, it can be run "as is")_
 
 ## External Tool Execution
@@ -209,13 +212,13 @@ from pydantic_ai import (
 
 @dataclass
 class TaskResult:
-    tool_call_id: str
+    task_id: str
     result: Any
 
 
-async def calculate_answer_task(tool_call_id: str, question: str) -> TaskResult:
+async def calculate_answer_task(task_id: str, question: str) -> TaskResult:
     await asyncio.sleep(1)
-    return TaskResult(tool_call_id=tool_call_id, result=42)
+    return TaskResult(task_id=task_id, result=42)
 
 
 agent = Agent('openai:gpt-5', output_type=[str, DeferredToolRequests])
@@ -225,12 +228,11 @@ tasks: list[asyncio.Task[TaskResult]] = []
 
 @agent.tool
 async def calculate_answer(ctx: RunContext, question: str) -> str:
-    assert ctx.tool_call_id is not None
-
-    task = asyncio.create_task(calculate_answer_task(ctx.tool_call_id, question))  # (1)!
+    task_id = f'task_{len(tasks)}'  # (1)!
+    task = asyncio.create_task(calculate_answer_task(task_id, question))
     tasks.append(task)
 
-    raise CallDeferred
+    raise CallDeferred(metadata={'task_id': task_id})  # (2)!
 
 
 async def main():
@@ -252,17 +254,19 @@ async def main():
             )
         ],
         approvals=[],
+        metadata={'pyd_ai_tool_call_id': {'task_id': 'task_0'}},
     )
     """
 
-    done, _ = await asyncio.wait(tasks)  # (2)!
+    done, _ = await asyncio.wait(tasks)  # (3)!
     task_results = [task.result() for task in done]
-    task_results_by_tool_call_id = {result.tool_call_id: result.result for result in task_results}
+    task_results_by_task_id = {result.task_id: result.result for result in task_results}
 
     results = DeferredToolResults()
     for call in requests.calls:
         try:
-            result = task_results_by_tool_call_id[call.tool_call_id]
+            task_id = requests.metadata[call.tool_call_id]['task_id']
+            result = task_results_by_task_id[task_id]
         except KeyError:
             result = ModelRetry('No result for this tool call was found.')
 
@@ -324,8 +328,9 @@ async def main():
     """
 ```
 
-1. In reality, you'd likely use Celery or a similar task queue to run the task in the background.
-2. In reality, this would typically happen in a separate process that polls for the task status or is notified when all pending tasks are complete.
+1. Generate a task ID that can be tracked independently of the tool call ID.
+2. The optional `metadata` parameter passes the `task_id` so it can be matched with results later, accessible in `DeferredToolRequests.metadata` keyed by `tool_call_id`.
+3. In reality, this would typically happen in a separate process that polls for the task status or is notified when all pending tasks are complete.
 
 _(This example is complete, it can be run "as is" — you'll need to add `asyncio.run(main())` to run `main`)_
 
 
@@ -172,7 +172,7 @@ As workflows and activities run in separate processes, any values passed between
 
 To account for these limitations, tool functions and the [event stream handler](#streaming) running inside activities receive a limited version of the agent's [`RunContext`][pydantic_ai.tools.RunContext], and it's your responsibility to make sure that the [dependencies](../dependencies.md) object provided to [`TemporalAgent.run()`][pydantic_ai.durable_exec.temporal.TemporalAgent.run] can be serialized using Pydantic.
 
-Specifically, only the `deps`, `run_id`, `retries`, `tool_call_id`, `tool_name`, `tool_call_approved`, `retry`, `max_retries`, `run_step` and `partial_output` fields are available by default, and trying to access `model`, `usage`, `prompt`, `messages`, or `tracer` will raise an error.
+Specifically, only the `deps`, `run_id`, `retries`, `tool_call_id`, `tool_name`, `tool_call_approved`, `retry`, `max_retries`, `run_step`, `usage`, and `partial_output` fields are available by default, and trying to access `model`, `prompt`, `messages`, or `tracer` will raise an error.
 If you need one or more of these attributes to be available inside activities, you can create a [`TemporalRunContext`][pydantic_ai.durable_exec.temporal.TemporalRunContext] subclass with custom `serialize_run_context` and `deserialize_run_context` class methods and pass it to [`TemporalAgent`][pydantic_ai.durable_exec.temporal.TemporalAgent] as `run_context_type`.
 
 ### Streaming
 
@@ -106,49 +106,30 @@ We can also query data with SQL in Logfire to monitor the performance of an appl
 
 ### Monitoring HTTP Requests
 
-!!! tip "\"F**k you, show me the prompt.\""
-    As per Hamel Husain's influential 2024 blog post ["Fuck You, Show Me The Prompt."](https://hamel.dev/blog/posts/prompt/)
-    (bear with the capitalization, the point is valid), it's often useful to be able to view the raw HTTP requests and responses made to model providers.
+As per Hamel Husain's influential 2024 blog post ["Fuck You, Show Me The Prompt."](https://hamel.dev/blog/posts/prompt/)
+(bear with the capitalization, the point is valid), it's often useful to be able to view the raw HTTP requests and responses made to model providers.
 
-    To observe raw HTTP requests made to model providers, you can use Logfire's [HTTPX instrumentation](https://logfire.pydantic.dev/docs/integrations/http-clients/httpx/) since all provider SDKs use the [HTTPX](https://www.python-httpx.org/) library internally.
+To observe raw HTTP requests made to model providers, you can use Logfire's [HTTPX instrumentation](https://logfire.pydantic.dev/docs/integrations/http-clients/httpx/) since all provider SDKs (except for [Bedrock](models/bedrock.md)) use the [HTTPX](https://www.python-httpx.org/) library internally:
 
-=== "With HTTP instrumentation"
 
-    ```py {title="with_logfire_instrument_httpx.py" hl_lines="7"}
-    import logfire
-
-    from pydantic_ai import Agent
-
-    logfire.configure()
-    logfire.instrument_pydantic_ai()
-    logfire.instrument_httpx(capture_all=True)  # (1)!
-    agent = Agent('openai:gpt-5')
-    result = agent.run_sync('What is the capital of France?')
-    print(result.output)
-    #> The capital of France is Paris.
-    ```
-
-    1. See the [`logfire.instrument_httpx` docs][logfire.Logfire.instrument_httpx] more details, `capture_all=True` means both headers and body are captured for both the request and response.
-
-    ![Logfire with HTTPX instrumentation](img/logfire-with-httpx.png)
-
-=== "Without HTTP instrumentation"
+```py {title="with_logfire_instrument_httpx.py" hl_lines="7"}
+import logfire
 
-    ```py {title="without_logfire_instrument_httpx.py"}
-    import logfire
+from pydantic_ai import Agent
 
-    from pydantic_ai import Agent
+logfire.configure()
+logfire.instrument_pydantic_ai()
+logfire.instrument_httpx(capture_all=True)  # (1)!
 
-    logfire.configure()
-    logfire.instrument_pydantic_ai()
+agent = Agent('openai:gpt-5')
+result = agent.run_sync('What is the capital of France?')
+print(result.output)
+#> The capital of France is Paris.
+```
 
-    agent = Agent('openai:gpt-5')
-    result = agent.run_sync('What is the capital of France?')
-    print(result.output)
-    #> The capital of France is Paris.
-    ```
+1. See the [`logfire.instrument_httpx` docs][logfire.Logfire.instrument_httpx] more details, `capture_all=True` means both headers and body are captured for both the request and response.
 
-    ![Logfire without HTTPX instrumentation](img/logfire-without-httpx.png)
+![Logfire with HTTPX instrumentation](img/logfire-with-httpx.png)
 
 ## Using OpenTelemetry
 
 
@@ -338,10 +338,94 @@ calculator_server = MCPServerSSE(
 agent = Agent('openai:gpt-5', toolsets=[weather_server, calculator_server])
 ```
 
+## Server Instructions
+
+MCP servers can provide instructions during initialization that give context about how to best interact with the server's tools. These instructions are accessible via the [`instructions`][pydantic_ai.mcp.MCPServer.instructions] property after the server connection is established.
+
+```python {title="mcp_server_instructions.py"}
+from pydantic_ai import Agent
+from pydantic_ai.mcp import MCPServerStreamableHTTP
+
+server = MCPServerStreamableHTTP('http://localhost:8000/mcp')
+agent = Agent('openai:gpt-5', toolsets=[server])
+
+@agent.instructions
+async def mcp_server_instructions():
+    return server.instructions  # (1)!
+
+async def main():
+    result = await agent.run('What is 7 plus 5?')
+    print(result.output)
+    #> The answer is 12.
+```
+
+1. The server connection is guaranteed to be established by this point, so `server.instructions` is available.
+
 ## Tool metadata
 
 MCP tools can include metadata that provides additional information about the tool's characteristics, which can be useful when [filtering tools][pydantic_ai.toolsets.FilteredToolset]. The `meta`, `annotations`, and `output_schema` fields can be found on the `metadata` dict on the [`ToolDefinition`][pydantic_ai.tools.ToolDefinition] object that's passed to filter functions.
 
+## Resources
+
+MCP servers can provide [resources](https://modelcontextprotocol.io/docs/concepts/resources) - files, data, or content that can be accessed by the client. Resources in MCP are application-driven, with host applications determining how to incorporate context manually, based on their needs. This means they will _not_ be exposed to the LLM automatically (unless a tool returns a `ResourceLink` or `EmbeddedResource`).
+
+Pydantic AI provides methods to discover and read resources from MCP servers:
+
+- [`list_resources()`][pydantic_ai.mcp.MCPServer.list_resources] - List all available resources on the server
+- [`list_resource_templates()`][pydantic_ai.mcp.MCPServer.list_resource_templates] - List resource templates with parameter placeholders
+- [`read_resource(uri)`][pydantic_ai.mcp.MCPServer.read_resource] - Read the contents of a specific resource by URI
+
+Resources are automatically converted: text content is returned as `str`, and binary content is returned as [`BinaryContent`][pydantic_ai.messages.BinaryContent].
+
+Before consuming resources, we need to run a server that exposes some:
+
+```python {title="mcp_resource_server.py"}
+from mcp.server.fastmcp import FastMCP
+
+mcp = FastMCP('Pydantic AI MCP Server')
+log_level = 'unset'
+
+
+@mcp.resource('resource://user_name.txt', mime_type='text/plain')
+async def user_name_resource() -> str:
+    return 'Alice'
+
+
+if __name__ == '__main__':
+    mcp.run()
+```
+
+Then we can create the client:
+
+```python {title="mcp_resources.py", requires="mcp_resource_server.py"}
+import asyncio
+
+from pydantic_ai.mcp import MCPServerStdio
+
+
+async def main():
+    server = MCPServerStdio('python', args=['-m', 'mcp_resource_server'])
+
+    async with server:
+        # List all available resources
+        resources = await server.list_resources()
+        for resource in resources:
+            print(f' - {resource.name}: {resource.uri} ({resource.mime_type})')
+            #>  - user_name_resource: resource://user_name.txt (text/plain)
+
+        # Read a text resource
+        user_name = await server.read_resource('resource://user_name.txt')
+        print(f'Text content: {user_name}')
+        #> Text content: Alice
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
+```
+
+_(This example is complete, it can be run "as is")_
+
+
 ## Custom TLS / SSL configuration
 
 In some environments you need to tweak how HTTPS connections are established –
 
@@ -83,8 +83,8 @@ agent = Agent(model)
 Anthropic supports [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) to reduce costs by caching parts of your prompts. Pydantic AI provides three ways to use prompt caching:
 
 1. **Cache User Messages with [`CachePoint`][pydantic_ai.messages.CachePoint]**: Insert a `CachePoint` marker in your user messages to cache everything before it
-2. **Cache System Instructions**: Enable the [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] [model setting](../agents.md#model-run-settings) to cache your system prompt
-3. **Cache Tool Definitions**: Enable the [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] [model setting](../agents.md#model-run-settings) to cache your tool definitions
+2. **Cache System Instructions**: Set [`AnthropicModelSettings.anthropic_cache_instructions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_instructions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
+3. **Cache Tool Definitions**: Set [`AnthropicModelSettings.anthropic_cache_tool_definitions`][pydantic_ai.models.anthropic.AnthropicModelSettings.anthropic_cache_tool_definitions] to `True` (uses 5m TTL by default) or specify `'5m'` / `'1h'` directly
 
 You can combine all three strategies for maximum savings:
 
@@ -96,8 +96,9 @@ agent = Agent(
     'anthropic:claude-sonnet-4-5',
     system_prompt='Detailed instructions...',
     model_settings=AnthropicModelSettings(
+        # Use True for default 5m TTL, or specify '5m' / '1h' directly
         anthropic_cache_instructions=True,
-        anthropic_cache_tool_definitions=True,
+        anthropic_cache_tool_definitions='1h',  # Longer cache for tool definitions
     ),
 )
 
@@ -134,7 +135,7 @@ agent = Agent(
     'anthropic:claude-sonnet-4-5',
     system_prompt='Instructions...',
     model_settings=AnthropicModelSettings(
-        anthropic_cache_instructions=True
+        anthropic_cache_instructions=True  # Default 5m TTL
     ),
 )
 
 
@@ -214,22 +214,22 @@ from pydantic_ai.models.google import GoogleModel, GoogleModelSettings
 settings = GoogleModelSettings(
     temperature=0.2,
     max_tokens=1024,
-    google_thinking_config={'thinking_budget': 2048},
+    google_thinking_config={'thinking_level': 'low'},
     google_safety_settings=[
         {
             'category': HarmCategory.HARM_CATEGORY_HATE_SPEECH,
             'threshold': HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
         }
     ]
 )
-model = GoogleModel('gemini-2.5-flash')
+model = GoogleModel('gemini-2.5-pro')
 agent = Agent(model, model_settings=settings)
 ...
 ```
 
 ### Disable thinking
 
-You can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
+On models older than Gemini 2.5 Pro, you can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
 
 ```python
 from pydantic_ai import Agent
 
@@ -362,6 +362,7 @@ DeferredToolRequests(
             tool_call_id='pyd_ai_tool_call_id__temperature_fahrenheit',
         ),
     ],
+    metadata={},
 )
 """
Original file line number	Diff line number	Diff line change
`@@ -214,22 +214,22 @@ from pydantic_ai.models.google import GoogleModel, GoogleModelSettings`
`214`	`214`	`settings = GoogleModelSettings(`
`215`	`215`	`temperature=0.2,`
`216`	`216`	`max_tokens=1024,`
`217`		`- google_thinking_config={'thinking_budget': 2048},`
	`217`	`+ google_thinking_config={'thinking_level': 'low'},`
`218`	`218`	`google_safety_settings=[`
`219`	`219`	`{`
`220`	`220`	`'category': HarmCategory.HARM_CATEGORY_HATE_SPEECH,`
`221`	`221`	`'threshold': HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,`
`222`	`222`	`}`
`223`	`223`	`]`
`224`	`224`	`)`
`225`		`-model = GoogleModel('gemini-2.5-flash')`
	`225`	`+model = GoogleModel('gemini-2.5-pro')`
`226`	`226`	`agent = Agent(model, model_settings=settings)`
`227`	`227`	`...`
`228`	`228`	```
`229`	`229`
`230`	`230`	`### Disable thinking`
`231`	`231`
`232`		-You can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
	`232`	+On models older than Gemini 2.5 Pro, you can disable thinking by setting the `thinking_budget` to `0` on the `google_thinking_config`:
`233`	`233`
`234`	`234`	```python
`235`	`235`	`from pydantic_ai import Agent`
Original file line number	Diff line number	Diff line change
`@@ -362,6 +362,7 @@ DeferredToolRequests(`
`362`	`362`	`tool_call_id='pyd_ai_tool_call_id__temperature_fahrenheit',`
`363`	`363`	`),`
`364`	`364`	`],`
	`365`	`+ metadata={},`
`365`	`366`	`)`
`366`	`367`	`"""`
`367`	`368`