pydantic
diff --git a/‎docs/durable_execution/prefect.md‎
Lines changed: 12 additions & 7 deletions b/‎docs/durable_execution/prefect.md‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎docs/mcp/client.md‎
Lines changed: 10 additions & 20 deletions b/‎docs/mcp/client.md‎
Lines changed: 10 additions & 20 deletions
diff --git a/‎docs/models/cohere.md‎
Lines changed: 4 additions & 4 deletions b/‎docs/models/cohere.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎pydantic_ai_slim/pydantic_ai/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/_agent_graph.py‎
Lines changed: 31 additions & 6 deletions b/‎pydantic_ai_slim/pydantic_ai/_agent_graph.py‎
Lines changed: 31 additions & 6 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/agent/__init__.py‎
Lines changed: 8 additions & 8 deletions b/‎pydantic_ai_slim/pydantic_ai/agent/__init__.py‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/builtin_tools.py‎
Lines changed: 35 additions & 4 deletions b/‎pydantic_ai_slim/pydantic_ai/builtin_tools.py‎
Lines changed: 35 additions & 4 deletions
diff --git a/‎pydantic_ai_slim/pydantic_ai/exceptions.py‎
Lines changed: 5 additions & 0 deletions b/‎pydantic_ai_slim/pydantic_ai/exceptions.py‎
Lines changed: 5 additions & 0 deletions
@@ -255,20 +255,23 @@ from prefect import flow
 from pydantic_ai import Agent
 from pydantic_ai.durable_exec.prefect import PrefectAgent
 
-agent = Agent(
-    'openai:gpt-4o',
-    name='daily_report_agent',
-    instructions='Generate a daily summary report.',
-)
-
-prefect_agent = PrefectAgent(agent)
 
 @flow
 async def daily_report_flow(user_prompt: str):
     """Generate a daily report using the agent."""
+    agent = Agent(  # (1)!
+        'openai:gpt-4o',
+        name='daily_report_agent',
+        instructions='Generate a daily summary report.',
+    )
+
+    prefect_agent = PrefectAgent(agent)
+
     result = await prefect_agent.run(user_prompt)
     return result.output
 
+
+
 # Serve the flow with a daily schedule
 if __name__ == '__main__':
     daily_report_flow.serve(
@@ -279,6 +282,8 @@ if __name__ == '__main__':
     )
 ```
 
+1. Each flow run executes in an isolated process, and all inputs and dependencies must be serializable. Because Agent instances cannot be serialized, instantiate the agent inside the flow rather than at the module level.
+
 The `serve()` method accepts scheduling options:
 
 - **`cron`**: Cron schedule string (e.g., `'0 9 * * *'` for daily at 9am)
 
@@ -58,15 +58,13 @@ server = MCPServerStreamableHTTP('http://localhost:8000/mcp')  # (1)!
 agent = Agent('openai:gpt-4o', toolsets=[server])  # (2)!
 
 async def main():
-    async with agent:  # (3)!
-        result = await agent.run('What is 7 plus 5?')
+    result = await agent.run('What is 7 plus 5?')
     print(result.output)
     #> The answer is 12.
 ```
 
 1. Define the MCP server with the URL used to connect.
 2. Create an agent with the MCP server attached.
-3. Create a client session to connect to the server.
 
 _(This example is complete, it can be run "as is" — you'll need to add `asyncio.run(main())` to run `main`)_
 
@@ -122,15 +120,13 @@ agent = Agent('openai:gpt-4o', toolsets=[server])  # (2)!
 
 
 async def main():
-    async with agent:  # (3)!
-        result = await agent.run('What is 7 plus 5?')
+    result = await agent.run('What is 7 plus 5?')
     print(result.output)
     #> The answer is 12.
 ```
 
 1. Define the MCP server with the URL used to connect.
 2. Create an agent with the MCP server attached.
-3. Create a client session to connect to the server.
 
 _(This example is complete, it can be run "as is" — you'll need to add `asyncio.run(main())` to run `main`)_
 
@@ -151,8 +147,7 @@ agent = Agent('openai:gpt-4o', toolsets=[server])
 
 
 async def main():
-    async with agent:
-        result = await agent.run('How many days between 2000-01-01 and 2025-03-18?')
+    result = await agent.run('How many days between 2000-01-01 and 2025-03-18?')
     print(result.output)
     #> There are 9,208 days between January 1, 2000, and March 18, 2025.
 ```
@@ -205,8 +200,7 @@ servers = load_mcp_servers('mcp_config.json')
 agent = Agent('openai:gpt-5', toolsets=servers)
 
 async def main():
-    async with agent:
-        result = await agent.run('What is 7 plus 5?')
+    result = await agent.run('What is 7 plus 5?')
     print(result.output)
 ```
 
@@ -247,8 +241,7 @@ agent = Agent(
 
 
 async def main():
-    async with agent:
-        result = await agent.run('Echo with deps set to 42', deps=42)
+    result = await agent.run('Echo with deps set to 42', deps=42)
     print(result.output)
     #> {"echo_deps":{"echo":"This is an echo message","deps":42}}
 ```
@@ -356,8 +349,7 @@ server = MCPServerSSE(
 agent = Agent('openai:gpt-4o', toolsets=[server])
 
 async def main():
-    async with agent:
-        result = await agent.run('How many days between 2000-01-01 and 2025-03-18?')
+    result = await agent.run('How many days between 2000-01-01 and 2025-03-18?')
     print(result.output)
     #> There are 9,208 days between January 1, 2000, and March 18, 2025.
 ```
@@ -454,9 +446,8 @@ agent = Agent('openai:gpt-4o', toolsets=[server])
 
 
 async def main():
-    async with agent:
-        agent.set_mcp_sampling_model()
-        result = await agent.run('Create an image of a robot in a punk style.')
+    agent.set_mcp_sampling_model()
+    result = await agent.run('Create an image of a robot in a punk style.')
     print(result.output)
     #> Image file written to robot_punk.svg.
 ```
@@ -598,9 +589,8 @@ agent = Agent('openai:gpt-4o', toolsets=[restaurant_server])
 
 async def main():
     """Run the agent to book a restaurant table."""
-    async with agent:
-        result = await agent.run('Book me a table')
-        print(f'\nResult: {result.output}')
+    result = await agent.run('Book me a table')
+    print(f'\nResult: {result.output}')
 
 
 if __name__ == '__main__':
 
@@ -27,7 +27,7 @@ You can then use `CohereModel` by name:
 ```python
 from pydantic_ai import Agent
 
-agent = Agent('cohere:command')
+agent = Agent('cohere:command-r7b-12-2024')
 ...
 ```
 
@@ -37,7 +37,7 @@ Or initialise the model directly with just the model name:
 from pydantic_ai import Agent
 from pydantic_ai.models.cohere import CohereModel
 
-model = CohereModel('command')
+model = CohereModel('command-r7b-12-2024')
 agent = Agent(model)
 ...
 ```
@@ -51,7 +51,7 @@ from pydantic_ai import Agent
 from pydantic_ai.models.cohere import CohereModel
 from pydantic_ai.providers.cohere import CohereProvider
 
-model = CohereModel('command', provider=CohereProvider(api_key='your-api-key'))
+model = CohereModel('command-r7b-12-2024', provider=CohereProvider(api_key='your-api-key'))
 agent = Agent(model)
 ...
 ```
@@ -67,7 +67,7 @@ from pydantic_ai.providers.cohere import CohereProvider
 
 custom_http_client = AsyncClient(timeout=30)
 model = CohereModel(
-    'command',
+    'command-r7b-12-2024',
     provider=CohereProvider(api_key='your-api-key', http_client=custom_http_client),
 )
 agent = Agent(model)
 
@@ -22,6 +22,7 @@
     ApprovalRequired,
     CallDeferred,
     FallbackExceptionGroup,
+    IncompleteToolCall,
     ModelHTTPError,
     ModelRetry,
     UnexpectedModelBehavior,
@@ -124,6 +125,7 @@
     'ModelRetry',
     'ModelHTTPError',
     'FallbackExceptionGroup',
+    'IncompleteToolCall',
     'UnexpectedModelBehavior',
     'UsageLimitExceeded',
     'UserError',
 
@@ -92,9 +92,28 @@ class GraphAgentState:
     retries: int = 0
     run_step: int = 0
 
-    def increment_retries(self, max_result_retries: int, error: BaseException | None = None) -> None:
+    def increment_retries(
+        self,
+        max_result_retries: int,
+        error: BaseException | None = None,
+        model_settings: ModelSettings | None = None,
+    ) -> None:
         self.retries += 1
         if self.retries > max_result_retries:
+            if (
+                self.message_history
+                and isinstance(model_response := self.message_history[-1], _messages.ModelResponse)
+                and model_response.finish_reason == 'length'
+                and model_response.parts
+                and isinstance(tool_call := model_response.parts[-1], _messages.ToolCallPart)
+            ):
+                try:
+                    tool_call.args_as_dict()
+                except Exception:
+                    max_tokens = (model_settings or {}).get('max_tokens') if model_settings else None
+                    raise exceptions.IncompleteToolCall(
+                        f'Model token limit ({max_tokens if max_tokens is not None else "provider default"}) exceeded while emitting a tool call, resulting in incomplete arguments. Increase max tokens or simplify tool call arguments to fit within limit.'
+                    )
             message = f'Exceeded maximum retries ({max_result_retries}) for output validation'
             if error:
                 if isinstance(error, exceptions.UnexpectedModelBehavior) and error.__cause__ is not None:
@@ -568,7 +587,7 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:  # noqa
                     # resubmit the most recent request that resulted in an empty response,
                     # as the empty response and request will not create any items in the API payload,
                     # in the hope the model will return a non-empty response this time.
-                    ctx.state.increment_retries(ctx.deps.max_result_retries)
+                    ctx.state.increment_retries(ctx.deps.max_result_retries, model_settings=ctx.deps.model_settings)
                     self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[]))
                     return
 
@@ -630,7 +649,9 @@ async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:  # noqa
                     )
                     raise ToolRetryError(m)
                 except ToolRetryError as e:
-                    ctx.state.increment_retries(ctx.deps.max_result_retries, e)
+                    ctx.state.increment_retries(
+                        ctx.deps.max_result_retries, error=e, model_settings=ctx.deps.model_settings
+                    )
                     self._next_node = ModelRequestNode[DepsT, NodeRunEndT](_messages.ModelRequest(parts=[e.tool_retry]))
 
             self._events_iterator = _run_stream()
@@ -788,10 +809,14 @@ async def process_tool_calls(  # noqa: C901
             try:
                 result_data = await tool_manager.handle_call(call)
             except exceptions.UnexpectedModelBehavior as e:
-                ctx.state.increment_retries(ctx.deps.max_result_retries, e)
+                ctx.state.increment_retries(
+                    ctx.deps.max_result_retries, error=e, model_settings=ctx.deps.model_settings
+                )
                 raise e  # pragma: lax no cover
             except ToolRetryError as e:
-                ctx.state.increment_retries(ctx.deps.max_result_retries, e)
+                ctx.state.increment_retries(
+                    ctx.deps.max_result_retries, error=e, model_settings=ctx.deps.model_settings
+                )
                 yield _messages.FunctionToolCallEvent(call)
                 output_parts.append(e.tool_retry)
                 yield _messages.FunctionToolResultEvent(e.tool_retry)
@@ -820,7 +845,7 @@ async def process_tool_calls(  # noqa: C901
 
     # Then, we handle unknown tool calls
     if tool_calls_by_kind['unknown']:
-        ctx.state.increment_retries(ctx.deps.max_result_retries)
+        ctx.state.increment_retries(ctx.deps.max_result_retries, model_settings=ctx.deps.model_settings)
         calls_to_run.extend(tool_calls_by_kind['unknown'])
 
     calls_to_run_results: dict[str, DeferredToolResult] = {}
 
@@ -662,14 +662,14 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
         )
 
         try:
-            async with toolset:
-                async with graph.iter(
-                    start_node,
-                    state=state,
-                    deps=graph_deps,
-                    span=use_span(run_span) if run_span.is_recording() else None,
-                    infer_name=False,
-                ) as graph_run:
+            async with graph.iter(
+                start_node,
+                state=state,
+                deps=graph_deps,
+                span=use_span(run_span) if run_span.is_recording() else None,
+                infer_name=False,
+            ) as graph_run:
+                async with toolset:
                     agent_run = AgentRun(graph_run)
                     yield agent_run
                     if (final_result := agent_run.result) is not None and run_span.is_recording():
 
@@ -2,13 +2,12 @@
 
 from abc import ABC
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Literal
+from typing import Annotated, Any, Literal, Union
 
+import pydantic
+from pydantic_core import core_schema
 from typing_extensions import TypedDict
 
-if TYPE_CHECKING:
-    from .builtin_tools import AbstractBuiltinTool
-
 __all__ = (
     'AbstractBuiltinTool',
     'WebSearchTool',
@@ -19,6 +18,8 @@
     'MemoryTool',
 )
 
+_BUILTIN_TOOL_TYPES: dict[str, type[AbstractBuiltinTool]] = {}
+
 
 @dataclass(kw_only=True)
 class AbstractBuiltinTool(ABC):
@@ -32,6 +33,26 @@ class AbstractBuiltinTool(ABC):
     kind: str = 'unknown_builtin_tool'
     """Built-in tool identifier, this should be available on all built-in tools as a discriminator."""
 
+    def __init_subclass__(cls, **kwargs: Any) -> None:
+        super().__init_subclass__(**kwargs)
+        _BUILTIN_TOOL_TYPES[cls.kind] = cls
+
+    @classmethod
+    def __get_pydantic_core_schema__(
+        cls, _source_type: Any, handler: pydantic.GetCoreSchemaHandler
+    ) -> core_schema.CoreSchema:
+        if cls is not AbstractBuiltinTool:
+            return handler(cls)
+
+        tools = _BUILTIN_TOOL_TYPES.values()
+        if len(tools) == 1:  # pragma: no cover
+            tools_type = next(iter(tools))
+        else:
+            tools_annotated = [Annotated[tool, pydantic.Tag(tool.kind)] for tool in tools]
+            tools_type = Annotated[Union[tuple(tools_annotated)], pydantic.Discriminator(_tool_discriminator)]  # noqa: UP007
+
+        return handler(tools_type)
+
 
 @dataclass(kw_only=True)
 class WebSearchTool(AbstractBuiltinTool):
@@ -120,6 +141,7 @@ class WebSearchUserLocation(TypedDict, total=False):
     """The timezone of the user's location."""
 
 
+@dataclass(kw_only=True)
 class CodeExecutionTool(AbstractBuiltinTool):
     """A builtin tool that allows your agent to execute code.
 
@@ -134,6 +156,7 @@ class CodeExecutionTool(AbstractBuiltinTool):
     """The kind of tool."""
 
 
+@dataclass(kw_only=True)
 class UrlContextTool(AbstractBuiltinTool):
     """Allows your agent to access contents from URLs.
 
@@ -227,6 +250,7 @@ class ImageGenerationTool(AbstractBuiltinTool):
     """The kind of tool."""
 
 
+@dataclass(kw_only=True)
 class MemoryTool(AbstractBuiltinTool):
     """A builtin tool that allows your agent to use memory.
 
@@ -237,3 +261,10 @@ class MemoryTool(AbstractBuiltinTool):
 
     kind: str = 'memory'
     """The kind of tool."""
+
+
+def _tool_discriminator(tool_data: dict[str, Any] | AbstractBuiltinTool) -> str:
+    if isinstance(tool_data, dict):
+        return tool_data.get('kind', AbstractBuiltinTool.kind)
+    else:
+        return tool_data.kind
@@ -23,6 +23,7 @@
     'UnexpectedModelBehavior',
     'UsageLimitExceeded',
     'ModelHTTPError',
+    'IncompleteToolCall',
     'FallbackExceptionGroup',
 )
 
@@ -168,3 +169,7 @@ class ToolRetryError(Exception):
     def __init__(self, tool_retry: RetryPromptPart):
         self.tool_retry = tool_retry
         super().__init__()
+
+
+class IncompleteToolCall(UnexpectedModelBehavior):
+    """Error raised when a model stops due to token limit while emitting a tool call."""