pydantic
diff --git a/‎pydantic_ai_slim/pydantic_ai/_agent_graph.py‎
Lines changed: 151 additions & 144 deletions b/‎pydantic_ai_slim/pydantic_ai/_agent_graph.py‎
Lines changed: 151 additions & 144 deletions
@@ -562,116 +562,141 @@ async def stream(
     async def _run_stream(  # noqa: C901
         self, ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]]
     ) -> AsyncIterator[_messages.HandleResponseEvent]:
+        # Ensure that the stream is only run once
         if self._events_iterator is None:
-            # Ensure that the stream is only run once
+            run_context = build_run_context(ctx)
 
-            output_schema = ctx.deps.output_schema
+            # This will raise errors for any tool name conflicts
+            ctx.deps.tool_manager = await ctx.deps.tool_manager.for_run_step(run_context)
+            tool_manager = ctx.deps.tool_manager
 
             async def _run_stream() -> AsyncIterator[_messages.HandleResponseEvent]:  # noqa: C901
-                if not self.model_response.parts:
-                    # we got an empty response.
-                    # this sometimes happens with anthropic (and perhaps other models)
-                    # when the model has already returned text along side tool calls
-                    if text_processor := output_schema.text_processor:  # pragma: no branch
-                        # in this scenario, if text responses are allowed, we return text from the most recent model
-                        # response, if any
-                        for message in reversed(ctx.state.message_history):
-                            if isinstance(message, _messages.ModelResponse):
-                                text = ''
-                                for part in message.parts:
-                                    if isinstance(part, _messages.TextPart):
-                                        text += part.content
-                                    elif isinstance(part, _messages.BuiltinToolCallPart):
-                                        # Text parts before a built-in tool call are essentially thoughts,
-                                        # not part of the final result output, so we reset the accumulated text
-                                        text = ''  # pragma: no cover
-                                if text:
-                                    try:
-                                        self._next_node = await self._handle_text_response(ctx, text, text_processor)
-                                        return
-                                    except ToolRetryError:
-                                        # If the text from the preview response was invalid, ignore it.
-                                        pass
-
-                    # Go back to the model request node with an empty request, which means we'll essentially
-                    # resubmit the most recent request that resulted in an empty response,
-                    # as the empty response and request will not create any items in the API payload,
-                    # in the hope the model will return a non-empty response this time.
-                    ctx.state.increment_retries(ctx.deps.max_result_retries, model_settings=ctx.deps.model_settings)
-                    run_context = build_run_context(ctx)
-                    instructions = await ctx.deps.get_instructions(run_context)
-                    self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
-                        _messages.ModelRequest(parts=[], instructions=instructions)
-                    )
-                    return
-
-                text = ''
-                tool_calls: list[_messages.ToolCallPart] = []
-                files: list[_messages.BinaryContent] = []
-
-                for part in self.model_response.parts:
-                    if isinstance(part, _messages.TextPart):
-                        text += part.content
-                    elif isinstance(part, _messages.ToolCallPart):
-                        tool_calls.append(part)
-                    elif isinstance(part, _messages.FilePart):
-                        files.append(part.content)
-                    elif isinstance(part, _messages.BuiltinToolCallPart):
-                        # Text parts before a built-in tool call are essentially thoughts,
-                        # not part of the final result output, so we reset the accumulated text
-                        text = ''
-                        yield _messages.BuiltinToolCallEvent(part)  # pyright: ignore[reportDeprecated]
-                    elif isinstance(part, _messages.BuiltinToolReturnPart):
-                        yield _messages.BuiltinToolResultEvent(part)  # pyright: ignore[reportDeprecated]
-                    elif isinstance(part, _messages.ThinkingPart):
-                        pass
-                    else:
-                        assert_never(part)
-
-                try:
-                    # At the moment, we prioritize at least executing tool calls if they are present.
-                    # In the future, we'd consider making this configurable at the agent or run level.
-                    # This accounts for cases like anthropic returns that might contain a text response
-                    # and a tool call response, where the text response just indicates the tool call will happen.
-                    alternatives: list[str] = []
-                    if tool_calls:
-                        async for event in self._handle_tool_calls(ctx, tool_calls):
-                            yield event
-                        return
-                    elif output_schema.toolset:
-                        alternatives.append('include your response in a tool call')
-                    else:
-                        alternatives.append('call a tool')
-
-                    if output_schema.allows_image:
-                        if image := next((file for file in files if isinstance(file, _messages.BinaryImage)), None):
-                            self._next_node = await self._handle_image_response(ctx, image)
+                send_stream, receive_stream = anyio.create_memory_object_stream[_messages.HandleResponseEvent]()
+
+                async def _run():  # noqa: C901
+                    async with send_stream:
+                        assert tool_manager.ctx is not None, 'ToolManager.ctx needs to be set'
+                        tool_manager.ctx.event_stream = send_stream
+
+                        output_schema = ctx.deps.output_schema
+                        if not self.model_response.parts:
+                            # we got an empty response.
+                            # this sometimes happens with anthropic (and perhaps other models)
+                            # when the model has already returned text along side tool calls
+                            if text_processor := output_schema.text_processor:  # pragma: no branch
+                                # in this scenario, if text responses are allowed, we return text from the most recent model
+                                # response, if any
+                                for message in reversed(ctx.state.message_history):
+                                    if isinstance(message, _messages.ModelResponse):
+                                        text = ''
+                                        for part in message.parts:
+                                            if isinstance(part, _messages.TextPart):
+                                                text += part.content
+                                            elif isinstance(part, _messages.BuiltinToolCallPart):
+                                                # Text parts before a built-in tool call are essentially thoughts,
+                                                # not part of the final result output, so we reset the accumulated text
+                                                text = ''  # pragma: no cover
+                                        if text:
+                                            try:
+                                                self._next_node = await self._handle_text_response(
+                                                    ctx, text, text_processor
+                                                )
+                                                return
+                                            except ToolRetryError:
+                                                # If the text from the preview response was invalid, ignore it.
+                                                pass
+
+                            # Go back to the model request node with an empty request, which means we'll essentially
+                            # resubmit the most recent request that resulted in an empty response,
+                            # as the empty response and request will not create any items in the API payload,
+                            # in the hope the model will return a non-empty response this time.
+                            ctx.state.increment_retries(
+                                ctx.deps.max_result_retries, model_settings=ctx.deps.model_settings
+                            )
+                            run_context = build_run_context(ctx)
+                            instructions = await ctx.deps.get_instructions(run_context)
+                            self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
+                                _messages.ModelRequest(parts=[], instructions=instructions)
+                            )
                             return
-                        alternatives.append('return an image')
 
-                    if text_processor := output_schema.text_processor:
-                        if text:
-                            # TODO (DouweM): This could call an output function that yields custom events, but we're not in an event stream here?
-                            self._next_node = await self._handle_text_response(ctx, text, text_processor)
-                            return
-                        alternatives.insert(0, 'return text')
+                        text = ''
+                        tool_calls: list[_messages.ToolCallPart] = []
+                        files: list[_messages.BinaryContent] = []
+
+                        for part in self.model_response.parts:
+                            if isinstance(part, _messages.TextPart):
+                                text += part.content
+                            elif isinstance(part, _messages.ToolCallPart):
+                                tool_calls.append(part)
+                            elif isinstance(part, _messages.FilePart):
+                                files.append(part.content)
+                            elif isinstance(part, _messages.BuiltinToolCallPart):
+                                # Text parts before a built-in tool call are essentially thoughts,
+                                # not part of the final result output, so we reset the accumulated text
+                                text = ''
+                                await send_stream.send(_messages.BuiltinToolCallEvent(part))  # pyright: ignore[reportDeprecated]
+                            elif isinstance(part, _messages.BuiltinToolReturnPart):
+                                await send_stream.send(_messages.BuiltinToolResultEvent(part))  # pyright: ignore[reportDeprecated]
+                            elif isinstance(part, _messages.ThinkingPart):
+                                pass
+                            else:
+                                assert_never(part)
+
+                        try:
+                            # At the moment, we prioritize at least executing tool calls if they are present.
+                            # In the future, we'd consider making this configurable at the agent or run level.
+                            # This accounts for cases like anthropic returns that might contain a text response
+                            # and a tool call response, where the text response just indicates the tool call will happen.
+                            alternatives: list[str] = []
+                            if tool_calls:
+                                async for event in self._handle_tool_calls(ctx, tool_calls):
+                                    await send_stream.send(event)
+                                return
+                            elif output_schema.toolset:
+                                alternatives.append('include your response in a tool call')
+                            else:
+                                alternatives.append('call a tool')
 
-                    # handle responses with only parts that don't constitute output.
-                    # This can happen with models that support thinking mode when they don't provide
-                    # actionable output alongside their thinking content. so we tell the model to try again.
-                    m = _messages.RetryPromptPart(
-                        content=f'Please {" or ".join(alternatives)}.',
-                    )
-                    raise ToolRetryError(m)
-                except ToolRetryError as e:
-                    ctx.state.increment_retries(
-                        ctx.deps.max_result_retries, error=e, model_settings=ctx.deps.model_settings
-                    )
-                    run_context = build_run_context(ctx)
-                    instructions = await ctx.deps.get_instructions(run_context)
-                    self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
-                        _messages.ModelRequest(parts=[e.tool_retry], instructions=instructions)
-                    )
+                            if output_schema.allows_image:
+                                if image := next(
+                                    (file for file in files if isinstance(file, _messages.BinaryImage)), None
+                                ):
+                                    self._next_node = await self._handle_image_response(ctx, image)
+                                    return
+                                alternatives.append('return an image')
+
+                            if text_processor := output_schema.text_processor:
+                                if text:
+                                    # TODO (DouweM): This could call an output function that yields custom events, but we're not in an event stream here?
+                                    self._next_node = await self._handle_text_response(ctx, text, text_processor)
+                                    return
+                                alternatives.insert(0, 'return text')
+
+                            # handle responses with only parts that don't constitute output.
+                            # This can happen with models that support thinking mode when they don't provide
+                            # actionable output alongside their thinking content. so we tell the model to try again.
+                            m = _messages.RetryPromptPart(
+                                content=f'Please {" or ".join(alternatives)}.',
+                            )
+                            raise ToolRetryError(m)
+                        except ToolRetryError as e:
+                            ctx.state.increment_retries(
+                                ctx.deps.max_result_retries, error=e, model_settings=ctx.deps.model_settings
+                            )
+                            run_context = build_run_context(ctx)
+                            instructions = await ctx.deps.get_instructions(run_context)
+                            self._next_node = ModelRequestNode[DepsT, NodeRunEndT](
+                                _messages.ModelRequest(parts=[e.tool_retry], instructions=instructions)
+                            )
+
+                task = asyncio.create_task(_run())
+
+                async with receive_stream:
+                    async for message in receive_stream:
+                        yield message
+
+                await task
 
             self._events_iterator = _run_stream()
 
@@ -685,9 +710,6 @@ async def _handle_tool_calls(
     ) -> AsyncIterator[_messages.HandleResponseEvent]:
         run_context = build_run_context(ctx)
 
-        # This will raise errors for any tool name conflicts
-        ctx.deps.tool_manager = await ctx.deps.tool_manager.for_run_step(run_context)
-
         output_parts: list[_messages.ModelRequestPart] = []
         output_final_result: deque[result.FinalResult[NodeRunEndT]] = deque(maxlen=1)
 
@@ -937,7 +959,7 @@ async def process_tool_calls(  # noqa: C901
         output_final_result.append(final_result)
 
 
-async def _call_tools(  # noqa: C901
+async def _call_tools(
     tool_manager: ToolManager[DepsT],
     tool_calls: list[_messages.ToolCallPart],
     tool_call_results: dict[str, DeferredToolResult],
@@ -995,45 +1017,30 @@ async def handle_call_or_result(
 
                 return _messages.FunctionToolResultEvent(tool_part, content=tool_user_content)
 
-        send_stream, receive_stream = anyio.create_memory_object_stream[_messages.HandleResponseEvent]()
-
-        async def _run_tools():
-            async with send_stream:
-                assert tool_manager.ctx is not None, 'ToolManager.ctx needs to be set'
-                tool_manager.ctx.event_stream = send_stream
-
-                if tool_manager.should_call_sequentially(tool_calls):
-                    for index, call in enumerate(tool_calls):
-                        if event := await handle_call_or_result(
-                            _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id)),
-                            index,
-                        ):
-                            await send_stream.send(event)
-
-                else:
-                    tasks = [
-                        asyncio.create_task(
-                            _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id)),
-                            name=call.tool_name,
-                        )
-                        for call in tool_calls
-                    ]
-
-                    pending = tasks
-                    while pending:
-                        done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
-                        for task in done:
-                            index = tasks.index(task)
-                            if event := await handle_call_or_result(coro_or_task=task, index=index):
-                                await send_stream.send(event)
-
-        task = asyncio.create_task(_run_tools())
+        if tool_manager.should_call_sequentially(tool_calls):
+            for index, call in enumerate(tool_calls):
+                if event := await handle_call_or_result(
+                    _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id)),
+                    index,
+                ):
+                    yield event
 
-        async with receive_stream:
-            async for message in receive_stream:
-                yield message
+        else:
+            tasks = [
+                asyncio.create_task(
+                    _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id)),
+                    name=call.tool_name,
+                )
+                for call in tool_calls
+            ]
 
-        await task
+            pending = tasks
+            while pending:
+                done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
+                for task in done:
+                    index = tasks.index(task)
+                    if event := await handle_call_or_result(coro_or_task=task, index=index):
+                        yield event
 
     # We append the results at the end, rather than as they are received, to retain a consistent ordering
     # This is mostly just to simplify testing