Address code review feedback

certainly-param · certainly-param · commit 6f8cfc7c91cb · 2025-10-13T14:28:41.000-04:00
- Remove unnecessary comments about request counting
- Move usage_lock to ToolManager as cached_property for better encapsulation
- Simplify RunUsage.incr() to avoid code duplication
- Clean up _agent_graph.py by removing context var management

This makes the lock management more localized to ToolManager where parallel
execution actually happens, improving code organization and maintainability.
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -408,7 +408,6 @@ async def stream(
             message_history, model_settings, model_request_parameters, run_context
         ) as streamed_response:
             self._did_stream = True
-            # Request count is incremented in _finish_handling via response.usage
             agent_stream = result.AgentStream[DepsT, T](
                 _raw_stream_response=streamed_response,
                 _output_schema=ctx.deps.output_schema,
@@ -419,8 +418,6 @@ async def stream(
                 _tool_manager=ctx.deps.tool_manager,
             )
             yield agent_stream
-            # In case the user didn't manually consume the full stream, ensure it is fully consumed here,
-            # otherwise usage won't be properly counted:
             async for _ in agent_stream:
                 pass
 
@@ -437,7 +434,6 @@ async def _make_request(
 
         model_settings, model_request_parameters, message_history, _ = await self._prepare_request(ctx)
         model_response = await ctx.deps.model.request(message_history, model_settings, model_request_parameters)
-        # Request count is incremented in _finish_handling via response.usage
 
         return await self._finish_handling(ctx, model_response)
 
@@ -895,8 +891,6 @@ async def _call_tools(
     tool_parts_by_index: dict[int, _messages.ModelRequestPart] = {}
     user_parts_by_index: dict[int, _messages.UserPromptPart] = {}
     deferred_calls_by_index: dict[int, Literal['external', 'unapproved']] = {}
-    # Lock to prevent race conditions when incrementing usage.tool_calls from concurrent tool executions
-    usage_lock = asyncio.Lock()
 
     if usage_limits.tool_calls_limit is not None:
         projected_usage = deepcopy(usage)
@@ -906,85 +900,76 @@ async def _call_tools(
     for call in tool_calls:
         yield _messages.FunctionToolCallEvent(call)
 
-    # Import and set the usage lock context variable for parallel tool execution
-    from ._tool_manager import _usage_increment_lock_ctx_var  # pyright: ignore[reportPrivateUsage]
-
-    token = _usage_increment_lock_ctx_var.set(usage_lock)
-
-    try:
-        with tracer.start_as_current_span(
-            'running tools',
-            attributes={
-                'tools': [call.tool_name for call in tool_calls],
-                'logfire.msg': f'running {len(tool_calls)} tool{"" if len(tool_calls) == 1 else "s"}',
-            },
-        ):
+    with tracer.start_as_current_span(
+        'running tools',
+        attributes={
+            'tools': [call.tool_name for call in tool_calls],
+            'logfire.msg': f'running {len(tool_calls)} tool{"" if len(tool_calls) == 1 else "s"}',
+        },
+    ):
 
-            async def handle_call_or_result(
-                coro_or_task: Awaitable[
-                    tuple[
-                        _messages.ToolReturnPart | _messages.RetryPromptPart,
-                        str | Sequence[_messages.UserContent] | None,
-                    ]
+        async def handle_call_or_result(
+            coro_or_task: Awaitable[
+                tuple[
+                    _messages.ToolReturnPart | _messages.RetryPromptPart,
+                    str | Sequence[_messages.UserContent] | None,
                 ]
-                | Task[
-                    tuple[
-                        _messages.ToolReturnPart | _messages.RetryPromptPart,
-                        str | Sequence[_messages.UserContent] | None,
-                    ]
-                ],
-                index: int,
-            ) -> _messages.HandleResponseEvent | None:
-                try:
-                    tool_part, tool_user_content = (
-                        (await coro_or_task) if inspect.isawaitable(coro_or_task) else coro_or_task.result()
-                    )
-                except exceptions.CallDeferred:
-                    deferred_calls_by_index[index] = 'external'
-                except exceptions.ApprovalRequired:
-                    deferred_calls_by_index[index] = 'unapproved'
-                else:
-                    tool_parts_by_index[index] = tool_part
-                    if tool_user_content:
-                        user_parts_by_index[index] = _messages.UserPromptPart(content=tool_user_content)
-
-                    return _messages.FunctionToolResultEvent(tool_part, content=tool_user_content)
-
-            if tool_manager.should_call_sequentially(tool_calls):
-                for index, call in enumerate(tool_calls):
-                    if event := await handle_call_or_result(
-                        _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id)),
-                        index,
-                    ):
-                        yield event
-
-            else:
-                tasks = [
-                    asyncio.create_task(
-                        _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id)),
-                        name=call.tool_name,
-                    )
-                    for call in tool_calls
+            ]
+            | Task[
+                tuple[
+                    _messages.ToolReturnPart | _messages.RetryPromptPart,
+                    str | Sequence[_messages.UserContent] | None,
                 ]
+            ],
+            index: int,
+        ) -> _messages.HandleResponseEvent | None:
+            try:
+                tool_part, tool_user_content = (
+                    (await coro_or_task) if inspect.isawaitable(coro_or_task) else coro_or_task.result()
+                )
+            except exceptions.CallDeferred:
+                deferred_calls_by_index[index] = 'external'
+            except exceptions.ApprovalRequired:
+                deferred_calls_by_index[index] = 'unapproved'
+            else:
+                tool_parts_by_index[index] = tool_part
+                if tool_user_content:
+                    user_parts_by_index[index] = _messages.UserPromptPart(content=tool_user_content)
 
-                pending = tasks
-                while pending:
-                    done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
-                    for task in done:
-                        index = tasks.index(task)
-                        if event := await handle_call_or_result(coro_or_task=task, index=index):
-                            yield event
+                return _messages.FunctionToolResultEvent(tool_part, content=tool_user_content)
 
-        # We append the results at the end, rather than as they are received, to retain a consistent ordering
-        # This is mostly just to simplify testing
-        output_parts.extend([tool_parts_by_index[k] for k in sorted(tool_parts_by_index)])
-        output_parts.extend([user_parts_by_index[k] for k in sorted(user_parts_by_index)])
+        if tool_manager.should_call_sequentially(tool_calls):
+            for index, call in enumerate(tool_calls):
+                if event := await handle_call_or_result(
+                    _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id)),
+                    index,
+                ):
+                    yield event
 
-        for k in sorted(deferred_calls_by_index):
-            output_deferred_calls[deferred_calls_by_index[k]].append(tool_calls[k])
-    finally:
-        # Reset the context variable
-        _usage_increment_lock_ctx_var.reset(token)
+        else:
+            tasks = [
+                asyncio.create_task(
+                    _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id)),
+                    name=call.tool_name,
+                )
+                for call in tool_calls
+            ]
+
+            pending = tasks
+            while pending:
+                done, pending = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
+                for task in done:
+                    index = tasks.index(task)
+                    if event := await handle_call_or_result(coro_or_task=task, index=index):
+                        yield event
+
+    # We append the results at the end, rather than as they are received, to retain a consistent ordering
+    # This is mostly just to simplify testing
+    output_parts.extend([tool_parts_by_index[k] for k in sorted(tool_parts_by_index)])
+    output_parts.extend([user_parts_by_index[k] for k in sorted(user_parts_by_index)])
+
+    for k in sorted(deferred_calls_by_index):
+        output_deferred_calls[deferred_calls_by_index[k]].append(tool_calls[k])
 
 
 async def _call_tool(
diff --git a/pydantic_ai_slim/pydantic_ai/_tool_manager.py b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
@@ -6,6 +6,7 @@
 from contextlib import contextmanager
 from contextvars import ContextVar
 from dataclasses import dataclass, field, replace
+from functools import cached_property
 from typing import Any, Generic
 
 from opentelemetry.trace import Tracer
@@ -22,7 +23,6 @@
 from .usage import RunUsage
 
 _sequential_tool_calls_ctx_var: ContextVar[bool] = ContextVar('sequential_tool_calls', default=False)
-_usage_increment_lock_ctx_var: ContextVar[asyncio.Lock | None] = ContextVar('usage_increment_lock', default=None)
 
 
 @dataclass
@@ -74,6 +74,11 @@ def tool_defs(self) -> list[ToolDefinition]:
 
         return [tool.tool_def for tool in self.tools.values()]
 
+    @cached_property
+    def _usage_lock(self) -> asyncio.Lock:
+        """Lock to prevent race conditions when incrementing usage.tool_calls from concurrent tool executions."""
+        return asyncio.Lock()
+
     def should_call_sequentially(self, calls: list[ToolCallPart]) -> bool:
         """Whether to require sequential tool calls for a list of tool calls."""
         return _sequential_tool_calls_ctx_var.get() or any(
@@ -236,12 +241,7 @@ async def _call_function_tool(
         ) as span:
             try:
                 tool_result = await self._call_tool(call, allow_partial, wrap_validation_errors)
-                # Use lock if available (for parallel tool execution) to prevent race conditions
-                lock = _usage_increment_lock_ctx_var.get()
-                if lock is not None:
-                    async with lock:
-                        usage.incr(RunUsage(tool_calls=1))
-                else:
+                async with self._usage_lock:
                     usage.incr(RunUsage(tool_calls=1))
 
             except ToolRetryError as e:
diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -195,12 +195,9 @@ def incr(self, incr_usage: RunUsage | RequestUsage) -> None:
         Args:
             incr_usage: The usage to increment by.
         """
+        self.requests += incr_usage.requests
         if isinstance(incr_usage, RunUsage):
-            self.requests += incr_usage.requests
             self.tool_calls += incr_usage.tool_calls
-        else:
-            # RequestUsage: requests is a property that returns 1
-            self.requests += incr_usage.requests
         return _incr_usage_tokens(self, incr_usage)
 
     def __add__(self, other: RunUsage | RequestUsage) -> RunUsage: