Fix race condition with threading.Lock (exclude lock from comparison)

certainly-param · certainly-param · commit 8d2b4c9a0388 · 2025-10-10T00:00:14.000-04:00
- Use threading.Lock instead of asyncio.Lock for Python GIL safety - Exclude lock from dataclass comparison and repr to fix serialization issues - Keep incr() synchronous for backward compatibility - Lock prevents race condition in concurrent tool calls - Compatible with Python 3.10, 3.11, 3.12, and 3.13 Addresses issue #3120 where usage.tool_calls was undercounting when running tools in parallel due to non-atomic increment operations.
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -408,7 +408,7 @@ async def stream(
             message_history, model_settings, model_request_parameters, run_context
         ) as streamed_response:
             self._did_stream = True
-            await ctx.state.usage.incr(_usage.RunUsage(requests=1))
+            ctx.state.usage.incr(_usage.RunUsage(requests=1))
             agent_stream = result.AgentStream[DepsT, T](
                 _raw_stream_response=streamed_response,
                 _output_schema=ctx.deps.output_schema,
@@ -437,9 +437,9 @@ async def _make_request(
 
         model_settings, model_request_parameters, message_history, _ = await self._prepare_request(ctx)
         model_response = await ctx.deps.model.request(message_history, model_settings, model_request_parameters)
-        await ctx.state.usage.incr(_usage.RunUsage(requests=1))
+        ctx.state.usage.incr(_usage.RunUsage(requests=1))
 
-        return await self._finish_handling(ctx, model_response)
+        return self._finish_handling(ctx, model_response)
 
     async def _prepare_request(
         self, ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]]
@@ -475,19 +475,19 @@ async def _prepare_request(
             usage = deepcopy(usage)
 
             counted_usage = await ctx.deps.model.count_tokens(message_history, model_settings, model_request_parameters)
-            await usage.incr(counted_usage)
+            usage.incr(counted_usage)
 
         ctx.deps.usage_limits.check_before_request(usage)
 
         return model_settings, model_request_parameters, message_history, run_context
 
-    async def _finish_handling(
+    def _finish_handling(
         self,
         ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, NodeRunEndT]],
         response: _messages.ModelResponse,
     ) -> CallToolsNode[DepsT, NodeRunEndT]:
         # Update usage
-        await ctx.state.usage.incr(response.usage)
+        ctx.state.usage.incr(response.usage)
         if ctx.deps.usage_limits:  # pragma: no branch
             ctx.deps.usage_limits.check_tokens(ctx.state.usage)
 
diff --git a/pydantic_ai_slim/pydantic_ai/_tool_manager.py b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
@@ -234,7 +234,7 @@ async def _call_function_tool(
         ) as span:
             try:
                 tool_result = await self._call_tool(call, allow_partial, wrap_validation_errors)
-                await usage.incr(RunUsage(tool_calls=1))
+                usage.incr(RunUsage(tool_calls=1))
 
             except ToolRetryError as e:
                 part = e.tool_retry
diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -1,6 +1,6 @@
 from __future__ import annotations as _annotations
 
-import asyncio
+import threading
 import dataclasses
 from copy import copy
 from dataclasses import dataclass, fields
@@ -190,16 +190,16 @@ class RunUsage(UsageBase):
     details: dict[str, int] = dataclasses.field(default_factory=dict)
     """Any extra details returned by the model."""
 
-    _lock: asyncio.Lock = dataclasses.field(default_factory=asyncio.Lock)
+    _lock: threading.Lock = dataclasses.field(default_factory=threading.Lock, compare=False, repr=False)
     """Lock to prevent race conditions when incrementing usage from concurrent tool calls."""
 
-    async def incr(self, incr_usage: RunUsage | RequestUsage) -> None:
+    def incr(self, incr_usage: RunUsage | RequestUsage) -> None:
         """Increment the usage in place.
 
         Args:
             incr_usage: The usage to increment by.
         """
-        async with self._lock:
+        with self._lock:
             if isinstance(incr_usage, RunUsage):
                 self.requests += incr_usage.requests
                 self.tool_calls += incr_usage.tool_calls
@@ -214,6 +214,9 @@ def __add__(self, other: RunUsage | RequestUsage) -> RunUsage:
         # Note: We can't use await here since __add__ must be synchronous
         # But __add__ creates a new object, so there's no race condition
         # The race condition only happens when modifying the same object concurrently
+        # Create a new lock for the new instance
+        new_usage._lock = threading.Lock()
+        
         if isinstance(other, RunUsage):
             new_usage.requests += other.requests
             new_usage.tool_calls += other.tool_calls