Address Douwe's review feedback on deferred tool metadata

cjohnhanson · cjohnhanson · commit 42cf5b83a802 · 2025-11-07T15:53:17.000-06:00
Per Douwe's comments:
1. Store None instead of {} when no metadata provided
2. Don't add tool_call_id to metadata dict when None
3. Update Temporal wrap/unwrap methods to handle metadata

- Updated test assertions to reflect None metadata behavior
- Updated doc example snapshots to show metadata={}
- Fixed codespell issue with table formatting
diff --git a/docs/deferred-tools.md b/docs/deferred-tools.md
@@ -77,6 +77,7 @@ DeferredToolRequests(
             tool_call_id='delete_file',
         ),
     ],
+    metadata={},
 )
 """
 
@@ -247,6 +248,7 @@ async def main():
             )
         ],
         approvals=[],
+        metadata={},
     )
     """
 
@@ -385,17 +387,24 @@ requests = result.output
 # Handle approvals with metadata
 for call in requests.approvals:
     metadata = requests.metadata.get(call.tool_call_id, {})
-    print(f"Approval needed for {call.tool_name}")
-    print(f"  Cost: ${metadata.get('estimated_cost_usd')}")
-    print(f"  Time: {metadata.get('estimated_time_minutes')} minutes")
-    print(f"  Reason: {metadata.get('reason')}")
+    print(f'Approval needed for {call.tool_name}')
+    #> Approval needed for expensive_compute
+    print(f'  Cost: ${metadata.get("estimated_cost_usd")}')
+    #>   Cost: $25.5
+    print(f'  Time: {metadata.get("estimated_time_minutes")} minutes')
+    #>   Time: 15 minutes
+    print(f'  Reason: {metadata.get("reason")}')
+    #>   Reason: High compute cost
 
 # Handle external calls with metadata
 for call in requests.calls:
     metadata = requests.metadata.get(call.tool_call_id, {})
-    print(f"External call to {call.tool_name}")
-    print(f"  Task ID: {metadata.get('task_id')}")
-    print(f"  Priority: {metadata.get('priority')}")
+    print(f'External call to {call.tool_name}')
+    #> External call to external_api_call
+    print(f'  Task ID: {metadata.get("task_id")}')
+    #>   Task ID: api_call_external_api_call
+    print(f'  Priority: {metadata.get("priority")}')
+    #>   Priority: high
 
 # Build results with approvals and external results
 results = DeferredToolResults()
@@ -416,9 +425,7 @@ for call in requests.calls:
 
 result = agent.run_sync(message_history=messages, deferred_tool_results=results)
 print(result.output)
-"""
-I completed task-123 and retrieved data from the /data endpoint.
-"""
+#> I completed task-123 and retrieved data from the /data endpoint.
 ```
 
 _(This example is complete, it can be run "as is")_
diff --git a/docs/toolsets.md b/docs/toolsets.md
@@ -362,6 +362,7 @@ DeferredToolRequests(
             tool_call_id='pyd_ai_tool_call_id__temperature_fahrenheit',
         ),
     ],
+    metadata={},
 )
 """
 
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -952,7 +952,7 @@ async def _call_tools(
     tool_parts_by_index: dict[int, _messages.ModelRequestPart] = {}
     user_parts_by_index: dict[int, _messages.UserPromptPart] = {}
     deferred_calls_by_index: dict[int, Literal['external', 'unapproved']] = {}
-    deferred_metadata_by_index: dict[int, dict[str, Any]] = {}
+    deferred_metadata_by_index: dict[int, dict[str, Any] | None] = {}
 
     if usage_limits.tool_calls_limit is not None:
         projected_usage = deepcopy(usage)
@@ -1038,7 +1038,7 @@ async def handle_call_or_result(
 def _populate_deferred_calls(
     tool_calls: list[_messages.ToolCallPart],
     deferred_calls_by_index: dict[int, Literal['external', 'unapproved']],
-    deferred_metadata_by_index: dict[int, dict[str, Any]],
+    deferred_metadata_by_index: dict[int, dict[str, Any] | None],
     output_deferred_calls: dict[Literal['external', 'unapproved'], list[_messages.ToolCallPart]],
     output_deferred_metadata: dict[str, dict[str, Any]],
 ) -> None:
@@ -1047,7 +1047,9 @@ def _populate_deferred_calls(
         call = tool_calls[k]
         output_deferred_calls[deferred_calls_by_index[k]].append(call)
         if k in deferred_metadata_by_index:
-            output_deferred_metadata[call.tool_call_id] = deferred_metadata_by_index[k]
+            metadata = deferred_metadata_by_index[k]
+            if metadata is not None:
+                output_deferred_metadata[call.tool_call_id] = metadata
 
 
 async def _call_tool(
diff --git a/pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_toolset.py b/pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_toolset.py
@@ -27,11 +27,13 @@ class CallToolParams:
 
 @dataclass
 class _ApprovalRequired:
+    metadata: dict[str, Any] | None = None
     kind: Literal['approval_required'] = 'approval_required'
 
 
 @dataclass
 class _CallDeferred:
+    metadata: dict[str, Any] | None = None
     kind: Literal['call_deferred'] = 'call_deferred'
 
 
@@ -75,20 +77,20 @@ async def _wrap_call_tool_result(self, coro: Awaitable[Any]) -> CallToolResult:
         try:
             result = await coro
             return _ToolReturn(result=result)
-        except ApprovalRequired:
-            return _ApprovalRequired()
-        except CallDeferred:
-            return _CallDeferred()
+        except ApprovalRequired as e:
+            return _ApprovalRequired(metadata=e.metadata)
+        except CallDeferred as e:
+            return _CallDeferred(metadata=e.metadata)
         except ModelRetry as e:
             return _ModelRetry(message=e.message)
 
     def _unwrap_call_tool_result(self, result: CallToolResult) -> Any:
         if isinstance(result, _ToolReturn):
             return result.result
         elif isinstance(result, _ApprovalRequired):
-            raise ApprovalRequired()
+            raise ApprovalRequired(metadata=result.metadata)
         elif isinstance(result, _CallDeferred):
-            raise CallDeferred()
+            raise CallDeferred(metadata=result.metadata)
         elif isinstance(result, _ModelRetry):
             raise ModelRetry(result.message)
         else:
diff --git a/pydantic_ai_slim/pydantic_ai/exceptions.py b/pydantic_ai_slim/pydantic_ai/exceptions.py
@@ -74,7 +74,7 @@ class CallDeferred(Exception):
     """
 
     def __init__(self, metadata: dict[str, Any] | None = None):
-        self.metadata = metadata or {}
+        self.metadata = metadata
         super().__init__()
 
 
@@ -89,7 +89,7 @@ class ApprovalRequired(Exception):
     """
 
     def __init__(self, metadata: dict[str, Any] | None = None):
-        self.metadata = metadata or {}
+        self.metadata = metadata
         super().__init__()
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -317,4 +317,4 @@ skip = '.git*,*.svg,*.lock,*.css,*.yaml'
 check-hidden = true
 # Ignore "formatting" like **L**anguage
 ignore-regex = '\*\*[A-Z]\*\*[a-z]+\b'
-ignore-words-list = 'asend,aci'
+ignore-words-list = 'asend,aci,Assertio'
diff --git a/tests/evals/test_reporting.py b/tests/evals/test_reporting.py
@@ -988,9 +988,9 @@ async def test_evaluation_renderer_with_experiment_metadata(sample_report_case:
 │ temperature: 0.7                  │
 │ prompt_version: v2                │
 ╰───────────────────────────────────╯
-┏━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┓
-┃          ┃          ┃           ┃          ┃           ┃ Assertions ┃          ┃
-┃ Case ID  ┃ Inputs   ┃ Scores    ┃ Labels   ┃ Metrics   ┃            ┃ Duration ┃
+┏━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┓
+┃          ┃          ┃           ┃          ┃           ┃ Assertio ┃          ┃
+┃ Case ID  ┃ Inputs   ┃ Scores    ┃ Labels   ┃ Metrics   ┃ ns       ┃ Duration ┃
 ┡━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━┩
 │ test_ca… │ {'query' │ score1:   │ label1:  │ accuracy: │ ✔        │  100.0ms │
 │          │ : 'What  │ 2.50      │ hello    │ 0.950     │          │          │
diff --git a/tests/test_agent.py b/tests/test_agent.py
@@ -4860,10 +4860,10 @@ def call_second():
     assert isinstance(result.output, DeferredToolRequests)
     assert len(result.output.approvals) == 1
     assert result.output.approvals[0].tool_name == 'requires_approval'
-    # Check metadata exists for this tool_call_id
+    # When no metadata is provided, the tool_call_id should not be in metadata dict
     tool_call_id = result.output.approvals[0].tool_call_id
-    assert tool_call_id in result.output.metadata
-    assert result.output.metadata[tool_call_id] == {}
+    assert tool_call_id not in result.output.metadata
+    assert result.output.metadata == {}
     assert integer_holder == 2
 
 
diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -523,6 +523,10 @@ async def call_tool(
     'Tell me about the pydantic/pydantic-ai repo.': 'The pydantic/pydantic-ai repo is a Python agent framework for building Generative AI applications.',
     'What do I have on my calendar today?': "You're going to spend all day playing with Pydantic AI.",
     'Write a long story about a cat': 'Once upon a time, there was a curious cat named Whiskers who loved to explore the world around him...',
+    'Run expensive task-123 and call the /data endpoint': [
+        ToolCallPart(tool_name='expensive_compute', args={'task_id': 'task-123'}, tool_call_id='expensive_compute'),
+        ToolCallPart(tool_name='external_api_call', args={'endpoint': '/data'}, tool_call_id='external_api_call'),
+    ],
 }
 
 tool_responses: dict[tuple[str, str], str] = {
@@ -871,10 +875,22 @@ async def model_logic(  # noqa: C901
         return ModelResponse(
             parts=[TextPart('The answer to the ultimate question of life, the universe, and everything is 42.')]
         )
-    else:
+    elif isinstance(m, ToolReturnPart) and m.tool_name in ('expensive_compute', 'external_api_call'):
+        # After deferred tools complete, check if we have all results to provide final response
+        tool_names = {part.tool_name for msg in messages for part in msg.parts if isinstance(part, ToolReturnPart)}
+        if 'expensive_compute' in tool_names and 'external_api_call' in tool_names:
+            return ModelResponse(parts=[TextPart('I completed task-123 and retrieved data from the /data endpoint.')])
+        # If we don't have both results yet, just acknowledge the tool result
+        return ModelResponse(parts=[TextPart(f'Received result from {m.tool_name}')])
+
+    if isinstance(m, ToolReturnPart):
         sys.stdout.write(str(debug.format(messages, info)))
         raise RuntimeError(f'Unexpected message: {m}')
 
+    # Fallback for any other message type
+    sys.stdout.write(str(debug.format(messages, info)))
+    raise RuntimeError(f'Unexpected message type: {type(m).__name__}')
+
 
 async def stream_model_logic(  # noqa C901
     messages: list[ModelMessage], info: AgentInfo
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
@@ -1152,10 +1152,10 @@ def regular_tool(x: int) -> int:
         assert isinstance(response, DeferredToolRequests)
         assert len(response.calls) == 1
         assert response.calls[0].tool_name == 'deferred_tool'
-        # Check metadata exists for this tool_call_id
+        # When no metadata is provided, the tool_call_id should not be in metadata dict
         tool_call_id = response.calls[0].tool_call_id
-        assert tool_call_id in response.metadata
-        assert response.metadata[tool_call_id] == {}
+        assert tool_call_id not in response.metadata
+        assert response.metadata == {}
         messages = result.all_messages()
 
     # Verify no tools were called
@@ -1639,18 +1639,10 @@ def my_tool(x: int) -> int:
     async with agent.run_stream('Hello') as result:
         assert not result.is_complete
         assert [c async for c in result.stream_output(debounce_by=None)] == snapshot(
-            [
-                DeferredToolRequests(
-                    calls=[ToolCallPart(tool_name='my_tool', args={'x': 0}, tool_call_id=IsStr())],
-                    metadata={'pyd_ai_tool_call_id__my_tool': {}},
-                )
-            ]
+            [DeferredToolRequests(calls=[ToolCallPart(tool_name='my_tool', args={'x': 0}, tool_call_id=IsStr())])]
         )
         assert await result.get_output() == snapshot(
-            DeferredToolRequests(
-                calls=[ToolCallPart(tool_name='my_tool', args={'x': 0}, tool_call_id=IsStr())],
-                metadata={'pyd_ai_tool_call_id__my_tool': {}},
-            )
+            DeferredToolRequests(calls=[ToolCallPart(tool_name='my_tool', args={'x': 0}, tool_call_id=IsStr())])
         )
         responses = [c async for c, _is_last in result.stream_responses(debounce_by=None)]
         assert responses == snapshot(
@@ -1665,10 +1657,7 @@ def my_tool(x: int) -> int:
             ]
         )
         assert await result.validate_response_output(responses[0]) == snapshot(
-            DeferredToolRequests(
-                calls=[ToolCallPart(tool_name='my_tool', args={'x': 0}, tool_call_id=IsStr())],
-                metadata={'pyd_ai_tool_call_id__my_tool': {}},
-            )
+            DeferredToolRequests(calls=[ToolCallPart(tool_name='my_tool', args={'x': 0}, tool_call_id=IsStr())])
         )
         assert result.usage() == snapshot(RunUsage(requests=1, input_tokens=51, output_tokens=0))
         assert result.timestamp() == IsNow(tz=timezone.utc)
@@ -1695,10 +1684,7 @@ def my_tool(ctx: RunContext[None], x: int) -> int:
         messages = result.all_messages()
         output = await result.get_output()
         assert output == snapshot(
-            DeferredToolRequests(
-                approvals=[ToolCallPart(tool_name='my_tool', args='{"x": 1}', tool_call_id=IsStr())],
-                metadata={'my_tool': {}},
-            )
+            DeferredToolRequests(approvals=[ToolCallPart(tool_name='my_tool', args='{"x": 1}', tool_call_id=IsStr())])
         )
         assert result.is_complete
 
@@ -1873,7 +1859,6 @@ def my_other_tool(x: int) -> int:
         DeferredToolRequests(
             calls=[ToolCallPart(tool_name='my_tool', args={'x': 0}, tool_call_id=IsStr())],
             approvals=[ToolCallPart(tool_name='my_other_tool', args={'x': 0}, tool_call_id=IsStr())],
-            metadata={'pyd_ai_tool_call_id__my_tool': {}, 'pyd_ai_tool_call_id__my_other_tool': {}},
         )
     )
 
diff --git a/tests/test_tools.py b/tests/test_tools.py
@@ -1318,10 +1318,7 @@ def my_tool(x: int) -> int:
 
     result = agent.run_sync('Hello')
     assert result.output == snapshot(
-        DeferredToolRequests(
-            calls=[ToolCallPart(tool_name='my_tool', args={'x': 0}, tool_call_id=IsStr())],
-            metadata={'pyd_ai_tool_call_id__my_tool': {}},
-        )
+        DeferredToolRequests(calls=[ToolCallPart(tool_name='my_tool', args={'x': 0}, tool_call_id=IsStr())])
     )
 
 
@@ -1351,10 +1348,7 @@ def my_tool(ctx: RunContext[None], x: int) -> int:
     result = agent.run_sync('Hello')
     messages = result.all_messages()
     assert result.output == snapshot(
-        DeferredToolRequests(
-            approvals=[ToolCallPart(tool_name='my_tool', args={'x': 1}, tool_call_id='my_tool')],
-            metadata={'my_tool': {}},
-        )
+        DeferredToolRequests(approvals=[ToolCallPart(tool_name='my_tool', args={'x': 1}, tool_call_id='my_tool')])
     )
 
     result = agent.run_sync(
@@ -1749,8 +1743,7 @@ def buy(fruit: str):
                 ToolCallPart(tool_name='buy', args={'fruit': 'apple'}, tool_call_id='buy_apple'),
                 ToolCallPart(tool_name='buy', args={'fruit': 'banana'}, tool_call_id='buy_banana'),
                 ToolCallPart(tool_name='buy', args={'fruit': 'pear'}, tool_call_id='buy_pear'),
-            ],
-            metadata={'buy_apple': {}, 'buy_banana': {}, 'buy_pear': {}},
+            ]
         )
     )
 
@@ -2098,8 +2091,7 @@ def bar(x: int) -> int:
             approvals=[
                 ToolCallPart(tool_name='foo', args={'x': 1}, tool_call_id='foo1'),
                 ToolCallPart(tool_name='foo', args={'x': 2}, tool_call_id='foo2'),
-            ],
-            metadata={'foo1': {}, 'foo2': {}},
+            ]
         )
     )
 

Original file line number	Diff line number	Diff line change
`@@ -362,6 +362,7 @@ DeferredToolRequests(`
`362`	`362`	`tool_call_id='pyd_ai_tool_call_id__temperature_fahrenheit',`
`363`	`363`	`),`
`364`	`364`	`],`
	`365`	`+ metadata={},`
`365`	`366`	`)`
`366`	`367`	`"""`
`367`	`368`