From a22fea65e8c6dd4aa901167f58dae98de353116a Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Wed, 26 Nov 2025 12:41:06 +0100
Subject: [PATCH] feat(integration): pydantic-ai: properly report token usage
 and response model for invoke_agent spans

---
 .../pydantic_ai/patches/agent_run.py          | 15 +++----
 .../pydantic_ai/spans/ai_client.py            | 17 +------
 .../pydantic_ai/spans/invoke_agent.py         | 32 ++++++++++++-
 .../integrations/pydantic_ai/spans/utils.py   | 34 ++++++++++++++
 .../pydantic_ai/test_pydantic_ai.py           | 45 +++++++++++++++++++
 5 files changed, 115 insertions(+), 28 deletions(-)
 create mode 100644 sentry_sdk/integrations/pydantic_ai/spans/utils.py

diff --git a/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py b/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py
index daa2da112c..cceb11fc90 100644
--- a/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py
+++ b/sentry_sdk/integrations/pydantic_ai/patches/agent_run.py
@@ -71,13 +71,9 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
             # Exit the original context manager first
             await self.original_ctx_manager.__aexit__(exc_type, exc_val, exc_tb)
 
-            # Update span with output if successful
-            if exc_type is None and self._result and hasattr(self._result, "output"):
-                output = (
-                    self._result.output if hasattr(self._result, "output") else None
-                )
-                if self._span is not None:
-                    update_invoke_agent_span(self._span, output)
+            # Update span with result if successful
+            if exc_type is None and self._result and self._span is not None:
+                update_invoke_agent_span(self._span, self._result)
         finally:
             # Pop agent from contextvar stack
             pop_agent()
@@ -123,9 +119,8 @@ async def wrapper(self, *args, **kwargs):
                 try:
                     result = await original_func(self, *args, **kwargs)
 
-                    # Update span with output
-                    output = result.output if hasattr(result, "output") else None
-                    update_invoke_agent_span(span, output)
+                    # Update span with result
+                    update_invoke_agent_span(span, result)
 
                     return result
                 except Exception as exc:
diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
index a2bd0272d4..b3749b16c9 100644
--- a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
+++ b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
@@ -13,6 +13,7 @@
     get_current_agent,
     get_is_streaming,
 )
+from .utils import _set_usage_data
 
 from typing import TYPE_CHECKING
 
@@ -39,22 +40,6 @@
     ThinkingPart = None
 
 
-def _set_usage_data(span, usage):
-    # type: (sentry_sdk.tracing.Span, RequestUsage) -> None
-    """Set token usage data on a span."""
-    if usage is None:
-        return
-
-    if hasattr(usage, "input_tokens") and usage.input_tokens is not None:
-        span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens)
-
-    if hasattr(usage, "output_tokens") and usage.output_tokens is not None:
-        span.set_data(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens)
-
-    if hasattr(usage, "total_tokens") and usage.total_tokens is not None:
-        span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens)
-
-
 def _set_input_messages(span, messages):
     # type: (sentry_sdk.tracing.Span, Any) -> None
     """Set input messages data on a span."""
diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
index f5e22fb346..ee451b7e6b 100644
--- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
+++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
@@ -9,6 +9,7 @@
     _set_model_data,
     _should_send_prompts,
 )
+from .utils import _set_usage_data
 
 from typing import TYPE_CHECKING
 
@@ -103,10 +104,37 @@ def invoke_agent_span(user_prompt, agent, model, model_settings, is_streaming=Fa
     return span
 
 
-def update_invoke_agent_span(span, output):
+def update_invoke_agent_span(span, result):
     # type: (sentry_sdk.tracing.Span, Any) -> None
     """Update and close the invoke agent span."""
-    if span and _should_send_prompts() and output:
+    if not span or not result:
+        return
+
+    # Extract output from result
+    output = getattr(result, "output", None)
+
+    # Set response text if prompts are enabled
+    if _should_send_prompts() and output:
         set_data_normalized(
             span, SPANDATA.GEN_AI_RESPONSE_TEXT, str(output), unpack=False
         )
+
+    # Set token usage data if available
+    if hasattr(result, "usage") and callable(result.usage):
+        try:
+            usage = result.usage()
+            if usage:
+                _set_usage_data(span, usage)
+        except Exception:
+            # If usage() call fails, continue without setting usage data
+            pass
+
+    # Set model name from response if available
+    if hasattr(result, "response"):
+        try:
+            response = result.response
+            if hasattr(response, "model_name") and response.model_name:
+                span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response.model_name)
+        except Exception:
+            # If response access fails, continue without setting model name
+            pass
diff --git a/sentry_sdk/integrations/pydantic_ai/spans/utils.py b/sentry_sdk/integrations/pydantic_ai/spans/utils.py
new file mode 100644
index 0000000000..f5251622de
--- /dev/null
+++ b/sentry_sdk/integrations/pydantic_ai/spans/utils.py
@@ -0,0 +1,34 @@
+"""Utility functions for PydanticAI span instrumentation."""
+
+import sentry_sdk
+from sentry_sdk.consts import SPANDATA
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Union
+    from pydantic_ai.usage import RequestUsage, RunUsage  # type: ignore
+
+
+def _set_usage_data(span, usage):
+    # type: (sentry_sdk.tracing.Span, Union[RequestUsage, RunUsage]) -> None
+    """Set token usage data on a span.
+
+    This function works with both RequestUsage (single request) and
+    RunUsage (agent run) objects from pydantic_ai.
+
+    Args:
+        span: The Sentry span to set data on.
+        usage: RequestUsage or RunUsage object containing token usage information.
+    """
+    if usage is None:
+        return
+
+    if hasattr(usage, "input_tokens") and usage.input_tokens is not None:
+        span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens)
+
+    if hasattr(usage, "output_tokens") and usage.output_tokens is not None:
+        span.set_data(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens)
+
+    if hasattr(usage, "total_tokens") and usage.total_tokens is not None:
+        span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens)
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
index 394979bb5e..7f81769407 100644
--- a/tests/integrations/pydantic_ai/test_pydantic_ai.py
+++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -76,6 +76,51 @@ async def test_agent_run_async(sentry_init, capture_events, test_agent):
     assert "gen_ai.usage.output_tokens" in chat_span["data"]
 
 
+@pytest.mark.asyncio
+async def test_agent_run_async_usage_data(sentry_init, capture_events, test_agent):
+    """
+    Test that the invoke_agent span includes token usage and model data.
+    """
+    sentry_init(
+        integrations=[PydanticAIIntegration()],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+
+    events = capture_events()
+
+    result = await test_agent.run("Test input")
+
+    assert result is not None
+    assert result.output is not None
+
+    (transaction,) = events
+
+    # Verify transaction (the transaction IS the invoke_agent span)
+    assert transaction["transaction"] == "invoke_agent test_agent"
+
+    # The invoke_agent span should have token usage data
+    trace_data = transaction["contexts"]["trace"].get("data", {})
+    assert "gen_ai.usage.input_tokens" in trace_data, (
+        "Missing input_tokens on invoke_agent span"
+    )
+    assert "gen_ai.usage.output_tokens" in trace_data, (
+        "Missing output_tokens on invoke_agent span"
+    )
+    assert "gen_ai.usage.total_tokens" in trace_data, (
+        "Missing total_tokens on invoke_agent span"
+    )
+    assert "gen_ai.response.model" in trace_data, (
+        "Missing response.model on invoke_agent span"
+    )
+
+    # Verify the values are reasonable
+    assert trace_data["gen_ai.usage.input_tokens"] > 0
+    assert trace_data["gen_ai.usage.output_tokens"] > 0
+    assert trace_data["gen_ai.usage.total_tokens"] > 0
+    assert trace_data["gen_ai.response.model"] == "test"  # Test model name
+
+
 def test_agent_run_sync(sentry_init, capture_events, test_agent):
     """
     Test that the integration creates spans for sync agent runs.