From 25f64773278bd2db0707fb3448db47a4e19454e5 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Fri, 26 Sep 2025 14:20:47 +0200
Subject: [PATCH 1/2] add executed tools to invoke_agent spans in openai agents

---
 .../openai_agents/spans/invoke_agent.py       |   5 +-
 .../integrations/openai_agents/utils.py       |  64 +++-
 .../openai_agents/test_openai_agents.py       | 360 +++++++++++++++---
 3 files changed, 367 insertions(+), 62 deletions(-)

diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
index cf06120625..24d9a78629 100644
--- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
+++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py
@@ -5,7 +5,7 @@
 from sentry_sdk.utils import safe_serialize
 
 from ..consts import SPAN_ORIGIN
-from ..utils import _set_agent_data
+from ..utils import _set_agent_data, _set_output_data
 
 from typing import TYPE_CHECKING
 
@@ -75,4 +75,7 @@ def update_invoke_agent_span(context, agent, output):
                 span, SPANDATA.GEN_AI_RESPONSE_TEXT, output, unpack=False
             )
 
+        # Capture tool calls from the output if available
+        _set_output_data(span, output)
+
         span.__exit__(None, None, None)
diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index a0487e0e3a..b2f1d117f5 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -3,7 +3,7 @@
 from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations import DidNotEnable
 from sentry_sdk.scope import should_send_default_pii
-from sentry_sdk.utils import event_from_exception, safe_serialize
+from sentry_sdk.utils import event_from_exception
 
 from typing import TYPE_CHECKING
 
@@ -28,6 +28,35 @@ def _capture_exception(exc):
     sentry_sdk.capture_event(event, hint=hint)
 
 
+def _simplify_openai_agent_tools(tools):
+    # type: (Any) -> list[dict[str, Any]] | None
+    """Parse and simplify OpenAI agent tools into a cleaner format."""
+    if not tools:
+        return None
+
+    if not isinstance(tools, (list, tuple)):
+        return None
+
+    simplified_tools = []
+    for tool in tools:
+        try:
+            simplified_tool = {
+                "name": getattr(tool, "name", None),
+                "description": getattr(tool, "description", None),
+            }
+
+            tool_type = getattr(tool, "__class__", None)
+            if tool_type:
+                simplified_tool["type"] = tool_type.__name__.lower().replace("tool", "")
+
+            if simplified_tool["name"]:
+                simplified_tools.append(simplified_tool)
+        except Exception:
+            continue
+
+    return simplified_tools if simplified_tools else None
+
+
 def _set_agent_data(span, agent):
     # type: (sentry_sdk.tracing.Span, agents.Agent) -> None
     span.set_data(
@@ -66,10 +95,10 @@ def _set_agent_data(span, agent):
         )
 
     if len(agent.tools) > 0:
-        span.set_data(
-            SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS,
-            safe_serialize([vars(tool) for tool in agent.tools]),
-        )
+        simplified_tools = _simplify_openai_agent_tools(agent.tools)
+        if simplified_tools:
+            # Use span.set_data directly to preserve list type instead of JSON string
+            span.set_data(SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, simplified_tools)
 
 
 def _set_usage_data(span, usage):
@@ -128,6 +157,14 @@ def _set_output_data(span, result):
     if not should_send_default_pii():
         return
 
+    # Handle case where result is a string directly
+    if isinstance(result, str):
+        return
+
+    # Handle case where result doesn't have an output attribute
+    if not hasattr(result, "output"):
+        return
+
     output_messages = {
         "response": [],
         "tool": [],
@@ -135,19 +172,26 @@ def _set_output_data(span, result):
 
     for output in result.output:
         if output.type == "function_call":
-            output_messages["tool"].append(output.dict())
+            # Use model_dump() if available (Pydantic v2), fallback to dict() for compatibility
+            if hasattr(output, "model_dump"):
+                output_messages["tool"].append(output.model_dump())
+            else:
+                output_messages["tool"].append(output.dict())
         elif output.type == "message":
             for output_message in output.content:
                 try:
                     output_messages["response"].append(output_message.text)
                 except AttributeError:
                     # Unknown output message type, just return the json
-                    output_messages["response"].append(output_message.dict())
+                    # Use model_dump() if available (Pydantic v2), fallback to dict() for compatibility
+                    if hasattr(output_message, "model_dump"):
+                        output_messages["response"].append(output_message.model_dump())
+                    else:
+                        output_messages["response"].append(output_message.dict())
 
     if len(output_messages["tool"]) > 0:
-        span.set_data(
-            SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(output_messages["tool"])
-        )
+        # Use span.set_data directly to preserve list type instead of JSON string
+        span.set_data(SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, output_messages["tool"])
 
     if len(output_messages["response"]) > 0:
         set_data_normalized(
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 047b919213..e1f6e17ad9 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -437,24 +437,10 @@ def simple_test_tool(message: str) -> str:
         ai_client_span2,
     ) = spans
 
-    available_tools = safe_serialize(
-        [
-            {
-                "name": "simple_test_tool",
-                "description": "A simple tool",
-                "params_json_schema": {
-                    "properties": {"message": {"title": "Message", "type": "string"}},
-                    "required": ["message"],
-                    "title": "simple_test_tool_args",
-                    "type": "object",
-                    "additionalProperties": False,
-                },
-                "on_invoke_tool": "<function agents.tool.function_tool.<locals>._create_function_tool.<locals>._on_invoke_tool>",
-                "strict_json_schema": True,
-                "is_enabled": True,
-            }
-        ]
-    )
+    # Expect simplified tool format, not raw tool data
+    available_tools = [
+        {"name": "simple_test_tool", "description": "A simple tool", "type": "function"}
+    ]
 
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
@@ -500,35 +486,22 @@ def simple_test_tool(message: str) -> str:
     assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
     assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
     assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
-    assert re.sub(
-        r"SerializationIterator\(.*\)",
-        "NOT_CHECKED",
-        ai_client_span1["data"]["gen_ai.response.tool_calls"],
-    ) == safe_serialize(
-        [
-            {
-                "arguments": '{"message": "hello"}',
-                "call_id": "call_123",
-                "name": "simple_test_tool",
-                "type": "function_call",
-                "id": "call_123",
-                "status": None,
-                "function": "NOT_CHECKED",
-            }
-        ]
-    )
+    # Tool calls are now stored as a list, not a JSON string
+    tool_calls = ai_client_span1["data"]["gen_ai.response.tool_calls"]
+    assert len(tool_calls) == 1
+    tool_call = tool_calls[0]
+    assert tool_call["arguments"] == '{"message": "hello"}'
+    assert tool_call["call_id"] == "call_123"
+    assert tool_call["name"] == "simple_test_tool"
+    assert tool_call["type"] == "function_call"
+    assert tool_call["id"] == "call_123"
+    assert tool_call["status"] is None
+    # Don't check the function field as it contains mock objects
 
     assert tool_span["description"] == "execute_tool simple_test_tool"
     assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"
     assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool"
-    assert (
-        re.sub(
-            "<.*>(,)",
-            r"'NOT_CHECKED'\1",
-            agent_span["data"]["gen_ai.request.available_tools"],
-        )
-        == available_tools
-    )
+    assert agent_span["data"]["gen_ai.request.available_tools"] == available_tools
     assert tool_span["data"]["gen_ai.request.max_tokens"] == 100
     assert tool_span["data"]["gen_ai.request.model"] == "gpt-4"
     assert tool_span["data"]["gen_ai.request.temperature"] == 0.7
@@ -543,14 +516,10 @@ def simple_test_tool(message: str) -> str:
     assert ai_client_span2["description"] == "chat gpt-4"
     assert ai_client_span2["data"]["gen_ai.agent.name"] == "test_agent"
     assert ai_client_span2["data"]["gen_ai.operation.name"] == "chat"
-    assert (
-        re.sub(
-            "<.*>(,)",
-            r"'NOT_CHECKED'\1",
-            agent_span["data"]["gen_ai.request.available_tools"],
-        )
-        == available_tools
-    )
+    # available_tools is now a list, not a JSON string, so we can compare directly
+    assert agent_span["data"]["gen_ai.request.available_tools"] == [
+        {"name": "simple_test_tool", "description": "A simple tool", "type": "function"}
+    ]
     assert ai_client_span2["data"]["gen_ai.request.max_tokens"] == 100
     assert re.sub(
         r"SerializationIterator\(.*\)",
@@ -697,3 +666,292 @@ async def run():
     assert txn2["transaction"] == "test_agent workflow"
     assert txn3["type"] == "transaction"
     assert txn3["transaction"] == "test_agent workflow"
+
+
+@pytest.mark.asyncio
+async def test_available_tools_simplified_format(
+    sentry_init, capture_events, test_agent, mock_model_response
+):
+    """
+    Test that available tools are recorded in simplified format on invoke_agent spans.
+    """
+
+    @agents.function_tool
+    def search_tool(query: str) -> str:
+        """Search for information using the given query."""
+        return f"Search results for: {query}"
+
+    @agents.function_tool
+    def calculator_tool(expression: str) -> str:
+        """Calculate mathematical expressions."""
+        return f"Result: {expression}"
+
+    # Create agent with multiple tools
+    agent_with_tools = test_agent.clone(tools=[search_tool, calculator_tool])
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.return_value = mock_model_response
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+            )
+
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                agent_with_tools, "Test input", run_config=test_run_config
+            )
+
+            assert result is not None
+
+    (transaction,) = events
+    spans = transaction["spans"]
+    invoke_agent_span = spans[0]
+
+    # Verify simplified tools format
+    available_tools = invoke_agent_span["data"]["gen_ai.request.available_tools"]
+    assert isinstance(available_tools, list)
+    assert len(available_tools) == 2
+
+    # Check first tool
+    search_tool_data = next(
+        (t for t in available_tools if t["name"] == "search_tool"), None
+    )
+    assert search_tool_data is not None
+    assert search_tool_data["name"] == "search_tool"
+    assert (
+        search_tool_data["description"]
+        == "Search for information using the given query."
+    )
+    assert search_tool_data["type"] == "function"
+
+    # Check second tool
+    calculator_tool_data = next(
+        (t for t in available_tools if t["name"] == "calculator_tool"), None
+    )
+    assert calculator_tool_data is not None
+    assert calculator_tool_data["name"] == "calculator_tool"
+    assert calculator_tool_data["description"] == "Calculate mathematical expressions."
+    assert calculator_tool_data["type"] == "function"
+
+    # Verify no extra fields are included (simplified format)
+    for tool_data in available_tools:
+        expected_keys = {"name", "description", "type"}
+        assert set(tool_data.keys()) == expected_keys
+
+
+@pytest.mark.asyncio
+async def test_tool_calls_captured_in_invoke_agent_span(
+    sentry_init, capture_events, test_agent
+):
+    """
+    Test that tool calls are captured in invoke_agent spans when tools are used.
+    """
+
+    @agents.function_tool
+    def test_function(input_text: str) -> str:
+        """A test function."""
+        return f"Processed: {input_text}"
+
+    agent_with_tool = test_agent.clone(tools=[test_function])
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+
+            # Mock response that includes a tool call
+            tool_call_response = ModelResponse(
+                output=[
+                    ResponseFunctionToolCall(
+                        id="call_test_123",
+                        call_id="call_test_123",
+                        name="test_function",
+                        type="function_call",
+                        arguments='{"input_text": "hello world"}',
+                        function=MagicMock(
+                            name="test_function",
+                            arguments='{"input_text": "hello world"}',
+                        ),
+                    )
+                ],
+                usage=Usage(
+                    requests=1, input_tokens=10, output_tokens=5, total_tokens=15
+                ),
+                response_id="resp_tool_123",
+            )
+
+            # Final response after tool execution
+            final_response = ModelResponse(
+                output=[
+                    ResponseOutputMessage(
+                        id="msg_final",
+                        type="message",
+                        status="completed",
+                        content=[
+                            ResponseOutputText(
+                                text="Tool execution completed successfully",
+                                type="output_text",
+                                annotations=[],
+                            )
+                        ],
+                        role="assistant",
+                    )
+                ],
+                usage=Usage(
+                    requests=1, input_tokens=15, output_tokens=10, total_tokens=25
+                ),
+                response_id="resp_final_123",
+            )
+
+            mock_get_response.side_effect = [tool_call_response, final_response]
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+            )
+
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                agent_with_tool,
+                "Please use the test function",
+                run_config=test_run_config,
+            )
+
+            assert result is not None
+
+    (transaction,) = events
+    spans = transaction["spans"]
+    invoke_agent_span = spans[0]
+
+    # Verify that available tools are recorded
+    assert "gen_ai.request.available_tools" in invoke_agent_span["data"]
+    available_tools = invoke_agent_span["data"]["gen_ai.request.available_tools"]
+    assert len(available_tools) == 1
+    assert available_tools[0]["name"] == "test_function"
+    assert available_tools[0]["type"] == "function"
+
+    # Find the AI client span that contains the tool call (first response)
+    # The tool calls should be captured in the AI client span, not the invoke agent span
+    tool_call_span = None
+    for span in spans:
+        if span.get("description", "").startswith(
+            "chat"
+        ) and "gen_ai.response.tool_calls" in span.get("data", {}):
+            tool_call_span = span
+            break
+
+    assert tool_call_span is not None, "Tool call span not found"
+    tool_calls = tool_call_span["data"]["gen_ai.response.tool_calls"]
+    assert len(tool_calls) == 1
+
+    tool_call = tool_calls[0]
+    assert tool_call["name"] == "test_function"
+    assert tool_call["type"] == "function_call"
+    assert tool_call["call_id"] == "call_test_123"
+    assert tool_call["arguments"] == '{"input_text": "hello world"}'
+
+
+@pytest.mark.asyncio
+async def test_agent_without_tools(
+    sentry_init, capture_events, test_agent, mock_model_response
+):
+    """
+    Test that agents without tools don't cause issues and don't include tools data.
+    """
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.return_value = mock_model_response
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+            )
+
+            events = capture_events()
+
+            result = await agents.Runner.run(
+                test_agent, "Test input", run_config=test_run_config
+            )
+
+            assert result is not None
+
+    (transaction,) = events
+    spans = transaction["spans"]
+    invoke_agent_span = spans[0]
+
+    # Agent has no tools, so available_tools should not be present
+    assert "gen_ai.request.available_tools" not in invoke_agent_span["data"]
+
+    # And no tool calls should be present since no tools were used
+    assert "gen_ai.response.tool_calls" not in invoke_agent_span["data"]
+
+
+def test_simplify_openai_agent_tools_edge_cases():
+    """
+    Test edge cases for the _simplify_openai_agent_tools function.
+    """
+    from sentry_sdk.integrations.openai_agents.utils import _simplify_openai_agent_tools
+
+    # Test with None
+    assert _simplify_openai_agent_tools(None) is None
+
+    # Test with empty list
+    assert _simplify_openai_agent_tools([]) is None
+
+    # Test with non-list/tuple
+    assert _simplify_openai_agent_tools("invalid") is None
+    assert _simplify_openai_agent_tools(42) is None
+
+    # Test with mock tool objects
+    class FunctionTool:
+        def __init__(self, name, description):
+            self.name = name
+            self.description = description
+
+    class CustomTool:
+        def __init__(self, name, description):
+            self.name = name
+            self.description = description
+
+    # Test with valid tools
+    mock_tools = [
+        FunctionTool("tool1", "Description 1"),
+        CustomTool("tool2", "Description 2"),
+    ]
+
+    result = _simplify_openai_agent_tools(mock_tools)
+    assert result is not None
+    assert len(result) == 2
+    assert result[0]["name"] == "tool1"
+    assert result[0]["description"] == "Description 1"
+    assert result[0]["type"] == "function"
+    assert result[1]["name"] == "tool2"
+    assert result[1]["description"] == "Description 2"
+    assert result[1]["type"] == "custom"
+
+    # Test with tool missing name (should be filtered out)
+    class MockToolNoName:
+        def __init__(self):
+            self.description = "Has description but no name"
+
+    mock_tools_with_invalid = [
+        FunctionTool("valid_tool", "Valid description"),
+        MockToolNoName(),
+    ]
+
+    result = _simplify_openai_agent_tools(mock_tools_with_invalid)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0]["name"] == "valid_tool"

From dc6fc84dec764659a24c7dcde373804457417b80 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Tue, 30 Sep 2025 10:46:25 +0200
Subject: [PATCH 2/2] cleanup

---
 sentry_sdk/integrations/openai_agents/utils.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py
index b2f1d117f5..3427dbe8aa 100644
--- a/sentry_sdk/integrations/openai_agents/utils.py
+++ b/sentry_sdk/integrations/openai_agents/utils.py
@@ -97,7 +97,6 @@ def _set_agent_data(span, agent):
     if len(agent.tools) > 0:
         simplified_tools = _simplify_openai_agent_tools(agent.tools)
         if simplified_tools:
-            # Use span.set_data directly to preserve list type instead of JSON string
             span.set_data(SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, simplified_tools)
 
 
@@ -157,14 +156,6 @@ def _set_output_data(span, result):
     if not should_send_default_pii():
         return
 
-    # Handle case where result is a string directly
-    if isinstance(result, str):
-        return
-
-    # Handle case where result doesn't have an output attribute
-    if not hasattr(result, "output"):
-        return
-
     output_messages = {
         "response": [],
         "tool": [],
@@ -172,7 +163,6 @@ def _set_output_data(span, result):
 
     for output in result.output:
         if output.type == "function_call":
-            # Use model_dump() if available (Pydantic v2), fallback to dict() for compatibility
             if hasattr(output, "model_dump"):
                 output_messages["tool"].append(output.model_dump())
             else:
@@ -182,15 +172,12 @@ def _set_output_data(span, result):
                 try:
                     output_messages["response"].append(output_message.text)
                 except AttributeError:
-                    # Unknown output message type, just return the json
-                    # Use model_dump() if available (Pydantic v2), fallback to dict() for compatibility
                     if hasattr(output_message, "model_dump"):
                         output_messages["response"].append(output_message.model_dump())
                     else:
                         output_messages["response"].append(output_message.dict())
 
     if len(output_messages["tool"]) > 0:
-        # Use span.set_data directly to preserve list type instead of JSON string
         span.set_data(SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, output_messages["tool"])
 
     if len(output_messages["response"]) > 0: