test(integrations): add tests for MCP Tool call spans

constantinius · constantinius · commit 3012e3a8f75b · 2025-10-01T12:22:28.000+02:00
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
@@ -683,6 +683,299 @@ async def test_span_status_error(sentry_init, capture_events, test_agent):
     assert transaction["contexts"]["trace"]["status"] == "error"
 
 
+@pytest.mark.asyncio
+async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent):
+    """
+    Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
+    This tests the functionality added in the PR for MCP tool execution tracking.
+    """
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            # Create a mock McpCall object
+            mcp_call = MagicMock()
+            mcp_call.__class__.__name__ = "McpCall"
+            mcp_call.name = "test_mcp_tool"
+            mcp_call.arguments = '{"query": "search term"}'
+            mcp_call.output = "MCP tool executed successfully"
+            mcp_call.error = None
+
+            # Create a ModelResponse with an McpCall in the output
+            mcp_response = ModelResponse(
+                output=[mcp_call],
+                usage=Usage(
+                    requests=1,
+                    input_tokens=10,
+                    output_tokens=5,
+                    total_tokens=15,
+                ),
+                response_id="resp_mcp_123",
+            )
+
+            # Final response after MCP tool execution
+            final_response = ModelResponse(
+                output=[
+                    ResponseOutputMessage(
+                        id="msg_final",
+                        type="message",
+                        status="completed",
+                        content=[
+                            ResponseOutputText(
+                                text="Task completed using MCP tool",
+                                type="output_text",
+                                annotations=[],
+                            )
+                        ],
+                        role="assistant",
+                    )
+                ],
+                usage=Usage(
+                    requests=1,
+                    input_tokens=15,
+                    output_tokens=10,
+                    total_tokens=25,
+                ),
+                response_id="resp_final_123",
+            )
+
+            mock_get_response.side_effect = [mcp_response, final_response]
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+            )
+
+            events = capture_events()
+
+            await agents.Runner.run(
+                test_agent,
+                "Please use MCP tool",
+                run_config=test_run_config,
+            )
+
+    (transaction,) = events
+    spans = transaction["spans"]
+
+    # Find the MCP execute_tool span
+    mcp_tool_span = None
+    for span in spans:
+        if (
+            span.get("description") == "execute_tool test_mcp_tool"
+            and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
+        ):
+            mcp_tool_span = span
+            break
+
+    # Verify the MCP tool span was created
+    assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+    assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
+    assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
+    assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
+    assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}'
+    assert (
+        mcp_tool_span["data"]["gen_ai.tool.output"] == "MCP tool executed successfully"
+    )
+
+    # Verify no error status since error was None
+    assert mcp_tool_span.get("status") != "error"
+
+
+@pytest.mark.asyncio
+async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_agent):
+    """
+    Test that MCP tool calls with errors are tracked with error status.
+    """
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            # Create a mock McpCall object with an error
+            mcp_call_with_error = MagicMock()
+            mcp_call_with_error.__class__.__name__ = "McpCall"
+            mcp_call_with_error.name = "failing_mcp_tool"
+            mcp_call_with_error.arguments = '{"query": "test"}'
+            mcp_call_with_error.output = None
+            mcp_call_with_error.error = "MCP tool execution failed"
+
+            # Create a ModelResponse with a failing McpCall
+            mcp_response = ModelResponse(
+                output=[mcp_call_with_error],
+                usage=Usage(
+                    requests=1,
+                    input_tokens=10,
+                    output_tokens=5,
+                    total_tokens=15,
+                ),
+                response_id="resp_mcp_error_123",
+            )
+
+            # Final response after error
+            final_response = ModelResponse(
+                output=[
+                    ResponseOutputMessage(
+                        id="msg_final",
+                        type="message",
+                        status="completed",
+                        content=[
+                            ResponseOutputText(
+                                text="The MCP tool encountered an error",
+                                type="output_text",
+                                annotations=[],
+                            )
+                        ],
+                        role="assistant",
+                    )
+                ],
+                usage=Usage(
+                    requests=1,
+                    input_tokens=15,
+                    output_tokens=10,
+                    total_tokens=25,
+                ),
+                response_id="resp_final_error_123",
+            )
+
+            mock_get_response.side_effect = [mcp_response, final_response]
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+            )
+
+            events = capture_events()
+
+            await agents.Runner.run(
+                test_agent,
+                "Please use failing MCP tool",
+                run_config=test_run_config,
+            )
+
+    (transaction,) = events
+    spans = transaction["spans"]
+
+    # Find the MCP execute_tool span with error
+    mcp_tool_span = None
+    for span in spans:
+        if (
+            span.get("description") == "execute_tool failing_mcp_tool"
+            and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
+        ):
+            mcp_tool_span = span
+            break
+
+    # Verify the MCP tool span was created with error status
+    assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+    assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool"
+    assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
+    assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool"
+    assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}'
+    assert mcp_tool_span["data"]["gen_ai.tool.output"] is None
+
+    # Verify error status was set
+    assert mcp_tool_span["status"] == "error"
+
+
+@pytest.mark.asyncio
+async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_agent):
+    """
+    Test that MCP tool input/output are not included when send_default_pii is False.
+    """
+
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            # Create a mock McpCall object
+            mcp_call = MagicMock()
+            mcp_call.__class__.__name__ = "McpCall"
+            mcp_call.name = "test_mcp_tool"
+            mcp_call.arguments = '{"query": "sensitive data"}'
+            mcp_call.output = "Result with sensitive info"
+            mcp_call.error = None
+
+            # Create a ModelResponse with an McpCall
+            mcp_response = ModelResponse(
+                output=[mcp_call],
+                usage=Usage(
+                    requests=1,
+                    input_tokens=10,
+                    output_tokens=5,
+                    total_tokens=15,
+                ),
+                response_id="resp_mcp_123",
+            )
+
+            # Final response
+            final_response = ModelResponse(
+                output=[
+                    ResponseOutputMessage(
+                        id="msg_final",
+                        type="message",
+                        status="completed",
+                        content=[
+                            ResponseOutputText(
+                                text="Task completed",
+                                type="output_text",
+                                annotations=[],
+                            )
+                        ],
+                        role="assistant",
+                    )
+                ],
+                usage=Usage(
+                    requests=1,
+                    input_tokens=15,
+                    output_tokens=10,
+                    total_tokens=25,
+                ),
+                response_id="resp_final_123",
+            )
+
+            mock_get_response.side_effect = [mcp_response, final_response]
+
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=False,  # PII disabled
+            )
+
+            events = capture_events()
+
+            await agents.Runner.run(
+                test_agent,
+                "Please use MCP tool",
+                run_config=test_run_config,
+            )
+
+    (transaction,) = events
+    spans = transaction["spans"]
+
+    # Find the MCP execute_tool span
+    mcp_tool_span = None
+    for span in spans:
+        if (
+            span.get("description") == "execute_tool test_mcp_tool"
+            and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
+        ):
+            mcp_tool_span = span
+            break
+
+    # Verify the MCP tool span was created but without input/output
+    assert mcp_tool_span is not None, "MCP execute_tool span was not created"
+    assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
+    assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
+    assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
+
+    # Verify input and output are not included when send_default_pii is False
+    assert "gen_ai.tool.input" not in mcp_tool_span["data"]
+    assert "gen_ai.tool.output" not in mcp_tool_span["data"]
+
+
 @pytest.mark.asyncio
 async def test_multiple_agents_asyncio(
     sentry_init, capture_events, test_agent, mock_model_response