Skip to content

Commit 3012e3a

Browse files
committed
test(integrations): add tests for MCP Tool call spans
1 parent b219cd8 commit 3012e3a

File tree

1 file changed

+293
-0
lines changed

1 file changed

+293
-0
lines changed

tests/integrations/openai_agents/test_openai_agents.py

Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,299 @@ async def test_span_status_error(sentry_init, capture_events, test_agent):
683683
assert transaction["contexts"]["trace"]["status"] == "error"
684684

685685

686+
@pytest.mark.asyncio
687+
async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent):
688+
"""
689+
Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
690+
This tests the functionality added in the PR for MCP tool execution tracking.
691+
"""
692+
693+
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
694+
with patch(
695+
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
696+
) as mock_get_response:
697+
# Create a mock McpCall object
698+
mcp_call = MagicMock()
699+
mcp_call.__class__.__name__ = "McpCall"
700+
mcp_call.name = "test_mcp_tool"
701+
mcp_call.arguments = '{"query": "search term"}'
702+
mcp_call.output = "MCP tool executed successfully"
703+
mcp_call.error = None
704+
705+
# Create a ModelResponse with an McpCall in the output
706+
mcp_response = ModelResponse(
707+
output=[mcp_call],
708+
usage=Usage(
709+
requests=1,
710+
input_tokens=10,
711+
output_tokens=5,
712+
total_tokens=15,
713+
),
714+
response_id="resp_mcp_123",
715+
)
716+
717+
# Final response after MCP tool execution
718+
final_response = ModelResponse(
719+
output=[
720+
ResponseOutputMessage(
721+
id="msg_final",
722+
type="message",
723+
status="completed",
724+
content=[
725+
ResponseOutputText(
726+
text="Task completed using MCP tool",
727+
type="output_text",
728+
annotations=[],
729+
)
730+
],
731+
role="assistant",
732+
)
733+
],
734+
usage=Usage(
735+
requests=1,
736+
input_tokens=15,
737+
output_tokens=10,
738+
total_tokens=25,
739+
),
740+
response_id="resp_final_123",
741+
)
742+
743+
mock_get_response.side_effect = [mcp_response, final_response]
744+
745+
sentry_init(
746+
integrations=[OpenAIAgentsIntegration()],
747+
traces_sample_rate=1.0,
748+
send_default_pii=True,
749+
)
750+
751+
events = capture_events()
752+
753+
await agents.Runner.run(
754+
test_agent,
755+
"Please use MCP tool",
756+
run_config=test_run_config,
757+
)
758+
759+
(transaction,) = events
760+
spans = transaction["spans"]
761+
762+
# Find the MCP execute_tool span
763+
mcp_tool_span = None
764+
for span in spans:
765+
if (
766+
span.get("description") == "execute_tool test_mcp_tool"
767+
and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
768+
):
769+
mcp_tool_span = span
770+
break
771+
772+
# Verify the MCP tool span was created
773+
assert mcp_tool_span is not None, "MCP execute_tool span was not created"
774+
assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
775+
assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
776+
assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
777+
assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}'
778+
assert (
779+
mcp_tool_span["data"]["gen_ai.tool.output"] == "MCP tool executed successfully"
780+
)
781+
782+
# Verify no error status since error was None
783+
assert mcp_tool_span.get("status") != "error"
784+
785+
786+
@pytest.mark.asyncio
787+
async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_agent):
788+
"""
789+
Test that MCP tool calls with errors are tracked with error status.
790+
"""
791+
792+
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
793+
with patch(
794+
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
795+
) as mock_get_response:
796+
# Create a mock McpCall object with an error
797+
mcp_call_with_error = MagicMock()
798+
mcp_call_with_error.__class__.__name__ = "McpCall"
799+
mcp_call_with_error.name = "failing_mcp_tool"
800+
mcp_call_with_error.arguments = '{"query": "test"}'
801+
mcp_call_with_error.output = None
802+
mcp_call_with_error.error = "MCP tool execution failed"
803+
804+
# Create a ModelResponse with a failing McpCall
805+
mcp_response = ModelResponse(
806+
output=[mcp_call_with_error],
807+
usage=Usage(
808+
requests=1,
809+
input_tokens=10,
810+
output_tokens=5,
811+
total_tokens=15,
812+
),
813+
response_id="resp_mcp_error_123",
814+
)
815+
816+
# Final response after error
817+
final_response = ModelResponse(
818+
output=[
819+
ResponseOutputMessage(
820+
id="msg_final",
821+
type="message",
822+
status="completed",
823+
content=[
824+
ResponseOutputText(
825+
text="The MCP tool encountered an error",
826+
type="output_text",
827+
annotations=[],
828+
)
829+
],
830+
role="assistant",
831+
)
832+
],
833+
usage=Usage(
834+
requests=1,
835+
input_tokens=15,
836+
output_tokens=10,
837+
total_tokens=25,
838+
),
839+
response_id="resp_final_error_123",
840+
)
841+
842+
mock_get_response.side_effect = [mcp_response, final_response]
843+
844+
sentry_init(
845+
integrations=[OpenAIAgentsIntegration()],
846+
traces_sample_rate=1.0,
847+
send_default_pii=True,
848+
)
849+
850+
events = capture_events()
851+
852+
await agents.Runner.run(
853+
test_agent,
854+
"Please use failing MCP tool",
855+
run_config=test_run_config,
856+
)
857+
858+
(transaction,) = events
859+
spans = transaction["spans"]
860+
861+
# Find the MCP execute_tool span with error
862+
mcp_tool_span = None
863+
for span in spans:
864+
if (
865+
span.get("description") == "execute_tool failing_mcp_tool"
866+
and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
867+
):
868+
mcp_tool_span = span
869+
break
870+
871+
# Verify the MCP tool span was created with error status
872+
assert mcp_tool_span is not None, "MCP execute_tool span was not created"
873+
assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool"
874+
assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
875+
assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool"
876+
assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}'
877+
assert mcp_tool_span["data"]["gen_ai.tool.output"] is None
878+
879+
# Verify error status was set
880+
assert mcp_tool_span["status"] == "error"
881+
882+
883+
@pytest.mark.asyncio
884+
async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_agent):
885+
"""
886+
Test that MCP tool input/output are not included when send_default_pii is False.
887+
"""
888+
889+
with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
890+
with patch(
891+
"agents.models.openai_responses.OpenAIResponsesModel.get_response"
892+
) as mock_get_response:
893+
# Create a mock McpCall object
894+
mcp_call = MagicMock()
895+
mcp_call.__class__.__name__ = "McpCall"
896+
mcp_call.name = "test_mcp_tool"
897+
mcp_call.arguments = '{"query": "sensitive data"}'
898+
mcp_call.output = "Result with sensitive info"
899+
mcp_call.error = None
900+
901+
# Create a ModelResponse with an McpCall
902+
mcp_response = ModelResponse(
903+
output=[mcp_call],
904+
usage=Usage(
905+
requests=1,
906+
input_tokens=10,
907+
output_tokens=5,
908+
total_tokens=15,
909+
),
910+
response_id="resp_mcp_123",
911+
)
912+
913+
# Final response
914+
final_response = ModelResponse(
915+
output=[
916+
ResponseOutputMessage(
917+
id="msg_final",
918+
type="message",
919+
status="completed",
920+
content=[
921+
ResponseOutputText(
922+
text="Task completed",
923+
type="output_text",
924+
annotations=[],
925+
)
926+
],
927+
role="assistant",
928+
)
929+
],
930+
usage=Usage(
931+
requests=1,
932+
input_tokens=15,
933+
output_tokens=10,
934+
total_tokens=25,
935+
),
936+
response_id="resp_final_123",
937+
)
938+
939+
mock_get_response.side_effect = [mcp_response, final_response]
940+
941+
sentry_init(
942+
integrations=[OpenAIAgentsIntegration()],
943+
traces_sample_rate=1.0,
944+
send_default_pii=False, # PII disabled
945+
)
946+
947+
events = capture_events()
948+
949+
await agents.Runner.run(
950+
test_agent,
951+
"Please use MCP tool",
952+
run_config=test_run_config,
953+
)
954+
955+
(transaction,) = events
956+
spans = transaction["spans"]
957+
958+
# Find the MCP execute_tool span
959+
mcp_tool_span = None
960+
for span in spans:
961+
if (
962+
span.get("description") == "execute_tool test_mcp_tool"
963+
and span.get("data", {}).get("gen_ai.tool.type") == "mcp"
964+
):
965+
mcp_tool_span = span
966+
break
967+
968+
# Verify the MCP tool span was created but without input/output
969+
assert mcp_tool_span is not None, "MCP execute_tool span was not created"
970+
assert mcp_tool_span["description"] == "execute_tool test_mcp_tool"
971+
assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp"
972+
assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool"
973+
974+
# Verify input and output are not included when send_default_pii is False
975+
assert "gen_ai.tool.input" not in mcp_tool_span["data"]
976+
assert "gen_ai.tool.output" not in mcp_tool_span["data"]
977+
978+
686979
@pytest.mark.asyncio
687980
async def test_multiple_agents_asyncio(
688981
sentry_init, capture_events, test_agent, mock_model_response

0 commit comments

Comments
 (0)