@@ -683,6 +683,299 @@ async def test_span_status_error(sentry_init, capture_events, test_agent):
683683 assert transaction ["contexts" ]["trace" ]["status" ] == "error"
684684
685685
686+ @pytest .mark .asyncio
687+ async def test_mcp_tool_execution_spans (sentry_init , capture_events , test_agent ):
688+ """
689+ Test that MCP (Model Context Protocol) tool calls create execute_tool spans.
690+ This tests the functionality added in the PR for MCP tool execution tracking.
691+ """
692+
693+ with patch .dict (os .environ , {"OPENAI_API_KEY" : "test-key" }):
694+ with patch (
695+ "agents.models.openai_responses.OpenAIResponsesModel.get_response"
696+ ) as mock_get_response :
697+ # Create a mock McpCall object
698+ mcp_call = MagicMock ()
699+ mcp_call .__class__ .__name__ = "McpCall"
700+ mcp_call .name = "test_mcp_tool"
701+ mcp_call .arguments = '{"query": "search term"}'
702+ mcp_call .output = "MCP tool executed successfully"
703+ mcp_call .error = None
704+
705+ # Create a ModelResponse with an McpCall in the output
706+ mcp_response = ModelResponse (
707+ output = [mcp_call ],
708+ usage = Usage (
709+ requests = 1 ,
710+ input_tokens = 10 ,
711+ output_tokens = 5 ,
712+ total_tokens = 15 ,
713+ ),
714+ response_id = "resp_mcp_123" ,
715+ )
716+
717+ # Final response after MCP tool execution
718+ final_response = ModelResponse (
719+ output = [
720+ ResponseOutputMessage (
721+ id = "msg_final" ,
722+ type = "message" ,
723+ status = "completed" ,
724+ content = [
725+ ResponseOutputText (
726+ text = "Task completed using MCP tool" ,
727+ type = "output_text" ,
728+ annotations = [],
729+ )
730+ ],
731+ role = "assistant" ,
732+ )
733+ ],
734+ usage = Usage (
735+ requests = 1 ,
736+ input_tokens = 15 ,
737+ output_tokens = 10 ,
738+ total_tokens = 25 ,
739+ ),
740+ response_id = "resp_final_123" ,
741+ )
742+
743+ mock_get_response .side_effect = [mcp_response , final_response ]
744+
745+ sentry_init (
746+ integrations = [OpenAIAgentsIntegration ()],
747+ traces_sample_rate = 1.0 ,
748+ send_default_pii = True ,
749+ )
750+
751+ events = capture_events ()
752+
753+ await agents .Runner .run (
754+ test_agent ,
755+ "Please use MCP tool" ,
756+ run_config = test_run_config ,
757+ )
758+
759+ (transaction ,) = events
760+ spans = transaction ["spans" ]
761+
762+ # Find the MCP execute_tool span
763+ mcp_tool_span = None
764+ for span in spans :
765+ if (
766+ span .get ("description" ) == "execute_tool test_mcp_tool"
767+ and span .get ("data" , {}).get ("gen_ai.tool.type" ) == "mcp"
768+ ):
769+ mcp_tool_span = span
770+ break
771+
772+ # Verify the MCP tool span was created
773+ assert mcp_tool_span is not None , "MCP execute_tool span was not created"
774+ assert mcp_tool_span ["description" ] == "execute_tool test_mcp_tool"
775+ assert mcp_tool_span ["data" ]["gen_ai.tool.type" ] == "mcp"
776+ assert mcp_tool_span ["data" ]["gen_ai.tool.name" ] == "test_mcp_tool"
777+ assert mcp_tool_span ["data" ]["gen_ai.tool.input" ] == '{"query": "search term"}'
778+ assert (
779+ mcp_tool_span ["data" ]["gen_ai.tool.output" ] == "MCP tool executed successfully"
780+ )
781+
782+ # Verify no error status since error was None
783+ assert mcp_tool_span .get ("status" ) != "error"
784+
785+
786+ @pytest .mark .asyncio
787+ async def test_mcp_tool_execution_with_error (sentry_init , capture_events , test_agent ):
788+ """
789+ Test that MCP tool calls with errors are tracked with error status.
790+ """
791+
792+ with patch .dict (os .environ , {"OPENAI_API_KEY" : "test-key" }):
793+ with patch (
794+ "agents.models.openai_responses.OpenAIResponsesModel.get_response"
795+ ) as mock_get_response :
796+ # Create a mock McpCall object with an error
797+ mcp_call_with_error = MagicMock ()
798+ mcp_call_with_error .__class__ .__name__ = "McpCall"
799+ mcp_call_with_error .name = "failing_mcp_tool"
800+ mcp_call_with_error .arguments = '{"query": "test"}'
801+ mcp_call_with_error .output = None
802+ mcp_call_with_error .error = "MCP tool execution failed"
803+
804+ # Create a ModelResponse with a failing McpCall
805+ mcp_response = ModelResponse (
806+ output = [mcp_call_with_error ],
807+ usage = Usage (
808+ requests = 1 ,
809+ input_tokens = 10 ,
810+ output_tokens = 5 ,
811+ total_tokens = 15 ,
812+ ),
813+ response_id = "resp_mcp_error_123" ,
814+ )
815+
816+ # Final response after error
817+ final_response = ModelResponse (
818+ output = [
819+ ResponseOutputMessage (
820+ id = "msg_final" ,
821+ type = "message" ,
822+ status = "completed" ,
823+ content = [
824+ ResponseOutputText (
825+ text = "The MCP tool encountered an error" ,
826+ type = "output_text" ,
827+ annotations = [],
828+ )
829+ ],
830+ role = "assistant" ,
831+ )
832+ ],
833+ usage = Usage (
834+ requests = 1 ,
835+ input_tokens = 15 ,
836+ output_tokens = 10 ,
837+ total_tokens = 25 ,
838+ ),
839+ response_id = "resp_final_error_123" ,
840+ )
841+
842+ mock_get_response .side_effect = [mcp_response , final_response ]
843+
844+ sentry_init (
845+ integrations = [OpenAIAgentsIntegration ()],
846+ traces_sample_rate = 1.0 ,
847+ send_default_pii = True ,
848+ )
849+
850+ events = capture_events ()
851+
852+ await agents .Runner .run (
853+ test_agent ,
854+ "Please use failing MCP tool" ,
855+ run_config = test_run_config ,
856+ )
857+
858+ (transaction ,) = events
859+ spans = transaction ["spans" ]
860+
861+ # Find the MCP execute_tool span with error
862+ mcp_tool_span = None
863+ for span in spans :
864+ if (
865+ span .get ("description" ) == "execute_tool failing_mcp_tool"
866+ and span .get ("data" , {}).get ("gen_ai.tool.type" ) == "mcp"
867+ ):
868+ mcp_tool_span = span
869+ break
870+
871+ # Verify the MCP tool span was created with error status
872+ assert mcp_tool_span is not None , "MCP execute_tool span was not created"
873+ assert mcp_tool_span ["description" ] == "execute_tool failing_mcp_tool"
874+ assert mcp_tool_span ["data" ]["gen_ai.tool.type" ] == "mcp"
875+ assert mcp_tool_span ["data" ]["gen_ai.tool.name" ] == "failing_mcp_tool"
876+ assert mcp_tool_span ["data" ]["gen_ai.tool.input" ] == '{"query": "test"}'
877+ assert mcp_tool_span ["data" ]["gen_ai.tool.output" ] is None
878+
879+ # Verify error status was set
880+ assert mcp_tool_span ["status" ] == "error"
881+
882+
883+ @pytest .mark .asyncio
884+ async def test_mcp_tool_execution_without_pii (sentry_init , capture_events , test_agent ):
885+ """
886+ Test that MCP tool input/output are not included when send_default_pii is False.
887+ """
888+
889+ with patch .dict (os .environ , {"OPENAI_API_KEY" : "test-key" }):
890+ with patch (
891+ "agents.models.openai_responses.OpenAIResponsesModel.get_response"
892+ ) as mock_get_response :
893+ # Create a mock McpCall object
894+ mcp_call = MagicMock ()
895+ mcp_call .__class__ .__name__ = "McpCall"
896+ mcp_call .name = "test_mcp_tool"
897+ mcp_call .arguments = '{"query": "sensitive data"}'
898+ mcp_call .output = "Result with sensitive info"
899+ mcp_call .error = None
900+
901+ # Create a ModelResponse with an McpCall
902+ mcp_response = ModelResponse (
903+ output = [mcp_call ],
904+ usage = Usage (
905+ requests = 1 ,
906+ input_tokens = 10 ,
907+ output_tokens = 5 ,
908+ total_tokens = 15 ,
909+ ),
910+ response_id = "resp_mcp_123" ,
911+ )
912+
913+ # Final response
914+ final_response = ModelResponse (
915+ output = [
916+ ResponseOutputMessage (
917+ id = "msg_final" ,
918+ type = "message" ,
919+ status = "completed" ,
920+ content = [
921+ ResponseOutputText (
922+ text = "Task completed" ,
923+ type = "output_text" ,
924+ annotations = [],
925+ )
926+ ],
927+ role = "assistant" ,
928+ )
929+ ],
930+ usage = Usage (
931+ requests = 1 ,
932+ input_tokens = 15 ,
933+ output_tokens = 10 ,
934+ total_tokens = 25 ,
935+ ),
936+ response_id = "resp_final_123" ,
937+ )
938+
939+ mock_get_response .side_effect = [mcp_response , final_response ]
940+
941+ sentry_init (
942+ integrations = [OpenAIAgentsIntegration ()],
943+ traces_sample_rate = 1.0 ,
944+ send_default_pii = False , # PII disabled
945+ )
946+
947+ events = capture_events ()
948+
949+ await agents .Runner .run (
950+ test_agent ,
951+ "Please use MCP tool" ,
952+ run_config = test_run_config ,
953+ )
954+
955+ (transaction ,) = events
956+ spans = transaction ["spans" ]
957+
958+ # Find the MCP execute_tool span
959+ mcp_tool_span = None
960+ for span in spans :
961+ if (
962+ span .get ("description" ) == "execute_tool test_mcp_tool"
963+ and span .get ("data" , {}).get ("gen_ai.tool.type" ) == "mcp"
964+ ):
965+ mcp_tool_span = span
966+ break
967+
968+ # Verify the MCP tool span was created but without input/output
969+ assert mcp_tool_span is not None , "MCP execute_tool span was not created"
970+ assert mcp_tool_span ["description" ] == "execute_tool test_mcp_tool"
971+ assert mcp_tool_span ["data" ]["gen_ai.tool.type" ] == "mcp"
972+ assert mcp_tool_span ["data" ]["gen_ai.tool.name" ] == "test_mcp_tool"
973+
974+ # Verify input and output are not included when send_default_pii is False
975+ assert "gen_ai.tool.input" not in mcp_tool_span ["data" ]
976+ assert "gen_ai.tool.output" not in mcp_tool_span ["data" ]
977+
978+
686979@pytest .mark .asyncio
687980async def test_multiple_agents_asyncio (
688981 sentry_init , capture_events , test_agent , mock_model_response
0 commit comments