Add test for DeepResearch (#42750)

nick863 · web-flow · commit 6df5c84a46e4 · 2025-08-28T10:25:06.000-07:00
diff --git a/sdk/ai/azure-ai-agents/assets.json b/sdk/ai/azure-ai-agents/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/ai/azure-ai-agents",
-  "Tag": "python/ai/azure-ai-agents_28d79451bb"
+  "Tag": "python/ai/azure-ai-agents_65510bf995"
 }
diff --git a/sdk/ai/azure-ai-agents/azure/ai/agents/telemetry/_ai_agents_instrumentor.py b/sdk/ai/azure-ai-agents/azure/ai/agents/telemetry/_ai_agents_instrumentor.py
@@ -459,6 +459,7 @@ def _process_tool_calls(self, step: RunStep) -> List[Dict[str, Any]]:
                     "server_label": t.server_label or ""
                 }
             else:
+                # Works for Deep research
                 tool_details = t.as_dict()[t.type]
 
                 tool_call = {
diff --git a/sdk/ai/azure-ai-agents/tests/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-agents/tests/gen_ai_trace_verifier.py
@@ -127,7 +127,7 @@ def check_event_attributes(self, expected_dict, actual_dict):
                 actual_val = json.dumps(actual_dict)
             else:
                 actual_val = actual_dict
-            raise AssertionError("check_event_attributes: keys do not match: " + expected_val + "!=" + actual_val)
+            raise AssertionError(f"check_event_attributes: keys do not match: {set(expected_dict.keys())} != {set(actual_dict.keys())}. The actual dictionaries: {expected_val} != {actual_val}")
         for key, expected_val in expected_dict.items():
             if key not in actual_dict:
                 raise AssertionError(f"check_event_attributes: key {key} not found in actuals")
diff --git a/sdk/ai/azure-ai-agents/tests/test_ai_agents_instrumentor.py b/sdk/ai/azure-ai-agents/tests/test_ai_agents_instrumentor.py
@@ -764,6 +764,7 @@ def _do_test_run_steps_with_toolset_with_tracing_content_recording(
             tool: Optional[Tool] = None,
             have_submit_tools: bool = False,
             run_step_events: List[List[Dict[str, Any]]] = None,
+            has_annotations: bool = False,
             **kwargs
         ) -> None:
         """The helper method to check the recordings."""
@@ -820,6 +821,7 @@ def _do_test_run_steps_with_toolset_with_tracing_content_recording(
             tool_message_attribute_content=tool_message_attribute_content,
             event_contents=event_contents,
             run_step_events=run_step_events,
+            has_annotations=has_annotations,
         )
 
     @pytest.mark.usefixtures("instrument_with_content")
@@ -959,6 +961,27 @@ def test_telemetry_steps_with_mcp_tool(self, **kwargs):
             run_step_events=self.get_expected_mcp_spans(),
         )
 
+    @pytest.mark.usefixtures("instrument_with_content")
+    @agentClientPreparer()
+    @recorded_by_proxy
+    def test_telemetry_steps_with_deep_research_tool(self, **kwargs):
+        """Test running functions with streaming and tracing content recording."""
+        
+        self._do_test_run_steps_with_toolset_with_tracing_content_recording(
+            tool=self._get_deep_research_tool(**kwargs),
+            model="gpt-4o",
+            use_stream=False,
+            instructions="You are a helpful agent that assists in researching scientific topics.",
+            message="Research the benefits of renewable energy sources. Keep the response brief.",
+            recording_enabled=True,
+            tool_message_attribute_content='',
+            event_contents=[],
+            have_submit_tools=False,
+            run_step_events=self.get_expected_deep_research_spans(),
+            has_annotations=True,
+            **kwargs
+        )
+
 class MyEventHandler(AgentEventHandler):
 
     def on_message_delta(self, delta: "MessageDeltaChunk") -> None:
diff --git a/sdk/ai/azure-ai-agents/tests/test_ai_agents_instrumentor_async.py b/sdk/ai/azure-ai-agents/tests/test_ai_agents_instrumentor_async.py
@@ -466,6 +466,7 @@ async def _do_test_run_steps_with_toolset_with_tracing_content_recording(
             tool: Optional[Tool] = None,
             have_submit_tools=False,
             run_step_events: List[List[Dict[str, Any]]] = None,
+            has_annotations: bool = False,
             **kwargs
         ):
         """The helper method to check the recordings."""
@@ -519,6 +520,7 @@ async def _do_test_run_steps_with_toolset_with_tracing_content_recording(
             tool_message_attribute_content=tool_message_attribute_content,
             event_contents=event_contents,
             run_step_events=run_step_events,
+            has_annotations=has_annotations,
         )
 
     @pytest.mark.usefixtures("instrument_without_content")
@@ -678,6 +680,25 @@ async def test_telemetry_steps_with_mcp_tool(self, **kwargs):
             run_step_events=self.get_expected_mcp_spans(),
         )
 
+    @pytest.mark.usefixtures("instrument_with_content")
+    @agentClientPreparer()
+    @recorded_by_proxy_async
+    async def test_telemetry_steps_with_deep_research_tool(self, **kwargs):
+        """Test running functions with streaming and tracing content recording."""        
+        await self._do_test_run_steps_with_toolset_with_tracing_content_recording(
+            tool=self._get_deep_research_tool(**kwargs),
+            model="gpt-4o",
+            use_stream=False,
+            instructions="You are a helpful agent that assists in researching scientific topics.",
+            message="Research the benefits of renewable energy sources. Keep the response brief.",
+            recording_enabled=True,
+            tool_message_attribute_content='',
+            event_contents=[],
+            have_submit_tools=False,
+            run_step_events=self.get_expected_deep_research_spans(),
+            has_annotations=True,
+            **kwargs
+        )
 
 class MyEventHandler(AsyncAgentEventHandler):
 
diff --git a/sdk/ai/azure-ai-agents/tests/test_ai_instrumentor_base.py b/sdk/ai/azure-ai-agents/tests/test_ai_instrumentor_base.py
@@ -13,6 +13,8 @@
 
 from azure.ai.agents.telemetry import AIAgentsInstrumentor
 
+from azure.ai.agents.models import DeepResearchTool
+
 from gen_ai_trace_verifier import GenAiTraceVerifier
 from memory_trace_exporter import MemoryTraceExporter
 from test_agents_client_base import TestAgentClientBase
@@ -49,6 +51,17 @@ def cleanup(self):
         trace._TRACER_PROVIDER = None
         os.environ.pop(CONTENT_TRACING_ENV_VARIABLE, None)
 
+    def _get_deep_research_tool(self, **kwargs):
+        """Get deep research tool."""
+        bing_conn_id = kwargs.pop("azure_ai_agents_tests_bing_connection_id")
+        deep_research_model = kwargs.pop("azure_ai_agents_tests_deep_research_model")
+
+        # Create DeepResearchTool
+        return DeepResearchTool(
+            bing_grounding_connection_id=bing_conn_id,
+            deep_research_model=deep_research_model,
+        )
+
     def _check_spans(
             self,
             model: str,
@@ -60,6 +73,7 @@ def _check_spans(
             tool_message_attribute_content: str,
             event_contents: List[str],
             run_step_events: Optional[List[List[Dict[str, Any]]]] = None,
+            has_annotations: bool = False,
         ):
         """Check the spans for correctness."""
         spans = self.exporter.get_spans_by_name("create_agent my-agent")
@@ -210,7 +224,7 @@ def _check_spans(
             assert events_match == True
         
         spans = self.exporter.get_spans_by_name("list_messages")
-        assert len(spans) == 2
+        assert len(spans) >= 2
         span = spans[0]
         expected_attributes = [
             ("gen_ai.system", "az.ai.agents"),
@@ -221,7 +235,13 @@ def _check_spans(
         attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
         assert attributes_match == True
         
-        content = '{"content": {"text": {"value": "*"}}, "role": "assistant"}' if recording_enabled else '{"role": "assistant"}'
+        if recording_enabled:
+            if has_annotations:
+                content = '{"content": {"text": {"value": "*", "annotations": "*"}}, "role": "assistant"}'
+            else:
+                content = '{"content": {"text": {"value": "*"}}, "role": "assistant"}'
+        else:
+            content = '{"role": "assistant"}'
         expected_events = [
             {
                 "name": "gen_ai.assistant.message",
@@ -239,7 +259,7 @@ def _check_spans(
         events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
         assert events_match == True
 
-        span = spans[1]
+        span = spans[-1]
         attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
         assert attributes_match == True
         
@@ -260,15 +280,21 @@ def _check_spans(
         
         spans = self.exporter.get_spans_by_name("list_run_steps")
         if run_step_events:
-            assert len(spans) == len(run_step_events)
             expected_attributes = [
                 ("gen_ai.system", "az.ai.agents"),
                 ("gen_ai.operation.name", "list_run_steps"),
                 ("server.address", ""),
                 ("gen_ai.thread.id", ""),
                 ("gen_ai.thread.run.id", ""),
             ]
-            for span, expected_span_events in zip(spans, run_step_events):
+            if len(spans) < 5:
+                assert len(spans) == len(run_step_events)
+                zip_obj = zip(spans, run_step_events)
+            else:
+                assert len(run_step_events) == 5
+                # If it is deep research there may be multiple run steps.
+                zip_obj = zip(spans[:3] + spans[-2:], run_step_events)
+            for span, expected_span_events in zip_obj:
                 attributes_match = GenAiTraceVerifier().check_span_attributes(span, expected_attributes)
                 assert attributes_match == True
                 events_match = GenAiTraceVerifier().check_span_events(span, expected_span_events)
@@ -411,4 +437,57 @@ def get_expected_mcp_spans(self):
         ])
         expected_spans.append([])
         return expected_spans
-        
+
+    def get_expected_deep_research_spans(self):
+        expected_event_content = json.dumps(
+            {'tool_calls': 
+                [
+                    {
+                        "id": "*",
+                        "type": "deep_research",
+                        "deep_research": {
+                            "input": "*",
+                            "output": "*"
+                        },
+                    }
+                ]
+            }
+        )
+        
+        expected_spans = [
+            [
+                {
+                    "name": "gen_ai.run_step.message_creation",
+                    "attributes": {
+                        "gen_ai.system": "az.ai.agents",
+                        "gen_ai.thread.id": "*",
+                        "gen_ai.agent.id": "*",
+                        "gen_ai.thread.run.id": "*",
+                        "gen_ai.message.id": "*",
+                        "gen_ai.run_step.status": "completed",
+                        "gen_ai.run_step.start.timestamp": "*",
+                        "gen_ai.run_step.end.timestamp": "*",
+                        "gen_ai.usage.input_tokens": 0,
+                        "gen_ai.usage.output_tokens": 0,
+                    },
+                },
+            ]
+        ] * 4
+        expected_spans.append([
+            {
+                "name": "gen_ai.run_step.tool_calls",
+                "attributes": {
+                    "gen_ai.system": "az.ai.agents",
+                    "gen_ai.thread.id": "*",
+                    "gen_ai.agent.id": "*",
+                    "gen_ai.thread.run.id": "*",
+                    "gen_ai.run_step.status": "completed",
+                    "gen_ai.run_step.start.timestamp": "*",
+                    "gen_ai.run_step.end.timestamp": "*",
+                    "gen_ai.usage.input_tokens": "+",
+                    "gen_ai.usage.output_tokens": "+",
+                    "gen_ai.event.content": expected_event_content
+                },
+            },
+        ])
+        return expected_spans

Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,5 @@`
`2`	`2`	`"AssetsRepo": "Azure/azure-sdk-assets",`
`3`	`3`	`"AssetsRepoPrefixPath": "python",`
`4`	`4`	`"TagPrefix": "python/ai/azure-ai-agents",`
`5`		`- "Tag": "python/ai/azure-ai-agents_28d79451bb"`
	`5`	`+ "Tag": "python/ai/azure-ai-agents_65510bf995"`
`6`	`6`	`}`
Original file line number	Diff line number	Diff line change
`@@ -459,6 +459,7 @@ def _process_tool_calls(self, step: RunStep) -> List[Dict[str, Any]]:`
`459`	`459`	`"server_label": t.server_label or ""`
`460`	`460`	`}`
`461`	`461`	`else:`
	`462`	`+ # Works for Deep research`
`462`	`463`	`tool_details = t.as_dict()[t.type]`
`463`	`464`
`464`	`465`	`tool_call = {`