Update deep research sample (#42268)

howieleung · Copilot · web-flow · commit 166f78d87554 · 2025-08-05T13:02:13.000-07:00
* Update deep research sample

* Update sdk/ai/azure-ai-agents/samples/agents_tools/sample_agents_deep_research.py

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;

* Update sdk/ai/azure-ai-agents/samples/agents_tools/sample_agents_deep_research.py

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;

* add async sample

* Resolved comments

* Resolve comment

* fix mypy

* update doc and pylint

---------

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/sdk/ai/azure-ai-agents/CHANGELOG.md b/sdk/ai/azure-ai-agents/CHANGELOG.md
@@ -17,6 +17,10 @@
 - `AgentsResponseFormatOption`, `MessageInputContent`, `MessageAttachmentToolDefinition`, `AgentsToolChoiceOption` are now public.
 - Fixed issues where the `runs.create_and_process` API call did not correctly handle the `AzureAISearchTool`, `FileSearchTool`, and `CodeInterpreterTool` when specified in the toolset parameter.
 - Fixed `update_agent` to execute with body as a keyword parameter.
+
+### Sample updates
+
+- Updated `sample_agents_deep_research.py` and `sample_agents_deep_research_async.py` for citations.
   
 ## 1.1.0b4 (2025-07-11)
 
diff --git a/sdk/ai/azure-ai-agents/README.md b/sdk/ai/azure-ai-agents/README.md
@@ -394,11 +394,11 @@ Here is an example:
 <!-- SNIPPET:sample_agents_deep_research.create_agent_with_deep_research_tool -->
 
 ```python
-conn_id = project_client.connections.get(name=os.environ["BING_RESOURCE_NAME"]).id
+bing_connection = project_client.connections.get(name=os.environ["BING_RESOURCE_NAME"])
 
 # Initialize a Deep Research tool with Bing Connection ID and Deep Research model deployment name
 deep_research_tool = DeepResearchTool(
-    bing_grounding_connection_id=conn_id,
+    bing_grounding_connection_id=bing_connection.id,
     deep_research_model=os.environ["DEEP_RESEARCH_MODEL_DEPLOYMENT_NAME"],
 )
 
diff --git a/sdk/ai/azure-ai-agents/samples/agents_async/sample_agents_deep_research_async.py b/sdk/ai/azure-ai-agents/samples/agents_async/sample_agents_deep_research_async.py
@@ -34,59 +34,175 @@
 
 import asyncio
 import os
-from typing import Optional
+import re
+from typing import Optional, Dict, List
 
 from azure.ai.projects.aio import AIProjectClient
 from azure.ai.agents.aio import AgentsClient
 from azure.ai.agents.models import DeepResearchTool, MessageRole, ThreadMessage
 from azure.identity.aio import DefaultAzureCredential
 
 
+def convert_citations_to_superscript(markdown_content):
+    """
+    Convert citation markers in markdown content to HTML superscript format.
+
+    This function finds citation patterns like [78:12+source] and converts them to
+    HTML superscript tags <sup>12</sup> for better formatting in markdown documents.
+    It also consolidates consecutive citations by sorting and deduplicating them.
+
+    Args:
+        markdown_content (str): The markdown content containing citation markers
+
+    Returns:
+        str: The markdown content with citations converted to HTML superscript format
+    """
+    # Pattern to match [number:number+source]
+    pattern = re.compile(r"\u3010\d+:(\d+)\u2020source\u3011")
+
+    # Replace with <sup>captured_number</sup>
+    def replacement(match):
+        citation_number = match.group(1)
+        return f"<sup>{citation_number}</sup>"
+
+    # First, convert all citation markers to superscript
+    converted_text = pattern.sub(replacement, markdown_content)
+
+    # Then, consolidate consecutive superscript citations
+    # Pattern to match multiple superscript tags with optional commas/spaces
+    # Matches: <sup>5</sup>,<sup>4</sup>,<sup>5</sup> or <sup>5</sup><sup>4</sup><sup>5</sup>
+    consecutive_pattern = r"(<sup>\d+</sup>)(\s*,?\s*<sup>\d+</sup>)\u3020"
+
+    def consolidate_and_sort_citations(match):
+        # Extract all citation numbers from the matched text
+        citation_text = match.group(0)
+        citation_numbers = re.findall(r"<sup>(\d+)</sup>", citation_text)
+
+        # Convert to integers, remove duplicates, and sort
+        unique_sorted_citations = sorted(set(int(num) for num in citation_numbers))
+
+        # If only one citation, return simple format
+        if len(unique_sorted_citations) == 1:
+            return f"<sup>{unique_sorted_citations[0]}</sup>"
+
+        # If multiple citations, return comma-separated format
+        citation_list = ",".join(str(num) for num in unique_sorted_citations)
+        return f"<sup>{citation_list}</sup>"
+
+    # Remove consecutive duplicate citations and sort them
+    final_text = re.sub(consecutive_pattern, consolidate_and_sort_citations, converted_text)
+
+    return final_text
+
+
 async def fetch_and_print_new_agent_response(
     thread_id: str,
     agents_client: AgentsClient,
     last_message_id: Optional[str] = None,
+    progress_filename: str = "research_progress.txt",
 ) -> Optional[str]:
+    """
+    Fetch the interim agent responses and citations from a thread and write them to a file.
+
+    Args:
+        thread_id (str): The ID of the thread to fetch messages from
+        agents_client (AgentsClient): The Azure AI agents client instance
+        last_message_id (Optional[str], optional): ID of the last processed message
+            to avoid duplicates. Defaults to None.
+        progress_filename (str, optional): Name of the file to write progress to.
+            Defaults to "research_progress.txt".
+
+    Returns:
+        Optional[str]: The ID of the latest message if new content was found,
+            otherwise returns the last_message_id
+    """
     response = await agents_client.messages.get_last_message_by_role(
         thread_id=thread_id,
         role=MessageRole.AGENT,
     )
 
     if not response or response.id == last_message_id:
+        return last_message_id  # No new content
+
+    # If not a "cot_summary", return.
+    if not any(t.text.value.startswith("cot_summary:") for t in response.text_messages):
         return last_message_id
 
     print("\nAgent response:")
-    print("\n".join(t.text.value for t in response.text_messages))
+    agent_text = "\n".join(t.text.value.replace("cot_summary:", "Reasoning:") for t in response.text_messages)
+    print(agent_text)
 
     # Print citation annotations (if any)
     for ann in response.url_citation_annotations:
         print(f"URL Citation: [{ann.url_citation.title}]({ann.url_citation.url})")
 
+    # Write progress to file
+    with open(progress_filename, "a", encoding="utf-8") as fp:
+        fp.write("\nAGENT>\n")
+        fp.write(agent_text)
+        fp.write("\n")
+
+        for ann in response.url_citation_annotations:
+            fp.write(f"Citation: [{ann.url_citation.title}]({ann.url_citation.url})\n")
+
     return response.id
 
 
-def create_research_summary(message: ThreadMessage, filepath: str = "research_summary.md") -> None:
+def create_research_summary(message: ThreadMessage, filepath: str = "research_report.md") -> None:
+    """
+    Create a formatted research report from an agent's thread message with numbered citations
+    and a references section.
+
+    Args:
+        message (ThreadMessage): The thread message containing the agent's research response
+        filepath (str, optional): Path where the research summary will be saved.
+            Defaults to "research_report.md".
+
+    Returns:
+        None: This function doesn't return a value, it writes to a file
+    """
     if not message:
-        print("No message content provided, cannot create research summary.")
+        print("No message content provided, cannot create research report.")
         return
 
     with open(filepath, "w", encoding="utf-8") as fp:
         # Write text summary
         text_summary = "\n\n".join([t.text.value.strip() for t in message.text_messages])
+        # Convert citations to superscript format
+        text_summary = convert_citations_to_superscript(text_summary)
         fp.write(text_summary)
 
-        # Write unique URL citations, if present
+        # Write unique URL citations with numbered bullets, if present
         if message.url_citation_annotations:
-            fp.write("\n\n## References\n")
+            fp.write("\n\n## Citations\n")
             seen_urls = set()
+            # Dictionary mapping full citation content to ordinal number
+            citations_ordinals: Dict[str, int] = {}
+            # List of citation URLs indexed by ordinal (0-based)
+            text_citation_list: List[str] = []
+
             for ann in message.url_citation_annotations:
                 url = ann.url_citation.url
                 title = ann.url_citation.title or url
+
                 if url not in seen_urls:
-                    fp.write(f"- [{title}]({url})\n")
+                    # Use the full annotation text as the key to avoid conflicts
+                    citation_key = ann.text if ann.text else f"fallback_{url}"
+
+                    # Only add if this citation content hasn't been seen before
+                    if citation_key not in citations_ordinals:
+                        # Assign next available ordinal number (1-based for display)
+                        ordinal = len(text_citation_list) + 1
+                        citations_ordinals[citation_key] = ordinal
+                        text_citation_list.append(f"[{title}]({url})")
+
                     seen_urls.add(url)
 
-    print(f"Research summary written to '{filepath}'.")
+            # Write citations in order they were added
+            for i, citation_text in enumerate(text_citation_list):
+                fp.write(f"{i + 1}. {citation_text}\n")
+
+    print(f"Research report written to '{filepath}'.")
 
 
 async def main() -> None:
@@ -96,6 +212,7 @@ async def main() -> None:
         credential=DefaultAzureCredential(),
     )
 
+    # [START create_agent_with_deep_research_tool]
     bing_connection = await project_client.connections.get(name=os.environ["BING_RESOURCE_NAME"])
 
     # Initialize a Deep Research tool with Bing Connection ID and Deep Research model deployment name
@@ -104,6 +221,7 @@ async def main() -> None:
         deep_research_model=os.environ["DEEP_RESEARCH_MODEL_DEPLOYMENT_NAME"],
     )
 
+    # Create Agent with the Deep Research tool and process Agent run
     async with project_client:
 
         agents_client = project_client.agents
@@ -145,6 +263,7 @@ async def main() -> None:
                 thread_id=thread.id,
                 agents_client=agents_client,
                 last_message_id=last_message_id,
+                progress_filename="research_progress.txt",
             )
             print(f"Run status: {run.status}")
 
diff --git a/sdk/ai/azure-ai-agents/samples/agents_async/sample_agents_stream_eventhandler_with_mcp_async.py b/sdk/ai/azure-ai-agents/samples/agents_async/sample_agents_stream_eventhandler_with_mcp_async.py
@@ -60,6 +60,7 @@
     allowed_tools=[],  # Optional: specify allowed tools
 )
 
+
 class MyEventHandler(AsyncAgentEventHandler[str]):
 
     def __init__(self, agents_client: AgentsClient) -> None:
@@ -143,9 +144,7 @@ async def main() -> None:
         print(f"Created thread, thread ID {thread.id}")
 
         message = await agents_client.messages.create(
-            thread_id=thread.id,
-            role="user",
-            content="Please summarize the Azure REST API specifications Readme"
+            thread_id=thread.id, role="user", content="Please summarize the Azure REST API specifications Readme"
         )
         print(f"Created message, message ID {message.id}")
 
diff --git a/sdk/ai/azure-ai-agents/samples/agents_response_formats/sample_agents_json_object_response_format.py b/sdk/ai/azure-ai-agents/samples/agents_response_formats/sample_agents_json_object_response_format.py
@@ -38,7 +38,7 @@
         model=os.environ["MODEL_DEPLOYMENT_NAME"],
         name="my-agent",
         instructions="You are helpful agent. You will respond with a JSON object.",
-        response_format=AgentsResponseFormat(type="json_object")
+        response_format=AgentsResponseFormat(type="json_object"),
     )
     print(f"Created agent, agent ID: {agent.id}")
 
@@ -48,7 +48,9 @@
     # List all threads for the agent
     threads = agents_client.threads.list()
 
-    message = agents_client.messages.create(thread_id=thread.id, role="user", content="Hello, give me a list of planets in our solar system.")
+    message = agents_client.messages.create(
+        thread_id=thread.id, role="user", content="Hello, give me a list of planets in our solar system."
+    )
     print(f"Created message, message ID: {message.id}")
 
     run = agents_client.runs.create_and_process(thread_id=thread.id, agent_id=agent.id)
diff --git a/sdk/ai/azure-ai-agents/samples/agents_response_formats/sample_agents_json_schema_response_format.py b/sdk/ai/azure-ai-agents/samples/agents_response_formats/sample_agents_json_schema_response_format.py
@@ -43,6 +43,7 @@ class Planets(str, Enum):
     Mars = "Mars"
     Mercury = "Mercury"
 
+
 class Planet(BaseModel):
     planet: Planets
     mass: float
diff --git a/sdk/ai/azure-ai-agents/samples/agents_response_formats/sample_agents_text_response_format.py b/sdk/ai/azure-ai-agents/samples/agents_response_formats/sample_agents_text_response_format.py
@@ -38,7 +38,7 @@
         model=os.environ["MODEL_DEPLOYMENT_NAME"],
         name="my-agent",
         instructions="You are helpful agent.",
-        response_format=AgentsResponseFormat(type="text")
+        response_format=AgentsResponseFormat(type="text"),
     )
     print(f"Created agent, agent ID: {agent.id}")
 
@@ -48,7 +48,9 @@
     # List all threads for the agent
     threads = agents_client.threads.list()
 
-    message = agents_client.messages.create(thread_id=thread.id, role="user", content="Hello, give me a list of planets in our solar system.")
+    message = agents_client.messages.create(
+        thread_id=thread.id, role="user", content="Hello, give me a list of planets in our solar system."
+    )
     print(f"Created message, message ID: {message.id}")
 
     run = agents_client.runs.create_and_process(thread_id=thread.id, agent_id=agent.id)
diff --git a/sdk/ai/azure-ai-agents/samples/agents_streaming/sample_agents_stream_iteration_with_mcp.py b/sdk/ai/azure-ai-agents/samples/agents_streaming/sample_agents_stream_iteration_with_mcp.py
@@ -7,7 +7,7 @@
 """
 DESCRIPTION:
     This sample demonstrates how to use agent operations with the
-    Model Context Protocol (MCP) tool from the Azure Agents service, and 
+    Model Context Protocol (MCP) tool from the Azure Agents service, and
     iteration in streaming. It uses a synchronous client.
     To learn more about Model Context Protocol, visit https://modelcontextprotocol.io/
 
@@ -41,7 +41,7 @@
     MessageDeltaTextUrlCitationAnnotation,
     RequiredMcpToolCall,
     SubmitToolApprovalAction,
-    ToolApproval
+    ToolApproval,
 )
 from azure.identity import DefaultAzureCredential
 
@@ -83,9 +83,7 @@
     print(f"Created thread, thread ID {thread.id}")
 
     message = agents_client.messages.create(
-        thread_id=thread.id,
-        role=MessageRole.USER,
-        content="Please summarize the Azure REST API specifications Readme"
+        thread_id=thread.id, role=MessageRole.USER, content="Please summarize the Azure REST API specifications Readme"
     )
     print(f"Created message, message ID {message.id}")
 
@@ -148,7 +146,10 @@
                         # Once we receive 'requires_action' status, the next event will be DONE.
                         # Here we associate our existing event handler to the next stream.
                         agents_client.runs.submit_tool_outputs_stream(
-                            thread_id=event_data.thread_id, run_id=event_data.id, tool_approvals=tool_approvals, event_handler=stream
+                            thread_id=event_data.thread_id,
+                            run_id=event_data.id,
+                            tool_approvals=tool_approvals,
+                            event_handler=stream,
                         )
 
             elif isinstance(event_data, RunStep):
diff --git a/sdk/ai/azure-ai-agents/samples/agents_telemetry/sample_agents_basics_with_console_tracing_custom_attributes.py b/sdk/ai/azure-ai-agents/samples/agents_telemetry/sample_agents_basics_with_console_tracing_custom_attributes.py
@@ -64,6 +64,8 @@ def on_start(self, span: Span, parent_context=None):
     def on_end(self, span: ReadableSpan):
         # Clean-up logic can be added here if necessary
         pass
+
+
 # [END custom_attribute_span_processor]
 
 # Setup tracing to console
diff --git a/sdk/ai/azure-ai-agents/samples/agents_tools/sample_agents_deep_research.py b/sdk/ai/azure-ai-agents/samples/agents_tools/sample_agents_deep_research.py
diff --git a/sdk/ai/azure-ai-agents/samples/agents_tools/sample_agents_enterprise_file_search.py b/sdk/ai/azure-ai-agents/samples/agents_tools/sample_agents_enterprise_file_search.py

Original file line number	Diff line number	Diff line change
`@@ -60,6 +60,7 @@`
`60`	`60`	`allowed_tools=[], # Optional: specify allowed tools`
`61`	`61`	`)`
`62`	`62`
	`63`	`+`
`63`	`64`	`class MyEventHandler(AsyncAgentEventHandler[str]):`
`64`	`65`
`65`	`66`	`def __init__(self, agents_client: AgentsClient) -> None:`
`@@ -143,9 +144,7 @@ async def main() -> None:`
`143`	`144`	`print(f"Created thread, thread ID {thread.id}")`
`144`	`145`
`145`	`146`	`message = await agents_client.messages.create(`
`146`		`- thread_id=thread.id,`
`147`		`- role="user",`
`148`		`- content="Please summarize the Azure REST API specifications Readme"`
	`147`	`+ thread_id=thread.id, role="user", content="Please summarize the Azure REST API specifications Readme"`
`149`	`148`	`)`
`150`	`149`	`print(f"Created message, message ID {message.id}")`
`151`	`150`