Skip to content

Commit 166f78d

Browse files
howieleungCopilot
andauthored
Update deep research sample (#42268)
* Update deep research sample * Update sdk/ai/azure-ai-agents/samples/agents_tools/sample_agents_deep_research.py Co-authored-by: Copilot <[email protected]> * Update sdk/ai/azure-ai-agents/samples/agents_tools/sample_agents_deep_research.py Co-authored-by: Copilot <[email protected]> * add async sample * Resolved comments * Resolve comment * fix mypy * update doc and pylint --------- Co-authored-by: Copilot <[email protected]>
1 parent 992c402 commit 166f78d

11 files changed

+363
-107
lines changed

sdk/ai/azure-ai-agents/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
- `AgentsResponseFormatOption`, `MessageInputContent`, `MessageAttachmentToolDefinition`, `AgentsToolChoiceOption` are now public.
1818
- Fixed issues where the `runs.create_and_process` API call did not correctly handle the `AzureAISearchTool`, `FileSearchTool`, and `CodeInterpreterTool` when specified in the toolset parameter.
1919
- Fixed `update_agent` to execute with body as a keyword parameter.
20+
21+
### Sample updates
22+
23+
- Updated `sample_agents_deep_research.py` and `sample_agents_deep_research_async.py` for citations.
2024

2125
## 1.1.0b4 (2025-07-11)
2226

sdk/ai/azure-ai-agents/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -394,11 +394,11 @@ Here is an example:
394394
<!-- SNIPPET:sample_agents_deep_research.create_agent_with_deep_research_tool -->
395395

396396
```python
397-
conn_id = project_client.connections.get(name=os.environ["BING_RESOURCE_NAME"]).id
397+
bing_connection = project_client.connections.get(name=os.environ["BING_RESOURCE_NAME"])
398398

399399
# Initialize a Deep Research tool with Bing Connection ID and Deep Research model deployment name
400400
deep_research_tool = DeepResearchTool(
401-
bing_grounding_connection_id=conn_id,
401+
bing_grounding_connection_id=bing_connection.id,
402402
deep_research_model=os.environ["DEEP_RESEARCH_MODEL_DEPLOYMENT_NAME"],
403403
)
404404

sdk/ai/azure-ai-agents/samples/agents_async/sample_agents_deep_research_async.py

Lines changed: 127 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,59 +34,175 @@
3434

3535
import asyncio
3636
import os
37-
from typing import Optional
37+
import re
38+
from typing import Optional, Dict, List
3839

3940
from azure.ai.projects.aio import AIProjectClient
4041
from azure.ai.agents.aio import AgentsClient
4142
from azure.ai.agents.models import DeepResearchTool, MessageRole, ThreadMessage
4243
from azure.identity.aio import DefaultAzureCredential
4344

4445

46+
def convert_citations_to_superscript(markdown_content):
47+
"""
48+
Convert citation markers in markdown content to HTML superscript format.
49+
50+
This function finds citation patterns like [78:12+source] and converts them to
51+
HTML superscript tags <sup>12</sup> for better formatting in markdown documents.
52+
It also consolidates consecutive citations by sorting and deduplicating them.
53+
54+
Args:
55+
markdown_content (str): The markdown content containing citation markers
56+
57+
Returns:
58+
str: The markdown content with citations converted to HTML superscript format
59+
"""
60+
# Pattern to match [number:number+source]
61+
pattern = re.compile(r"\u3010\d+:(\d+)\u2020source\u3011")
62+
63+
# Replace with <sup>captured_number</sup>
64+
def replacement(match):
65+
citation_number = match.group(1)
66+
return f"<sup>{citation_number}</sup>"
67+
68+
# First, convert all citation markers to superscript
69+
converted_text = pattern.sub(replacement, markdown_content)
70+
71+
# Then, consolidate consecutive superscript citations
72+
# Pattern to match multiple superscript tags with optional commas/spaces
73+
# Matches: <sup>5</sup>,<sup>4</sup>,<sup>5</sup> or <sup>5</sup><sup>4</sup><sup>5</sup>
74+
consecutive_pattern = r"(<sup>\d+</sup>)(\s*,?\s*<sup>\d+</sup>)\u3020"
75+
76+
def consolidate_and_sort_citations(match):
77+
# Extract all citation numbers from the matched text
78+
citation_text = match.group(0)
79+
citation_numbers = re.findall(r"<sup>(\d+)</sup>", citation_text)
80+
81+
# Convert to integers, remove duplicates, and sort
82+
unique_sorted_citations = sorted(set(int(num) for num in citation_numbers))
83+
84+
# If only one citation, return simple format
85+
if len(unique_sorted_citations) == 1:
86+
return f"<sup>{unique_sorted_citations[0]}</sup>"
87+
88+
# If multiple citations, return comma-separated format
89+
citation_list = ",".join(str(num) for num in unique_sorted_citations)
90+
return f"<sup>{citation_list}</sup>"
91+
92+
# Remove consecutive duplicate citations and sort them
93+
final_text = re.sub(consecutive_pattern, consolidate_and_sort_citations, converted_text)
94+
95+
return final_text
96+
97+
4598
async def fetch_and_print_new_agent_response(
4699
thread_id: str,
47100
agents_client: AgentsClient,
48101
last_message_id: Optional[str] = None,
102+
progress_filename: str = "research_progress.txt",
49103
) -> Optional[str]:
104+
"""
105+
Fetch the interim agent responses and citations from a thread and write them to a file.
106+
107+
Args:
108+
thread_id (str): The ID of the thread to fetch messages from
109+
agents_client (AgentsClient): The Azure AI agents client instance
110+
last_message_id (Optional[str], optional): ID of the last processed message
111+
to avoid duplicates. Defaults to None.
112+
progress_filename (str, optional): Name of the file to write progress to.
113+
Defaults to "research_progress.txt".
114+
115+
Returns:
116+
Optional[str]: The ID of the latest message if new content was found,
117+
otherwise returns the last_message_id
118+
"""
50119
response = await agents_client.messages.get_last_message_by_role(
51120
thread_id=thread_id,
52121
role=MessageRole.AGENT,
53122
)
54123

55124
if not response or response.id == last_message_id:
125+
return last_message_id # No new content
126+
127+
# If not a "cot_summary", return.
128+
if not any(t.text.value.startswith("cot_summary:") for t in response.text_messages):
56129
return last_message_id
57130

58131
print("\nAgent response:")
59-
print("\n".join(t.text.value for t in response.text_messages))
132+
agent_text = "\n".join(t.text.value.replace("cot_summary:", "Reasoning:") for t in response.text_messages)
133+
print(agent_text)
60134

61135
# Print citation annotations (if any)
62136
for ann in response.url_citation_annotations:
63137
print(f"URL Citation: [{ann.url_citation.title}]({ann.url_citation.url})")
64138

139+
# Write progress to file
140+
with open(progress_filename, "a", encoding="utf-8") as fp:
141+
fp.write("\nAGENT>\n")
142+
fp.write(agent_text)
143+
fp.write("\n")
144+
145+
for ann in response.url_citation_annotations:
146+
fp.write(f"Citation: [{ann.url_citation.title}]({ann.url_citation.url})\n")
147+
65148
return response.id
66149

67150

68-
def create_research_summary(message: ThreadMessage, filepath: str = "research_summary.md") -> None:
151+
def create_research_summary(message: ThreadMessage, filepath: str = "research_report.md") -> None:
152+
"""
153+
Create a formatted research report from an agent's thread message with numbered citations
154+
and a references section.
155+
156+
Args:
157+
message (ThreadMessage): The thread message containing the agent's research response
158+
filepath (str, optional): Path where the research summary will be saved.
159+
Defaults to "research_report.md".
160+
161+
Returns:
162+
None: This function doesn't return a value, it writes to a file
163+
"""
69164
if not message:
70-
print("No message content provided, cannot create research summary.")
165+
print("No message content provided, cannot create research report.")
71166
return
72167

73168
with open(filepath, "w", encoding="utf-8") as fp:
74169
# Write text summary
75170
text_summary = "\n\n".join([t.text.value.strip() for t in message.text_messages])
171+
# Convert citations to superscript format
172+
text_summary = convert_citations_to_superscript(text_summary)
76173
fp.write(text_summary)
77174

78-
# Write unique URL citations, if present
175+
# Write unique URL citations with numbered bullets, if present
79176
if message.url_citation_annotations:
80-
fp.write("\n\n## References\n")
177+
fp.write("\n\n## Citations\n")
81178
seen_urls = set()
179+
# Dictionary mapping full citation content to ordinal number
180+
citations_ordinals: Dict[str, int] = {}
181+
# List of citation URLs indexed by ordinal (0-based)
182+
text_citation_list: List[str] = []
183+
82184
for ann in message.url_citation_annotations:
83185
url = ann.url_citation.url
84186
title = ann.url_citation.title or url
187+
85188
if url not in seen_urls:
86-
fp.write(f"- [{title}]({url})\n")
189+
# Use the full annotation text as the key to avoid conflicts
190+
citation_key = ann.text if ann.text else f"fallback_{url}"
191+
192+
# Only add if this citation content hasn't been seen before
193+
if citation_key not in citations_ordinals:
194+
# Assign next available ordinal number (1-based for display)
195+
ordinal = len(text_citation_list) + 1
196+
citations_ordinals[citation_key] = ordinal
197+
text_citation_list.append(f"[{title}]({url})")
198+
87199
seen_urls.add(url)
88200

89-
print(f"Research summary written to '{filepath}'.")
201+
# Write citations in order they were added
202+
for i, citation_text in enumerate(text_citation_list):
203+
fp.write(f"{i + 1}. {citation_text}\n")
204+
205+
print(f"Research report written to '{filepath}'.")
90206

91207

92208
async def main() -> None:
@@ -96,6 +212,7 @@ async def main() -> None:
96212
credential=DefaultAzureCredential(),
97213
)
98214

215+
# [START create_agent_with_deep_research_tool]
99216
bing_connection = await project_client.connections.get(name=os.environ["BING_RESOURCE_NAME"])
100217

101218
# Initialize a Deep Research tool with Bing Connection ID and Deep Research model deployment name
@@ -104,6 +221,7 @@ async def main() -> None:
104221
deep_research_model=os.environ["DEEP_RESEARCH_MODEL_DEPLOYMENT_NAME"],
105222
)
106223

224+
# Create Agent with the Deep Research tool and process Agent run
107225
async with project_client:
108226

109227
agents_client = project_client.agents
@@ -145,6 +263,7 @@ async def main() -> None:
145263
thread_id=thread.id,
146264
agents_client=agents_client,
147265
last_message_id=last_message_id,
266+
progress_filename="research_progress.txt",
148267
)
149268
print(f"Run status: {run.status}")
150269

sdk/ai/azure-ai-agents/samples/agents_async/sample_agents_stream_eventhandler_with_mcp_async.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
allowed_tools=[], # Optional: specify allowed tools
6161
)
6262

63+
6364
class MyEventHandler(AsyncAgentEventHandler[str]):
6465

6566
def __init__(self, agents_client: AgentsClient) -> None:
@@ -143,9 +144,7 @@ async def main() -> None:
143144
print(f"Created thread, thread ID {thread.id}")
144145

145146
message = await agents_client.messages.create(
146-
thread_id=thread.id,
147-
role="user",
148-
content="Please summarize the Azure REST API specifications Readme"
147+
thread_id=thread.id, role="user", content="Please summarize the Azure REST API specifications Readme"
149148
)
150149
print(f"Created message, message ID {message.id}")
151150

sdk/ai/azure-ai-agents/samples/agents_response_formats/sample_agents_json_object_response_format.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
model=os.environ["MODEL_DEPLOYMENT_NAME"],
3939
name="my-agent",
4040
instructions="You are helpful agent. You will respond with a JSON object.",
41-
response_format=AgentsResponseFormat(type="json_object")
41+
response_format=AgentsResponseFormat(type="json_object"),
4242
)
4343
print(f"Created agent, agent ID: {agent.id}")
4444

@@ -48,7 +48,9 @@
4848
# List all threads for the agent
4949
threads = agents_client.threads.list()
5050

51-
message = agents_client.messages.create(thread_id=thread.id, role="user", content="Hello, give me a list of planets in our solar system.")
51+
message = agents_client.messages.create(
52+
thread_id=thread.id, role="user", content="Hello, give me a list of planets in our solar system."
53+
)
5254
print(f"Created message, message ID: {message.id}")
5355

5456
run = agents_client.runs.create_and_process(thread_id=thread.id, agent_id=agent.id)

sdk/ai/azure-ai-agents/samples/agents_response_formats/sample_agents_json_schema_response_format.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class Planets(str, Enum):
4343
Mars = "Mars"
4444
Mercury = "Mercury"
4545

46+
4647
class Planet(BaseModel):
4748
planet: Planets
4849
mass: float

sdk/ai/azure-ai-agents/samples/agents_response_formats/sample_agents_text_response_format.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
model=os.environ["MODEL_DEPLOYMENT_NAME"],
3939
name="my-agent",
4040
instructions="You are helpful agent.",
41-
response_format=AgentsResponseFormat(type="text")
41+
response_format=AgentsResponseFormat(type="text"),
4242
)
4343
print(f"Created agent, agent ID: {agent.id}")
4444

@@ -48,7 +48,9 @@
4848
# List all threads for the agent
4949
threads = agents_client.threads.list()
5050

51-
message = agents_client.messages.create(thread_id=thread.id, role="user", content="Hello, give me a list of planets in our solar system.")
51+
message = agents_client.messages.create(
52+
thread_id=thread.id, role="user", content="Hello, give me a list of planets in our solar system."
53+
)
5254
print(f"Created message, message ID: {message.id}")
5355

5456
run = agents_client.runs.create_and_process(thread_id=thread.id, agent_id=agent.id)

sdk/ai/azure-ai-agents/samples/agents_streaming/sample_agents_stream_iteration_with_mcp.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"""
88
DESCRIPTION:
99
This sample demonstrates how to use agent operations with the
10-
Model Context Protocol (MCP) tool from the Azure Agents service, and
10+
Model Context Protocol (MCP) tool from the Azure Agents service, and
1111
iteration in streaming. It uses a synchronous client.
1212
To learn more about Model Context Protocol, visit https://modelcontextprotocol.io/
1313
@@ -41,7 +41,7 @@
4141
MessageDeltaTextUrlCitationAnnotation,
4242
RequiredMcpToolCall,
4343
SubmitToolApprovalAction,
44-
ToolApproval
44+
ToolApproval,
4545
)
4646
from azure.identity import DefaultAzureCredential
4747

@@ -83,9 +83,7 @@
8383
print(f"Created thread, thread ID {thread.id}")
8484

8585
message = agents_client.messages.create(
86-
thread_id=thread.id,
87-
role=MessageRole.USER,
88-
content="Please summarize the Azure REST API specifications Readme"
86+
thread_id=thread.id, role=MessageRole.USER, content="Please summarize the Azure REST API specifications Readme"
8987
)
9088
print(f"Created message, message ID {message.id}")
9189

@@ -148,7 +146,10 @@
148146
# Once we receive 'requires_action' status, the next event will be DONE.
149147
# Here we associate our existing event handler to the next stream.
150148
agents_client.runs.submit_tool_outputs_stream(
151-
thread_id=event_data.thread_id, run_id=event_data.id, tool_approvals=tool_approvals, event_handler=stream
149+
thread_id=event_data.thread_id,
150+
run_id=event_data.id,
151+
tool_approvals=tool_approvals,
152+
event_handler=stream,
152153
)
153154

154155
elif isinstance(event_data, RunStep):

sdk/ai/azure-ai-agents/samples/agents_telemetry/sample_agents_basics_with_console_tracing_custom_attributes.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def on_start(self, span: Span, parent_context=None):
6464
def on_end(self, span: ReadableSpan):
6565
# Clean-up logic can be added here if necessary
6666
pass
67+
68+
6769
# [END custom_attribute_span_processor]
6870

6971
# Setup tracing to console

0 commit comments

Comments
 (0)