34
34
35
35
import asyncio
36
36
import os
37
- from typing import Optional
37
+ import re
38
+ from typing import Optional , Dict , List
38
39
39
40
from azure .ai .projects .aio import AIProjectClient
40
41
from azure .ai .agents .aio import AgentsClient
41
42
from azure .ai .agents .models import DeepResearchTool , MessageRole , ThreadMessage
42
43
from azure .identity .aio import DefaultAzureCredential
43
44
44
45
46
+ def convert_citations_to_superscript (markdown_content ):
47
+ """
48
+ Convert citation markers in markdown content to HTML superscript format.
49
+
50
+ This function finds citation patterns like [78:12+source] and converts them to
51
+ HTML superscript tags <sup>12</sup> for better formatting in markdown documents.
52
+ It also consolidates consecutive citations by sorting and deduplicating them.
53
+
54
+ Args:
55
+ markdown_content (str): The markdown content containing citation markers
56
+
57
+ Returns:
58
+ str: The markdown content with citations converted to HTML superscript format
59
+ """
60
+ # Pattern to match [number:number+source]
61
+ pattern = re .compile (r"\u3010\d+:(\d+)\u2020source\u3011" )
62
+
63
+ # Replace with <sup>captured_number</sup>
64
+ def replacement (match ):
65
+ citation_number = match .group (1 )
66
+ return f"<sup>{ citation_number } </sup>"
67
+
68
+ # First, convert all citation markers to superscript
69
+ converted_text = pattern .sub (replacement , markdown_content )
70
+
71
+ # Then, consolidate consecutive superscript citations
72
+ # Pattern to match multiple superscript tags with optional commas/spaces
73
+ # Matches: <sup>5</sup>,<sup>4</sup>,<sup>5</sup> or <sup>5</sup><sup>4</sup><sup>5</sup>
74
+ consecutive_pattern = r"(<sup>\d+</sup>)(\s*,?\s*<sup>\d+</sup>)\u3020"
75
+
76
+ def consolidate_and_sort_citations (match ):
77
+ # Extract all citation numbers from the matched text
78
+ citation_text = match .group (0 )
79
+ citation_numbers = re .findall (r"<sup>(\d+)</sup>" , citation_text )
80
+
81
+ # Convert to integers, remove duplicates, and sort
82
+ unique_sorted_citations = sorted (set (int (num ) for num in citation_numbers ))
83
+
84
+ # If only one citation, return simple format
85
+ if len (unique_sorted_citations ) == 1 :
86
+ return f"<sup>{ unique_sorted_citations [0 ]} </sup>"
87
+
88
+ # If multiple citations, return comma-separated format
89
+ citation_list = "," .join (str (num ) for num in unique_sorted_citations )
90
+ return f"<sup>{ citation_list } </sup>"
91
+
92
+ # Remove consecutive duplicate citations and sort them
93
+ final_text = re .sub (consecutive_pattern , consolidate_and_sort_citations , converted_text )
94
+
95
+ return final_text
96
+
97
+
45
98
async def fetch_and_print_new_agent_response (
46
99
thread_id : str ,
47
100
agents_client : AgentsClient ,
48
101
last_message_id : Optional [str ] = None ,
102
+ progress_filename : str = "research_progress.txt" ,
49
103
) -> Optional [str ]:
104
+ """
105
+ Fetch the interim agent responses and citations from a thread and write them to a file.
106
+
107
+ Args:
108
+ thread_id (str): The ID of the thread to fetch messages from
109
+ agents_client (AgentsClient): The Azure AI agents client instance
110
+ last_message_id (Optional[str], optional): ID of the last processed message
111
+ to avoid duplicates. Defaults to None.
112
+ progress_filename (str, optional): Name of the file to write progress to.
113
+ Defaults to "research_progress.txt".
114
+
115
+ Returns:
116
+ Optional[str]: The ID of the latest message if new content was found,
117
+ otherwise returns the last_message_id
118
+ """
50
119
response = await agents_client .messages .get_last_message_by_role (
51
120
thread_id = thread_id ,
52
121
role = MessageRole .AGENT ,
53
122
)
54
123
55
124
if not response or response .id == last_message_id :
125
+ return last_message_id # No new content
126
+
127
+ # If not a "cot_summary", return.
128
+ if not any (t .text .value .startswith ("cot_summary:" ) for t in response .text_messages ):
56
129
return last_message_id
57
130
58
131
print ("\n Agent response:" )
59
- print ("\n " .join (t .text .value for t in response .text_messages ))
132
+ agent_text = "\n " .join (t .text .value .replace ("cot_summary:" , "Reasoning:" ) for t in response .text_messages )
133
+ print (agent_text )
60
134
61
135
# Print citation annotations (if any)
62
136
for ann in response .url_citation_annotations :
63
137
print (f"URL Citation: [{ ann .url_citation .title } ]({ ann .url_citation .url } )" )
64
138
139
+ # Write progress to file
140
+ with open (progress_filename , "a" , encoding = "utf-8" ) as fp :
141
+ fp .write ("\n AGENT>\n " )
142
+ fp .write (agent_text )
143
+ fp .write ("\n " )
144
+
145
+ for ann in response .url_citation_annotations :
146
+ fp .write (f"Citation: [{ ann .url_citation .title } ]({ ann .url_citation .url } )\n " )
147
+
65
148
return response .id
66
149
67
150
68
- def create_research_summary (message : ThreadMessage , filepath : str = "research_summary.md" ) -> None :
151
+ def create_research_summary (message : ThreadMessage , filepath : str = "research_report.md" ) -> None :
152
+ """
153
+ Create a formatted research report from an agent's thread message with numbered citations
154
+ and a references section.
155
+
156
+ Args:
157
+ message (ThreadMessage): The thread message containing the agent's research response
158
+ filepath (str, optional): Path where the research summary will be saved.
159
+ Defaults to "research_report.md".
160
+
161
+ Returns:
162
+ None: This function doesn't return a value, it writes to a file
163
+ """
69
164
if not message :
70
- print ("No message content provided, cannot create research summary ." )
165
+ print ("No message content provided, cannot create research report ." )
71
166
return
72
167
73
168
with open (filepath , "w" , encoding = "utf-8" ) as fp :
74
169
# Write text summary
75
170
text_summary = "\n \n " .join ([t .text .value .strip () for t in message .text_messages ])
171
+ # Convert citations to superscript format
172
+ text_summary = convert_citations_to_superscript (text_summary )
76
173
fp .write (text_summary )
77
174
78
- # Write unique URL citations, if present
175
+ # Write unique URL citations with numbered bullets , if present
79
176
if message .url_citation_annotations :
80
- fp .write ("\n \n ## References \n " )
177
+ fp .write ("\n \n ## Citations \n " )
81
178
seen_urls = set ()
179
+ # Dictionary mapping full citation content to ordinal number
180
+ citations_ordinals : Dict [str , int ] = {}
181
+ # List of citation URLs indexed by ordinal (0-based)
182
+ text_citation_list : List [str ] = []
183
+
82
184
for ann in message .url_citation_annotations :
83
185
url = ann .url_citation .url
84
186
title = ann .url_citation .title or url
187
+
85
188
if url not in seen_urls :
86
- fp .write (f"- [{ title } ]({ url } )\n " )
189
+ # Use the full annotation text as the key to avoid conflicts
190
+ citation_key = ann .text if ann .text else f"fallback_{ url } "
191
+
192
+ # Only add if this citation content hasn't been seen before
193
+ if citation_key not in citations_ordinals :
194
+ # Assign next available ordinal number (1-based for display)
195
+ ordinal = len (text_citation_list ) + 1
196
+ citations_ordinals [citation_key ] = ordinal
197
+ text_citation_list .append (f"[{ title } ]({ url } )" )
198
+
87
199
seen_urls .add (url )
88
200
89
- print (f"Research summary written to '{ filepath } '." )
201
+ # Write citations in order they were added
202
+ for i , citation_text in enumerate (text_citation_list ):
203
+ fp .write (f"{ i + 1 } . { citation_text } \n " )
204
+
205
+ print (f"Research report written to '{ filepath } '." )
90
206
91
207
92
208
async def main () -> None :
@@ -96,6 +212,7 @@ async def main() -> None:
96
212
credential = DefaultAzureCredential (),
97
213
)
98
214
215
+ # [START create_agent_with_deep_research_tool]
99
216
bing_connection = await project_client .connections .get (name = os .environ ["BING_RESOURCE_NAME" ])
100
217
101
218
# Initialize a Deep Research tool with Bing Connection ID and Deep Research model deployment name
@@ -104,6 +221,7 @@ async def main() -> None:
104
221
deep_research_model = os .environ ["DEEP_RESEARCH_MODEL_DEPLOYMENT_NAME" ],
105
222
)
106
223
224
+ # Create Agent with the Deep Research tool and process Agent run
107
225
async with project_client :
108
226
109
227
agents_client = project_client .agents
@@ -145,6 +263,7 @@ async def main() -> None:
145
263
thread_id = thread .id ,
146
264
agents_client = agents_client ,
147
265
last_message_id = last_message_id ,
266
+ progress_filename = "research_progress.txt" ,
148
267
)
149
268
print (f"Run status: { run .status } " )
150
269
0 commit comments