3434
3535import asyncio
3636import os
37- from typing import Optional
37+ import re
38+ from typing import Optional , Dict , List
3839
3940from azure .ai .projects .aio import AIProjectClient
4041from azure .ai .agents .aio import AgentsClient
4142from azure .ai .agents .models import DeepResearchTool , MessageRole , ThreadMessage
4243from azure .identity .aio import DefaultAzureCredential
4344
4445
46+ def convert_citations_to_superscript (markdown_content ):
47+ """
48+ Convert citation markers in markdown content to HTML superscript format.
49+
50+ This function finds citation patterns like [78:12+source] and converts them to
51+ HTML superscript tags <sup>12</sup> for better formatting in markdown documents.
52+ It also consolidates consecutive citations by sorting and deduplicating them.
53+
54+ Args:
55+ markdown_content (str): The markdown content containing citation markers
56+
57+ Returns:
58+ str: The markdown content with citations converted to HTML superscript format
59+ """
60+ # Pattern to match [number:number+source]
61+ pattern = re .compile (r"\u3010\d+:(\d+)\u2020source\u3011" )
62+
63+ # Replace with <sup>captured_number</sup>
64+ def replacement (match ):
65+ citation_number = match .group (1 )
66+ return f"<sup>{ citation_number } </sup>"
67+
68+ # First, convert all citation markers to superscript
69+ converted_text = pattern .sub (replacement , markdown_content )
70+
71+ # Then, consolidate consecutive superscript citations
72+ # Pattern to match multiple superscript tags with optional commas/spaces
73+ # Matches: <sup>5</sup>,<sup>4</sup>,<sup>5</sup> or <sup>5</sup><sup>4</sup><sup>5</sup>
74+ consecutive_pattern = r"(<sup>\d+</sup>)(\s*,?\s*<sup>\d+</sup>)\u3020"
75+
76+ def consolidate_and_sort_citations (match ):
77+ # Extract all citation numbers from the matched text
78+ citation_text = match .group (0 )
79+ citation_numbers = re .findall (r"<sup>(\d+)</sup>" , citation_text )
80+
81+ # Convert to integers, remove duplicates, and sort
82+ unique_sorted_citations = sorted (set (int (num ) for num in citation_numbers ))
83+
84+ # If only one citation, return simple format
85+ if len (unique_sorted_citations ) == 1 :
86+ return f"<sup>{ unique_sorted_citations [0 ]} </sup>"
87+
88+ # If multiple citations, return comma-separated format
89+ citation_list = "," .join (str (num ) for num in unique_sorted_citations )
90+ return f"<sup>{ citation_list } </sup>"
91+
92+ # Remove consecutive duplicate citations and sort them
93+ final_text = re .sub (consecutive_pattern , consolidate_and_sort_citations , converted_text )
94+
95+ return final_text
96+
97+
4598async def fetch_and_print_new_agent_response (
4699 thread_id : str ,
47100 agents_client : AgentsClient ,
48101 last_message_id : Optional [str ] = None ,
102+ progress_filename : str = "research_progress.txt" ,
49103) -> Optional [str ]:
104+ """
105+ Fetch the interim agent responses and citations from a thread and write them to a file.
106+
107+ Args:
108+ thread_id (str): The ID of the thread to fetch messages from
109+ agents_client (AgentsClient): The Azure AI agents client instance
110+ last_message_id (Optional[str], optional): ID of the last processed message
111+ to avoid duplicates. Defaults to None.
112+ progress_filename (str, optional): Name of the file to write progress to.
113+ Defaults to "research_progress.txt".
114+
115+ Returns:
116+ Optional[str]: The ID of the latest message if new content was found,
117+ otherwise returns the last_message_id
118+ """
50119 response = await agents_client .messages .get_last_message_by_role (
51120 thread_id = thread_id ,
52121 role = MessageRole .AGENT ,
53122 )
54123
55124 if not response or response .id == last_message_id :
125+ return last_message_id # No new content
126+
127+ # If not a "cot_summary", return.
128+ if not any (t .text .value .startswith ("cot_summary:" ) for t in response .text_messages ):
56129 return last_message_id
57130
58131 print ("\n Agent response:" )
59- print ("\n " .join (t .text .value for t in response .text_messages ))
132+ agent_text = "\n " .join (t .text .value .replace ("cot_summary:" , "Reasoning:" ) for t in response .text_messages )
133+ print (agent_text )
60134
61135 # Print citation annotations (if any)
62136 for ann in response .url_citation_annotations :
63137 print (f"URL Citation: [{ ann .url_citation .title } ]({ ann .url_citation .url } )" )
64138
139+ # Write progress to file
140+ with open (progress_filename , "a" , encoding = "utf-8" ) as fp :
141+ fp .write ("\n AGENT>\n " )
142+ fp .write (agent_text )
143+ fp .write ("\n " )
144+
145+ for ann in response .url_citation_annotations :
146+ fp .write (f"Citation: [{ ann .url_citation .title } ]({ ann .url_citation .url } )\n " )
147+
65148 return response .id
66149
67150
68- def create_research_summary (message : ThreadMessage , filepath : str = "research_summary.md" ) -> None :
151+ def create_research_summary (message : ThreadMessage , filepath : str = "research_report.md" ) -> None :
152+ """
153+ Create a formatted research report from an agent's thread message with numbered citations
154+ and a references section.
155+
156+ Args:
157+ message (ThreadMessage): The thread message containing the agent's research response
158+ filepath (str, optional): Path where the research summary will be saved.
159+ Defaults to "research_report.md".
160+
161+ Returns:
162+ None: This function doesn't return a value, it writes to a file
163+ """
69164 if not message :
70- print ("No message content provided, cannot create research summary ." )
165+ print ("No message content provided, cannot create research report ." )
71166 return
72167
73168 with open (filepath , "w" , encoding = "utf-8" ) as fp :
74169 # Write text summary
75170 text_summary = "\n \n " .join ([t .text .value .strip () for t in message .text_messages ])
171+ # Convert citations to superscript format
172+ text_summary = convert_citations_to_superscript (text_summary )
76173 fp .write (text_summary )
77174
78- # Write unique URL citations, if present
175+ # Write unique URL citations with numbered bullets , if present
79176 if message .url_citation_annotations :
80- fp .write ("\n \n ## References \n " )
177+ fp .write ("\n \n ## Citations \n " )
81178 seen_urls = set ()
179+ # Dictionary mapping full citation content to ordinal number
180+ citations_ordinals : Dict [str , int ] = {}
181+ # List of citation URLs indexed by ordinal (0-based)
182+ text_citation_list : List [str ] = []
183+
82184 for ann in message .url_citation_annotations :
83185 url = ann .url_citation .url
84186 title = ann .url_citation .title or url
187+
85188 if url not in seen_urls :
86- fp .write (f"- [{ title } ]({ url } )\n " )
189+ # Use the full annotation text as the key to avoid conflicts
190+ citation_key = ann .text if ann .text else f"fallback_{ url } "
191+
192+ # Only add if this citation content hasn't been seen before
193+ if citation_key not in citations_ordinals :
194+ # Assign next available ordinal number (1-based for display)
195+ ordinal = len (text_citation_list ) + 1
196+ citations_ordinals [citation_key ] = ordinal
197+ text_citation_list .append (f"[{ title } ]({ url } )" )
198+
87199 seen_urls .add (url )
88200
89- print (f"Research summary written to '{ filepath } '." )
201+ # Write citations in order they were added
202+ for i , citation_text in enumerate (text_citation_list ):
203+ fp .write (f"{ i + 1 } . { citation_text } \n " )
204+
205+ print (f"Research report written to '{ filepath } '." )
90206
91207
92208async def main () -> None :
@@ -96,6 +212,7 @@ async def main() -> None:
96212 credential = DefaultAzureCredential (),
97213 )
98214
215+ # [START create_agent_with_deep_research_tool]
99216 bing_connection = await project_client .connections .get (name = os .environ ["BING_RESOURCE_NAME" ])
100217
101218 # Initialize a Deep Research tool with Bing Connection ID and Deep Research model deployment name
@@ -104,6 +221,7 @@ async def main() -> None:
104221 deep_research_model = os .environ ["DEEP_RESEARCH_MODEL_DEPLOYMENT_NAME" ],
105222 )
106223
224+ # Create Agent with the Deep Research tool and process Agent run
107225 async with project_client :
108226
109227 agents_client = project_client .agents
@@ -145,6 +263,7 @@ async def main() -> None:
145263 thread_id = thread .id ,
146264 agents_client = agents_client ,
147265 last_message_id = last_message_id ,
266+ progress_filename = "research_progress.txt" ,
148267 )
149268 print (f"Run status: { run .status } " )
150269
0 commit comments