Skip to content

Commit 9149616

Browse files
Refactor citation extraction logic in send_chat_request for improved clarity and efficiency; streamline citation handling and response text extraction
1 parent e26159c commit 9149616

File tree

1 file changed

+60
-37
lines changed

1 file changed

+60
-37
lines changed

src/app.py

Lines changed: 60 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -154,32 +154,6 @@ def replace_marker(match):
154154
return re.sub(r'【(\d+:\d+)†source】', replace_marker, text)
155155

156156

157-
# Extract citations from run steps
158-
async def extract_citations_from_run_steps(project_client, thread_id, run_id, answer, streamed_titles=None):
159-
streamed_titles = streamed_titles or set()
160-
161-
async for run_step in project_client.agents.run_steps.list(thread_id=thread_id, run_id=run_id):
162-
if isinstance(run_step.step_details, RunStepToolCallDetails):
163-
for tool_call in run_step.step_details.tool_calls:
164-
if "azure_ai_search" in tool_call:
165-
output_data = tool_call["azure_ai_search"].get("output")
166-
if output_data:
167-
tool_output = ast.literal_eval(output_data) if isinstance(output_data, str) else output_data
168-
metadata = tool_output.get("metadata", {})
169-
urls = metadata.get("get_urls", [])
170-
titles = metadata.get("titles", [])
171-
172-
for i, title in enumerate(titles):
173-
url = urls[i] if i < len(urls) else ""
174-
175-
if not streamed_titles or title in streamed_titles:
176-
existing = next((c for c in answer["citations"] if c["title"] == title), None)
177-
if existing:
178-
existing["url"] = url
179-
else:
180-
answer["citations"].append({"title": title, "url": url})
181-
182-
183157
async def send_chat_request(request_body, request_headers) -> AsyncGenerator[Dict[str, Any], None]:
184158
filtered_messages = []
185159
messages = request_body.get("messages", [])
@@ -224,28 +198,40 @@ async def send_chat_request(request_body, request_headers) -> AsyncGenerator[Dic
224198
async with ChatAgent(
225199
chat_client=chat_client,
226200
tool_choice="auto", # Let agent decide when to use Azure AI Search
227-
) as chat_agent:
201+
) as chat_agent:
228202
thread = chat_agent.get_new_thread()
229203

230204
if app_settings.azure_openai.stream:
231205
# Stream response
232206
async for chunk in chat_agent.run_stream(messages=conversation_prompt, thread=thread):
233-
chunk_text = str(chunk) if chunk is not None else ""
234-
235-
if chunk_text:
236-
answer["answer"] += chunk_text
207+
# Extract text from chunk
208+
if hasattr(chunk, 'text') and chunk.text:
209+
delta_text = chunk.text
210+
answer["answer"] += delta_text
237211

238212
# Check if citation markers are present
239-
has_citation_markers = bool(re.search(r'【(\d+:\d+)†source】', chunk_text))
213+
has_citation_markers = bool(re.search(r'【(\d+:\d+)†source】', delta_text))
240214
if has_citation_markers:
241215
yield {
242-
"answer": convert_citation_markers(chunk_text, doc_mapping),
216+
"answer": convert_citation_markers(delta_text, doc_mapping),
243217
"citations": json.dumps(answer["citations"])
244218
}
245219
else:
246220
yield {
247-
"answer": chunk_text
221+
"answer": delta_text
248222
}
223+
224+
# # Collect citations from annotations
225+
# if hasattr(chunk, 'contents') and chunk.contents:
226+
# for content in chunk.contents:
227+
# if hasattr(content, 'annotations') and content.annotations:
228+
# for annotation in content.annotations:
229+
# if hasattr(annotation, 'url') and hasattr(annotation, 'title'):
230+
# if annotation.url not in [c["url"] for c in answer["citations"]]:
231+
# answer["citations"].append({
232+
# "title": annotation.title,
233+
# "url": annotation.url
234+
# })
249235

250236
# Final citation update if needed
251237
has_final_citation_markers = bool(re.search(r'【(\d+:\d+)†source】', answer["answer"]))
@@ -256,9 +242,28 @@ async def send_chat_request(request_body, request_headers) -> AsyncGenerator[Dic
256242
else:
257243
# Non-streaming response
258244
result = await chat_agent.run(messages=conversation_prompt, thread=thread)
259-
response_text = str(result) if result is not None else ""
245+
246+
# Extract text from result
247+
if hasattr(result, 'text'):
248+
response_text = result.text
249+
else:
250+
response_text = str(result) if result is not None else ""
251+
260252
answer["answer"] = response_text
261253

254+
# # Collect citations from annotations
255+
# if hasattr(result, 'contents') and result.contents:
256+
# for content in result.contents:
257+
# if hasattr(content, 'annotations') and content.annotations:
258+
# for annotation in content.annotations:
259+
# if hasattr(annotation, 'url') and hasattr(annotation, 'title'):
260+
# if annotation.url not in [c["url"] for c in answer["citations"]]:
261+
# answer["citations"].append({
262+
# "title": annotation.title,
263+
# "url": annotation.url
264+
# })
265+
266+
# Check if citation markers are present
262267
has_citation_markers = bool(re.search(r'【(\d+:\d+)†source】', response_text))
263268
if has_citation_markers:
264269
yield {
@@ -267,7 +272,8 @@ async def send_chat_request(request_body, request_headers) -> AsyncGenerator[Dic
267272
}
268273
else:
269274
yield {
270-
"answer": response_text
275+
"answer": response_text,
276+
"citations": json.dumps(answer["citations"])
271277
}
272278

273279
# Generate Template
@@ -294,12 +300,29 @@ async def send_chat_request(request_body, request_headers) -> AsyncGenerator[Dic
294300
) as chat_agent:
295301
thread = chat_agent.get_new_thread()
296302
result = await chat_agent.run(messages=conversation_prompt, thread=thread)
297-
response_text = str(result) if result is not None else ""
303+
304+
# Extract text from result
305+
if hasattr(result, 'text'):
306+
response_text = result.text
307+
else:
308+
response_text = str(result) if result is not None else ""
298309

299310
# Remove citation markers from template
300311
response_text = re.sub(r'【(\d+:\d+)†source】', '', response_text)
301312
answer["answer"] = convert_citation_markers(response_text, doc_mapping)
302313

314+
# # Collect citations from annotations (if any)
315+
# if hasattr(result, 'contents') and result.contents:
316+
# for content in result.contents:
317+
# if hasattr(content, 'annotations') and content.annotations:
318+
# for annotation in content.annotations:
319+
# if hasattr(annotation, 'url') and hasattr(annotation, 'title'):
320+
# if annotation.url not in [c["url"] for c in answer["citations"]]:
321+
# answer["citations"].append({
322+
# "title": annotation.title,
323+
# "url": annotation.url
324+
# })
325+
303326
yield {
304327
"answer": answer["answer"],
305328
"citations": json.dumps(answer["citations"])

0 commit comments

Comments
 (0)