Skip to content

Commit e3c5310

Browse files
authored
Merge branch 'main' into answersources
2 parents 0ee4f21 + 1507ea7 commit e3c5310

File tree

17 files changed

+857
-178
lines changed

17 files changed

+857
-178
lines changed

app/backend/app.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,7 @@ async def setup_clients():
471471
USE_CHAT_HISTORY_BROWSER = os.getenv("USE_CHAT_HISTORY_BROWSER", "").lower() == "true"
472472
USE_CHAT_HISTORY_COSMOS = os.getenv("USE_CHAT_HISTORY_COSMOS", "").lower() == "true"
473473
USE_AGENTIC_RETRIEVAL = os.getenv("USE_AGENTIC_RETRIEVAL", "").lower() == "true"
474+
ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA = os.getenv("ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA", "").lower() == "true"
474475

475476
# WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep
476477
RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None
@@ -689,6 +690,7 @@ async def setup_clients():
689690
query_speller=AZURE_SEARCH_QUERY_SPELLER,
690691
prompt_manager=prompt_manager,
691692
reasoning_effort=OPENAI_REASONING_EFFORT,
693+
hydrate_references=ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA,
692694
multimodal_enabled=USE_MULTIMODAL,
693695
image_embeddings_client=image_embeddings_client,
694696
global_blob_manager=global_blob_manager,
@@ -716,6 +718,7 @@ async def setup_clients():
716718
query_speller=AZURE_SEARCH_QUERY_SPELLER,
717719
prompt_manager=prompt_manager,
718720
reasoning_effort=OPENAI_REASONING_EFFORT,
721+
hydrate_references=ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA,
719722
multimodal_enabled=USE_MULTIMODAL,
720723
image_embeddings_client=image_embeddings_client,
721724
global_blob_manager=global_blob_manager,

app/backend/approaches/approach.py

Lines changed: 97 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ def __init__(
162162
openai_host: str,
163163
prompt_manager: PromptManager,
164164
reasoning_effort: Optional[str] = None,
165+
hydrate_references: bool = False,
165166
multimodal_enabled: bool = False,
166167
image_embeddings_client: Optional[ImageEmbeddings] = None,
167168
global_blob_manager: Optional[BlobManager] = None,
@@ -179,6 +180,7 @@ def __init__(
179180
self.openai_host = openai_host
180181
self.prompt_manager = prompt_manager
181182
self.reasoning_effort = reasoning_effort
183+
self.hydrate_references = hydrate_references
182184
self.include_token_usage = True
183185
self.multimodal_enabled = multimodal_enabled
184186
self.image_embeddings_client = image_embeddings_client
@@ -236,7 +238,7 @@ async def search(
236238
vector_queries=search_vectors,
237239
)
238240

239-
documents = []
241+
documents: list[Document] = []
240242
async for page in results.by_page():
241243
async for document in page:
242244
documents.append(
@@ -299,40 +301,112 @@ async def run_agentic_retrieval(
299301
)
300302
)
301303

302-
# STEP 2: Generate a contextual and content specific answer using the search results and chat history
304+
# Map activity id -> agent's internal search query
303305
activities = response.activity
304-
activity_mapping = (
306+
activity_mapping: dict[int, str] = (
305307
{
306-
activity.id: activity.query.search if activity.query else ""
308+
activity.id: activity.query.search
307309
for activity in activities
308-
if isinstance(activity, KnowledgeAgentSearchActivityRecord)
310+
if (
311+
isinstance(activity, KnowledgeAgentSearchActivityRecord)
312+
and activity.query
313+
and activity.query.search is not None
314+
)
309315
}
310316
if activities
311317
else {}
312318
)
313319

314-
results = []
315-
if response and response.references:
316-
if results_merge_strategy == "interleaved":
317-
# Use interleaved reference order
318-
references = sorted(response.references, key=lambda reference: int(reference.id))
319-
else:
320-
# Default to descending strategy
321-
references = response.references
322-
for reference in references:
323-
if isinstance(reference, KnowledgeAgentAzureSearchDocReference) and reference.source_data:
324-
results.append(
320+
# No refs? we're done
321+
if not (response and response.references):
322+
return response, []
323+
324+
# Extract references
325+
refs = [r for r in response.references if isinstance(r, KnowledgeAgentAzureSearchDocReference)]
326+
327+
documents: list[Document] = []
328+
329+
if self.hydrate_references:
330+
# Hydrate references to get full documents
331+
documents = await self.hydrate_agent_references(
332+
references=refs,
333+
top=top,
334+
)
335+
else:
336+
# Create documents from reference source data
337+
for ref in refs:
338+
if ref.source_data:
339+
documents.append(
325340
Document(
326-
id=reference.doc_key,
327-
content=reference.source_data["content"],
328-
sourcepage=reference.source_data["sourcepage"],
329-
search_agent_query=activity_mapping[reference.activity_source],
341+
id=ref.doc_key,
342+
content=ref.source_data.get("content"),
343+
sourcepage=ref.source_data.get("sourcepage"),
330344
)
331345
)
332-
if top and len(results) == top:
333-
break
346+
if top and len(documents) >= top:
347+
break
348+
349+
# Build mappings for agent queries and sorting
350+
ref_to_activity: dict[str, int] = {}
351+
doc_to_ref_id: dict[str, str] = {}
352+
for ref in refs:
353+
if ref.doc_key:
354+
ref_to_activity[ref.doc_key] = ref.activity_source
355+
doc_to_ref_id[ref.doc_key] = ref.id
356+
357+
# Inject agent search queries into all documents
358+
for doc in documents:
359+
if doc.id and doc.id in ref_to_activity:
360+
activity_id = ref_to_activity[doc.id]
361+
doc.search_agent_query = activity_mapping.get(activity_id, "")
362+
363+
# Apply sorting strategy to the documents
364+
if results_merge_strategy == "interleaved": # Use interleaved reference order
365+
documents = sorted(
366+
documents,
367+
key=lambda d: int(doc_to_ref_id.get(d.id, 0)) if d.id and doc_to_ref_id.get(d.id) else 0,
368+
)
369+
# else: Default - preserve original order
370+
371+
return response, documents
372+
373+
async def hydrate_agent_references(
374+
self,
375+
references: list[KnowledgeAgentAzureSearchDocReference],
376+
top: Optional[int],
377+
) -> list[Document]:
378+
doc_keys: set[str] = set()
379+
380+
for ref in references:
381+
if not ref.doc_key:
382+
continue
383+
doc_keys.add(ref.doc_key)
384+
if top and len(doc_keys) >= top:
385+
break
386+
387+
if not doc_keys:
388+
return []
389+
390+
# Build search filter only on unique doc IDs
391+
id_csv = ",".join(doc_keys)
392+
id_filter = f"search.in(id, '{id_csv}', ',')"
393+
394+
# Fetch full documents
395+
hydrated_docs: list[Document] = await self.search(
396+
top=len(doc_keys),
397+
query_text=None,
398+
filter=id_filter,
399+
vectors=[],
400+
use_text_search=False,
401+
use_vector_search=False,
402+
use_semantic_ranker=False,
403+
use_semantic_captions=False,
404+
minimum_search_score=None,
405+
minimum_reranker_score=None,
406+
use_query_rewriting=False,
407+
)
334408

335-
return response, results
409+
return hydrated_docs
336410

337411
async def get_sources_content(
338412
self,

app/backend/approaches/chatreadretrieveread.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def __init__(
5757
query_speller: str,
5858
prompt_manager: PromptManager,
5959
reasoning_effort: Optional[str] = None,
60+
hydrate_references: bool = False,
6061
multimodal_enabled: bool = False,
6162
image_embeddings_client: Optional[ImageEmbeddings] = None,
6263
global_blob_manager: Optional[BlobManager] = None,
@@ -84,6 +85,7 @@ def __init__(
8485
self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
8586
self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
8687
self.reasoning_effort = reasoning_effort
88+
self.hydrate_references = hydrate_references
8789
self.include_token_usage = True
8890
self.multimodal_enabled = multimodal_enabled
8991
self.image_embeddings_client = image_embeddings_client

app/backend/approaches/retrievethenread.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def __init__(
4646
query_speller: str,
4747
prompt_manager: PromptManager,
4848
reasoning_effort: Optional[str] = None,
49+
hydrate_references: bool = False,
4950
multimodal_enabled: bool = False,
5051
image_embeddings_client: Optional[ImageEmbeddings] = None,
5152
global_blob_manager: Optional[BlobManager] = None,
@@ -73,6 +74,7 @@ def __init__(
7374
self.answer_prompt = self.prompt_manager.load_prompt("ask_answer_question.prompty")
7475
self.reasoning_effort = reasoning_effort
7576
self.include_token_usage = True
77+
self.hydrate_references = hydrate_references
7678
self.multimodal_enabled = multimodal_enabled
7779
self.image_embeddings_client = image_embeddings_client
7880
self.global_blob_manager = global_blob_manager

app/backend/requirements.txt

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ azure-cognitiveservices-speech==1.40.0
3030
# via -r requirements.in
3131
azure-common==1.1.28
3232
# via azure-search-documents
33-
azure-core==1.30.2
33+
azure-core==1.35.0
3434
# via
3535
# azure-ai-documentintelligence
3636
# azure-core-tracing-opentelemetry
@@ -50,6 +50,7 @@ azure-cosmos==4.9.0
5050
azure-identity==1.17.1
5151
# via
5252
# -r requirements.in
53+
# azure-monitor-opentelemetry-exporter
5354
# msgraph-sdk
5455
azure-monitor-opentelemetry==1.6.13
5556
# via -r requirements.in
@@ -79,7 +80,7 @@ cffi==1.17.0
7980
# via cryptography
8081
charset-normalizer==3.3.2
8182
# via requests
82-
click==8.1.7
83+
click==8.1.8
8384
# via
8485
# flask
8586
# prompty
@@ -92,10 +93,6 @@ cryptography==44.0.1
9293
# azure-storage-blob
9394
# msal
9495
# pyjwt
95-
deprecated==1.2.14
96-
# via
97-
# opentelemetry-api
98-
# opentelemetry-semantic-conventions
9996
distro==1.9.0
10097
# via openai
10198
exceptiongroup==1.3.0
@@ -117,11 +114,11 @@ h11==0.16.0
117114
# hypercorn
118115
# uvicorn
119116
# wsproto
120-
h2==4.1.0
117+
h2==4.3.0
121118
# via
122119
# httpx
123120
# hypercorn
124-
hpack==4.0.0
121+
hpack==4.1.0
125122
# via h2
126123
httpcore==1.0.9
127124
# via httpx
@@ -132,7 +129,7 @@ httpx[http2]==0.27.0
132129
# openai
133130
hypercorn==0.17.3
134131
# via quart
135-
hyperframe==6.0.1
132+
hyperframe==6.1.0
136133
# via h2
137134
idna==3.10
138135
# via
@@ -366,7 +363,7 @@ quart==0.20.0
366363
# quart-cors
367364
quart-cors==0.7.0
368365
# via -r requirements.in
369-
regex==2024.11.6
366+
regex==2025.7.34
370367
# via tiktoken
371368
requests==2.32.4
372369
# via
@@ -394,7 +391,7 @@ std-uritemplate==2.0.5
394391
# via microsoft-kiota-abstractions
395392
taskgroup==0.2.2
396393
# via hypercorn
397-
tenacity==9.0.0
394+
tenacity==9.1.2
398395
# via -r requirements.in
399396
tiktoken==0.8.0
400397
# via
@@ -426,7 +423,9 @@ typing-extensions==4.13.2
426423
# exceptiongroup
427424
# hypercorn
428425
# openai
426+
# opentelemetry-api
429427
# opentelemetry-sdk
428+
# opentelemetry-semantic-conventions
430429
# pydantic
431430
# pydantic-core
432431
# pypdf
@@ -445,7 +444,6 @@ werkzeug==3.0.6
445444
# quart
446445
wrapt==1.16.0
447446
# via
448-
# deprecated
449447
# opentelemetry-instrumentation
450448
# opentelemetry-instrumentation-aiohttp-client
451449
# opentelemetry-instrumentation-dbapi

docs/agentic_retrieval.md

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,21 +34,33 @@ See the agentic retrieval documentation.
3434
azd env set AZURE_OPENAI_SEARCHAGENT_MODEL_VERSION 2025-04-14
3535
```
3636

37-
3. **Update the infrastructure and application:**
37+
3. **(Optional) Enable extra field hydration**
38+
39+
By default, agentic retrieval only returns fields included in the semantic configuration.
40+
41+
You can enable this optional feature below, to include all fields from the search index in the result.
42+
⚠️ This feature is currently only compatible with indexes set up with integrated vectorization,
43+
or indexes that otherwise have an "id" field marked as filterable.
44+
45+
```shell
46+
azd env set ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA true
47+
```
48+
49+
4. **Update the infrastructure and application:**
3850

3951
Execute `azd up` to provision the infrastructure changes (only the new model, if you ran `up` previously) and deploy the application code with the updated environment variables.
4052

41-
4. **Try out the feature:**
53+
5. **Try out the feature:**
4254

4355
Open the web app and start a new chat. Agentic retrieval will be used to find all sources.
4456

45-
5. **Experiment with max subqueries:**
57+
6. **Experiment with max subqueries:**
4658

4759
Select the developer options in the web app and change max subqueries to any value between 1 and 20. This controls the maximum amount of subqueries that can be created in the query plan.
4860

4961
![Max subqueries screenshot](./images/max-subqueries.png)
5062

51-
6. **Review the query plan**
63+
7. **Review the query plan**
5264

5365
Agentic retrieval use additional billed tokens behind the scenes for the planning process.
5466
To see the token usage, select the lightbulb icon on a chat answer. This will open the "Thought process" tab, which shows the amount of tokens used by and the queries produced by the planning process

0 commit comments

Comments
 (0)