Azure-Samples
diff --git a/‎app/backend/app.py‎
Lines changed: 3 additions & 0 deletions b/‎app/backend/app.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎app/backend/approaches/approach.py‎
Lines changed: 97 additions & 23 deletions b/‎app/backend/approaches/approach.py‎
Lines changed: 97 additions & 23 deletions
diff --git a/‎app/backend/approaches/chatreadretrieveread.py‎
Lines changed: 2 additions & 0 deletions b/‎app/backend/approaches/chatreadretrieveread.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎app/backend/approaches/retrievethenread.py‎
Lines changed: 2 additions & 0 deletions b/‎app/backend/approaches/retrievethenread.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎app/backend/requirements.txt‎
Lines changed: 10 additions & 12 deletions b/‎app/backend/requirements.txt‎
Lines changed: 10 additions & 12 deletions
diff --git a/‎docs/agentic_retrieval.md‎
Lines changed: 16 additions & 4 deletions b/‎docs/agentic_retrieval.md‎
Lines changed: 16 additions & 4 deletions
@@ -471,6 +471,7 @@ async def setup_clients():
     USE_CHAT_HISTORY_BROWSER = os.getenv("USE_CHAT_HISTORY_BROWSER", "").lower() == "true"
     USE_CHAT_HISTORY_COSMOS = os.getenv("USE_CHAT_HISTORY_COSMOS", "").lower() == "true"
     USE_AGENTIC_RETRIEVAL = os.getenv("USE_AGENTIC_RETRIEVAL", "").lower() == "true"
+    ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA = os.getenv("ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA", "").lower() == "true"
 
     # WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep
     RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None
@@ -689,6 +690,7 @@ async def setup_clients():
         query_speller=AZURE_SEARCH_QUERY_SPELLER,
         prompt_manager=prompt_manager,
         reasoning_effort=OPENAI_REASONING_EFFORT,
+        hydrate_references=ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA,
         multimodal_enabled=USE_MULTIMODAL,
         image_embeddings_client=image_embeddings_client,
         global_blob_manager=global_blob_manager,
@@ -716,6 +718,7 @@ async def setup_clients():
         query_speller=AZURE_SEARCH_QUERY_SPELLER,
         prompt_manager=prompt_manager,
         reasoning_effort=OPENAI_REASONING_EFFORT,
+        hydrate_references=ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA,
         multimodal_enabled=USE_MULTIMODAL,
         image_embeddings_client=image_embeddings_client,
         global_blob_manager=global_blob_manager,
 
@@ -162,6 +162,7 @@ def __init__(
         openai_host: str,
         prompt_manager: PromptManager,
         reasoning_effort: Optional[str] = None,
+        hydrate_references: bool = False,
         multimodal_enabled: bool = False,
         image_embeddings_client: Optional[ImageEmbeddings] = None,
         global_blob_manager: Optional[BlobManager] = None,
@@ -179,6 +180,7 @@ def __init__(
         self.openai_host = openai_host
         self.prompt_manager = prompt_manager
         self.reasoning_effort = reasoning_effort
+        self.hydrate_references = hydrate_references
         self.include_token_usage = True
         self.multimodal_enabled = multimodal_enabled
         self.image_embeddings_client = image_embeddings_client
@@ -236,7 +238,7 @@ async def search(
                 vector_queries=search_vectors,
             )
 
-        documents = []
+        documents: list[Document] = []
         async for page in results.by_page():
             async for document in page:
                 documents.append(
@@ -299,40 +301,112 @@ async def run_agentic_retrieval(
             )
         )
 
-        # STEP 2: Generate a contextual and content specific answer using the search results and chat history
+        # Map activity id -> agent's internal search query
         activities = response.activity
-        activity_mapping = (
+        activity_mapping: dict[int, str] = (
             {
-                activity.id: activity.query.search if activity.query else ""
+                activity.id: activity.query.search
                 for activity in activities
-                if isinstance(activity, KnowledgeAgentSearchActivityRecord)
+                if (
+                    isinstance(activity, KnowledgeAgentSearchActivityRecord)
+                    and activity.query
+                    and activity.query.search is not None
+                )
             }
             if activities
             else {}
         )
 
-        results = []
-        if response and response.references:
-            if results_merge_strategy == "interleaved":
-                # Use interleaved reference order
-                references = sorted(response.references, key=lambda reference: int(reference.id))
-            else:
-                # Default to descending strategy
-                references = response.references
-            for reference in references:
-                if isinstance(reference, KnowledgeAgentAzureSearchDocReference) and reference.source_data:
-                    results.append(
+        # No refs? we're done
+        if not (response and response.references):
+            return response, []
+
+        # Extract references
+        refs = [r for r in response.references if isinstance(r, KnowledgeAgentAzureSearchDocReference)]
+
+        documents: list[Document] = []
+
+        if self.hydrate_references:
+            # Hydrate references to get full documents
+            documents = await self.hydrate_agent_references(
+                references=refs,
+                top=top,
+            )
+        else:
+            # Create documents from reference source data
+            for ref in refs:
+                if ref.source_data:
+                    documents.append(
                         Document(
-                            id=reference.doc_key,
-                            content=reference.source_data["content"],
-                            sourcepage=reference.source_data["sourcepage"],
-                            search_agent_query=activity_mapping[reference.activity_source],
+                            id=ref.doc_key,
+                            content=ref.source_data.get("content"),
+                            sourcepage=ref.source_data.get("sourcepage"),
                         )
                     )
-                if top and len(results) == top:
-                    break
+                    if top and len(documents) >= top:
+                        break
+
+        # Build mappings for agent queries and sorting
+        ref_to_activity: dict[str, int] = {}
+        doc_to_ref_id: dict[str, str] = {}
+        for ref in refs:
+            if ref.doc_key:
+                ref_to_activity[ref.doc_key] = ref.activity_source
+                doc_to_ref_id[ref.doc_key] = ref.id
+
+        # Inject agent search queries into all documents
+        for doc in documents:
+            if doc.id and doc.id in ref_to_activity:
+                activity_id = ref_to_activity[doc.id]
+                doc.search_agent_query = activity_mapping.get(activity_id, "")
+
+        # Apply sorting strategy to the documents
+        if results_merge_strategy == "interleaved":  # Use interleaved reference order
+            documents = sorted(
+                documents,
+                key=lambda d: int(doc_to_ref_id.get(d.id, 0)) if d.id and doc_to_ref_id.get(d.id) else 0,
+            )
+        # else: Default - preserve original order
+
+        return response, documents
+
+    async def hydrate_agent_references(
+        self,
+        references: list[KnowledgeAgentAzureSearchDocReference],
+        top: Optional[int],
+    ) -> list[Document]:
+        doc_keys: set[str] = set()
+
+        for ref in references:
+            if not ref.doc_key:
+                continue
+            doc_keys.add(ref.doc_key)
+            if top and len(doc_keys) >= top:
+                break
+
+        if not doc_keys:
+            return []
+
+        # Build search filter only on unique doc IDs
+        id_csv = ",".join(doc_keys)
+        id_filter = f"search.in(id, '{id_csv}', ',')"
+
+        # Fetch full documents
+        hydrated_docs: list[Document] = await self.search(
+            top=len(doc_keys),
+            query_text=None,
+            filter=id_filter,
+            vectors=[],
+            use_text_search=False,
+            use_vector_search=False,
+            use_semantic_ranker=False,
+            use_semantic_captions=False,
+            minimum_search_score=None,
+            minimum_reranker_score=None,
+            use_query_rewriting=False,
+        )
 
-        return response, results
+        return hydrated_docs
 
     async def get_sources_content(
         self,
 
@@ -57,6 +57,7 @@ def __init__(
         query_speller: str,
         prompt_manager: PromptManager,
         reasoning_effort: Optional[str] = None,
+        hydrate_references: bool = False,
         multimodal_enabled: bool = False,
         image_embeddings_client: Optional[ImageEmbeddings] = None,
         global_blob_manager: Optional[BlobManager] = None,
@@ -84,6 +85,7 @@ def __init__(
         self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
         self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
         self.reasoning_effort = reasoning_effort
+        self.hydrate_references = hydrate_references
         self.include_token_usage = True
         self.multimodal_enabled = multimodal_enabled
         self.image_embeddings_client = image_embeddings_client
 
@@ -46,6 +46,7 @@ def __init__(
         query_speller: str,
         prompt_manager: PromptManager,
         reasoning_effort: Optional[str] = None,
+        hydrate_references: bool = False,
         multimodal_enabled: bool = False,
         image_embeddings_client: Optional[ImageEmbeddings] = None,
         global_blob_manager: Optional[BlobManager] = None,
@@ -73,6 +74,7 @@ def __init__(
         self.answer_prompt = self.prompt_manager.load_prompt("ask_answer_question.prompty")
         self.reasoning_effort = reasoning_effort
         self.include_token_usage = True
+        self.hydrate_references = hydrate_references
         self.multimodal_enabled = multimodal_enabled
         self.image_embeddings_client = image_embeddings_client
         self.global_blob_manager = global_blob_manager
 
@@ -30,7 +30,7 @@ azure-cognitiveservices-speech==1.40.0
     # via -r requirements.in
 azure-common==1.1.28
     # via azure-search-documents
-azure-core==1.30.2
+azure-core==1.35.0
     # via
     #   azure-ai-documentintelligence
     #   azure-core-tracing-opentelemetry
@@ -50,6 +50,7 @@ azure-cosmos==4.9.0
 azure-identity==1.17.1
     # via
     #   -r requirements.in
+    #   azure-monitor-opentelemetry-exporter
     #   msgraph-sdk
 azure-monitor-opentelemetry==1.6.13
     # via -r requirements.in
@@ -79,7 +80,7 @@ cffi==1.17.0
     # via cryptography
 charset-normalizer==3.3.2
     # via requests
-click==8.1.7
+click==8.1.8
     # via
     #   flask
     #   prompty
@@ -92,10 +93,6 @@ cryptography==44.0.1
     #   azure-storage-blob
     #   msal
     #   pyjwt
-deprecated==1.2.14
-    # via
-    #   opentelemetry-api
-    #   opentelemetry-semantic-conventions
 distro==1.9.0
     # via openai
 exceptiongroup==1.3.0
@@ -117,11 +114,11 @@ h11==0.16.0
     #   hypercorn
     #   uvicorn
     #   wsproto
-h2==4.1.0
+h2==4.3.0
     # via
     #   httpx
     #   hypercorn
-hpack==4.0.0
+hpack==4.1.0
     # via h2
 httpcore==1.0.9
     # via httpx
@@ -132,7 +129,7 @@ httpx[http2]==0.27.0
     #   openai
 hypercorn==0.17.3
     # via quart
-hyperframe==6.0.1
+hyperframe==6.1.0
     # via h2
 idna==3.10
     # via
@@ -366,7 +363,7 @@ quart==0.20.0
     #   quart-cors
 quart-cors==0.7.0
     # via -r requirements.in
-regex==2024.11.6
+regex==2025.7.34
     # via tiktoken
 requests==2.32.4
     # via
@@ -394,7 +391,7 @@ std-uritemplate==2.0.5
     # via microsoft-kiota-abstractions
 taskgroup==0.2.2
     # via hypercorn
-tenacity==9.0.0
+tenacity==9.1.2
     # via -r requirements.in
 tiktoken==0.8.0
     # via
@@ -426,7 +423,9 @@ typing-extensions==4.13.2
     #   exceptiongroup
     #   hypercorn
     #   openai
+    #   opentelemetry-api
     #   opentelemetry-sdk
+    #   opentelemetry-semantic-conventions
     #   pydantic
     #   pydantic-core
     #   pypdf
@@ -445,7 +444,6 @@ werkzeug==3.0.6
     #   quart
 wrapt==1.16.0
     # via
-    #   deprecated
     #   opentelemetry-instrumentation
     #   opentelemetry-instrumentation-aiohttp-client
     #   opentelemetry-instrumentation-dbapi
 
@@ -34,21 +34,33 @@ See the agentic retrieval documentation.
    azd env set AZURE_OPENAI_SEARCHAGENT_MODEL_VERSION 2025-04-14
    ```
 
-3. **Update the infrastructure and application:**
+3. **(Optional) Enable extra field hydration**
+
+   By default, agentic retrieval only returns fields included in the semantic configuration.
+
+   You can enable this optional feature below, to include all fields from the search index in the result.
+   ⚠️ This feature is currently only compatible with indexes set up with integrated vectorization,
+   or indexes that otherwise have an "id" field marked as filterable.
+
+   ```shell
+   azd env set ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA true
+   ```
+
+4. **Update the infrastructure and application:**
 
    Execute `azd up` to provision the infrastructure changes (only the new model, if you ran `up` previously) and deploy the application code with the updated environment variables.
 
-4. **Try out the feature:**
+5. **Try out the feature:**
 
    Open the web app and start a new chat. Agentic retrieval will be used to find all sources.
 
-5. **Experiment with max subqueries:**
+6. **Experiment with max subqueries:**
 
    Select the developer options in the web app and change max subqueries to any value between 1 and 20. This controls the maximum amount of subqueries that can be created in the query plan.
 
    ![Max subqueries screenshot](./images/max-subqueries.png)
 
-6. **Review the query plan**
+7. **Review the query plan**
 
    Agentic retrieval use additional billed tokens behind the scenes for the planning process.
    To see the token usage, select the lightbulb icon on a chat answer. This will open the "Thought process" tab, which shows the amount of tokens used by and the queries produced by the planning process