Azure-Samples · mattgotteiner · May 19, 2025 · Apr 30, 2025 · Apr 30, 2025 · Apr 30, 2025
diff --git a/.github/workflows/python-test.yaml b/.github/workflows/python-test.yaml
@@ -36,6 +36,7 @@ jobs:
             version: "0.4.20"
             cache-dependency-glob: "requirements**.txt"
             python-version: ${{ matrix.python_version }}
+            activate-environment: true
         - name: Setup node
           uses: actions/setup-node@v4
           with:

diff --git a/app/backend/app.py b/app/backend/app.py
@@ -23,6 +23,7 @@
     get_bearer_token_provider,
 )
 from azure.monitor.opentelemetry import configure_azure_monitor
+from azure.search.documents.agent.aio import KnowledgeAgentRetrievalClient
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.indexes.aio import SearchIndexClient
 from azure.storage.blob.aio import ContainerClient
@@ -57,6 +58,8 @@
 from approaches.retrievethenreadvision import RetrieveThenReadVisionApproach
 from chat_history.cosmosdb import chat_history_cosmosdb_bp
 from config import (
+    CONFIG_AGENT_CLIENT,
+    CONFIG_AGENTIC_RETRIEVAL_ENABLED,
     CONFIG_ASK_APPROACH,
     CONFIG_ASK_VISION_APPROACH,
     CONFIG_AUTH_CLIENT,
@@ -308,6 +311,7 @@ def config():
             "showSpeechOutputAzure": current_app.config[CONFIG_SPEECH_OUTPUT_AZURE_ENABLED],
             "showChatHistoryBrowser": current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED],
             "showChatHistoryCosmos": current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED],
+            "showAgenticRetrievalOption": current_app.config[CONFIG_AGENTIC_RETRIEVAL_ENABLED],
         }
     )
 
@@ -424,10 +428,14 @@ async def setup_clients():
     AZURE_USERSTORAGE_ACCOUNT = os.environ.get("AZURE_USERSTORAGE_ACCOUNT")
     AZURE_USERSTORAGE_CONTAINER = os.environ.get("AZURE_USERSTORAGE_CONTAINER")
     AZURE_SEARCH_SERVICE = os.environ["AZURE_SEARCH_SERVICE"]
+    AZURE_SEARCH_ENDPOINT = f"https://{AZURE_SEARCH_SERVICE}.search.windows.net"
     AZURE_SEARCH_INDEX = os.environ["AZURE_SEARCH_INDEX"]
+    AZURE_SEARCH_AGENT = os.getenv("AZURE_SEARCH_AGENT", "")
     # Shared by all OpenAI deployments
     OPENAI_HOST = os.getenv("OPENAI_HOST", "azure")
     OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"]
+    AZURE_OPENAI_SEARCHAGENT_MODEL = os.getenv("AZURE_OPENAI_SEARCHAGENT_MODEL")
+    AZURE_OPENAI_SEARCHAGENT_DEPLOYMENT = os.getenv("AZURE_OPENAI_SEARCHAGENT_DEPLOYMENT")
     OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002")
     OPENAI_EMB_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMB_DIMENSIONS") or 1536)
     OPENAI_REASONING_EFFORT = os.getenv("AZURE_OPENAI_REASONING_EFFORT")
@@ -479,6 +487,7 @@ async def setup_clients():
     USE_SPEECH_OUTPUT_AZURE = os.getenv("USE_SPEECH_OUTPUT_AZURE", "").lower() == "true"
     USE_CHAT_HISTORY_BROWSER = os.getenv("USE_CHAT_HISTORY_BROWSER", "").lower() == "true"
     USE_CHAT_HISTORY_COSMOS = os.getenv("USE_CHAT_HISTORY_COSMOS", "").lower() == "true"
+    USE_AGENTIC_RETRIEVAL = os.getenv("USE_AGENTIC_RETRIEVAL", "").lower() == "true"
 
     # WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep
     RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None
@@ -513,10 +522,13 @@ async def setup_clients():
 
     # Set up clients for AI Search and Storage
     search_client = SearchClient(
-        endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net",
+        endpoint=AZURE_SEARCH_ENDPOINT,
         index_name=AZURE_SEARCH_INDEX,
         credential=azure_credential,
     )
+    agent_client = KnowledgeAgentRetrievalClient(
+        endpoint=AZURE_SEARCH_ENDPOINT, agent_name=AZURE_SEARCH_AGENT, credential=azure_credential
+    )
 
     blob_container_client = ContainerClient(
         f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net", AZURE_STORAGE_CONTAINER, credential=azure_credential
@@ -527,7 +539,7 @@ async def setup_clients():
     if AZURE_USE_AUTHENTICATION:
         current_app.logger.info("AZURE_USE_AUTHENTICATION is true, setting up search index client")
         search_index_client = SearchIndexClient(
-            endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net",
+            endpoint=AZURE_SEARCH_ENDPOINT,
             credential=azure_credential,
         )
         search_index = await search_index_client.get_index(AZURE_SEARCH_INDEX)
@@ -645,6 +657,7 @@ async def setup_clients():
 
     current_app.config[CONFIG_OPENAI_CLIENT] = openai_client
     current_app.config[CONFIG_SEARCH_CLIENT] = search_client
+    current_app.config[CONFIG_AGENT_CLIENT] = agent_client
     current_app.config[CONFIG_BLOB_CONTAINER_CLIENT] = blob_container_client
     current_app.config[CONFIG_AUTH_CLIENT] = auth_helper
 
@@ -668,13 +681,18 @@ async def setup_clients():
     current_app.config[CONFIG_SPEECH_OUTPUT_AZURE_ENABLED] = USE_SPEECH_OUTPUT_AZURE
     current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED] = USE_CHAT_HISTORY_BROWSER
     current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED] = USE_CHAT_HISTORY_COSMOS
+    current_app.config[CONFIG_AGENTIC_RETRIEVAL_ENABLED] = USE_AGENTIC_RETRIEVAL
 
     prompt_manager = PromptyManager()
 
     # Set up the two default RAG approaches for /ask and /chat
     # RetrieveThenReadApproach is used by /ask for single-turn Q&A
     current_app.config[CONFIG_ASK_APPROACH] = RetrieveThenReadApproach(
         search_client=search_client,
+        search_index_name=AZURE_SEARCH_INDEX,
+        agent_model=AZURE_OPENAI_SEARCHAGENT_MODEL,
+        agent_deployment=AZURE_OPENAI_SEARCHAGENT_DEPLOYMENT,
+        agent_client=agent_client,
         openai_client=openai_client,
         auth_helper=auth_helper,
         chatgpt_model=OPENAI_CHATGPT_MODEL,
@@ -694,6 +712,10 @@ async def setup_clients():
     # ChatReadRetrieveReadApproach is used by /chat for multi-turn conversation
     current_app.config[CONFIG_CHAT_APPROACH] = ChatReadRetrieveReadApproach(
         search_client=search_client,
+        search_index_name=AZURE_SEARCH_INDEX,
+        agent_model=AZURE_OPENAI_SEARCHAGENT_MODEL,
+        agent_deployment=AZURE_OPENAI_SEARCHAGENT_DEPLOYMENT,
+        agent_client=agent_client,
         openai_client=openai_client,
         auth_helper=auth_helper,
         chatgpt_model=OPENAI_CHATGPT_MODEL,

diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
@@ -2,17 +2,20 @@
 from abc import ABC
 from collections.abc import AsyncGenerator, Awaitable
 from dataclasses import dataclass
-from typing import (
-    Any,
-    Callable,
-    Optional,
-    TypedDict,
-    Union,
-    cast,
-)
+from typing import Any, Callable, Optional, TypedDict, Union, cast
 from urllib.parse import urljoin
 
 import aiohttp
+from azure.search.documents.agent.aio import KnowledgeAgentRetrievalClient
+from azure.search.documents.agent.models import (
+    KnowledgeAgentAzureSearchDocReference,
+    KnowledgeAgentIndexParams,
+    KnowledgeAgentMessage,
+    KnowledgeAgentMessageTextContent,
+    KnowledgeAgentRetrievalRequest,
+    KnowledgeAgentRetrievalResponse,
+    KnowledgeAgentSearchActivityRecord,
+)
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import (
     QueryCaptionResult,
@@ -36,16 +39,17 @@
 
 @dataclass
 class Document:
-    id: Optional[str]
-    content: Optional[str]
-    category: Optional[str]
-    sourcepage: Optional[str]
-    sourcefile: Optional[str]
-    oids: Optional[list[str]]
-    groups: Optional[list[str]]
-    captions: list[QueryCaptionResult]
+    id: Optional[str] = None
+    content: Optional[str] = None
+    category: Optional[str] = None
+    sourcepage: Optional[str] = None
+    sourcefile: Optional[str] = None
+    oids: Optional[list[str]] = None
+    groups: Optional[list[str]] = None
+    captions: Optional[list[QueryCaptionResult]] = None
     score: Optional[float] = None
     reranker_score: Optional[float] = None
+    search_agent_query: Optional[str] = None
 
     def serialize_for_results(self) -> dict[str, Any]:
         result_dict = {
@@ -70,6 +74,7 @@ def serialize_for_results(self) -> dict[str, Any]:
             ),
             "score": self.score,
             "reranker_score": self.reranker_score,
+            "search_agent_query": self.search_agent_query,
         }
         return result_dict
 
@@ -247,6 +252,67 @@ async def search(
 
         return qualified_documents
 
+    async def run_agentic_retrieval(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        agent_client: KnowledgeAgentRetrievalClient,
+        search_index_name: str,
+        top: Optional[int] = None,
+        filter_add_on: Optional[str] = None,
+        minimum_reranker_score: Optional[float] = None,
+        max_docs_for_reranker: Optional[int] = None,
+    ) -> tuple[KnowledgeAgentRetrievalResponse, list[Document]]:
+        # STEP 1: Invoke agentic retrieval
+        response = await agent_client.retrieve(
+            retrieval_request=KnowledgeAgentRetrievalRequest(
+                messages=[
+                    KnowledgeAgentMessage(
+                        role=str(msg["role"]), content=[KnowledgeAgentMessageTextContent(text=str(msg["content"]))]
+                    )
+                    for msg in messages
+                    if msg["role"] != "system"
+                ],
+                target_index_params=[
+                    KnowledgeAgentIndexParams(
+                        index_name=search_index_name,
+                        reranker_threshold=minimum_reranker_score,
+                        max_docs_for_reranker=max_docs_for_reranker,
+                        filter_add_on=filter_add_on,
+                        include_reference_source_data=True,
+                    )
+                ],
+            )
+        )
+
+        # STEP 2: Generate a contextual and content specific answer using the search results and chat history
+        activities = response.activity
+        activity_mapping = (
+            {
+                activity.id: activity.query.search if activity.query else ""
+                for activity in activities
+                if isinstance(activity, KnowledgeAgentSearchActivityRecord)
+            }
+            if activities
+            else {}
+        )
+
+        results = []
+        if response and response.references:
+            for reference in response.references:
+                if isinstance(reference, KnowledgeAgentAzureSearchDocReference) and reference.source_data:
+                    results.append(
+                        Document(
+                            id=reference.doc_key,
+                            content=reference.source_data["content"],
+                            sourcepage=reference.source_data["sourcepage"],
+                            search_agent_query=activity_mapping[reference.activity_source],
+                        )
+                    )
+                if top and len(results) == top:
+                    break
+
+        return response, results
+
     def get_sources_content(
         self, results: list[Document], use_semantic_captions: bool, use_image_citation: bool
     ) -> list[str]: