Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ async def setup_clients():
USE_CHAT_HISTORY_BROWSER = os.getenv("USE_CHAT_HISTORY_BROWSER", "").lower() == "true"
USE_CHAT_HISTORY_COSMOS = os.getenv("USE_CHAT_HISTORY_COSMOS", "").lower() == "true"
USE_AGENTIC_RETRIEVAL = os.getenv("USE_AGENTIC_RETRIEVAL", "").lower() == "true"
ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA = os.getenv("ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA", "").lower() == "true"

# WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep
RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None
Expand Down Expand Up @@ -689,6 +690,7 @@ async def setup_clients():
query_speller=AZURE_SEARCH_QUERY_SPELLER,
prompt_manager=prompt_manager,
reasoning_effort=OPENAI_REASONING_EFFORT,
hydrate_references=ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA,
multimodal_enabled=USE_MULTIMODAL,
image_embeddings_client=image_embeddings_client,
global_blob_manager=global_blob_manager,
Expand Down Expand Up @@ -716,6 +718,7 @@ async def setup_clients():
query_speller=AZURE_SEARCH_QUERY_SPELLER,
prompt_manager=prompt_manager,
reasoning_effort=OPENAI_REASONING_EFFORT,
hydrate_references=ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA,
multimodal_enabled=USE_MULTIMODAL,
image_embeddings_client=image_embeddings_client,
global_blob_manager=global_blob_manager,
Expand Down
120 changes: 97 additions & 23 deletions app/backend/approaches/approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def __init__(
openai_host: str,
prompt_manager: PromptManager,
reasoning_effort: Optional[str] = None,
hydrate_references: bool = False,
multimodal_enabled: bool = False,
image_embeddings_client: Optional[ImageEmbeddings] = None,
global_blob_manager: Optional[BlobManager] = None,
Expand All @@ -179,6 +180,7 @@ def __init__(
self.openai_host = openai_host
self.prompt_manager = prompt_manager
self.reasoning_effort = reasoning_effort
self.hydrate_references = hydrate_references
self.include_token_usage = True
self.multimodal_enabled = multimodal_enabled
self.image_embeddings_client = image_embeddings_client
Expand Down Expand Up @@ -236,7 +238,7 @@ async def search(
vector_queries=search_vectors,
)

documents = []
documents: list[Document] = []
async for page in results.by_page():
async for document in page:
documents.append(
Expand Down Expand Up @@ -299,40 +301,112 @@ async def run_agentic_retrieval(
)
)

# STEP 2: Generate a contextual and content specific answer using the search results and chat history
# Map activity id -> agent's internal search query
activities = response.activity
activity_mapping = (
activity_mapping: dict[int, str] = (
{
activity.id: activity.query.search if activity.query else ""
activity.id: activity.query.search
for activity in activities
if isinstance(activity, KnowledgeAgentSearchActivityRecord)
if (
isinstance(activity, KnowledgeAgentSearchActivityRecord)
and activity.query
and activity.query.search is not None
)
}
if activities
else {}
)

results = []
if response and response.references:
if results_merge_strategy == "interleaved":
# Use interleaved reference order
references = sorted(response.references, key=lambda reference: int(reference.id))
else:
# Default to descending strategy
references = response.references
for reference in references:
if isinstance(reference, KnowledgeAgentAzureSearchDocReference) and reference.source_data:
results.append(
# No refs? we're done
if not (response and response.references):
return response, []

# Extract references
refs = [r for r in response.references if isinstance(r, KnowledgeAgentAzureSearchDocReference)]

documents: list[Document] = []

if self.hydrate_references:
# Hydrate references to get full documents
documents = await self.hydrate_agent_references(
references=refs,
top=top,
)
else:
# Create documents from reference source data
for ref in refs:
if ref.source_data:
documents.append(
Document(
id=reference.doc_key,
content=reference.source_data["content"],
sourcepage=reference.source_data["sourcepage"],
search_agent_query=activity_mapping[reference.activity_source],
id=ref.doc_key,
content=ref.source_data.get("content"),
sourcepage=ref.source_data.get("sourcepage"),
)
)
if top and len(results) == top:
break
if top and len(documents) >= top:
break

# Build mappings for agent queries and sorting
ref_to_activity: dict[str, int] = {}
doc_to_ref_id: dict[str, str] = {}
for ref in refs:
if ref.doc_key:
ref_to_activity[ref.doc_key] = ref.activity_source
doc_to_ref_id[ref.doc_key] = ref.id

# Inject agent search queries into all documents
for doc in documents:
if doc.id and doc.id in ref_to_activity:
activity_id = ref_to_activity[doc.id]
doc.search_agent_query = activity_mapping.get(activity_id, "")

# Apply sorting strategy to the documents
if results_merge_strategy == "interleaved": # Use interleaved reference order
documents = sorted(
documents,
key=lambda d: int(doc_to_ref_id.get(d.id, 0)) if d.id and doc_to_ref_id.get(d.id) else 0,
)
# else: Default - preserve original order

return response, documents

async def hydrate_agent_references(
self,
references: list[KnowledgeAgentAzureSearchDocReference],
top: Optional[int],
) -> list[Document]:
doc_keys: set[str] = set()

for ref in references:
if not ref.doc_key:
continue
doc_keys.add(ref.doc_key)
if top and len(doc_keys) >= top:
break

if not doc_keys:
return []

# Build search filter only on unique doc IDs
id_csv = ",".join(doc_keys)
id_filter = f"search.in(id, '{id_csv}', ',')"

# Fetch full documents
hydrated_docs: list[Document] = await self.search(
top=len(doc_keys),
query_text=None,
filter=id_filter,
vectors=[],
use_text_search=False,
use_vector_search=False,
use_semantic_ranker=False,
use_semantic_captions=False,
minimum_search_score=None,
minimum_reranker_score=None,
use_query_rewriting=False,
)

return response, results
return hydrated_docs

async def get_sources_content(
self,
Expand Down
2 changes: 2 additions & 0 deletions app/backend/approaches/chatreadretrieveread.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(
query_speller: str,
prompt_manager: PromptManager,
reasoning_effort: Optional[str] = None,
hydrate_references: bool = False,
multimodal_enabled: bool = False,
image_embeddings_client: Optional[ImageEmbeddings] = None,
global_blob_manager: Optional[BlobManager] = None,
Expand Down Expand Up @@ -84,6 +85,7 @@ def __init__(
self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
self.reasoning_effort = reasoning_effort
self.hydrate_references = hydrate_references
self.include_token_usage = True
self.multimodal_enabled = multimodal_enabled
self.image_embeddings_client = image_embeddings_client
Expand Down
2 changes: 2 additions & 0 deletions app/backend/approaches/retrievethenread.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(
query_speller: str,
prompt_manager: PromptManager,
reasoning_effort: Optional[str] = None,
hydrate_references: bool = False,
multimodal_enabled: bool = False,
image_embeddings_client: Optional[ImageEmbeddings] = None,
global_blob_manager: Optional[BlobManager] = None,
Expand Down Expand Up @@ -73,6 +74,7 @@ def __init__(
self.answer_prompt = self.prompt_manager.load_prompt("ask_answer_question.prompty")
self.reasoning_effort = reasoning_effort
self.include_token_usage = True
self.hydrate_references = hydrate_references
self.multimodal_enabled = multimodal_enabled
self.image_embeddings_client = image_embeddings_client
self.global_blob_manager = global_blob_manager
Expand Down
20 changes: 16 additions & 4 deletions docs/agentic_retrieval.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,33 @@ See the agentic retrieval documentation.
azd env set AZURE_OPENAI_SEARCHAGENT_MODEL_VERSION 2025-04-14
```

3. **Update the infrastructure and application:**
3. **(Optional) Enable extra field hydration**

By default, agentic retrieval only returns fields included in the semantic configuration.

You can enable this optional feature below, to include all fields from the search index in the result.
⚠️ This feature is currently only compatible with indexes set up with integrated vectorization,
or indexes that otherwise have an "id" field marked as filterable.

```shell
azd env set ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA true
```

4. **Update the infrastructure and application:**

Execute `azd up` to provision the infrastructure changes (only the new model, if you ran `up` previously) and deploy the application code with the updated environment variables.

4. **Try out the feature:**
5. **Try out the feature:**

Open the web app and start a new chat. Agentic retrieval will be used to find all sources.

5. **Experiment with max subqueries:**
6. **Experiment with max subqueries:**

Select the developer options in the web app and change max subqueries to any value between 1 and 20. This controls the maximum amount of subqueries that can be created in the query plan.

![Max subqueries screenshot](./images/max-subqueries.png)

6. **Review the query plan**
7. **Review the query plan**

Agentic retrieval use additional billed tokens behind the scenes for the planning process.
To see the token usage, select the lightbulb icon on a chat answer. This will open the "Thought process" tab, which shows the amount of tokens used by and the queries produced by the planning process
Expand Down
2 changes: 2 additions & 0 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ param storageSkuName string // Set in main.parameters.json

param defaultReasoningEffort string // Set in main.parameters.json
param useAgenticRetrieval bool // Set in main.parameters.json
param enableAgenticRetrievalSourceData bool // Set in main.parameters.json

param userStorageAccountName string = ''
param userStorageContainerName string = 'user-content'
Expand Down Expand Up @@ -423,6 +424,7 @@ var appEnvVariables = {
USE_SPEECH_OUTPUT_BROWSER: useSpeechOutputBrowser
USE_SPEECH_OUTPUT_AZURE: useSpeechOutputAzure
USE_AGENTIC_RETRIEVAL: useAgenticRetrieval
ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA: enableAgenticRetrievalSourceData
// Chat history settings
USE_CHAT_HISTORY_BROWSER: useChatHistoryBrowser
USE_CHAT_HISTORY_COSMOS: useChatHistoryCosmos
Expand Down
43 changes: 23 additions & 20 deletions infra/main.parameters.json
Original file line number Diff line number Diff line change
Expand Up @@ -104,79 +104,79 @@
"backendServiceName": {
"value": "${AZURE_APP_SERVICE}"
},
"chatGptModelName":{
"chatGptModelName": {
"value": "${AZURE_OPENAI_CHATGPT_MODEL}"
},
"chatGptDeploymentName": {
"value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT}"
},
"chatGptDeploymentVersion":{
"chatGptDeploymentVersion": {
"value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION}"
},
"chatGptDeploymentSkuName":{
"chatGptDeploymentSkuName": {
"value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU}"
},
"chatGptDeploymentCapacity":{
"chatGptDeploymentCapacity": {
"value": "${AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY}"
},
"embeddingModelName":{
"embeddingModelName": {
"value": "${AZURE_OPENAI_EMB_MODEL_NAME}"
},
"embeddingDeploymentName": {
"value": "${AZURE_OPENAI_EMB_DEPLOYMENT}"
},
"embeddingDeploymentVersion":{
"embeddingDeploymentVersion": {
"value": "${AZURE_OPENAI_EMB_DEPLOYMENT_VERSION}"
},
"embeddingDeploymentSkuName":{
"embeddingDeploymentSkuName": {
"value": "${AZURE_OPENAI_EMB_DEPLOYMENT_SKU}"
},
"embeddingDeploymentCapacity":{
"embeddingDeploymentCapacity": {
"value": "${AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY}"
},
"embeddingDimensions": {
"value": "${AZURE_OPENAI_EMB_DIMENSIONS}"
},
"evalModelName":{
"evalModelName": {
"value": "${AZURE_OPENAI_EVAL_MODEL}"
},
"evalModelVersion":{
"evalModelVersion": {
"value": "${AZURE_OPENAI_EVAL_MODEL_VERSION}"
},
"evalDeploymentName": {
"value": "${AZURE_OPENAI_EVAL_DEPLOYMENT}"
},
"evalDeploymentSkuName":{
"evalDeploymentSkuName": {
"value": "${AZURE_OPENAI_EVAL_DEPLOYMENT_SKU}"
},
"evalDeploymentCapacity":{
"evalDeploymentCapacity": {
"value": "${AZURE_OPENAI_EVAL_DEPLOYMENT_CAPACITY}"
},
"searchAgentModelName":{
"searchAgentModelName": {
"value": "${AZURE_OPENAI_SEARCHAGENT_MODEL}"
},
"searchAgentModelVersion":{
"searchAgentModelVersion": {
"value": "${AZURE_OPENAI_SEARCHAGENT_MODEL_VERSION}"
},
"searchAgentDeploymentName": {
"value": "${AZURE_OPENAI_SEARCHAGENT_DEPLOYMENT}"
},
"searchAgentDeploymentSkuName":{
"searchAgentDeploymentSkuName": {
"value": "${AZURE_OPENAI_SEARCHAGENT_DEPLOYMENT_SKU}"
},
"searchAgentDeploymentCapacity":{
"searchAgentDeploymentCapacity": {
"value": "${AZURE_OPENAI_SEARCHAGENT_DEPLOYMENT_CAPACITY}"
},
"openAiHost": {
"value": "${OPENAI_HOST=azure}"
},
"azureOpenAiCustomUrl":{
"azureOpenAiCustomUrl": {
"value": "${AZURE_OPENAI_CUSTOM_URL}"
},
"azureOpenAiApiVersion":{
"azureOpenAiApiVersion": {
"value": "${AZURE_OPENAI_API_VERSION}"
},
"azureOpenAiApiKey":{
"azureOpenAiApiKey": {
"value": "${AZURE_OPENAI_API_KEY_OVERRIDE}"
},
"azureOpenAiDisableKeys": {
Expand Down Expand Up @@ -324,7 +324,7 @@
"value": "${DEPLOYMENT_TARGET=containerapps}"
},
"webAppExists": {
"value": "${SERVICE_WEB_RESOURCE_EXISTS=false}"
"value": "${SERVICE_WEB_RESOURCE_EXISTS=false}"
},
"azureContainerAppsWorkloadProfile": {
"value": "${AZURE_CONTAINER_APPS_WORKLOAD_PROFILE=Consumption}"
Expand All @@ -338,6 +338,9 @@
"useAgenticRetrieval": {
"value": "${USE_AGENTIC_RETRIEVAL=false}"
},
"enableAgenticRetrievalSourceData": {
"value": "${ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA=false}"
},
"ragSearchTextEmbeddings": {
"value": "${RAG_SEARCH_TEXT_EMBEDDINGS=true}"
},
Expand Down
Loading
Loading