diff --git a/.github/workflows/python-test.yaml b/.github/workflows/python-test.yaml index ee14ea05e0..2d99da68cf 100644 --- a/.github/workflows/python-test.yaml +++ b/.github/workflows/python-test.yaml @@ -49,6 +49,8 @@ jobs: cd ./app/frontend npm install npm run build + - name: Check i18n translations + run: npx -y @lingual/i18n-check --locales app/frontend/src/locales -s en -f i18next -r summary - name: Install dependencies run: | uv pip install -r requirements-dev.txt diff --git a/app/backend/app.py b/app/backend/app.py index d391e8b779..62707e0cd7 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -471,7 +471,6 @@ async def setup_clients(): USE_CHAT_HISTORY_BROWSER = os.getenv("USE_CHAT_HISTORY_BROWSER", "").lower() == "true" USE_CHAT_HISTORY_COSMOS = os.getenv("USE_CHAT_HISTORY_COSMOS", "").lower() == "true" USE_AGENTIC_RETRIEVAL = os.getenv("USE_AGENTIC_RETRIEVAL", "").lower() == "true" - ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA = os.getenv("ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA", "").lower() == "true" # WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None @@ -690,7 +689,6 @@ async def setup_clients(): query_speller=AZURE_SEARCH_QUERY_SPELLER, prompt_manager=prompt_manager, reasoning_effort=OPENAI_REASONING_EFFORT, - hydrate_references=ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA, multimodal_enabled=USE_MULTIMODAL, image_embeddings_client=image_embeddings_client, global_blob_manager=global_blob_manager, @@ -718,7 +716,6 @@ async def setup_clients(): query_speller=AZURE_SEARCH_QUERY_SPELLER, prompt_manager=prompt_manager, reasoning_effort=OPENAI_REASONING_EFFORT, - hydrate_references=ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA, multimodal_enabled=USE_MULTIMODAL, image_embeddings_client=image_embeddings_client, global_blob_manager=global_blob_manager, diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py index b813ab7722..04a74a8818 100644 --- a/app/backend/approaches/approach.py +++ b/app/backend/approaches/approach.py @@ -6,13 +6,13 @@ from azure.search.documents.agent.aio import KnowledgeAgentRetrievalClient from azure.search.documents.agent.models import ( - KnowledgeAgentAzureSearchDocReference, - KnowledgeAgentIndexParams, KnowledgeAgentMessage, KnowledgeAgentMessageTextContent, KnowledgeAgentRetrievalRequest, KnowledgeAgentRetrievalResponse, - KnowledgeAgentSearchActivityRecord, + KnowledgeAgentSearchIndexActivityRecord, + KnowledgeAgentSearchIndexReference, + SearchIndexKnowledgeSourceParams, ) from azure.search.documents.aio import SearchClient from azure.search.documents.models import ( @@ -162,7 +162,6 @@ def __init__( openai_host: str, prompt_manager: PromptManager, reasoning_effort: Optional[str] = None, - hydrate_references: bool = False, multimodal_enabled: bool = False, image_embeddings_client: Optional[ImageEmbeddings] = None, global_blob_manager: Optional[BlobManager] = None, @@ -180,7 +179,6 @@ def __init__( self.openai_host = openai_host self.prompt_manager = prompt_manager self.reasoning_effort = reasoning_effort - self.hydrate_references = hydrate_references self.include_token_usage = True self.multimodal_enabled = multimodal_enabled self.image_embeddings_client = image_embeddings_client @@ -276,7 +274,6 @@ async def run_agentic_retrieval( top: Optional[int] = None, filter_add_on: Optional[str] = None, minimum_reranker_score: Optional[float] = None, - max_docs_for_reranker: Optional[int] = None, results_merge_strategy: Optional[str] = None, ) -> tuple[KnowledgeAgentRetrievalResponse, list[Document]]: # STEP 1: Invoke agentic retrieval @@ -289,13 +286,10 @@ async def run_agentic_retrieval( for msg in messages if msg["role"] != "system" ], - target_index_params=[ - KnowledgeAgentIndexParams( - index_name=search_index_name, - reranker_threshold=minimum_reranker_score, - max_docs_for_reranker=max_docs_for_reranker, + knowledge_source_params=[ + SearchIndexKnowledgeSourceParams( + knowledge_source_name=search_index_name, filter_add_on=filter_add_on, - include_reference_source_data=True, ) ], ) @@ -305,12 +299,12 @@ async def run_agentic_retrieval( activities = response.activity activity_mapping: dict[int, str] = ( { - activity.id: activity.query.search + activity.id: activity.search_index_arguments.search for activity in activities if ( - isinstance(activity, KnowledgeAgentSearchActivityRecord) - and activity.query - and activity.query.search is not None + isinstance(activity, KnowledgeAgentSearchIndexActivityRecord) + and activity.search_index_arguments + and activity.search_index_arguments.search is not None ) } if activities @@ -322,92 +316,42 @@ async def run_agentic_retrieval( return response, [] # Extract references - refs = [r for r in response.references if isinstance(r, KnowledgeAgentAzureSearchDocReference)] - + refs = [r for r in response.references if isinstance(r, KnowledgeAgentSearchIndexReference)] documents: list[Document] = [] - - if self.hydrate_references: - # Hydrate references to get full documents - documents = await self.hydrate_agent_references( - references=refs, - top=top, - ) - else: - # Create documents from reference source data - for ref in refs: - if ref.source_data: - documents.append( - Document( - id=ref.doc_key, - content=ref.source_data.get("content"), - sourcepage=ref.source_data.get("sourcepage"), - ) - ) - if top and len(documents) >= top: - break - - # Build mappings for agent queries and sorting - ref_to_activity: dict[str, int] = {} doc_to_ref_id: dict[str, str] = {} + + # Create documents from reference source data for ref in refs: - if ref.doc_key: - ref_to_activity[ref.doc_key] = ref.activity_source + if ref.source_data and ref.doc_key: + # Note that ref.doc_key is the same as source_data["id"] + documents.append( + Document( + id=ref.doc_key, + content=ref.source_data.get("content"), + category=ref.source_data.get("category"), + sourcepage=ref.source_data.get("sourcepage"), + sourcefile=ref.source_data.get("sourcefile"), + oids=ref.source_data.get("oids"), + groups=ref.source_data.get("groups"), + reranker_score=ref.reranker_score, + images=ref.source_data.get("images"), + search_agent_query=activity_mapping[ref.activity_source], + ) + ) doc_to_ref_id[ref.doc_key] = ref.id + if top and len(documents) >= top: + break - # Inject agent search queries into all documents - for doc in documents: - if doc.id and doc.id in ref_to_activity: - activity_id = ref_to_activity[doc.id] - doc.search_agent_query = activity_mapping.get(activity_id, "") + if minimum_reranker_score is not None: + documents = [doc for doc in documents if (doc.reranker_score or 0) >= minimum_reranker_score] - # Apply sorting strategy to the documents - if results_merge_strategy == "interleaved": # Use interleaved reference order + if results_merge_strategy == "interleaved": documents = sorted( documents, key=lambda d: int(doc_to_ref_id.get(d.id, 0)) if d.id and doc_to_ref_id.get(d.id) else 0, ) - # else: Default - preserve original order - return response, documents - async def hydrate_agent_references( - self, - references: list[KnowledgeAgentAzureSearchDocReference], - top: Optional[int], - ) -> list[Document]: - doc_keys: set[str] = set() - - for ref in references: - if not ref.doc_key: - continue - doc_keys.add(ref.doc_key) - if top and len(doc_keys) >= top: - break - - if not doc_keys: - return [] - - # Build search filter only on unique doc IDs - id_csv = ",".join(doc_keys) - id_filter = f"search.in(id, '{id_csv}', ',')" - - # Fetch full documents - hydrated_docs: list[Document] = await self.search( - top=len(doc_keys), - query_text=None, - filter=id_filter, - vectors=[], - use_text_search=False, - use_vector_search=False, - use_semantic_ranker=False, - use_semantic_captions=False, - minimum_search_score=None, - minimum_reranker_score=None, - use_query_rewriting=False, - ) - - return hydrated_docs - async def get_sources_content( self, results: list[Document], diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index 5096ebaf4f..bc51dc107a 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -56,7 +56,6 @@ def __init__( query_speller: str, prompt_manager: PromptManager, reasoning_effort: Optional[str] = None, - hydrate_references: bool = False, multimodal_enabled: bool = False, image_embeddings_client: Optional[ImageEmbeddings] = None, global_blob_manager: Optional[BlobManager] = None, @@ -84,7 +83,6 @@ def __init__( self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json") self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty") self.reasoning_effort = reasoning_effort - self.hydrate_references = hydrate_references self.include_token_usage = True self.multimodal_enabled = multimodal_enabled self.image_embeddings_client = image_embeddings_client @@ -390,13 +388,10 @@ async def run_agentic_retrieval_approach( overrides: dict[str, Any], auth_claims: dict[str, Any], ): - minimum_reranker_score = overrides.get("minimum_reranker_score", 0) search_index_filter = self.build_filter(overrides, auth_claims) + minimum_reranker_score = overrides.get("minimum_reranker_score", 0) top = overrides.get("top", 3) - max_subqueries = overrides.get("max_subqueries", 10) results_merge_strategy = overrides.get("results_merge_strategy", "interleaved") - # 50 is the amount of documents that the reranker can process per query - max_docs_for_reranker = max_subqueries * 50 send_text_sources = overrides.get("send_text_sources", True) send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled @@ -407,7 +402,6 @@ async def run_agentic_retrieval_approach( top=top, filter_add_on=search_index_filter, minimum_reranker_score=minimum_reranker_score, - max_docs_for_reranker=max_docs_for_reranker, results_merge_strategy=results_merge_strategy, ) @@ -426,7 +420,6 @@ async def run_agentic_retrieval_approach( messages, { "reranker_threshold": minimum_reranker_score, - "max_docs_for_reranker": max_docs_for_reranker, "results_merge_strategy": results_merge_strategy, "filter": search_index_filter, }, diff --git a/app/backend/approaches/retrievethenread.py b/app/backend/approaches/retrievethenread.py index b3f3db8058..ca92eaff64 100644 --- a/app/backend/approaches/retrievethenread.py +++ b/app/backend/approaches/retrievethenread.py @@ -46,7 +46,6 @@ def __init__( query_speller: str, prompt_manager: PromptManager, reasoning_effort: Optional[str] = None, - hydrate_references: bool = False, multimodal_enabled: bool = False, image_embeddings_client: Optional[ImageEmbeddings] = None, global_blob_manager: Optional[BlobManager] = None, @@ -74,7 +73,6 @@ def __init__( self.answer_prompt = self.prompt_manager.load_prompt("ask_answer_question.prompty") self.reasoning_effort = reasoning_effort self.include_token_usage = True - self.hydrate_references = hydrate_references self.multimodal_enabled = multimodal_enabled self.image_embeddings_client = image_embeddings_client self.global_blob_manager = global_blob_manager @@ -229,10 +227,7 @@ async def run_agentic_retrieval_approach( minimum_reranker_score = overrides.get("minimum_reranker_score", 0) search_index_filter = self.build_filter(overrides, auth_claims) top = overrides.get("top", 3) - max_subqueries = overrides.get("max_subqueries", 10) results_merge_strategy = overrides.get("results_merge_strategy", "interleaved") - # 50 is the amount of documents that the reranker can process per query - max_docs_for_reranker = max_subqueries * 50 send_text_sources = overrides.get("send_text_sources", True) send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled @@ -243,7 +238,6 @@ async def run_agentic_retrieval_approach( top=top, filter_add_on=search_index_filter, minimum_reranker_score=minimum_reranker_score, - max_docs_for_reranker=max_docs_for_reranker, results_merge_strategy=results_merge_strategy, ) @@ -263,7 +257,6 @@ async def run_agentic_retrieval_approach( messages, { "reranker_threshold": minimum_reranker_score, - "max_docs_for_reranker": max_docs_for_reranker, "results_merge_strategy": results_merge_strategy, "filter": search_index_filter, }, diff --git a/app/backend/prepdocslib/searchmanager.py b/app/backend/prepdocslib/searchmanager.py index 0fdf57580f..7ab8018e5c 100644 --- a/app/backend/prepdocslib/searchmanager.py +++ b/app/backend/prepdocslib/searchmanager.py @@ -14,12 +14,14 @@ KnowledgeAgent, KnowledgeAgentAzureOpenAIModel, KnowledgeAgentRequestLimits, - KnowledgeAgentTargetIndex, + KnowledgeSourceReference, RescoringOptions, SearchableField, SearchField, SearchFieldDataType, SearchIndex, + SearchIndexKnowledgeSource, + SearchIndexKnowledgeSourceParameters, SemanticConfiguration, SemanticField, SemanticPrioritizedFields, @@ -83,7 +85,6 @@ async def create_index(self): logger.info("Checking whether search index %s exists...", self.search_info.index_name) async with self.search_info.create_search_index_client() as search_index_client: - embedding_field = None images_field = None text_vector_search_profile = None @@ -439,13 +440,29 @@ async def create_agent(self): if self.search_info.agent_name: logger.info(f"Creating search agent named {self.search_info.agent_name}") + field_names = ["id", "sourcepage", "sourcefile", "content", "category"] + if self.use_acls: + field_names.extend(["oids", "groups"]) + if self.search_images: + field_names.append("images/url") async with self.search_info.create_search_index_client() as search_index_client: + knowledge_source = SearchIndexKnowledgeSource( + name=self.search_info.index_name, # Use the same name for convenience + description="Default knowledge source using the main search index", + search_index_parameters=SearchIndexKnowledgeSourceParameters( + search_index_name=self.search_info.index_name, + source_data_select=",".join(field_names), + ), + ) + await search_index_client.create_or_update_knowledge_source( + knowledge_source=knowledge_source, api_version="2025-08-01-preview" + ) await search_index_client.create_or_update_agent( agent=KnowledgeAgent( name=self.search_info.agent_name, - target_indexes=[ - KnowledgeAgentTargetIndex( - index_name=self.search_info.index_name, default_include_reference_source_data=True + knowledge_sources=[ + KnowledgeSourceReference( + name=knowledge_source.name, include_references=True, include_reference_source_data=True ) ], models=[ diff --git a/app/backend/requirements.in b/app/backend/requirements.in index ac889f2b9d..1110ef5546 100644 --- a/app/backend/requirements.in +++ b/app/backend/requirements.in @@ -7,7 +7,7 @@ tenacity azure-ai-documentintelligence==1.0.0b4 azure-cognitiveservices-speech azure-cosmos -azure-search-documents==11.6.0b12 +azure-search-documents==11.7.0b1 azure-storage-blob azure-storage-file-datalake uvicorn diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index 4de1b17ad8..33d7140c7b 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -56,7 +56,7 @@ azure-monitor-opentelemetry==1.6.13 # via -r requirements.in azure-monitor-opentelemetry-exporter==1.0.0b40 # via azure-monitor-opentelemetry -azure-search-documents==11.6.0b12 +azure-search-documents==11.7.0b1 # via -r requirements.in azure-storage-blob==12.22.0 # via @@ -122,7 +122,7 @@ hpack==4.1.0 # via h2 httpcore==1.0.9 # via httpx -httpx[http2]==0.27.0 +httpx==0.27.0 # via # microsoft-kiota-http # msgraph-core @@ -343,7 +343,7 @@ pydantic-core==2.20.1 # via pydantic pygments==2.18.0 # via rich -pyjwt[crypto]==2.10.1 +pyjwt==2.10.1 # via # -r requirements.in # msal diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index e5a95f4891..b240dc87a7 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -14,7 +14,6 @@ export type ChatAppRequestOverrides = { exclude_category?: string; seed?: number; top?: number; - max_subqueries?: number; results_merge_strategy?: string; temperature?: number; minimum_search_score?: number; diff --git a/app/frontend/src/components/AnalysisPanel/AgentPlan.tsx b/app/frontend/src/components/AnalysisPanel/AgentPlan.tsx index 21e246eb12..c891da8102 100644 --- a/app/frontend/src/components/AnalysisPanel/AgentPlan.tsx +++ b/app/frontend/src/components/AnalysisPanel/AgentPlan.tsx @@ -8,16 +8,16 @@ SyntaxHighlighter.registerLanguage("json", json); type ModelQueryPlanningStep = { id: number; - type: "ModelQueryPlanning"; + type: "modelQueryPlanning"; input_tokens: number; output_tokens: number; }; type AzureSearchQueryStep = { id: number; - type: "AzureSearchQuery"; - target_index: string; - query: { search: string }; + type: "searchIndex"; + knowledge_source_name: string; + search_index_arguments: { search: string }; query_time: string; count: number; elapsed_ms: number; @@ -32,10 +32,10 @@ interface Props { export const AgentPlan: React.FC = ({ query_plan, description }) => { // find the planning step - const planning = query_plan.find((step): step is ModelQueryPlanningStep => step.type === "ModelQueryPlanning"); + const planning = query_plan.find((step): step is ModelQueryPlanningStep => step.type === "modelQueryPlanning"); // collect all search query steps - const queries = query_plan.filter((step): step is AzureSearchQueryStep => step.type === "AzureSearchQuery"); + const queries = query_plan.filter((step): step is AzureSearchQueryStep => step.type === "searchIndex"); return (
@@ -65,7 +65,7 @@ export const AgentPlan: React.FC = ({ query_plan, description }) => { {queries.map(q => ( - {q.query.search} + {q.search_index_arguments.search} {q.count} {q.elapsed_ms} diff --git a/app/frontend/src/components/Settings/Settings.tsx b/app/frontend/src/components/Settings/Settings.tsx index b5e9174ce6..852dd850c0 100644 --- a/app/frontend/src/components/Settings/Settings.tsx +++ b/app/frontend/src/components/Settings/Settings.tsx @@ -13,7 +13,6 @@ export interface SettingsProps { promptTemplate: string; temperature: number; retrieveCount: number; - maxSubqueryCount: number; resultsMergeStrategy: string; seed: number | null; minimumSearchScore: number; @@ -55,7 +54,6 @@ export const Settings = ({ promptTemplate, temperature, retrieveCount, - maxSubqueryCount, resultsMergeStrategy, seed, minimumSearchScore, @@ -109,8 +107,6 @@ export const Settings = ({ const rerankerScoreFieldId = useId("rerankerScoreField"); const retrieveCountId = useId("retrieveCount"); const retrieveCountFieldId = useId("retrieveCountField"); - const maxSubqueryCountId = useId("maxSubqueryCount"); - const maxSubqueryCountFieldId = useId("maxSubqueryCountField"); const resultsMergeStrategyFieldId = useId("resultsMergeStrategy"); const includeCategoryId = useId("includeCategory"); const includeCategoryFieldId = useId("includeCategoryField"); @@ -209,20 +205,6 @@ export const Settings = ({ onRenderLabel={props => renderLabel(props, rerankerScoreId, rerankerScoreFieldId, t("helpTexts.rerankerScore"))} /> )} - {showAgenticRetrievalOption && useAgenticRetrieval && ( - onChange("maxSubqueryCount", parseInt(val || "10"))} - aria-labelledby={maxSubqueryCountId} - onRenderLabel={props => renderLabel(props, maxSubqueryCountId, maxSubqueryCountFieldId, t("helpTexts.maxSubqueryCount"))} - /> - )} {showAgenticRetrievalOption && useAgenticRetrieval && ( (0); const [retrievalMode, setRetrievalMode] = useState(RetrievalMode.Hybrid); const [retrieveCount, setRetrieveCount] = useState(3); - const [maxSubqueryCount, setMaxSubqueryCount] = useState(10); const [resultsMergeStrategy, setResultsMergeStrategy] = useState("interleaved"); const [useSemanticRanker, setUseSemanticRanker] = useState(true); const [useSemanticCaptions, setUseSemanticCaptions] = useState(false); @@ -148,7 +147,6 @@ export function Component(): JSX.Element { include_category: includeCategory.length === 0 ? undefined : includeCategory, exclude_category: excludeCategory.length === 0 ? undefined : excludeCategory, top: retrieveCount, - max_subqueries: maxSubqueryCount, results_merge_strategy: resultsMergeStrategy, temperature: temperature, minimum_reranker_score: minimumRerankerScore, @@ -208,9 +206,6 @@ export function Component(): JSX.Element { case "retrieveCount": setRetrieveCount(value); break; - case "maxSubqueryCount": - setMaxSubqueryCount(value); - break; case "resultsMergeStrategy": setResultsMergeStrategy(value); break; @@ -369,7 +364,6 @@ export function Component(): JSX.Element { promptTemplateSuffix={promptTemplateSuffix} temperature={temperature} retrieveCount={retrieveCount} - maxSubqueryCount={maxSubqueryCount} resultsMergeStrategy={resultsMergeStrategy} seed={seed} minimumSearchScore={minimumSearchScore} diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx index c5bc85911d..3f3d386e39 100644 --- a/app/frontend/src/pages/chat/Chat.tsx +++ b/app/frontend/src/pages/chat/Chat.tsx @@ -35,7 +35,6 @@ const Chat = () => { const [minimumRerankerScore, setMinimumRerankerScore] = useState(0); const [minimumSearchScore, setMinimumSearchScore] = useState(0); const [retrieveCount, setRetrieveCount] = useState(3); - const [maxSubqueryCount, setMaxSubqueryCount] = useState(10); const [resultsMergeStrategy, setResultsMergeStrategy] = useState("interleaved"); const [retrievalMode, setRetrievalMode] = useState(RetrievalMode.Hybrid); const [useSemanticRanker, setUseSemanticRanker] = useState(true); @@ -213,7 +212,6 @@ const Chat = () => { include_category: includeCategory.length === 0 ? undefined : includeCategory, exclude_category: excludeCategory.length === 0 ? undefined : excludeCategory, top: retrieveCount, - max_subqueries: maxSubqueryCount, results_merge_strategy: resultsMergeStrategy, temperature: temperature, minimum_reranker_score: minimumRerankerScore, @@ -310,9 +308,6 @@ const Chat = () => { case "retrieveCount": setRetrieveCount(value); break; - case "maxSubqueryCount": - setMaxSubqueryCount(value); - break; case "resultsMergeStrategy": setResultsMergeStrategy(value); break; @@ -543,7 +538,6 @@ const Chat = () => { promptTemplate={promptTemplate} temperature={temperature} retrieveCount={retrieveCount} - maxSubqueryCount={maxSubqueryCount} resultsMergeStrategy={resultsMergeStrategy} seed={seed} minimumSearchScore={minimumSearchScore} diff --git a/docs/agentic_retrieval.md b/docs/agentic_retrieval.md index baa55994c1..74d71bf716 100644 --- a/docs/agentic_retrieval.md +++ b/docs/agentic_retrieval.md @@ -1,28 +1,18 @@ # RAG chat: Using agentic retrieval -This repository includes an optional feature that uses agentic retrieval to find the most relevant content given a user's conversation history. +This repository includes an optional feature that uses [agentic retrieval from Azure AI Search](https://learn.microsoft.com/azure/search/search-agentic-retrieval-concept) to find the most relevant content given a user's conversation history. The agentic retrieval feature uses a LLM to analyze the conversation and generate multiple search queries to find relevant content. This can improve the quality of the responses, especially for complex or multi-faceted questions. -## Using the feature - -### Supported Models - -See the agentic retrieval documentation. - -### Prerequisites - -* A deployment of any of the supported agentic retrieval models in the [supported regions](https://learn.microsoft.com/azure/ai-services/openai/concepts/models#standard-deployment-model-availability). If you're not sure, try to create a gpt-4.1-mini deployment from your Azure OpenAI deployments page. - -### Deployment +## Deployment 1. **Enable agentic retrieval:** - Set the environment variables for your Azure OpenAI GPT deployments to your reasoning model + Set the azd environment variable to enable the agentic retrieval feature: ```shell azd env set USE_AGENTIC_RETRIEVAL true ``` -2. **(Optional) Set the agentic retrieval model** +2. **(Optional) Customize the agentic retrieval model** You can configure which model agentic retrieval uses. By default, gpt-4.1-mini is used. @@ -34,35 +24,19 @@ See the agentic retrieval documentation. azd env set AZURE_OPENAI_SEARCHAGENT_MODEL_VERSION 2025-04-14 ``` -3. **(Optional) Enable extra field hydration** + You can only change it to one of the [supported models](https://learn.microsoft.com/azure/search/search-agentic-retrieval-how-to-create#supported-models). - By default, agentic retrieval only returns fields included in the semantic configuration. +3. **Update the infrastructure and application:** - You can enable this optional feature below, to include all fields from the search index in the result. - ⚠️ This feature is currently only compatible with indexes set up with integrated vectorization, - or indexes that otherwise have an "id" field marked as filterable. + Execute `azd up` to provision the infrastructure changes (only the new model, if you ran `up` previously) and deploy the application code with the updated environment variables. The post-provision script will configure Azure AI Search with a Knowledge agent pointing at the search index. - ```shell - azd env set ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA true - ``` - -4. **Update the infrastructure and application:** - - Execute `azd up` to provision the infrastructure changes (only the new model, if you ran `up` previously) and deploy the application code with the updated environment variables. - -5. **Try out the feature:** +4. **Try out the feature:** Open the web app and start a new chat. Agentic retrieval will be used to find all sources. -6. **Experiment with max subqueries:** - - Select the developer options in the web app and change max subqueries to any value between 1 and 20. This controls the maximum amount of subqueries that can be created in the query plan. - - ![Max subqueries screenshot](./images/max-subqueries.png) - -7. **Review the query plan** +5. **Review the query plan** - Agentic retrieval use additional billed tokens behind the scenes for the planning process. + Agentic retrieval uses additional billed tokens behind the scenes for the planning process. To see the token usage, select the lightbulb icon on a chat answer. This will open the "Thought process" tab, which shows the amount of tokens used by and the queries produced by the planning process ![Thought process token usage](./images/query-plan.png) diff --git a/docs/deploy_features.md b/docs/deploy_features.md index 5efdeb89ae..fe7b1d30f4 100644 --- a/docs/deploy_features.md +++ b/docs/deploy_features.md @@ -152,8 +152,6 @@ To enable reasoning models, follow the steps in [the reasoning models guide](./r ## Using agentic retrieval -⚠️ This feature is not fully compatible with [multimodal feature](./multimodal.md). - This feature allows you to use agentic retrieval in place of the Search API. To enable agentic retrieval, follow the steps in [the agentic retrieval guide](./agentic_retrieval.md) ## Using different embedding models @@ -231,8 +229,6 @@ If you have already deployed: ## Enabling multimodal embeddings and answering -⚠️ This feature is not currently compatible with [agentic retrieval](./agentic_retrieval.md). - When your documents include images, you can optionally enable this feature that can use image embeddings when searching and also use images when answering questions. diff --git a/docs/multimodal.md b/docs/multimodal.md index bf5f347c32..b547cc1c37 100644 --- a/docs/multimodal.md +++ b/docs/multimodal.md @@ -113,7 +113,4 @@ and you may still see good results with just text inputs, since the inputs conta ## Compatibility * This feature is **not** compatible with [integrated vectorization](./deploy_features.md#enabling-integrated-vectorization), as the currently configured built-in skills do not process images or store image embeddings. Azure AI Search does now offer built-in skills for multimodal support, as demonstrated in [azure-ai-search-multimodal-sample](https://github.com/Azure-Samples/azure-ai-search-multimodal-sample), but we have not integrated them in this project. Instead, we are working on making a custom skill based off the data ingestion code in this repository, and hosting that skill on Azure Functions. Stay tuned to the releases to find out when that's available. -* This feature is **not** fully compatible with the [agentic retrieval](./agentic_retrieval.md) feature. -The agent *will* perform the multimodal vector embedding search, but it will not return images in the response, -so we cannot send the images to the chat completion model. * This feature *is* compatible with the [reasoning models](./reasoning.md) feature, as long as you use a model that [supports image inputs](https://learn.microsoft.com/azure/ai-services/openai/how-to/reasoning?tabs=python-secure%2Cpy#api--feature-support). diff --git a/evals/evaluate_config.json b/evals/evaluate_config.json index 846eee0787..b94adeef64 100644 --- a/evals/evaluate_config.json +++ b/evals/evaluate_config.json @@ -6,7 +6,6 @@ "target_parameters": { "overrides": { "top": 3, - "max_subqueries": 10, "results_merge_strategy": "interleaved", "temperature": 0.3, "minimum_reranker_score": 0, diff --git a/evals/evaluate_config_multimodal.json b/evals/evaluate_config_multimodal.json index 959cef666c..62dfa4a3c6 100644 --- a/evals/evaluate_config_multimodal.json +++ b/evals/evaluate_config_multimodal.json @@ -6,7 +6,6 @@ "target_parameters": { "overrides": { "top": 3, - "max_subqueries": 10, "results_merge_strategy": "interleaved", "temperature": 0.3, "minimum_reranker_score": 0, diff --git a/infra/main.bicep b/infra/main.bicep index 6964b9dc75..2dc2e5612e 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -24,7 +24,7 @@ param searchServiceLocation string = '' // Set in main.parameters.json @allowed(['free', 'basic', 'standard', 'standard2', 'standard3', 'storage_optimized_l1', 'storage_optimized_l2']) param searchServiceSkuName string // Set in main.parameters.json param searchIndexName string // Set in main.parameters.json -param searchAgentName string = useAgenticRetrieval ? '${searchIndexName}-agent' : '' +param searchAgentName string = useAgenticRetrieval ? '${searchIndexName}-agent-upgrade' : '' param searchQueryLanguage string // Set in main.parameters.json param searchQuerySpeller string // Set in main.parameters.json param searchServiceSemanticRankerLevel string // Set in main.parameters.json @@ -41,7 +41,6 @@ param storageSkuName string // Set in main.parameters.json param defaultReasoningEffort string // Set in main.parameters.json param useAgenticRetrieval bool // Set in main.parameters.json -param enableAgenticRetrievalSourceData bool // Set in main.parameters.json param userStorageAccountName string = '' param userStorageContainerName string = 'user-content' @@ -424,7 +423,6 @@ var appEnvVariables = { USE_SPEECH_OUTPUT_BROWSER: useSpeechOutputBrowser USE_SPEECH_OUTPUT_AZURE: useSpeechOutputAzure USE_AGENTIC_RETRIEVAL: useAgenticRetrieval - ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA: enableAgenticRetrievalSourceData // Chat history settings USE_CHAT_HISTORY_BROWSER: useChatHistoryBrowser USE_CHAT_HISTORY_COSMOS: useChatHistoryCosmos diff --git a/infra/main.parameters.json b/infra/main.parameters.json index dd047dc56f..7a637c8022 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -338,9 +338,6 @@ "useAgenticRetrieval": { "value": "${USE_AGENTIC_RETRIEVAL=false}" }, - "enableAgenticRetrievalSourceData": { - "value": "${ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA=false}" - }, "ragSearchTextEmbeddings": { "value": "${RAG_SEARCH_TEXT_EMBEDDINGS=true}" }, diff --git a/tests/conftest.py b/tests/conftest.py index 89c1c66711..50b0eb32e3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,6 +18,8 @@ KnowledgeAgent, SearchField, SearchIndex, + SearchIndexKnowledgeSource, + SearchIndexKnowledgeSourceParameters, ) from azure.storage.blob.aio import BlobServiceClient, ContainerClient from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding @@ -47,8 +49,6 @@ MockResponse, MockTransport, mock_retrieval_response, - mock_retrieval_response_with_duplicates, - mock_retrieval_response_with_missing_doc_key, mock_retrieval_response_with_sorting, mock_retrieval_response_with_top_limit, mock_speak_text_cancelled, @@ -64,7 +64,20 @@ SearchField(name="groups", type="Collection(Edm.String)"), ], ) -MockAgent = KnowledgeAgent(name="test", models=[], target_indexes=[], request_limits=[]) +MockAgent = KnowledgeAgent( + name="test", + models=[], + knowledge_sources=[ + SearchIndexKnowledgeSource( + name="test", + description="The default index for searching", + search_index_parameters=SearchIndexKnowledgeSourceParameters( + search_index_name="test", include_reference_source_data=True + ), + ) + ], + request_limits=[], +) async def mock_search(self, *args, **kwargs): @@ -75,28 +88,23 @@ async def mock_search(self, *args, **kwargs): def create_mock_retrieve(response_type="default"): """Create a mock_retrieve function that returns different response types. - Args: - response_type: Type of response to return. Options: - - "default": mock_retrieval_response() - - "sorting": mock_retrieval_response_with_sorting() - - "duplicates": mock_retrieval_response_with_duplicates() - - "missing_doc_key": mock_retrieval_response_with_missing_doc_key() - - "top_limit": mock_retrieval_response_with_top_limit() + Supported response_type values: + - "default": single reference response + - "sorting": multiple refs to test ordering / interleaving + - "top_limit": many refs to test early breaking via top limit """ async def mock_retrieve_parameterized(self, *args, **kwargs): retrieval_request = kwargs.get("retrieval_request") assert retrieval_request is not None - assert retrieval_request.target_index_params is not None - assert len(retrieval_request.target_index_params) == 1 - self.filter = retrieval_request.target_index_params[0].filter_add_on + assert retrieval_request.knowledge_source_params is not None + assert len(retrieval_request.knowledge_source_params) == 1 + params_list = retrieval_request.knowledge_source_params + params = params_list[0] + self.filter = getattr(params, "filter_add_on", None) if response_type == "sorting": return mock_retrieval_response_with_sorting() - elif response_type == "duplicates": - return mock_retrieval_response_with_duplicates() - elif response_type == "missing_doc_key": - return mock_retrieval_response_with_missing_doc_key() elif response_type == "top_limit": return mock_retrieval_response_with_top_limit() else: # default @@ -447,7 +455,6 @@ async def mock_exists(*args, **kwargs): "AZURE_OPENAI_SEARCHAGENT_MODEL": "gpt-4.1-mini", "AZURE_OPENAI_SEARCHAGENT_DEPLOYMENT": "gpt-4.1-mini", "USE_AGENTIC_RETRIEVAL": "true", - "ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA": "true", } ] @@ -461,7 +468,6 @@ async def mock_exists(*args, **kwargs): "AZURE_OPENAI_SEARCHAGENT_MODEL": "gpt-4.1-mini", "AZURE_OPENAI_SEARCHAGENT_DEPLOYMENT": "gpt-4.1-mini", "USE_AGENTIC_RETRIEVAL": "true", - "ENABLE_AGENTIC_RETRIEVAL_SOURCE_DATA": "true", "AZURE_USE_AUTHENTICATION": "true", "AZURE_SERVER_APP_ID": "SERVER_APP", "AZURE_SERVER_APP_SECRET": "SECRET", @@ -1160,38 +1166,3 @@ def chat_approach(): credential=MockAzureCredential(), ), ) - - -@pytest.fixture -def chat_approach_with_hydration(): - return ChatReadRetrieveReadApproach( - search_client=SearchClient(endpoint="", index_name="", credential=AzureKeyCredential("")), - search_index_name=None, - agent_model=None, - agent_deployment=None, - agent_client=None, - auth_helper=None, - openai_client=None, - chatgpt_model="gpt-4.1-mini", - chatgpt_deployment="chat", - embedding_deployment="embeddings", - embedding_model=MOCK_EMBEDDING_MODEL_NAME, - embedding_dimensions=MOCK_EMBEDDING_DIMENSIONS, - embedding_field="embedding3", - sourcepage_field="", - content_field="", - query_language="en-us", - query_speller="lexicon", - prompt_manager=PromptyManager(), - hydrate_references=True, - user_blob_manager=AdlsBlobManager( - endpoint="https://test-userstorage-account.dfs.core.windows.net", - container="test-userstorage-container", - credential=MockAzureCredential(), - ), - global_blob_manager=BlobManager( # on normal Azure storage - endpoint="https://test-globalstorage-account.blob.core.windows.net", - container="test-globalstorage-container", - credential=MockAzureCredential(), - ), - ) diff --git a/tests/mocks.py b/tests/mocks.py index de84fa470e..503b5b5f99 100644 --- a/tests/mocks.py +++ b/tests/mocks.py @@ -13,13 +13,13 @@ HttpRequest, ) from azure.search.documents.agent.models import ( - KnowledgeAgentAzureSearchDocReference, KnowledgeAgentMessage, KnowledgeAgentMessageTextContent, KnowledgeAgentModelQueryPlanningActivityRecord, KnowledgeAgentRetrievalResponse, - KnowledgeAgentSearchActivityRecord, - KnowledgeAgentSearchActivityRecordQuery, + KnowledgeAgentSearchIndexActivityArguments, + KnowledgeAgentSearchIndexActivityRecord, + KnowledgeAgentSearchIndexReference, ) from azure.search.documents.models import ( VectorQuery, @@ -268,69 +268,6 @@ def __init__(self, search_text, vector_queries: Optional[list[VectorQuery]]): }, ] ] - elif search_text == "hydrated": - self.data = [ - [ - { - "sourcepage": "Benefit_Options-2.pdf", - "sourcefile": "Benefit_Options.pdf", - "content": "There is a whistleblower policy.", - "embedding": [], - "category": "benefits", - "id": "Benefit_Options-2.pdf", - "@search.score": 0.03279569745063782, - "@search.reranker_score": 3.4577205181121826, - "@search.highlights": None, - "@search.captions": [MockCaption("Caption: A whistleblower policy.")], - }, - ] - ] - elif search_text == "hydrated_multi": - self.data = [ - [ - { - "id": "doc1", - "content": "Hydrated content 1", - "sourcepage": "page1.pdf", - "sourcefile": "file1.pdf", - "category": "category1", - "@search.score": 0.9, - "@search.reranker_score": 3.5, - "@search.highlights": None, - "@search.captions": [], - }, - { - "id": "doc2", - "content": "Hydrated content 2", - "sourcepage": "page2.pdf", - "sourcefile": "file2.pdf", - "category": "category2", - "@search.score": 0.8, - "@search.reranker_score": 3.2, - "@search.highlights": None, - "@search.captions": [], - }, - ] - ] - elif search_text == "hydrated_single": - self.data = [ - [ - { - "id": "doc1", - "content": "Hydrated content 1", - "sourcepage": "page1.pdf", - "sourcefile": "file1.pdf", - "category": "category1", - "@search.score": 0.9, - "@search.reranker_score": 3.5, - "@search.highlights": None, - "@search.captions": [], - }, - ] - ] - elif search_text == "hydrated_empty": - # Mock search results for empty hydration - self.data = [[]] else: self.data = [ [ @@ -436,20 +373,26 @@ def mock_retrieval_response(): ], activity=[ KnowledgeAgentModelQueryPlanningActivityRecord(id=0, input_tokens=10, output_tokens=20, elapsed_ms=200), - KnowledgeAgentSearchActivityRecord( + KnowledgeAgentSearchIndexActivityRecord( id=1, - target_index="index", - query=KnowledgeAgentSearchActivityRecordQuery(search="whistleblower query"), + knowledge_source_name="index", + search_index_arguments=KnowledgeAgentSearchIndexActivityArguments(search="whistleblower query"), count=10, elapsed_ms=50, ), ], references=[ - KnowledgeAgentAzureSearchDocReference( + KnowledgeAgentSearchIndexReference( id=0, activity_source=1, - doc_key="Benefit_Options-2.pdf", - source_data={"content": "There is a whistleblower policy.", "sourcepage": "Benefit_Options-2.pdf"}, + doc_key="file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", + reranker_score=3.4577205181121826, + source_data={ + "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", + "content": "There is a whistleblower policy.", + "sourcepage": "Benefit_Options-2.pdf", + "sourcefile": "Benefit_Options.pdf", + }, ) ], ) @@ -465,122 +408,35 @@ def mock_retrieval_response_with_sorting(): ) ], activity=[ - KnowledgeAgentSearchActivityRecord( + KnowledgeAgentSearchIndexActivityRecord( id=1, - target_index="index", - query=KnowledgeAgentSearchActivityRecordQuery(search="first query"), + knowledge_source_name="index", + search_index_arguments=KnowledgeAgentSearchIndexActivityArguments(search="first query"), count=10, elapsed_ms=50, ), - KnowledgeAgentSearchActivityRecord( + KnowledgeAgentSearchIndexActivityRecord( id=2, - target_index="index", - query=KnowledgeAgentSearchActivityRecordQuery(search="second query"), + knowledge_source_name="index", + search_index_arguments=KnowledgeAgentSearchIndexActivityArguments(search="second query"), count=10, elapsed_ms=50, ), ], references=[ - KnowledgeAgentAzureSearchDocReference( + KnowledgeAgentSearchIndexReference( id="2", # Higher ID for testing interleaved sorting activity_source=2, doc_key="doc2", - source_data={"content": "Content 2", "sourcepage": "page2.pdf"}, + source_data={"id": "doc2", "content": "Content 2", "sourcepage": "page2.pdf"}, + reranker_score=3.7, ), - KnowledgeAgentAzureSearchDocReference( + KnowledgeAgentSearchIndexReference( id="1", # Lower ID for testing interleaved sorting activity_source=1, doc_key="doc1", - source_data={"content": "Content 1", "sourcepage": "page1.pdf"}, - ), - ], - ) - - -def mock_retrieval_response_with_duplicates(): - """Mock response with duplicate doc_keys for testing deduplication""" - return KnowledgeAgentRetrievalResponse( - response=[ - KnowledgeAgentMessage( - role="assistant", - content=[KnowledgeAgentMessageTextContent(text="Test response")], - ) - ], - activity=[ - KnowledgeAgentSearchActivityRecord( - id=1, - target_index="index", - query=KnowledgeAgentSearchActivityRecordQuery(search="query for doc1"), - count=10, - elapsed_ms=50, - ), - KnowledgeAgentSearchActivityRecord( - id=2, - target_index="index", - query=KnowledgeAgentSearchActivityRecordQuery(search="another query for doc1"), - count=10, - elapsed_ms=50, - ), - ], - references=[ - KnowledgeAgentAzureSearchDocReference( - id="1", - activity_source=1, - doc_key="doc1", # Same doc_key - source_data={"content": "Content 1", "sourcepage": "page1.pdf"}, - ), - KnowledgeAgentAzureSearchDocReference( - id="2", - activity_source=2, - doc_key="doc1", # Duplicate doc_key - source_data={"content": "Content 1", "sourcepage": "page1.pdf"}, - ), - KnowledgeAgentAzureSearchDocReference( - id="3", - activity_source=1, - doc_key="doc2", # Different doc_key - source_data={"content": "Content 2", "sourcepage": "page2.pdf"}, - ), - ], - ) - - -def mock_retrieval_response_with_missing_doc_key(): - """Mock response with missing doc_key to test continue condition""" - return KnowledgeAgentRetrievalResponse( - response=[ - KnowledgeAgentMessage( - role="assistant", - content=[KnowledgeAgentMessageTextContent(text="Test response")], - ) - ], - activity=[ - KnowledgeAgentSearchActivityRecord( - id=1, - target_index="index", - query=KnowledgeAgentSearchActivityRecordQuery(search="query"), - count=10, - elapsed_ms=50, - ), - ], - references=[ - KnowledgeAgentAzureSearchDocReference( - id="1", - activity_source=1, - doc_key=None, # Missing doc_key - source_data={"content": "Content 1", "sourcepage": "page1.pdf"}, - ), - KnowledgeAgentAzureSearchDocReference( - id="2", - activity_source=1, - doc_key="", # Empty doc_key - source_data={"content": "Content 2", "sourcepage": "page2.pdf"}, - ), - KnowledgeAgentAzureSearchDocReference( - id="3", - activity_source=1, - doc_key="doc3", # Valid doc_key - source_data={"content": "Content 3", "sourcepage": "page3.pdf"}, + source_data={"id": "doc1", "content": "Content 1", "sourcepage": "page1.pdf"}, + reranker_score=3.5, ), ], ) @@ -591,11 +447,11 @@ def mock_retrieval_response_with_top_limit(): references = [] for i in range(15): # More than any reasonable top limit references.append( - KnowledgeAgentAzureSearchDocReference( + KnowledgeAgentSearchIndexReference( id=str(i), activity_source=1, doc_key=f"doc{i}", - source_data={"content": f"Content {i}", "sourcepage": f"page{i}.pdf"}, + source_data={"id": f"doc{i}", "content": f"Content {i}", "sourcepage": f"page{i}.pdf"}, ) ) @@ -607,10 +463,10 @@ def mock_retrieval_response_with_top_limit(): ) ], activity=[ - KnowledgeAgentSearchActivityRecord( + KnowledgeAgentSearchIndexActivityRecord( id=1, - target_index="index", - query=KnowledgeAgentSearchActivityRecordQuery(search="query"), + knowledge_source_name="index", + search_index_arguments=KnowledgeAgentSearchIndexActivityArguments(search="query"), count=10, elapsed_ms=50, ), diff --git a/tests/snapshots/test_app/test_ask_rtr_text_agent/agent_client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text_agent/agent_client0/result.json index 73ce16656a..2d15ccaba9 100644 --- a/tests/snapshots/test_app/test_ask_rtr_text_agent/agent_client0/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_text_agent/agent_client0/result.json @@ -19,7 +19,6 @@ ], "props": { "filter": null, - "max_docs_for_reranker": 500, "reranker_threshold": 0, "results_merge_strategy": "interleaved" }, @@ -28,13 +27,7 @@ { "description": [ { - "captions": [ - { - "additional_properties": {}, - "highlights": [], - "text": "Caption: A whistleblower policy." - } - ], + "captions": [], "category": null, "content": "There is a whistleblower policy.", "groups": null, @@ -42,8 +35,8 @@ "images": null, "oids": null, "reranker_score": 3.4577205181121826, - "score": 0.03279569745063782, - "search_agent_query": null, + "score": null, + "search_agent_query": "whistleblower query", "sourcefile": "Benefit_Options.pdf", "sourcepage": "Benefit_Options-2.pdf" } @@ -57,17 +50,17 @@ "id": 0, "input_tokens": 10, "output_tokens": 20, - "type": "ModelQueryPlanning" + "type": "modelQueryPlanning" }, { "count": 10, "elapsed_ms": 50, "id": 1, - "query": { + "knowledge_source_name": "index", + "search_index_arguments": { "search": "whistleblower query" }, - "target_index": "index", - "type": "AzureSearchQuery" + "type": "searchIndex" } ] }, diff --git a/tests/snapshots/test_app/test_ask_rtr_text_agent_filter/agent_auth_client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text_agent_filter/agent_auth_client0/result.json index 81fb7e9a3b..5ed04de27d 100644 --- a/tests/snapshots/test_app/test_ask_rtr_text_agent_filter/agent_auth_client0/result.json +++ b/tests/snapshots/test_app/test_ask_rtr_text_agent_filter/agent_auth_client0/result.json @@ -19,7 +19,6 @@ ], "props": { "filter": "category ne 'excluded' and (oids/any(g:search.in(g, 'OID_X')) or groups/any(g:search.in(g, 'GROUP_Y, GROUP_Z')))", - "max_docs_for_reranker": 500, "reranker_threshold": 0, "results_merge_strategy": "interleaved" }, @@ -28,13 +27,7 @@ { "description": [ { - "captions": [ - { - "additional_properties": {}, - "highlights": [], - "text": "Caption: A whistleblower policy." - } - ], + "captions": [], "category": null, "content": "There is a whistleblower policy.", "groups": null, @@ -42,8 +35,8 @@ "images": null, "oids": null, "reranker_score": 3.4577205181121826, - "score": 0.03279569745063782, - "search_agent_query": null, + "score": null, + "search_agent_query": "whistleblower query", "sourcefile": "Benefit_Options.pdf", "sourcepage": "Benefit_Options-2.pdf" } @@ -57,17 +50,17 @@ "id": 0, "input_tokens": 10, "output_tokens": 20, - "type": "ModelQueryPlanning" + "type": "modelQueryPlanning" }, { "count": 10, "elapsed_ms": 50, "id": 1, - "query": { + "knowledge_source_name": "index", + "search_index_arguments": { "search": "whistleblower query" }, - "target_index": "index", - "type": "AzureSearchQuery" + "type": "searchIndex" } ] }, diff --git a/tests/snapshots/test_app/test_chat_text_agent/agent_client0/result.json b/tests/snapshots/test_app/test_chat_text_agent/agent_client0/result.json index c38c8475ef..b6a07b7a28 100644 --- a/tests/snapshots/test_app/test_chat_text_agent/agent_client0/result.json +++ b/tests/snapshots/test_app/test_chat_text_agent/agent_client0/result.json @@ -20,7 +20,6 @@ ], "props": { "filter": null, - "max_docs_for_reranker": 500, "reranker_threshold": 0, "results_merge_strategy": "interleaved" }, @@ -29,13 +28,7 @@ { "description": [ { - "captions": [ - { - "additional_properties": {}, - "highlights": [], - "text": "Caption: A whistleblower policy." - } - ], + "captions": [], "category": null, "content": "There is a whistleblower policy.", "groups": null, @@ -43,8 +36,8 @@ "images": null, "oids": null, "reranker_score": 3.4577205181121826, - "score": 0.03279569745063782, - "search_agent_query": null, + "score": null, + "search_agent_query": "whistleblower query", "sourcefile": "Benefit_Options.pdf", "sourcepage": "Benefit_Options-2.pdf" } @@ -58,17 +51,17 @@ "id": 0, "input_tokens": 10, "output_tokens": 20, - "type": "ModelQueryPlanning" + "type": "modelQueryPlanning" }, { "count": 10, "elapsed_ms": 50, "id": 1, - "query": { + "knowledge_source_name": "index", + "search_index_arguments": { "search": "whistleblower query" }, - "target_index": "index", - "type": "AzureSearchQuery" + "type": "searchIndex" } ] }, diff --git a/tests/snapshots/test_app/test_chat_text_filter_agent/agent_auth_client0/result.json b/tests/snapshots/test_app/test_chat_text_filter_agent/agent_auth_client0/result.json index bb7baf210c..aa00a0ddd4 100644 --- a/tests/snapshots/test_app/test_chat_text_filter_agent/agent_auth_client0/result.json +++ b/tests/snapshots/test_app/test_chat_text_filter_agent/agent_auth_client0/result.json @@ -20,7 +20,6 @@ ], "props": { "filter": "category ne 'excluded' and (oids/any(g:search.in(g, 'OID_X')) or groups/any(g:search.in(g, 'GROUP_Y, GROUP_Z')))", - "max_docs_for_reranker": 500, "reranker_threshold": 0, "results_merge_strategy": "interleaved" }, @@ -29,13 +28,7 @@ { "description": [ { - "captions": [ - { - "additional_properties": {}, - "highlights": [], - "text": "Caption: A whistleblower policy." - } - ], + "captions": [], "category": null, "content": "There is a whistleblower policy.", "groups": null, @@ -43,8 +36,8 @@ "images": null, "oids": null, "reranker_score": 3.4577205181121826, - "score": 0.03279569745063782, - "search_agent_query": null, + "score": null, + "search_agent_query": "whistleblower query", "sourcefile": "Benefit_Options.pdf", "sourcepage": "Benefit_Options-2.pdf" } @@ -58,17 +51,17 @@ "id": 0, "input_tokens": 10, "output_tokens": 20, - "type": "ModelQueryPlanning" + "type": "modelQueryPlanning" }, { "count": 10, "elapsed_ms": 50, "id": 1, - "query": { + "knowledge_source_name": "index", + "search_index_arguments": { "search": "whistleblower query" }, - "target_index": "index", - "type": "AzureSearchQuery" + "type": "searchIndex" } ] }, diff --git a/tests/test_agentic_retrieval.py b/tests/test_agentic_retrieval.py index 656a3fccbc..aeb3d58ba0 100644 --- a/tests/test_agentic_retrieval.py +++ b/tests/test_agentic_retrieval.py @@ -1,22 +1,19 @@ +"""Agentic retrieval tests""" + import pytest from azure.core.credentials import AzureKeyCredential from azure.search.documents.agent.aio import KnowledgeAgentRetrievalClient from azure.search.documents.agent.models import ( - KnowledgeAgentAzureSearchDocReference, KnowledgeAgentMessage, KnowledgeAgentRetrievalResponse, ) -from azure.search.documents.aio import SearchClient from .conftest import create_mock_retrieve -from .mocks import ( - MockAsyncSearchResultsIterator, -) @pytest.mark.asyncio -async def test_agentic_retrieval_non_hydrated_default_sort(chat_approach, monkeypatch): - """Test non-hydrated path with default sorting (preserve original order)""" +async def test_agentic_retrieval_default_sort(chat_approach, monkeypatch): + """Test default sorting (preserve original order)""" monkeypatch.setattr(KnowledgeAgentRetrievalClient, "retrieve", create_mock_retrieve("sorting")) @@ -41,8 +38,8 @@ async def test_agentic_retrieval_non_hydrated_default_sort(chat_approach, monkey @pytest.mark.asyncio -async def test_agentic_retrieval_non_hydrated_interleaved_sort(chat_approach, monkeypatch): - """Test non-hydrated path with interleaved sorting""" +async def test_agentic_retrieval_interleaved_sort(chat_approach, monkeypatch): + """Test interleaved sorting""" monkeypatch.setattr(KnowledgeAgentRetrievalClient, "retrieve", create_mock_retrieve("sorting")) @@ -66,61 +63,6 @@ async def test_agentic_retrieval_non_hydrated_interleaved_sort(chat_approach, mo assert results[1].search_agent_query == "second query" -@pytest.mark.asyncio -async def test_agentic_retrieval_hydrated_with_sorting(chat_approach_with_hydration, monkeypatch): - """Test hydrated path with sorting""" - - monkeypatch.setattr(KnowledgeAgentRetrievalClient, "retrieve", create_mock_retrieve("sorting")) - - async def mock_search(self, *args, **kwargs): - # For hydration, we expect a filter like "search.in(id, 'doc1,doc2', ',')" - return MockAsyncSearchResultsIterator("hydrated_multi", None) - - monkeypatch.setattr(SearchClient, "search", mock_search) - - agent_client = KnowledgeAgentRetrievalClient(endpoint="", agent_name="", credential=AzureKeyCredential("")) - - _, results = await chat_approach_with_hydration.run_agentic_retrieval( - messages=[], - agent_client=agent_client, - search_index_name="test-index", - results_merge_strategy="interleaved", - ) - - assert len(results) == 2 - # Should have hydrated content, not source_data content - assert results[0].content == "Hydrated content 1" - assert results[1].content == "Hydrated content 2" - # Should still have agent queries injected - assert results[0].search_agent_query == "first query" - assert results[1].search_agent_query == "second query" - - -@pytest.mark.asyncio -async def test_hydrate_agent_references_deduplication(chat_approach_with_hydration, monkeypatch): - """Test that hydrate_agent_references deduplicates doc_keys""" - - monkeypatch.setattr(KnowledgeAgentRetrievalClient, "retrieve", create_mock_retrieve("duplicates")) - - async def mock_search(self, *args, **kwargs): - # For deduplication test, we expect doc1 and doc2 to be in the filter - return MockAsyncSearchResultsIterator("hydrated_multi", None) - - monkeypatch.setattr(SearchClient, "search", mock_search) - - agent_client = KnowledgeAgentRetrievalClient(endpoint="", agent_name="", credential=AzureKeyCredential("")) - - _, results = await chat_approach_with_hydration.run_agentic_retrieval( - messages=[], agent_client=agent_client, search_index_name="test-index" - ) - - # Should only get 2 unique documents despite 3 references (doc1 appears twice) - assert len(results) == 2 - doc_ids = [doc.id for doc in results] - assert "doc1" in doc_ids - assert "doc2" in doc_ids - - @pytest.mark.asyncio async def test_agentic_retrieval_no_references(chat_approach, monkeypatch): """Test behavior when agent returns no references""" @@ -143,126 +85,9 @@ async def mock_retrieval(*args, **kwargs): assert len(results) == 0 -@pytest.mark.asyncio -async def test_activity_mapping_injection(chat_approach, monkeypatch): - """Test that search_agent_query is properly injected from activity mapping""" - - monkeypatch.setattr(KnowledgeAgentRetrievalClient, "retrieve", create_mock_retrieve("sorting")) - - agent_client = KnowledgeAgentRetrievalClient(endpoint="", agent_name="", credential=AzureKeyCredential("")) - - _, results = await chat_approach.run_agentic_retrieval( - messages=[], agent_client=agent_client, search_index_name="test-index" - ) - - # Verify that search_agent_query is correctly mapped from activity - assert len(results) == 2 - - # Find each document and verify its query - doc1 = next(doc for doc in results if doc.id == "doc1") - doc2 = next(doc for doc in results if doc.id == "doc2") - - assert doc1.search_agent_query == "first query" # From activity_source=1 - assert doc2.search_agent_query == "second query" # From activity_source=2 - - -@pytest.mark.asyncio -async def test_hydrate_agent_references_missing_doc_keys(chat_approach_with_hydration, monkeypatch): - """Test that hydrate_agent_references handles missing/empty doc_keys correctly""" - - monkeypatch.setattr( - KnowledgeAgentRetrievalClient, - "retrieve", - create_mock_retrieve("missing_doc_key"), - ) - - async def mock_search(self, *args, **kwargs): - return MockAsyncSearchResultsIterator("hydrated_single", None) - - monkeypatch.setattr(SearchClient, "search", mock_search) - - agent_client = KnowledgeAgentRetrievalClient(endpoint="", agent_name="", credential=AzureKeyCredential("")) - - _, results = await chat_approach_with_hydration.run_agentic_retrieval( - messages=[], agent_client=agent_client, search_index_name="test-index" - ) - - # Should only get doc3 since doc_key was missing/empty for others - assert len(results) == 1 - assert results[0].id == "doc1" # From mock search result - assert results[0].content == "Hydrated content 1" - - -@pytest.mark.asyncio -async def test_hydrate_agent_references_empty_doc_keys(chat_approach_with_hydration, monkeypatch): - """Test that hydrate_agent_references handles case with no valid doc_keys""" - - async def mock_retrieval_no_valid_keys(*args, **kwargs): - return KnowledgeAgentRetrievalResponse( - response=[KnowledgeAgentMessage(role="assistant", content=[])], - activity=[], - references=[ - KnowledgeAgentAzureSearchDocReference( - id="1", - activity_source=1, - doc_key=None, # No valid doc_key - source_data={"content": "Content 1", "sourcepage": "page1.pdf"}, - ), - ], - ) - - monkeypatch.setattr(KnowledgeAgentRetrievalClient, "retrieve", mock_retrieval_no_valid_keys) - # No need to mock search since it should never be called - - agent_client = KnowledgeAgentRetrievalClient(endpoint="", agent_name="", credential=AzureKeyCredential("")) - - _, results = await chat_approach_with_hydration.run_agentic_retrieval( - messages=[], agent_client=agent_client, search_index_name="test-index" - ) - - # Should get empty results since no valid doc_keys - assert len(results) == 0 - - -@pytest.mark.asyncio -async def test_hydrate_agent_references_search_returns_empty(chat_approach_with_hydration, monkeypatch): - """Test that hydrate_agent_references handles case where search returns no results""" - - async def mock_retrieval_valid_keys(*args, **kwargs): - return KnowledgeAgentRetrievalResponse( - response=[KnowledgeAgentMessage(role="assistant", content=[])], - activity=[], - references=[ - KnowledgeAgentAzureSearchDocReference( - id="1", - activity_source=1, - doc_key="nonexistent_doc", # Valid doc_key but document doesn't exist - source_data={"content": "Content 1", "sourcepage": "page1.pdf"}, - ), - ], - ) - - monkeypatch.setattr(KnowledgeAgentRetrievalClient, "retrieve", mock_retrieval_valid_keys) - - async def mock_search(self, *args, **kwargs): - return MockAsyncSearchResultsIterator("hydrated_empty", None) - - monkeypatch.setattr(SearchClient, "search", mock_search) - - agent_client = KnowledgeAgentRetrievalClient(endpoint="", agent_name="", credential=AzureKeyCredential("")) - - _, results = await chat_approach_with_hydration.run_agentic_retrieval( - messages=[], agent_client=agent_client, search_index_name="test-index" - ) - - # When hydration is enabled but returns empty results, we should get empty list - # rather than falling back to source_data (this is the expected behavior) - assert len(results) == 0 - - @pytest.mark.asyncio async def test_agentic_retrieval_with_top_limit_during_building(chat_approach, monkeypatch): - """Test that document building respects top limit and breaks early (non-hydrated path)""" + """Test that document building respects top limit and breaks early""" monkeypatch.setattr(KnowledgeAgentRetrievalClient, "retrieve", create_mock_retrieve("top_limit")) @@ -280,29 +105,3 @@ async def test_agentic_retrieval_with_top_limit_during_building(chat_approach, m for i, result in enumerate(results): assert result.id == f"doc{i}" assert result.content == f"Content {i}" - - -@pytest.mark.asyncio -async def test_hydrate_agent_references_with_top_limit_during_collection(chat_approach_with_hydration, monkeypatch): - """Test that hydration respects top limit when collecting doc_keys""" - - monkeypatch.setattr(KnowledgeAgentRetrievalClient, "retrieve", create_mock_retrieve("top_limit")) - - async def mock_search(self, *args, **kwargs): - return MockAsyncSearchResultsIterator("hydrated_multi", None) - - monkeypatch.setattr(SearchClient, "search", mock_search) - - agent_client = KnowledgeAgentRetrievalClient(endpoint="", agent_name="", credential=AzureKeyCredential("")) - - _, results = await chat_approach_with_hydration.run_agentic_retrieval( - messages=[], - agent_client=agent_client, - search_index_name="test-index", - top=2, # Limit to 2 documents - ) - - # Should get exactly 2 documents due to top limit during doc_keys collection - assert len(results) == 2 - assert results[0].content == "Hydrated content 1" - assert results[1].content == "Hydrated content 2" diff --git a/tests/test_app.py b/tests/test_app.py index 96e5c03941..3cb78e58a9 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -558,7 +558,6 @@ async def test_chat_text_agent(agent_client, snapshot): ) assert response.status_code == 200 result = await response.get_json() - assert result["context"]["thoughts"][0]["props"]["max_docs_for_reranker"] == 500 assert result["context"]["thoughts"][0]["props"]["reranker_threshold"] == 0 snapshot.assert_match(json.dumps(result, indent=4), "result.json") diff --git a/tests/test_searchmanager.py b/tests/test_searchmanager.py index f7d95c5d27..e13656710d 100644 --- a/tests/test_searchmanager.py +++ b/tests/test_searchmanager.py @@ -717,3 +717,73 @@ async def mock_list_index_names(self): profiles = indexes[0].vector_search.profiles assert any(p.name == "images_embedding_profile" for p in profiles), "Should have an image embedding profile" assert any(p.name == "embedding3-profile" for p in profiles), "Should have a text embedding profile" + + +@pytest.mark.asyncio +async def test_create_agent_field_names_with_acls_and_images(monkeypatch, search_info): + """Covers create_agent logic adding oids/groups/images and creating knowledge source (lines 443-447,449,457).""" + + # Provide a SearchInfo configured for agentic retrieval and image search + search_info_agent = SearchInfo( + endpoint=search_info.endpoint, + credential=search_info.credential, + index_name=search_info.index_name, + use_agentic_retrieval=True, + agent_name="test-agent", + agent_max_output_tokens=1024, + azure_openai_searchagent_model="gpt-4o-mini", + azure_openai_searchagent_deployment="gpt-4o-mini", + azure_openai_endpoint="https://openaidummy.openai.azure.com/", + azure_vision_endpoint="https://visiondummy.cognitiveservices.azure.com/", + ) + + created_indexes = [] + knowledge_sources = [] + agents = [] + + async def mock_list_index_names(self): + for index in []: + yield index # pragma: no cover + + async def mock_create_index(self, index): + created_indexes.append(index) + + async def mock_create_or_update_knowledge_source(self, knowledge_source, *args, **kwargs): + knowledge_sources.append(knowledge_source) + return knowledge_source + + async def mock_create_or_update_agent(self, agent, *args, **kwargs): + agents.append(agent) + return agent + + monkeypatch.setattr(SearchIndexClient, "list_index_names", mock_list_index_names) + monkeypatch.setattr(SearchIndexClient, "create_index", mock_create_index) + monkeypatch.setattr(SearchIndexClient, "create_or_update_knowledge_source", mock_create_or_update_knowledge_source) + monkeypatch.setattr(SearchIndexClient, "create_or_update_agent", mock_create_or_update_agent) + + manager = SearchManager(search_info_agent, use_acls=True, search_images=True) + + # Act + await manager.create_index() + + # Assert index created + assert len(created_indexes) == 1, "Index should be created before agent creation" + # Assert index has images and ACL fields + index = created_indexes[0] + assert any(field.name == "images" for field in index.fields), "Index should have images field" + assert any(field.name == "oids" for field in index.fields), "Index should have oids field" + assert any(field.name == "groups" for field in index.fields), "Index should have groups field" + + # Assert knowledge source was created with expected selected fields + assert len(knowledge_sources) == 1, "Knowledge source should be created" + ks = knowledge_sources[0] + selected = ks.search_index_parameters.source_data_select.split(",") + # Required baseline fields + for f in ["id", "sourcepage", "sourcefile", "content", "category", "oids", "groups", "images/url"]: + assert f in selected, f"Missing field {f} in knowledge source selection" + + # Assert agent created referencing the knowledge source + assert len(agents) == 1, "Agent should be created" + agent = agents[0] + assert agent.name == "test-agent" + assert any(ks_ref.name == ks.name for ks_ref in agent.knowledge_sources), "Agent should reference knowledge source"