6
6
7
7
from azure .search .documents .agent .aio import KnowledgeAgentRetrievalClient
8
8
from azure .search .documents .agent .models import (
9
- KnowledgeAgentAzureSearchDocReference ,
10
- KnowledgeAgentIndexParams ,
11
9
KnowledgeAgentMessage ,
12
10
KnowledgeAgentMessageTextContent ,
13
11
KnowledgeAgentRetrievalRequest ,
14
12
KnowledgeAgentRetrievalResponse ,
15
- KnowledgeAgentSearchActivityRecord ,
13
+ KnowledgeAgentSearchIndexActivityRecord ,
14
+ KnowledgeAgentSearchIndexReference ,
15
+ SearchIndexKnowledgeSourceParams ,
16
16
)
17
17
from azure .search .documents .aio import SearchClient
18
18
from azure .search .documents .models import (
@@ -162,7 +162,6 @@ def __init__(
162
162
openai_host : str ,
163
163
prompt_manager : PromptManager ,
164
164
reasoning_effort : Optional [str ] = None ,
165
- hydrate_references : bool = False ,
166
165
multimodal_enabled : bool = False ,
167
166
image_embeddings_client : Optional [ImageEmbeddings ] = None ,
168
167
global_blob_manager : Optional [BlobManager ] = None ,
@@ -180,7 +179,6 @@ def __init__(
180
179
self .openai_host = openai_host
181
180
self .prompt_manager = prompt_manager
182
181
self .reasoning_effort = reasoning_effort
183
- self .hydrate_references = hydrate_references
184
182
self .include_token_usage = True
185
183
self .multimodal_enabled = multimodal_enabled
186
184
self .image_embeddings_client = image_embeddings_client
@@ -276,7 +274,6 @@ async def run_agentic_retrieval(
276
274
top : Optional [int ] = None ,
277
275
filter_add_on : Optional [str ] = None ,
278
276
minimum_reranker_score : Optional [float ] = None ,
279
- max_docs_for_reranker : Optional [int ] = None ,
280
277
results_merge_strategy : Optional [str ] = None ,
281
278
) -> tuple [KnowledgeAgentRetrievalResponse , list [Document ]]:
282
279
# STEP 1: Invoke agentic retrieval
@@ -289,13 +286,10 @@ async def run_agentic_retrieval(
289
286
for msg in messages
290
287
if msg ["role" ] != "system"
291
288
],
292
- target_index_params = [
293
- KnowledgeAgentIndexParams (
294
- index_name = search_index_name ,
295
- reranker_threshold = minimum_reranker_score ,
296
- max_docs_for_reranker = max_docs_for_reranker ,
289
+ knowledge_source_params = [
290
+ SearchIndexKnowledgeSourceParams (
291
+ knowledge_source_name = search_index_name ,
297
292
filter_add_on = filter_add_on ,
298
- include_reference_source_data = True ,
299
293
)
300
294
],
301
295
)
@@ -305,12 +299,12 @@ async def run_agentic_retrieval(
305
299
activities = response .activity
306
300
activity_mapping : dict [int , str ] = (
307
301
{
308
- activity .id : activity .query .search
302
+ activity .id : activity .search_index_arguments .search
309
303
for activity in activities
310
304
if (
311
- isinstance (activity , KnowledgeAgentSearchActivityRecord )
312
- and activity .query
313
- and activity .query .search is not None
305
+ isinstance (activity , KnowledgeAgentSearchIndexActivityRecord )
306
+ and activity .search_index_arguments
307
+ and activity .search_index_arguments .search is not None
314
308
)
315
309
}
316
310
if activities
@@ -322,92 +316,42 @@ async def run_agentic_retrieval(
322
316
return response , []
323
317
324
318
# Extract references
325
- refs = [r for r in response .references if isinstance (r , KnowledgeAgentAzureSearchDocReference )]
326
-
319
+ refs = [r for r in response .references if isinstance (r , KnowledgeAgentSearchIndexReference )]
327
320
documents : list [Document ] = []
328
-
329
- if self .hydrate_references :
330
- # Hydrate references to get full documents
331
- documents = await self .hydrate_agent_references (
332
- references = refs ,
333
- top = top ,
334
- )
335
- else :
336
- # Create documents from reference source data
337
- for ref in refs :
338
- if ref .source_data :
339
- documents .append (
340
- Document (
341
- id = ref .doc_key ,
342
- content = ref .source_data .get ("content" ),
343
- sourcepage = ref .source_data .get ("sourcepage" ),
344
- )
345
- )
346
- if top and len (documents ) >= top :
347
- break
348
-
349
- # Build mappings for agent queries and sorting
350
- ref_to_activity : dict [str , int ] = {}
351
321
doc_to_ref_id : dict [str , str ] = {}
322
+
323
+ # Create documents from reference source data
352
324
for ref in refs :
353
- if ref .doc_key :
354
- ref_to_activity [ref .doc_key ] = ref .activity_source
325
+ if ref .source_data and ref .doc_key :
326
+ # Note that ref.doc_key is the same as source_data["id"]
327
+ documents .append (
328
+ Document (
329
+ id = ref .doc_key ,
330
+ content = ref .source_data .get ("content" ),
331
+ category = ref .source_data .get ("category" ),
332
+ sourcepage = ref .source_data .get ("sourcepage" ),
333
+ sourcefile = ref .source_data .get ("sourcefile" ),
334
+ oids = ref .source_data .get ("oids" ),
335
+ groups = ref .source_data .get ("groups" ),
336
+ reranker_score = ref .reranker_score ,
337
+ images = ref .source_data .get ("images" ),
338
+ search_agent_query = activity_mapping [ref .activity_source ],
339
+ )
340
+ )
355
341
doc_to_ref_id [ref .doc_key ] = ref .id
342
+ if top and len (documents ) >= top :
343
+ break
356
344
357
- # Inject agent search queries into all documents
358
- for doc in documents :
359
- if doc .id and doc .id in ref_to_activity :
360
- activity_id = ref_to_activity [doc .id ]
361
- doc .search_agent_query = activity_mapping .get (activity_id , "" )
345
+ if minimum_reranker_score is not None :
346
+ documents = [doc for doc in documents if (doc .reranker_score or 0 ) >= minimum_reranker_score ]
362
347
363
- # Apply sorting strategy to the documents
364
- if results_merge_strategy == "interleaved" : # Use interleaved reference order
348
+ if results_merge_strategy == "interleaved" :
365
349
documents = sorted (
366
350
documents ,
367
351
key = lambda d : int (doc_to_ref_id .get (d .id , 0 )) if d .id and doc_to_ref_id .get (d .id ) else 0 ,
368
352
)
369
- # else: Default - preserve original order
370
-
371
353
return response , documents
372
354
373
- async def hydrate_agent_references (
374
- self ,
375
- references : list [KnowledgeAgentAzureSearchDocReference ],
376
- top : Optional [int ],
377
- ) -> list [Document ]:
378
- doc_keys : set [str ] = set ()
379
-
380
- for ref in references :
381
- if not ref .doc_key :
382
- continue
383
- doc_keys .add (ref .doc_key )
384
- if top and len (doc_keys ) >= top :
385
- break
386
-
387
- if not doc_keys :
388
- return []
389
-
390
- # Build search filter only on unique doc IDs
391
- id_csv = "," .join (doc_keys )
392
- id_filter = f"search.in(id, '{ id_csv } ', ',')"
393
-
394
- # Fetch full documents
395
- hydrated_docs : list [Document ] = await self .search (
396
- top = len (doc_keys ),
397
- query_text = None ,
398
- filter = id_filter ,
399
- vectors = [],
400
- use_text_search = False ,
401
- use_vector_search = False ,
402
- use_semantic_ranker = False ,
403
- use_semantic_captions = False ,
404
- minimum_search_score = None ,
405
- minimum_reranker_score = None ,
406
- use_query_rewriting = False ,
407
- )
408
-
409
- return hydrated_docs
410
-
411
355
async def get_sources_content (
412
356
self ,
413
357
results : list [Document ],
0 commit comments