6
6
7
7
from azure .search .documents .agent .aio import KnowledgeAgentRetrievalClient
8
8
from azure .search .documents .agent .models import (
9
- KnowledgeAgentAzureSearchDocReference ,
10
- KnowledgeAgentIndexParams ,
11
9
KnowledgeAgentMessage ,
12
10
KnowledgeAgentMessageTextContent ,
13
11
KnowledgeAgentRetrievalRequest ,
14
12
KnowledgeAgentRetrievalResponse ,
15
- KnowledgeAgentSearchActivityRecord ,
13
+ KnowledgeAgentSearchIndexReference ,
14
+ SearchIndexKnowledgeSourceParams ,
16
15
)
17
16
from azure .search .documents .aio import SearchClient
18
17
from azure .search .documents .models import (
@@ -162,7 +161,6 @@ def __init__(
162
161
openai_host : str ,
163
162
prompt_manager : PromptManager ,
164
163
reasoning_effort : Optional [str ] = None ,
165
- hydrate_references : bool = False ,
166
164
multimodal_enabled : bool = False ,
167
165
image_embeddings_client : Optional [ImageEmbeddings ] = None ,
168
166
global_blob_manager : Optional [BlobManager ] = None ,
@@ -180,7 +178,6 @@ def __init__(
180
178
self .openai_host = openai_host
181
179
self .prompt_manager = prompt_manager
182
180
self .reasoning_effort = reasoning_effort
183
- self .hydrate_references = hydrate_references
184
181
self .include_token_usage = True
185
182
self .multimodal_enabled = multimodal_enabled
186
183
self .image_embeddings_client = image_embeddings_client
@@ -275,139 +272,58 @@ async def run_agentic_retrieval(
275
272
search_index_name : str ,
276
273
top : Optional [int ] = None ,
277
274
filter_add_on : Optional [str ] = None ,
278
- minimum_reranker_score : Optional [float ] = None ,
279
- max_docs_for_reranker : Optional [int ] = None ,
280
275
results_merge_strategy : Optional [str ] = None ,
281
276
) -> tuple [KnowledgeAgentRetrievalResponse , list [Document ]]:
282
277
# STEP 1: Invoke agentic retrieval
283
278
response = await agent_client .retrieve (
284
279
retrieval_request = KnowledgeAgentRetrievalRequest (
285
280
messages = [
286
281
KnowledgeAgentMessage (
287
- role = str (msg ["role" ]), content = [KnowledgeAgentMessageTextContent (text = str (msg ["content" ]))]
282
+ role = str (msg ["role" ]),
283
+ content = [KnowledgeAgentMessageTextContent (text = str (msg ["content" ]))],
288
284
)
289
285
for msg in messages
290
286
if msg ["role" ] != "system"
291
287
],
292
- target_index_params = [
293
- KnowledgeAgentIndexParams (
294
- index_name = search_index_name ,
295
- reranker_threshold = minimum_reranker_score ,
296
- max_docs_for_reranker = max_docs_for_reranker ,
288
+ knowledge_source_params = [
289
+ SearchIndexKnowledgeSourceParams (
290
+ knowledge_source_name = "default-knowledge-source" ,
297
291
filter_add_on = filter_add_on ,
298
- include_reference_source_data = True ,
299
292
)
300
293
],
301
294
)
302
295
)
303
296
304
- # Map activity id -> agent's internal search query
305
- activities = response .activity
306
- activity_mapping : dict [int , str ] = (
307
- {
308
- activity .id : activity .query .search
309
- for activity in activities
310
- if (
311
- isinstance (activity , KnowledgeAgentSearchActivityRecord )
312
- and activity .query
313
- and activity .query .search is not None
314
- )
315
- }
316
- if activities
317
- else {}
318
- )
319
-
320
297
# No refs? we're done
321
298
if not (response and response .references ):
322
299
return response , []
323
300
324
301
# Extract references
325
- refs = [r for r in response .references if isinstance (r , KnowledgeAgentAzureSearchDocReference )]
302
+ refs = [r for r in response .references if isinstance (r , KnowledgeAgentSearchIndexReference )]
326
303
327
304
documents : list [Document ] = []
328
305
329
- if self .hydrate_references :
330
- # Hydrate references to get full documents
331
- documents = await self .hydrate_agent_references (
332
- references = refs ,
333
- top = top ,
334
- )
335
- else :
336
- # Create documents from reference source data
337
- for ref in refs :
338
- if ref .source_data :
339
- documents .append (
340
- Document (
341
- id = ref .doc_key ,
342
- content = ref .source_data .get ("content" ),
343
- sourcepage = ref .source_data .get ("sourcepage" ),
344
- )
345
- )
346
- if top and len (documents ) >= top :
347
- break
348
-
349
- # Build mappings for agent queries and sorting
350
- ref_to_activity : dict [str , int ] = {}
351
- doc_to_ref_id : dict [str , str ] = {}
306
+ # Create documents from reference source data
352
307
for ref in refs :
353
- if ref .doc_key :
354
- ref_to_activity [ref .doc_key ] = ref .activity_source
355
- doc_to_ref_id [ref .doc_key ] = ref .id
356
-
357
- # Inject agent search queries into all documents
358
- for doc in documents :
359
- if doc .id and doc .id in ref_to_activity :
360
- activity_id = ref_to_activity [doc .id ]
361
- doc .search_agent_query = activity_mapping .get (activity_id , "" )
362
-
363
- # Apply sorting strategy to the documents
364
- if results_merge_strategy == "interleaved" : # Use interleaved reference order
365
- documents = sorted (
366
- documents ,
367
- key = lambda d : int (doc_to_ref_id .get (d .id , 0 )) if d .id and doc_to_ref_id .get (d .id ) else 0 ,
368
- )
369
- # else: Default - preserve original order
308
+ if ref .source_data :
309
+ documents .append (
310
+ Document (
311
+ id = ref .source_data .get ("id" ),
312
+ content = ref .source_data .get ("content" ),
313
+ category = ref .source_data .get ("category" ),
314
+ sourcepage = ref .source_data .get ("sourcepage" ),
315
+ sourcefile = ref .source_data .get ("sourcefile" ),
316
+ oids = ref .source_data .get ("oids" ),
317
+ groups = ref .source_data .get ("groups" ),
318
+ reranker_score = ref .reranker_score ,
319
+ images = ref .source_data .get ("images" ),
320
+ )
321
+ )
322
+ if top and len (documents ) >= top :
323
+ break
370
324
371
325
return response , documents
372
326
373
- async def hydrate_agent_references (
374
- self ,
375
- references : list [KnowledgeAgentAzureSearchDocReference ],
376
- top : Optional [int ],
377
- ) -> list [Document ]:
378
- doc_keys : set [str ] = set ()
379
-
380
- for ref in references :
381
- if not ref .doc_key :
382
- continue
383
- doc_keys .add (ref .doc_key )
384
- if top and len (doc_keys ) >= top :
385
- break
386
-
387
- if not doc_keys :
388
- return []
389
-
390
- # Build search filter only on unique doc IDs
391
- id_csv = "," .join (doc_keys )
392
- id_filter = f"search.in(id, '{ id_csv } ', ',')"
393
-
394
- # Fetch full documents
395
- hydrated_docs : list [Document ] = await self .search (
396
- top = len (doc_keys ),
397
- query_text = None ,
398
- filter = id_filter ,
399
- vectors = [],
400
- use_text_search = False ,
401
- use_vector_search = False ,
402
- use_semantic_ranker = False ,
403
- use_semantic_captions = False ,
404
- minimum_search_score = None ,
405
- minimum_reranker_score = None ,
406
- use_query_rewriting = False ,
407
- )
408
-
409
- return hydrated_docs
410
-
411
327
async def get_sources_content (
412
328
self ,
413
329
results : list [Document ],
@@ -535,7 +451,11 @@ async def compute_multimodal_embedding(self, q: str):
535
451
if not self .image_embeddings_client :
536
452
raise ValueError ("Approach is missing an image embeddings client for multimodal queries" )
537
453
multimodal_query_vector = await self .image_embeddings_client .create_embedding_for_text (q )
538
- return VectorizedQuery (vector = multimodal_query_vector , k_nearest_neighbors = 50 , fields = "images/embedding" )
454
+ return VectorizedQuery (
455
+ vector = multimodal_query_vector ,
456
+ k_nearest_neighbors = 50 ,
457
+ fields = "images/embedding" ,
458
+ )
539
459
540
460
def get_system_prompt_variables (self , override_prompt : Optional [str ]) -> dict [str , str ]:
541
461
# Allows client to replace the entire prompt, or to inject into the existing prompt using >>>
0 commit comments