@@ -83,6 +83,7 @@ async def create_index(self):
83
83
logger .info ("Checking whether search index %s exists..." , self .search_info .index_name )
84
84
85
85
async with self .search_info .create_search_index_client () as search_index_client :
86
+
86
87
embedding_field = None
87
88
images_field = None
88
89
text_vector_search_profile = None
@@ -230,12 +231,7 @@ async def create_index(self):
230
231
type = "Edm.String" ,
231
232
analyzer_name = self .search_analyzer_name ,
232
233
),
233
- SimpleField (
234
- name = "category" ,
235
- type = "Edm.String" ,
236
- filterable = True ,
237
- facetable = True ,
238
- ),
234
+ SimpleField (name = "category" , type = "Edm.String" , filterable = True , facetable = True ),
239
235
SimpleField (
240
236
name = "sourcepage" ,
241
237
type = "Edm.String" ,
@@ -280,10 +276,7 @@ async def create_index(self):
280
276
vector_algorithms : list [VectorSearchAlgorithmConfiguration ] = []
281
277
vector_compressions : list [VectorSearchCompression ] = []
282
278
if embedding_field :
283
- logger .info (
284
- "Including %s field for text vectors in new index" ,
285
- embedding_field .name ,
286
- )
279
+ logger .info ("Including %s field for text vectors in new index" , embedding_field .name )
287
280
fields .append (embedding_field )
288
281
if text_vectorizer is not None :
289
282
vectorizers .append (text_vectorizer )
@@ -298,10 +291,7 @@ async def create_index(self):
298
291
vector_compressions .append (text_vector_compression )
299
292
300
293
if images_field :
301
- logger .info (
302
- "Including %s field for image descriptions and vectors in new index" ,
303
- images_field .name ,
304
- )
294
+ logger .info ("Including %s field for image descriptions and vectors in new index" , images_field .name )
305
295
fields .append (images_field )
306
296
if image_vector_search_profile is None or image_vector_algorithm is None :
307
297
raise ValueError ("Image search profile and algorithm must be set" )
@@ -338,10 +328,7 @@ async def create_index(self):
338
328
logger .info ("Search index %s already exists" , self .search_info .index_name )
339
329
existing_index = await search_index_client .get_index (self .search_info .index_name )
340
330
if not any (field .name == "storageUrl" for field in existing_index .fields ):
341
- logger .info (
342
- "Adding storageUrl field to index %s" ,
343
- self .search_info .index_name ,
344
- )
331
+ logger .info ("Adding storageUrl field to index %s" , self .search_info .index_name )
345
332
existing_index .fields .append (
346
333
SimpleField (
347
334
name = "storageUrl" ,
@@ -406,10 +393,7 @@ async def create_index(self):
406
393
407
394
if existing_index .semantic_search :
408
395
if not existing_index .semantic_search .default_configuration_name :
409
- logger .info (
410
- "Adding default semantic configuration to index %s" ,
411
- self .search_info .index_name ,
412
- )
396
+ logger .info ("Adding default semantic configuration to index %s" , self .search_info .index_name )
413
397
existing_index .semantic_search .default_configuration_name = "default"
414
398
415
399
if existing_index .semantic_search .configurations :
@@ -419,10 +403,7 @@ async def create_index(self):
419
403
and existing_semantic_config .prioritized_fields .title_field
420
404
and not existing_semantic_config .prioritized_fields .title_field .field_name == "sourcepage"
421
405
):
422
- logger .info (
423
- "Updating semantic configuration for index %s" ,
424
- self .search_info .index_name ,
425
- )
406
+ logger .info ("Updating semantic configuration for index %s" , self .search_info .index_name )
426
407
existing_semantic_config .prioritized_fields .title_field = SemanticField (
427
408
field_name = "sourcepage"
428
409
)
@@ -432,10 +413,7 @@ async def create_index(self):
432
413
or len (existing_index .vector_search .vectorizers ) == 0
433
414
):
434
415
if self .embeddings is not None and isinstance (self .embeddings , AzureOpenAIEmbeddingService ):
435
- logger .info (
436
- "Adding vectorizer to search index %s" ,
437
- self .search_info .index_name ,
438
- )
416
+ logger .info ("Adding vectorizer to search index %s" , self .search_info .index_name )
439
417
existing_index .vector_search .vectorizers = [
440
418
AzureOpenAIVectorizer (
441
419
vectorizer_name = f"{ self .search_info .index_name } -vectorizer" ,
@@ -467,8 +445,7 @@ async def create_agent(self):
467
445
name = self .search_info .agent_name ,
468
446
target_indexes = [
469
447
KnowledgeAgentTargetIndex (
470
- index_name = self .search_info .index_name ,
471
- default_include_reference_source_data = True ,
448
+ index_name = self .search_info .index_name , default_include_reference_source_data = True
472
449
)
473
450
],
474
451
models = [
@@ -494,35 +471,33 @@ async def update_content(self, sections: list[Section], url: Optional[str] = Non
494
471
495
472
async with self .search_info .create_search_client () as search_client :
496
473
for batch_index , batch in enumerate (section_batches ):
497
- image_fields = {}
498
- if self . search_images :
499
- image_fields = {
500
- "images" : [
501
- {
502
- "url " : image . url ,
503
- "description" : image . description ,
504
- "boundingbox " : image .bbox ,
505
- "embedding " : image .embedding ,
506
- }
507
- for section in batch
508
- for image in section . chunk . images
509
- ]
510
- }
511
- documents = [
512
- {
474
+ documents = []
475
+ for section_index , section in enumerate ( batch ) :
476
+ image_fields = {}
477
+ if self . search_images :
478
+ image_fields = {
479
+ "images " : [
480
+ {
481
+ "url " : image .url ,
482
+ "description " : image .description ,
483
+ "boundingbox" : image . bbox ,
484
+ "embedding" : image . embedding ,
485
+ }
486
+ for image in section . chunk . images
487
+ ]
488
+ }
489
+ document = {
513
490
"id" : f"{ section .content .filename_to_id ()} -page-{ section_index + batch_index * MAX_BATCH_SIZE } " ,
514
491
"content" : section .chunk .text ,
515
492
"category" : section .category ,
516
493
"sourcepage" : BlobManager .sourcepage_from_file_page (
517
- filename = section .content .filename (),
518
- page = section .chunk .page_num ,
494
+ filename = section .content .filename (), page = section .chunk .page_num
519
495
),
520
496
"sourcefile" : section .content .filename (),
521
497
** image_fields ,
522
498
** section .content .acls ,
523
499
}
524
- for section_index , section in enumerate (batch )
525
- ]
500
+ documents .append (document )
526
501
if url :
527
502
for document in documents :
528
503
document ["storageUrl" ] = url
@@ -544,9 +519,7 @@ async def update_content(self, sections: list[Section], url: Optional[str] = Non
544
519
545
520
async def remove_content (self , path : Optional [str ] = None , only_oid : Optional [str ] = None ):
546
521
logger .info (
547
- "Removing sections from '{%s or '<all>'}' from search index '%s'" ,
548
- path ,
549
- self .search_info .index_name ,
522
+ "Removing sections from '{%s or '<all>'}' from search index '%s'" , path , self .search_info .index_name
550
523
)
551
524
async with self .search_info .create_search_client () as search_client :
552
525
while True :
@@ -558,10 +531,7 @@ async def remove_content(self, path: Optional[str] = None, only_oid: Optional[st
558
531
filter = f"sourcefile eq '{ path_for_filter } '"
559
532
max_results = 1000
560
533
result = await search_client .search (
561
- search_text = "" ,
562
- filter = filter ,
563
- top = max_results ,
564
- include_total_count = True ,
534
+ search_text = "" , filter = filter , top = max_results , include_total_count = True
565
535
)
566
536
result_count = await result .get_count ()
567
537
if result_count == 0 :
0 commit comments