@@ -471,28 +471,33 @@ async def update_content(self, sections: list[Section], url: Optional[str] = Non
471
471
472
472
async with self .search_info .create_search_client () as search_client :
473
473
for batch_index , batch in enumerate (section_batches ):
474
- documents = [
475
- {
474
+ documents = []
475
+ for section_index , section in enumerate (batch ):
476
+ image_fields = {}
477
+ if self .search_images :
478
+ image_fields = {
479
+ "images" : [
480
+ {
481
+ "url" : image .url ,
482
+ "description" : image .description ,
483
+ "boundingbox" : image .bbox ,
484
+ "embedding" : image .embedding ,
485
+ }
486
+ for image in section .chunk .images
487
+ ]
488
+ }
489
+ document = {
476
490
"id" : f"{ section .content .filename_to_id ()} -page-{ section_index + batch_index * MAX_BATCH_SIZE } " ,
477
491
"content" : section .chunk .text ,
478
492
"category" : section .category ,
479
493
"sourcepage" : BlobManager .sourcepage_from_file_page (
480
494
filename = section .content .filename (), page = section .chunk .page_num
481
495
),
482
496
"sourcefile" : section .content .filename (),
483
- "images" : [
484
- {
485
- "url" : image .url ,
486
- "description" : image .description ,
487
- "boundingbox" : image .bbox ,
488
- "embedding" : image .embedding ,
489
- }
490
- for image in section .chunk .images
491
- ],
497
+ ** image_fields ,
492
498
** section .content .acls ,
493
499
}
494
- for section_index , section in enumerate (batch )
495
- ]
500
+ documents .append (document )
496
501
if url :
497
502
for document in documents :
498
503
document ["storageUrl" ] = url
0 commit comments