26
26
from azure .search .documents .agent .aio import KnowledgeAgentRetrievalClient
27
27
from azure .search .documents .aio import SearchClient
28
28
from azure .search .documents .indexes .aio import SearchIndexClient
29
- from azure .storage .blob .aio import ContainerClient
30
29
from azure .storage .blob .aio import StorageStreamDownloader as BlobDownloader
31
- from azure .storage .filedatalake .aio import FileSystemClient
32
30
from azure .storage .filedatalake .aio import StorageStreamDownloader as DatalakeDownloader
33
31
from opentelemetry .instrumentation .aiohttp_client import AioHttpClientInstrumentor
34
32
from opentelemetry .instrumentation .asgi import OpenTelemetryMiddleware
59
57
CONFIG_AGENTIC_RETRIEVAL_ENABLED ,
60
58
CONFIG_ASK_APPROACH ,
61
59
CONFIG_AUTH_CLIENT ,
62
- CONFIG_BLOB_CONTAINER_CLIENT ,
63
60
CONFIG_CHAT_APPROACH ,
64
61
CONFIG_CHAT_HISTORY_BROWSER_ENABLED ,
65
62
CONFIG_CHAT_HISTORY_COSMOS_ENABLED ,
66
63
CONFIG_CREDENTIAL ,
67
64
CONFIG_DEFAULT_REASONING_EFFORT ,
68
- CONFIG_IMAGE_BLOB_CONTAINER_CLIENT , # Added this line
69
- CONFIG_IMAGE_DATALAKE_CLIENT ,
65
+ CONFIG_GLOBAL_BLOB_MANAGER ,
70
66
CONFIG_INGESTER ,
71
67
CONFIG_LANGUAGE_PICKER_ENABLED ,
72
68
CONFIG_MULTIMODAL_ENABLED ,
87
83
CONFIG_SPEECH_SERVICE_TOKEN ,
88
84
CONFIG_SPEECH_SERVICE_VOICE ,
89
85
CONFIG_STREAMING_ENABLED ,
90
- CONFIG_USER_BLOB_CONTAINER_CLIENT ,
86
+ CONFIG_USER_BLOB_MANAGER ,
91
87
CONFIG_USER_UPLOAD_ENABLED ,
92
88
CONFIG_VECTOR_SEARCH_ENABLED ,
93
89
)
104
100
setup_openai_client ,
105
101
setup_search_info ,
106
102
)
107
- from prepdocslib .blobmanager import AdlsBlobManager
103
+ from prepdocslib .blobmanager import AdlsBlobManager , BlobManager
108
104
from prepdocslib .embeddings import ImageEmbeddings
109
105
from prepdocslib .filestrategy import UploadUserFileStrategy
110
106
from prepdocslib .listfilestrategy import File
@@ -153,19 +149,16 @@ async def content_file(path: str, auth_claims: dict[str, Any]):
153
149
path_parts = path .rsplit ("#page=" , 1 )
154
150
path = path_parts [0 ]
155
151
current_app .logger .info ("Opening file %s" , path )
156
- blob_container_client : ContainerClient = current_app .config [CONFIG_BLOB_CONTAINER_CLIENT ]
152
+ blob_manager : BlobManager = current_app .config [CONFIG_GLOBAL_BLOB_MANAGER ]
157
153
blob : Union [BlobDownloader , DatalakeDownloader ]
158
- try :
159
- blob = await blob_container_client .get_blob_client (path ).download_blob ()
160
- except ResourceNotFoundError :
154
+ blob = await blob_manager .download_blob (path )
155
+ if blob is None :
161
156
current_app .logger .info ("Path not found in general Blob container: %s" , path )
162
157
if current_app .config [CONFIG_USER_UPLOAD_ENABLED ]:
163
158
try :
164
159
user_oid = auth_claims ["oid" ]
165
- user_blob_container_client = current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ]
166
- user_directory_client : FileSystemClient = user_blob_container_client .get_directory_client (user_oid )
167
- file_client = user_directory_client .get_file_client (path )
168
- blob = await file_client .download_file ()
160
+ user_blob_manager : AdlsBlobManager = current_app .config [CONFIG_USER_BLOB_MANAGER ]
161
+ blob = await user_blob_manager .download_blob (path , user_oid = user_oid )
169
162
except ResourceNotFoundError :
170
163
current_app .logger .exception ("Path not found in DataLake: %s" , path )
171
164
abort (404 )
@@ -364,7 +357,7 @@ async def upload(auth_claims: dict[str, Any]):
364
357
365
358
user_oid = auth_claims ["oid" ]
366
359
file = request_files .getlist ("file" )[0 ]
367
- adls_manager = AdlsBlobManager ( current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ])
360
+ adls_manager : AdlsBlobManager = current_app .config [CONFIG_USER_BLOB_MANAGER ]
368
361
file_url = await adls_manager .upload_blob (file , file .filename , user_oid )
369
362
ingester : UploadUserFileStrategy = current_app .config [CONFIG_INGESTER ]
370
363
await ingester .add_file (File (content = file , url = file_url , acls = {"oids" : [user_oid ]}), user_oid = user_oid )
@@ -377,9 +370,9 @@ async def delete_uploaded(auth_claims: dict[str, Any]):
377
370
request_json = await request .get_json ()
378
371
filename = request_json .get ("filename" )
379
372
user_oid = auth_claims ["oid" ]
380
- adls_manager = AdlsBlobManager ( current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ])
373
+ adls_manager : AdlsBlobManager = current_app .config [CONFIG_USER_BLOB_MANAGER ]
381
374
await adls_manager .remove_blob (filename , user_oid )
382
- ingester = current_app .config [CONFIG_INGESTER ]
375
+ ingester : UploadUserFileStrategy = current_app .config [CONFIG_INGESTER ]
383
376
await ingester .remove_file (filename , user_oid )
384
377
return jsonify ({"message" : f"File { filename } deleted successfully" }), 200
385
378
@@ -391,7 +384,7 @@ async def list_uploaded(auth_claims: dict[str, Any]):
391
384
Only returns files directly in the user's directory, not in subdirectories.
392
385
Excludes image files and the images directory."""
393
386
user_oid = auth_claims ["oid" ]
394
- adls_manager = AdlsBlobManager ( current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ])
387
+ adls_manager : AdlsBlobManager = current_app .config [CONFIG_USER_BLOB_MANAGER ]
395
388
files = await adls_manager .list_blobs (user_oid )
396
389
return jsonify (files ), 200
397
390
@@ -514,18 +507,14 @@ async def setup_clients():
514
507
endpoint = AZURE_SEARCH_ENDPOINT , agent_name = AZURE_SEARCH_AGENT , credential = azure_credential
515
508
)
516
509
517
- blob_container_client = ContainerClient (
518
- f"https://{ AZURE_STORAGE_ACCOUNT } .blob.core.windows.net" , AZURE_STORAGE_CONTAINER , credential = azure_credential
510
+ # Set up the global blob storage manager (used for global content/images, but not user uploads)
511
+ global_blob_manager = BlobManager (
512
+ endpoint = f"https://{ AZURE_STORAGE_ACCOUNT } .blob.core.windows.net" ,
513
+ credential = azure_credential ,
514
+ container = AZURE_STORAGE_CONTAINER ,
515
+ image_container = AZURE_IMAGESTORAGE_CONTAINER ,
519
516
)
520
-
521
- # Set up the image storage container client if configured
522
- image_blob_container_client = None
523
- if AZURE_IMAGESTORAGE_CONTAINER :
524
- image_blob_container_client = ContainerClient (
525
- f"https://{ AZURE_STORAGE_ACCOUNT } .blob.core.windows.net" ,
526
- AZURE_IMAGESTORAGE_CONTAINER ,
527
- credential = azure_credential ,
528
- )
517
+ current_app .config [CONFIG_GLOBAL_BLOB_MANAGER ] = global_blob_manager
529
518
530
519
# Set up authentication helper
531
520
search_index = None
@@ -572,19 +561,19 @@ async def setup_clients():
572
561
openai_organization = OPENAI_ORGANIZATION ,
573
562
)
574
563
575
- user_blob_container_client = None
564
+ user_image_blob_manager = None
576
565
if USE_USER_UPLOAD :
577
566
current_app .logger .info ("USE_USER_UPLOAD is true, setting up user upload feature" )
578
567
if not AZURE_USERSTORAGE_ACCOUNT or not AZURE_USERSTORAGE_CONTAINER :
579
568
raise ValueError (
580
569
"AZURE_USERSTORAGE_ACCOUNT and AZURE_USERSTORAGE_CONTAINER must be set when USE_USER_UPLOAD is true"
581
570
)
582
- user_blob_container_client = FileSystemClient (
583
- f"https://{ AZURE_USERSTORAGE_ACCOUNT } .dfs.core.windows.net" ,
584
- AZURE_USERSTORAGE_CONTAINER ,
571
+ user_blob_manager = AdlsBlobManager (
572
+ endpoint = f"https://{ AZURE_USERSTORAGE_ACCOUNT } .dfs.core.windows.net" ,
573
+ container = AZURE_USERSTORAGE_CONTAINER ,
585
574
credential = azure_credential ,
586
575
)
587
- current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ] = user_blob_container_client
576
+ current_app .config [CONFIG_USER_BLOB_MANAGER ] = user_blob_manager
588
577
589
578
# Set up ingester
590
579
file_processors = setup_file_processors (
@@ -627,7 +616,7 @@ async def setup_clients():
627
616
embeddings = text_embeddings_service ,
628
617
image_embeddings = image_embeddings_service ,
629
618
search_field_name_embedding = AZURE_SEARCH_FIELD_NAME_EMBEDDING ,
630
- blob_manager = AdlsBlobManager ( user_blob_container_client ) ,
619
+ blob_manager = user_image_blob_manager ,
631
620
)
632
621
current_app .config [CONFIG_INGESTER ] = ingester
633
622
@@ -638,9 +627,6 @@ async def setup_clients():
638
627
current_app .config [CONFIG_OPENAI_CLIENT ] = openai_client
639
628
current_app .config [CONFIG_SEARCH_CLIENT ] = search_client
640
629
current_app .config [CONFIG_AGENT_CLIENT ] = agent_client
641
- current_app .config [CONFIG_BLOB_CONTAINER_CLIENT ] = blob_container_client
642
- if image_blob_container_client :
643
- current_app .config [CONFIG_IMAGE_BLOB_CONTAINER_CLIENT ] = image_blob_container_client
644
630
current_app .config [CONFIG_AUTH_CLIENT ] = auth_helper
645
631
646
632
current_app .config [CONFIG_SEMANTIC_RANKER_DEPLOYED ] = AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
@@ -695,8 +681,8 @@ async def setup_clients():
695
681
reasoning_effort = OPENAI_REASONING_EFFORT ,
696
682
multimodal_enabled = USE_MULTIMODAL ,
697
683
image_embeddings_client = image_embeddings_client ,
698
- image_blob_container_client = image_blob_container_client ,
699
- image_datalake_client = user_blob_container_client ,
684
+ global_blob_manager = global_blob_manager ,
685
+ user_blob_manager = user_blob_manager ,
700
686
)
701
687
702
688
# ChatReadRetrieveReadApproach is used by /chat for multi-turn conversation
@@ -722,21 +708,14 @@ async def setup_clients():
722
708
reasoning_effort = OPENAI_REASONING_EFFORT ,
723
709
multimodal_enabled = USE_MULTIMODAL ,
724
710
image_embeddings_client = image_embeddings_client ,
725
- image_blob_container_client = image_blob_container_client ,
726
- image_datalake_client = user_blob_container_client ,
711
+ global_blob_manager = global_blob_manager ,
712
+ user_blob_manager = user_blob_manager ,
727
713
)
728
714
729
715
730
716
@bp .after_app_serving
731
717
async def close_clients ():
732
718
await current_app .config [CONFIG_SEARCH_CLIENT ].close ()
733
- await current_app .config [CONFIG_BLOB_CONTAINER_CLIENT ].close ()
734
- if current_app .config .get (CONFIG_USER_BLOB_CONTAINER_CLIENT ):
735
- await current_app .config [CONFIG_USER_BLOB_CONTAINER_CLIENT ].close ()
736
- if current_app .config .get (CONFIG_IMAGE_BLOB_CONTAINER_CLIENT ):
737
- await current_app .config [CONFIG_IMAGE_BLOB_CONTAINER_CLIENT ].close ()
738
- if current_app .config .get (CONFIG_IMAGE_DATALAKE_CLIENT ):
739
- await current_app .config [CONFIG_IMAGE_DATALAKE_CLIENT ].close ()
740
719
741
720
742
721
def create_app ():
0 commit comments