@@ -179,7 +179,7 @@ async def upload_blob(self, file: Union[File, IO], filename: str, user_oid: str)
179
179
str: The URL of the uploaded file, with forward slashes (not URL-encoded)
180
180
"""
181
181
# Ensure user directory exists but don't create a subdirectory
182
- user_directory_client = await self ._ensure_directory (user_oid , owner = user_oid )
182
+ user_directory_client = await self ._ensure_directory (directory_path = user_oid , user_oid = user_oid )
183
183
184
184
# Create file directly in user directory
185
185
file_client = user_directory_client .get_file_client (filename )
@@ -240,12 +240,12 @@ async def upload_document_image(
240
240
Returns:
241
241
str: The URL of the uploaded file, with forward slashes (not URL-encoded)
242
242
"""
243
- await self ._ensure_directory (user_oid , owner = user_oid )
244
- directory_path = self ._get_image_directory_path (document_filename , user_oid , image_page_num )
245
- image_directory_client = await self ._ensure_directory (directory_path , owner = user_oid )
243
+ await self ._ensure_directory (directory_path = user_oid , user_oid = user_oid )
244
+ image_directory_path = self ._get_image_directory_path (document_filename , user_oid , image_page_num )
245
+ image_directory_client = await self ._ensure_directory (directory_path = image_directory_path , user_oid = user_oid )
246
246
file_client = image_directory_client .get_file_client (image_filename )
247
247
image_bytes = BaseBlobManager .add_image_citation (image_bytes , document_filename , image_filename , image_page_num )
248
- logger .info ("Uploading document image '%s' to '%s'" , image_filename , directory_path )
248
+ logger .info ("Uploading document image '%s' to '%s'" , image_filename , image_directory_path )
249
249
await file_client .upload_data (image_bytes , overwrite = True , metadata = {"UploadedBy" : user_oid })
250
250
return unquote (file_client .url )
251
251
@@ -284,7 +284,7 @@ async def download_blob(
284
284
filename = path_parts [- 1 ]
285
285
286
286
try :
287
- user_directory_client = self ._ensure_directory (directory_path , user_oid )
287
+ user_directory_client = self ._ensure_directory (directory_path = directory_path , user_oid = user_oid )
288
288
file_client = user_directory_client .get_file_client (filename )
289
289
blob = await file_client .download_file ()
290
290
return blob
@@ -310,15 +310,17 @@ async def remove_blob(self, filename: str, user_oid: str) -> None:
310
310
ResourceNotFoundError: If the file does not exist
311
311
"""
312
312
# Ensure the user directory exists
313
- user_directory_client = await self ._ensure_directory (user_oid , owner = user_oid )
313
+ user_directory_client = await self ._ensure_directory (directory_path = user_oid , user_oid = user_oid )
314
314
# Delete the main document file
315
315
file_client = user_directory_client .get_file_client (filename )
316
316
await file_client .delete_file ()
317
317
318
318
# Try to delete any associated image directories
319
319
try :
320
320
image_directory_path = self ._get_image_directory_path (filename , user_oid )
321
- image_directory_client = await self ._ensure_directory (image_directory_path , owner = user_oid )
321
+ image_directory_client = await self ._ensure_directory (
322
+ directory_path = image_directory_path , user_oid = user_oid
323
+ )
322
324
await image_directory_client .delete_directory ()
323
325
logger .info (f"Deleted associated image directory: { image_directory_path } " )
324
326
except ResourceNotFoundError :
@@ -338,7 +340,7 @@ async def list_blobs(self, user_oid: str) -> list[str]:
338
340
Returns:
339
341
list[str]: List of filenames that belong to the user
340
342
"""
341
- user_directory_client = await self ._ensure_directory (user_oid , owner = user_oid )
343
+ user_directory_client = await self ._ensure_directory (directory_path = user_oid , user_oid = user_oid )
342
344
files = []
343
345
try :
344
346
all_paths = user_directory_client .get_paths ()
@@ -389,21 +391,30 @@ def __init__(
389
391
self .subscription_id = subscription_id
390
392
self .image_container = image_container
391
393
392
- async def upload_blob (self , file : File ) -> Optional [list [str ]]:
393
- async with BlobServiceClient (
394
+ def get_managedidentity_connectionstring (self ):
395
+ if not self .account or not self .resource_group or not self .subscription_id :
396
+ raise ValueError ("Account, resource group, and subscription ID must be set to generate connection string." )
397
+ return f"ResourceId=/subscriptions/{ self .subscription_id } /resourceGroups/{ self .resource_group } /providers/Microsoft.Storage/storageAccounts/{ self .account } ;"
398
+
399
+ async def _get_service_client (self ) -> BlobServiceClient :
400
+ return BlobServiceClient (
394
401
account_url = self .endpoint , credential = self .credential , max_single_put_size = 4 * 1024 * 1024
395
- ) as service_client , service_client .get_container_client (self .container ) as container_client :
396
- if not await container_client .exists ():
397
- await container_client .create_container ()
398
-
399
- # Re-open and upload the original file
400
- if file .url is None :
401
- with open (file .content .name , "rb" ) as reopened_file :
402
- blob_name = self .blob_name_from_file_name (file .content .name )
403
- logger .info ("Uploading blob for document '%s'" , blob_name )
404
- blob_client = await container_client .upload_blob (blob_name , reopened_file , overwrite = True )
405
- file .url = blob_client .url
406
- return None
402
+ )
403
+
404
+ async def upload_blob (self , file : File ) -> Optional [list [str ]]:
405
+ blob_service_client = await self ._get_service_client ()
406
+ container_client = blob_service_client .get_container_client (self .container )
407
+ if not await container_client .exists ():
408
+ await container_client .create_container ()
409
+
410
+ # Re-open and upload the original file
411
+ if file .url is None :
412
+ with open (file .content .name , "rb" ) as reopened_file :
413
+ blob_name = self .blob_name_from_file_name (file .content .name )
414
+ logger .info ("Uploading blob for document '%s'" , blob_name )
415
+ blob_client = await container_client .upload_blob (blob_name , reopened_file , overwrite = True )
416
+ file .url = blob_client .url
417
+ blob_service_client .close ()
407
418
408
419
async def upload_document_image (
409
420
self ,
@@ -421,22 +432,16 @@ async def upload_document_image(
421
432
raise ValueError (
422
433
"user_oid is not supported for BlobManager. Use AdlsBlobManager for user-specific operations."
423
434
)
424
- async with BlobServiceClient (
425
- account_url = self .endpoint , credential = self .credential , max_single_put_size = 4 * 1024 * 1024
426
- ) as service_client , service_client .get_container_client (self .image_container ) as container_client :
427
- if not await container_client .exists ():
428
- await container_client .create_container ()
429
- image_bytes = self .add_image_citation (image_bytes , document_filename , image_filename , image_page_num )
430
- blob_name = f"{ self .blob_name_from_file_name (document_filename )} /page{ image_page_num } /{ image_filename } "
431
- logger .info ("Uploading blob for document image '%s'" , blob_name )
432
- blob_client = await container_client .upload_blob (blob_name , image_bytes , overwrite = True )
433
- return blob_client .url
434
- return None
435
-
436
- def get_managedidentity_connectionstring (self ):
437
- if not self .account or not self .resource_group or not self .subscription_id :
438
- raise ValueError ("Account, resource group, and subscription ID must be set to generate connection string." )
439
- return f"ResourceId=/subscriptions/{ self .subscription_id } /resourceGroups/{ self .resource_group } /providers/Microsoft.Storage/storageAccounts/{ self .account } ;"
435
+ blob_service_client = await self ._get_service_client ()
436
+ container_client = blob_service_client .get_container_client (self .container )
437
+ if not await container_client .exists ():
438
+ await container_client .create_container ()
439
+ image_bytes = self .add_image_citation (image_bytes , document_filename , image_filename , image_page_num )
440
+ blob_name = f"{ self .blob_name_from_file_name (document_filename )} /page{ image_page_num } /{ image_filename } "
441
+ logger .info ("Uploading blob for document image '%s'" , blob_name )
442
+ blob_client = await container_client .upload_blob (blob_name , image_bytes , overwrite = True )
443
+ blob_service_client .close ()
444
+ return blob_client .url
440
445
441
446
async def download_blob (
442
447
self , blob_path : str , user_oid : Optional [str ] = None
@@ -445,43 +450,43 @@ async def download_blob(
445
450
raise ValueError (
446
451
"user_oid is not supported for BlobManager. Use AdlsBlobManager for user-specific operations."
447
452
)
448
-
449
- async with BlobServiceClient (
450
- account_url = self .endpoint , credential = self .credential , max_single_put_size = 4 * 1024 * 1024
451
- ) as service_client , service_client .get_container_client (self .container ) as container_client :
452
- if not await container_client .exists ():
453
- return None
454
- blob_client = container_client .get_blob_client (blob_path )
455
- try :
456
- blob = await blob_client .download_blob ()
457
- if not blob .properties :
458
- logger .warning (f"No blob exists for { blob_path } " )
459
- return None
460
- return blob
461
- except ResourceNotFoundError :
462
- logger .warning ("Blob not found: %s" , blob_path )
453
+ blob_service_client = await self ._get_service_client ()
454
+ container_client = blob_service_client .get_container_client (self .container )
455
+ if not await container_client .exists ():
456
+ return None
457
+ if len (blob_path ) == 0 :
458
+ logger .warning ("Blob path is empty" )
459
+ return None
460
+ blob_client = container_client .get_blob_client (blob_path )
461
+ try :
462
+ blob = await blob_client .download_blob ()
463
+ if not blob .properties :
464
+ logger .warning (f"No blob exists for { blob_path } " )
463
465
return None
466
+ return blob
467
+ except ResourceNotFoundError :
468
+ logger .warning ("Blob not found: %s" , blob_path )
469
+ return None
470
+ # TODO: close client properly
464
471
465
472
async def remove_blob (self , path : Optional [str ] = None ):
466
- async with BlobServiceClient (
467
- account_url = self .endpoint , credential = self .credential
468
- ) as service_client , service_client .get_container_client (self .container ) as container_client :
469
- if not await container_client .exists ():
470
- return
471
- if path is None :
472
- prefix = None
473
- blobs = container_client .list_blob_names ()
474
- else :
475
- prefix = os .path .splitext (os .path .basename (path ))[0 ]
476
- blobs = container_client .list_blob_names (name_starts_with = os .path .splitext (os .path .basename (prefix ))[0 ])
477
- async for blob_path in blobs :
478
- # This still supports PDFs split into individual pages, but we could remove in future to simplify code
479
- if (
480
- prefix is not None
481
- and (
482
- not re .match (rf"{ prefix } -\d+\.pdf" , blob_path ) or not re .match (rf"{ prefix } -\d+\.png" , blob_path )
483
- )
484
- ) or (path is not None and blob_path == os .path .basename (path )):
485
- continue
486
- logger .info ("Removing blob %s" , blob_path )
487
- await container_client .delete_blob (blob_path )
473
+ blob_service_client = await self ._get_service_client ()
474
+ container_client = blob_service_client .get_container_client (self .container )
475
+ if not await container_client .exists ():
476
+ return
477
+ if path is None :
478
+ prefix = None
479
+ blobs = container_client .list_blob_names ()
480
+ else :
481
+ prefix = os .path .splitext (os .path .basename (path ))[0 ]
482
+ blobs = container_client .list_blob_names (name_starts_with = os .path .splitext (os .path .basename (prefix ))[0 ])
483
+ async for blob_path in blobs :
484
+ # This still supports PDFs split into individual pages, but we could remove in future to simplify code
485
+ if (
486
+ prefix is not None
487
+ and (not re .match (rf"{ prefix } -\d+\.pdf" , blob_path ) or not re .match (rf"{ prefix } -\d+\.png" , blob_path ))
488
+ ) or (path is not None and blob_path == os .path .basename (path )):
489
+ continue
490
+ logger .info ("Removing blob %s" , blob_path )
491
+ await container_client .delete_blob (blob_path )
492
+ blob_service_client .close ()
0 commit comments