1
1
import os
2
2
import sys
3
3
from tempfile import NamedTemporaryFile
4
+ from unittest .mock import MagicMock
4
5
5
6
import azure .storage .blob .aio
7
+ import azure .storage .filedatalake .aio
6
8
import pytest
7
9
8
- from prepdocslib .blobmanager import BlobManager
10
+ # The pythonpath is configured in pyproject.toml to include app/backend
11
+ from prepdocslib .blobmanager import AdlsBlobManager , BlobManager
9
12
from prepdocslib .listfilestrategy import File
10
13
11
- from .mocks import MockAzureCredential
14
+ from .mocks import MockAzureCredential , MockBlob
12
15
13
16
14
17
@pytest .fixture
15
- def blob_manager (monkeypatch ):
18
+ def blob_manager ():
16
19
return BlobManager (
17
20
endpoint = f"https://{ os .environ ['AZURE_STORAGE_ACCOUNT' ]} .blob.core.windows.net" ,
18
21
credential = MockAzureCredential (),
@@ -23,6 +26,15 @@ def blob_manager(monkeypatch):
23
26
)
24
27
25
28
29
+ @pytest .fixture
30
+ def adls_blob_manager (monkeypatch ):
31
+ return AdlsBlobManager (
32
+ endpoint = "https://test-storage-account.dfs.core.windows.net" ,
33
+ container = "test-storage-container" ,
34
+ credential = MockAzureCredential (),
35
+ )
36
+
37
+
26
38
@pytest .mark .asyncio
27
39
@pytest .mark .skipif (sys .version_info .minor < 10 , reason = "requires Python 3.10 or higher" )
28
40
async def test_upload_and_remove (monkeypatch , mock_env , mock_blob_container_client_exists , blob_manager ):
@@ -219,6 +231,55 @@ async def mock_upload_blob(self, name, *args, **kwargs):
219
231
assert result_url == "https://test.blob.core.windows.net/test-image-container/test-image-url"
220
232
221
233
234
+ @pytest .mark .asyncio
235
+ @pytest .mark .skipif (sys .version_info .minor < 10 , reason = "requires Python 3.10 or higher" )
236
+ async def test_adls_upload_document_image (monkeypatch , mock_env , adls_blob_manager ):
237
+
238
+ # Test parameters
239
+ document_filename = "test_document.pdf"
240
+ image_bytes = b"\x89 PNG\r \n \x1a \n \x00 \x00 \x00 \r IHDR\x00 \x00 \x00 \x01 \x00 \x00 \x00 \x01 \x08 \x04 \x00 \x00 \x00 \xb5 \x1c \x0c \x02 \x00 \x00 \x00 \x0b IDATx\xda c\xfc \xff \xff ?\x00 \x05 \xfe \x02 \xfe \xa3 \xb8 \xfb \x26 \x00 \x00 \x00 \x00 IEND\xae B`\x82 "
241
+ image_filename = "test_image.png"
242
+ image_page_num = 0
243
+ user_oid = "test-user-123"
244
+
245
+ # Mock directory path operations
246
+ image_directory_path = f"{ user_oid } /images/{ document_filename } /page_{ image_page_num } "
247
+
248
+ # Mock the _ensure_directory method to avoid needing Azure Data Lake Storage
249
+ mock_directory_client = MagicMock ()
250
+ mock_file_client = MagicMock ()
251
+ mock_directory_client .get_file_client .return_value = mock_file_client
252
+ mock_file_client .url = f"https://test-storage-account.dfs.core.windows.net/{ image_directory_path } /{ image_filename } "
253
+
254
+ async def mock_ensure_directory (self , directory_path , user_oid ):
255
+ assert directory_path in [user_oid , image_directory_path ]
256
+ return mock_directory_client
257
+
258
+ monkeypatch .setattr (AdlsBlobManager , "_ensure_directory" , mock_ensure_directory )
259
+
260
+ # Mock file_client.upload_data to avoid actual upload
261
+ async def mock_upload_data (data , overwrite = True , metadata = None ):
262
+ assert overwrite is True
263
+ assert metadata == {"UploadedBy" : user_oid }
264
+ # Verify we're adding the citation to the image
265
+ assert len (data ) > len (image_bytes ) # The citation adds to the size
266
+
267
+ mock_file_client .upload_data = mock_upload_data
268
+
269
+ # Call the method and verify the results
270
+ result_url = await adls_blob_manager .upload_document_image (
271
+ document_filename , image_bytes , image_filename , image_page_num , user_oid
272
+ )
273
+
274
+ # Verify the URL is correct and unquoted
275
+ assert result_url == f"https://test-storage-account.dfs.core.windows.net/{ image_directory_path } /{ image_filename } "
276
+ assert result_url == f"https://test-storage-account.dfs.core.windows.net/{ image_directory_path } /{ image_filename } "
277
+
278
+ # Test with missing user_oid
279
+ with pytest .raises (ValueError , match = "user_oid must be provided for user-specific operations." ):
280
+ await adls_blob_manager .upload_document_image (document_filename , image_bytes , image_filename , image_page_num )
281
+
282
+
222
283
def test_get_managed_identity_connection_string (mock_env , blob_manager ):
223
284
assert (
224
285
blob_manager .get_managedidentity_connectionstring ()
@@ -311,3 +372,64 @@ async def test_download_blob_with_user_oid(monkeypatch, mock_env, blob_manager):
311
372
await blob_manager .download_blob ("test_document.pdf" , user_oid = "user123" )
312
373
313
374
assert "user_oid is not supported for BlobManager" in str (excinfo .value )
375
+
376
+
377
+ @pytest .mark .asyncio
378
+ async def test_adls_download_blob_permission_denied (monkeypatch , mock_env , adls_blob_manager ):
379
+ """Test that AdlsBlobManager.download_blob returns None when a user tries to access a blob that doesn't belong to them."""
380
+ user_oid = "test-user-123"
381
+ other_user_oid = "another-user-456"
382
+ blob_path = f"{ other_user_oid } /document.pdf" # Path belonging to another user
383
+
384
+ # Attempt to download blob
385
+ result = await adls_blob_manager .download_blob (blob_path , user_oid )
386
+
387
+ # Verify the blob access is denied and the method returns None
388
+ assert result is None
389
+
390
+ # Also test the case where no user_oid is provided
391
+ result = await adls_blob_manager .download_blob (blob_path , None )
392
+ assert result is None
393
+
394
+
395
+ @pytest .mark .asyncio
396
+ async def test_adls_download_blob_with_permission (monkeypatch , mock_data_lake_service_client , adls_blob_manager ):
397
+ """Test that AdlsBlobManager.download_blob works when a user has permission to access a blob."""
398
+
399
+ # Track downloaded files
400
+ downloaded_files = []
401
+
402
+ # Mock directory client for _ensure_directory method
403
+ class MockDirectoryClient :
404
+ async def get_directory_properties (self ):
405
+ # Return dummy properties to indicate directory exists
406
+ return {"name" : "test-directory" }
407
+
408
+ async def get_access_control (self ):
409
+ # Return a dictionary with the owner matching the auth_client's user_oid
410
+ return {"owner" : "OID_X" } # This should match the user_oid in auth_client
411
+
412
+ def get_file_client (self , filename ):
413
+ # Return a file client for the given filename
414
+ return MockFileClient (filename )
415
+
416
+ class MockFileClient :
417
+ def __init__ (self , path_name ):
418
+ self .path_name = path_name
419
+
420
+ async def download_file (self ):
421
+ downloaded_files .append (self .path_name )
422
+ return MockBlob ()
423
+
424
+ # Mock get_directory_client to return our MockDirectoryClient
425
+ monkeypatch .setattr (
426
+ azure .storage .filedatalake .aio .FileSystemClient ,
427
+ "get_directory_client" ,
428
+ lambda * args , ** kwargs : MockDirectoryClient (),
429
+ )
430
+
431
+ content , properties = await adls_blob_manager .download_blob ("OID_X/document.pdf" , "OID_X" )
432
+
433
+ assert content .startswith (b"\x89 PNG\r \n \x1a \n " )
434
+ assert properties ["content_settings" ]["content_type" ] == "application/octet-stream"
435
+ assert downloaded_files == ["document.pdf" ]
0 commit comments