Skip to content

Commit 874082b

Browse files
committed
Increase coverage
1 parent da270e5 commit 874082b

File tree

2 files changed

+125
-16
lines changed

2 files changed

+125
-16
lines changed

tests/test_blob_manager.py

Lines changed: 125 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
11
import os
22
import sys
33
from tempfile import NamedTemporaryFile
4+
from unittest.mock import MagicMock
45

56
import azure.storage.blob.aio
7+
import azure.storage.filedatalake.aio
68
import pytest
79

8-
from prepdocslib.blobmanager import BlobManager
10+
# The pythonpath is configured in pyproject.toml to include app/backend
11+
from prepdocslib.blobmanager import AdlsBlobManager, BlobManager
912
from prepdocslib.listfilestrategy import File
1013

11-
from .mocks import MockAzureCredential
14+
from .mocks import MockAzureCredential, MockBlob
1215

1316

1417
@pytest.fixture
15-
def blob_manager(monkeypatch):
18+
def blob_manager():
1619
return BlobManager(
1720
endpoint=f"https://{os.environ['AZURE_STORAGE_ACCOUNT']}.blob.core.windows.net",
1821
credential=MockAzureCredential(),
@@ -23,6 +26,15 @@ def blob_manager(monkeypatch):
2326
)
2427

2528

29+
@pytest.fixture
30+
def adls_blob_manager(monkeypatch):
31+
return AdlsBlobManager(
32+
endpoint="https://test-storage-account.dfs.core.windows.net",
33+
container="test-storage-container",
34+
credential=MockAzureCredential(),
35+
)
36+
37+
2638
@pytest.mark.asyncio
2739
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
2840
async def test_upload_and_remove(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
@@ -219,6 +231,55 @@ async def mock_upload_blob(self, name, *args, **kwargs):
219231
assert result_url == "https://test.blob.core.windows.net/test-image-container/test-image-url"
220232

221233

234+
@pytest.mark.asyncio
235+
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
236+
async def test_adls_upload_document_image(monkeypatch, mock_env, adls_blob_manager):
237+
238+
# Test parameters
239+
document_filename = "test_document.pdf"
240+
image_bytes = b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x04\x00\x00\x00\xb5\x1c\x0c\x02\x00\x00\x00\x0bIDATx\xdac\xfc\xff\xff?\x00\x05\xfe\x02\xfe\xa3\xb8\xfb\x26\x00\x00\x00\x00IEND\xaeB`\x82"
241+
image_filename = "test_image.png"
242+
image_page_num = 0
243+
user_oid = "test-user-123"
244+
245+
# Mock directory path operations
246+
image_directory_path = f"{user_oid}/images/{document_filename}/page_{image_page_num}"
247+
248+
# Mock the _ensure_directory method to avoid needing Azure Data Lake Storage
249+
mock_directory_client = MagicMock()
250+
mock_file_client = MagicMock()
251+
mock_directory_client.get_file_client.return_value = mock_file_client
252+
mock_file_client.url = f"https://test-storage-account.dfs.core.windows.net/{image_directory_path}/{image_filename}"
253+
254+
async def mock_ensure_directory(self, directory_path, user_oid):
255+
assert directory_path in [user_oid, image_directory_path]
256+
return mock_directory_client
257+
258+
monkeypatch.setattr(AdlsBlobManager, "_ensure_directory", mock_ensure_directory)
259+
260+
# Mock file_client.upload_data to avoid actual upload
261+
async def mock_upload_data(data, overwrite=True, metadata=None):
262+
assert overwrite is True
263+
assert metadata == {"UploadedBy": user_oid}
264+
# Verify we're adding the citation to the image
265+
assert len(data) > len(image_bytes) # The citation adds to the size
266+
267+
mock_file_client.upload_data = mock_upload_data
268+
269+
# Call the method and verify the results
270+
result_url = await adls_blob_manager.upload_document_image(
271+
document_filename, image_bytes, image_filename, image_page_num, user_oid
272+
)
273+
274+
# Verify the URL is correct and unquoted
275+
assert result_url == f"https://test-storage-account.dfs.core.windows.net/{image_directory_path}/{image_filename}"
276+
assert result_url == f"https://test-storage-account.dfs.core.windows.net/{image_directory_path}/{image_filename}"
277+
278+
# Test with missing user_oid
279+
with pytest.raises(ValueError, match="user_oid must be provided for user-specific operations."):
280+
await adls_blob_manager.upload_document_image(document_filename, image_bytes, image_filename, image_page_num)
281+
282+
222283
def test_get_managed_identity_connection_string(mock_env, blob_manager):
223284
assert (
224285
blob_manager.get_managedidentity_connectionstring()
@@ -311,3 +372,64 @@ async def test_download_blob_with_user_oid(monkeypatch, mock_env, blob_manager):
311372
await blob_manager.download_blob("test_document.pdf", user_oid="user123")
312373

313374
assert "user_oid is not supported for BlobManager" in str(excinfo.value)
375+
376+
377+
@pytest.mark.asyncio
378+
async def test_adls_download_blob_permission_denied(monkeypatch, mock_env, adls_blob_manager):
379+
"""Test that AdlsBlobManager.download_blob returns None when a user tries to access a blob that doesn't belong to them."""
380+
user_oid = "test-user-123"
381+
other_user_oid = "another-user-456"
382+
blob_path = f"{other_user_oid}/document.pdf" # Path belonging to another user
383+
384+
# Attempt to download blob
385+
result = await adls_blob_manager.download_blob(blob_path, user_oid)
386+
387+
# Verify the blob access is denied and the method returns None
388+
assert result is None
389+
390+
# Also test the case where no user_oid is provided
391+
result = await adls_blob_manager.download_blob(blob_path, None)
392+
assert result is None
393+
394+
395+
@pytest.mark.asyncio
396+
async def test_adls_download_blob_with_permission(monkeypatch, mock_data_lake_service_client, adls_blob_manager):
397+
"""Test that AdlsBlobManager.download_blob works when a user has permission to access a blob."""
398+
399+
# Track downloaded files
400+
downloaded_files = []
401+
402+
# Mock directory client for _ensure_directory method
403+
class MockDirectoryClient:
404+
async def get_directory_properties(self):
405+
# Return dummy properties to indicate directory exists
406+
return {"name": "test-directory"}
407+
408+
async def get_access_control(self):
409+
# Return a dictionary with the owner matching the auth_client's user_oid
410+
return {"owner": "OID_X"} # This should match the user_oid in auth_client
411+
412+
def get_file_client(self, filename):
413+
# Return a file client for the given filename
414+
return MockFileClient(filename)
415+
416+
class MockFileClient:
417+
def __init__(self, path_name):
418+
self.path_name = path_name
419+
420+
async def download_file(self):
421+
downloaded_files.append(self.path_name)
422+
return MockBlob()
423+
424+
# Mock get_directory_client to return our MockDirectoryClient
425+
monkeypatch.setattr(
426+
azure.storage.filedatalake.aio.FileSystemClient,
427+
"get_directory_client",
428+
lambda *args, **kwargs: MockDirectoryClient(),
429+
)
430+
431+
content, properties = await adls_blob_manager.download_blob("OID_X/document.pdf", "OID_X")
432+
433+
assert content.startswith(b"\x89PNG\r\n\x1a\n")
434+
assert properties["content_settings"]["content_type"] == "application/octet-stream"
435+
assert downloaded_files == ["document.pdf"]

tests/test_content_file.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,6 @@ async def download_file(self):
125125
lambda *args, **kwargs: MockDirectoryClient(),
126126
)
127127

128-
# Original mock for download_file (keep for backward compatibility)
129-
async def mock_download_file(self):
130-
downloaded_files.append(self.path_name)
131-
return MockBlob()
132-
133-
monkeypatch.setattr(azure.storage.filedatalake.aio.DataLakeFileClient, "download_file", mock_download_file)
134-
135128
response = await auth_client.get("/content/userdoc.pdf", headers={"Authorization": "Bearer test"})
136129
assert response.status_code == 200
137130
assert len(downloaded_files) == 1
@@ -179,11 +172,5 @@ async def download_file(self):
179172
lambda *args, **kwargs: MockDirectoryClient(),
180173
)
181174

182-
# Original mock for download_file (keep for backward compatibility)
183-
async def mock_download_file(self):
184-
raise ResourceNotFoundError(MockAiohttpClientResponse404("userdoc.pdf", b""))
185-
186-
monkeypatch.setattr(azure.storage.filedatalake.aio.DataLakeFileClient, "download_file", mock_download_file)
187-
188175
response = await auth_client.get("/content/userdoc.pdf", headers={"Authorization": "Bearer test"})
189176
assert response.status_code == 404

0 commit comments

Comments
 (0)