Skip to content

Commit 952fd44

Browse files
committed
Add more tests for blobmanger
1 parent 0a097df commit 952fd44

File tree

6 files changed

+143
-35
lines changed

6 files changed

+143
-35
lines changed

app/backend/app.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -357,13 +357,17 @@ async def upload(auth_claims: dict[str, Any]):
357357
if "file" not in request_files:
358358
return jsonify({"message": "No file part in the request", "status": "failed"}), 400
359359

360-
user_oid = auth_claims["oid"]
361-
file = request_files.getlist("file")[0]
362-
adls_manager: AdlsBlobManager = current_app.config[CONFIG_USER_BLOB_MANAGER]
363-
file_url = await adls_manager.upload_blob(file, file.filename, user_oid)
364-
ingester: UploadUserFileStrategy = current_app.config[CONFIG_INGESTER]
365-
await ingester.add_file(File(content=file, url=file_url, acls={"oids": [user_oid]}), user_oid=user_oid)
366-
return jsonify({"message": "File uploaded successfully"}), 200
360+
try:
361+
user_oid = auth_claims["oid"]
362+
file = request_files.getlist("file")[0]
363+
adls_manager: AdlsBlobManager = current_app.config[CONFIG_USER_BLOB_MANAGER]
364+
file_url = await adls_manager.upload_blob(file, file.filename, user_oid)
365+
ingester: UploadUserFileStrategy = current_app.config[CONFIG_INGESTER]
366+
await ingester.add_file(File(content=file, url=file_url, acls={"oids": [user_oid]}), user_oid=user_oid)
367+
return jsonify({"message": "File uploaded successfully"}), 200
368+
except Exception as error:
369+
current_app.logger.error("Error uploading file: %s", error)
370+
return jsonify({"message": "Error uploading file, check server logs for details.", "status": "failed"}), 500
367371

368372

369373
@bp.post("/delete_uploaded")

app/backend/prepdocslib/blobmanager.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@ def sourcepage_from_file_page(cls, filename, page=0) -> str:
3838
else:
3939
return os.path.basename(filename)
4040

41-
@classmethod
42-
def blob_image_name_from_file_page(cls, filename, page=0) -> str:
43-
return os.path.splitext(os.path.basename(filename))[0] + f"-{page+1}" + ".png"
44-
4541
@classmethod
4642
def blob_name_from_file_name(cls, filename) -> str:
4743
return os.path.basename(filename)

tests/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,14 @@ async def mock_exists(*args, **kwargs):
308308
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.exists", mock_exists)
309309

310310

311+
@pytest.fixture
312+
def mock_blob_container_client_does_not_exist(monkeypatch):
313+
async def mock_exists(*args, **kwargs):
314+
return False
315+
316+
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.exists", mock_exists)
317+
318+
311319
envs = [
312320
{
313321
"OPENAI_HOST": "openai",

tests/test_blob_manager.py

Lines changed: 92 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,12 @@ def blob_manager(monkeypatch):
2525

2626
@pytest.mark.asyncio
2727
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
28-
async def test_upload_and_remove(monkeypatch, mock_env, blob_manager):
28+
async def test_upload_and_remove(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
2929
with NamedTemporaryFile(suffix=".pdf") as temp_file:
3030
f = File(temp_file.file)
3131
filename = os.path.basename(f.content.name)
3232

33-
# Set up mocks used by upload_blob
34-
async def mock_exists(*args, **kwargs):
35-
return True
36-
37-
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.exists", mock_exists)
38-
33+
# Set up mock of upload_blob
3934
async def mock_upload_blob(self, name, *args, **kwargs):
4035
assert name == filename
4136
return azure.storage.blob.aio.BlobClient.from_blob_url(
@@ -78,17 +73,12 @@ async def mock_delete_blob(self, name, *args, **kwargs):
7873

7974
@pytest.mark.asyncio
8075
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
81-
async def test_upload_and_remove_all(monkeypatch, mock_env, blob_manager):
76+
async def test_upload_and_remove_all(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
8277
with NamedTemporaryFile(suffix=".pdf") as temp_file:
8378
f = File(temp_file.file)
8479
filename = os.path.basename(f.content.name)
8580

86-
# Set up mocks used by upload_blob
87-
async def mock_exists(*args, **kwargs):
88-
return True
89-
90-
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.exists", mock_exists)
91-
81+
# Set up mock of upload_blob
9282
async def mock_upload_blob(self, name, *args, **kwargs):
9383
assert name == filename
9484
return azure.storage.blob.aio.BlobClient.from_blob_url(
@@ -161,12 +151,9 @@ async def mock_upload_blob(self, name, *args, **kwargs):
161151

162152
@pytest.mark.asyncio
163153
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
164-
async def test_dont_remove_if_no_container(monkeypatch, mock_env, blob_manager):
165-
async def mock_exists(*args, **kwargs):
166-
return False
167-
168-
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.exists", mock_exists)
169-
154+
async def test_dont_remove_if_no_container(
155+
monkeypatch, mock_env, mock_blob_container_client_does_not_exist, blob_manager
156+
):
170157
async def mock_delete_blob(*args, **kwargs):
171158
assert False, "delete_blob() shouldn't have been called"
172159

@@ -177,7 +164,8 @@ async def mock_delete_blob(*args, **kwargs):
177164

178165
@pytest.mark.asyncio
179166
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
180-
async def test_upload_document_image(monkeypatch, mock_env):
167+
@pytest.mark.parametrize("directory_exists", [True, False])
168+
async def test_upload_document_image(monkeypatch, mock_env, directory_exists):
181169
# Create a blob manager with an image container
182170
blob_manager = BlobManager(
183171
endpoint=f"https://{os.environ['AZURE_STORAGE_ACCOUNT']}.blob.core.windows.net",
@@ -202,10 +190,15 @@ async def test_upload_document_image(monkeypatch, mock_env):
202190

203191
# Mock container client operations
204192
async def mock_exists(*args, **kwargs):
205-
return True
193+
return directory_exists
206194

207195
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.exists", mock_exists)
208196

197+
async def mock_create_container(*args, **kwargs):
198+
return
199+
200+
monkeypatch.setattr("azure.storage.blob.aio.ContainerClient.create_container", mock_create_container)
201+
209202
expected_blob_name = f"{os.path.basename(temp_file.name)}/page{image_page_num}/{image_filename}"
210203

211204
async def mock_upload_blob(self, name, *args, **kwargs):
@@ -241,3 +234,80 @@ def test_sourcepage_from_file_page():
241234
def test_blob_name_from_file_name():
242235
assert BlobManager.blob_name_from_file_name("tmp/test.pdf") == "test.pdf"
243236
assert BlobManager.blob_name_from_file_name("tmp/test.html") == "test.html"
237+
238+
239+
@pytest.mark.asyncio
240+
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
241+
async def test_download_blob(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
242+
# Mock the download_blob method
243+
test_content = b"test content bytes"
244+
245+
class MockDownloadResponse:
246+
def __init__(self):
247+
# Create properties with content_settings
248+
class ContentSettings:
249+
content_type = "application/pdf"
250+
251+
class Properties:
252+
def __init__(self):
253+
self.content_settings = ContentSettings()
254+
255+
self.properties = Properties()
256+
257+
async def readall(self):
258+
return test_content
259+
260+
async def mock_download_blob(*args, **kwargs):
261+
return MockDownloadResponse()
262+
263+
monkeypatch.setattr("azure.storage.blob.aio.BlobClient.download_blob", mock_download_blob)
264+
265+
result = await blob_manager.download_blob("test_document.pdf")
266+
267+
assert result is not None
268+
content, properties = result
269+
assert content == test_content
270+
assert properties["content_settings"]["content_type"] == "application/pdf"
271+
272+
273+
@pytest.mark.asyncio
274+
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
275+
async def test_download_blob_not_found(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
276+
# Mock the download_blob method to raise ResourceNotFoundError
277+
async def mock_download_blob(*args, **kwargs):
278+
from azure.core.exceptions import ResourceNotFoundError
279+
280+
raise ResourceNotFoundError("Blob not found")
281+
282+
monkeypatch.setattr("azure.storage.blob.aio.BlobClient.download_blob", mock_download_blob)
283+
284+
result = await blob_manager.download_blob("nonexistent.pdf")
285+
286+
assert result is None
287+
288+
289+
@pytest.mark.asyncio
290+
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
291+
async def test_download_blob_container_not_exist(
292+
monkeypatch, mock_env, mock_blob_container_client_does_not_exist, blob_manager
293+
):
294+
result = await blob_manager.download_blob("test_document.pdf")
295+
296+
assert result is None
297+
298+
299+
@pytest.mark.asyncio
300+
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
301+
async def test_download_blob_empty_path(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
302+
result = await blob_manager.download_blob("")
303+
304+
assert result is None
305+
306+
307+
@pytest.mark.asyncio
308+
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
309+
async def test_download_blob_with_user_oid(monkeypatch, mock_env, blob_manager):
310+
with pytest.raises(ValueError) as excinfo:
311+
await blob_manager.download_blob("test_document.pdf", user_oid="user123")
312+
313+
assert "user_oid is not supported for BlobManager" in str(excinfo.value)

tests/test_upload.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from .mocks import MockClient, MockEmbeddingsClient
1919

2020

21-
# parameterize for directory existing or not
2221
@pytest.mark.asyncio
2322
@pytest.mark.parametrize("directory_exists", [True, False])
2423
async def test_upload_file(auth_client, monkeypatch, mock_data_lake_service_client, directory_exists):
@@ -115,6 +114,38 @@ async def mock_upload_documents(self, documents):
115114
assert directory_created[0] == (not directory_exists)
116115

117116

117+
@pytest.mark.asyncio
118+
async def test_upload_file_error_wrong_directory_owner(auth_client, monkeypatch, mock_data_lake_service_client):
119+
120+
# Create a mock class for DataLakeDirectoryClient that includes the _client attribute
121+
class MockDataLakeDirectoryClient:
122+
def __init__(self, *args, **kwargs):
123+
self._client = object()
124+
self.url = "https://test.blob.core.windows.net/container/path"
125+
126+
async def get_directory_properties(self, *args, **kwargs):
127+
return {"name": "test-directory"}
128+
129+
async def get_access_control(self, *args, **kwargs):
130+
return {"owner": "OID_Y"}
131+
132+
# Replace the DataLakeDirectoryClient with our mock
133+
monkeypatch.setattr(
134+
azure.storage.filedatalake.aio.FileSystemClient,
135+
"get_directory_client",
136+
lambda *args, **kwargs: MockDataLakeDirectoryClient(),
137+
)
138+
139+
response = await auth_client.post(
140+
"/upload",
141+
headers={"Authorization": "Bearer test"},
142+
files={"file": FileStorage(BytesIO(b"foo;bar"), filename="a.txt")},
143+
)
144+
message = (await response.get_json())["message"]
145+
assert message == "Error uploading file, check server logs for details."
146+
assert response.status_code == 500
147+
148+
118149
@pytest.mark.asyncio
119150
async def test_list_uploaded(auth_client, monkeypatch, mock_data_lake_service_client):
120151
response = await auth_client.get("/list_uploaded", headers={"Authorization": "Bearer test"})

todo.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ TODO:
44
* DocIntelligence does not support JSON/MD - note in docs
55
* Verbalization strategy from Sri's skillset
66
* Filter images to match page numbers, using shaping + conditional skills
7-
* Update all TODOs in the code/docs
87
* Fix/add unit tests - check coverage
98

109

0 commit comments

Comments
 (0)