Skip to content

Commit 6acc94a

Browse files
committed
Increase test coverage
1 parent 874082b commit 6acc94a

File tree

3 files changed

+202
-13
lines changed

3 files changed

+202
-13
lines changed

app/backend/prepdocslib/listfilestrategy.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,7 @@ def filename(self) -> str:
5353
if content_name and content_name != "file":
5454
return os.path.basename(content_name)
5555

56-
# If we couldn't determine a name, return a default
57-
return "unknown"
56+
raise ValueError("The content object does not have a filename or name attribute. ")
5857

5958
def file_extension(self):
6059
return os.path.splitext(self.filename())[1]

tests/test_blob_manager.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def adls_blob_manager(monkeypatch):
3636

3737

3838
@pytest.mark.asyncio
39-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
39+
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher (due to NamedTemporaryFile)")
4040
async def test_upload_and_remove(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
4141
with NamedTemporaryFile(suffix=".pdf") as temp_file:
4242
f = File(temp_file.file)
@@ -84,7 +84,7 @@ async def mock_delete_blob(self, name, *args, **kwargs):
8484

8585

8686
@pytest.mark.asyncio
87-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
87+
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher (due to NamedTemporaryFile)")
8888
async def test_upload_and_remove_all(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
8989
with NamedTemporaryFile(suffix=".pdf") as temp_file:
9090
f = File(temp_file.file)
@@ -132,7 +132,7 @@ async def mock_delete_blob(self, name, *args, **kwargs):
132132

133133

134134
@pytest.mark.asyncio
135-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
135+
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher (due to NamedTemporaryFile)")
136136
async def test_create_container_upon_upload(monkeypatch, mock_env, blob_manager):
137137
with NamedTemporaryFile(suffix=".pdf") as temp_file:
138138
f = File(temp_file.file)
@@ -162,7 +162,6 @@ async def mock_upload_blob(self, name, *args, **kwargs):
162162

163163

164164
@pytest.mark.asyncio
165-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
166165
async def test_dont_remove_if_no_container(
167166
monkeypatch, mock_env, mock_blob_container_client_does_not_exist, blob_manager
168167
):
@@ -175,7 +174,7 @@ async def mock_delete_blob(*args, **kwargs):
175174

176175

177176
@pytest.mark.asyncio
178-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
177+
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher (due to NamedTemporaryFile)")
179178
@pytest.mark.parametrize("directory_exists", [True, False])
180179
async def test_upload_document_image(monkeypatch, mock_env, directory_exists):
181180
# Create a blob manager with an image container
@@ -232,7 +231,6 @@ async def mock_upload_blob(self, name, *args, **kwargs):
232231

233232

234233
@pytest.mark.asyncio
235-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
236234
async def test_adls_upload_document_image(monkeypatch, mock_env, adls_blob_manager):
237235

238236
# Test parameters
@@ -298,7 +296,6 @@ def test_blob_name_from_file_name():
298296

299297

300298
@pytest.mark.asyncio
301-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
302299
async def test_download_blob(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
303300
# Mock the download_blob method
304301
test_content = b"test content bytes"
@@ -332,7 +329,6 @@ async def mock_download_blob(*args, **kwargs):
332329

333330

334331
@pytest.mark.asyncio
335-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
336332
async def test_download_blob_not_found(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
337333
# Mock the download_blob method to raise ResourceNotFoundError
338334
async def mock_download_blob(*args, **kwargs):
@@ -348,7 +344,6 @@ async def mock_download_blob(*args, **kwargs):
348344

349345

350346
@pytest.mark.asyncio
351-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
352347
async def test_download_blob_container_not_exist(
353348
monkeypatch, mock_env, mock_blob_container_client_does_not_exist, blob_manager
354349
):
@@ -358,22 +353,44 @@ async def test_download_blob_container_not_exist(
358353

359354

360355
@pytest.mark.asyncio
361-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
362356
async def test_download_blob_empty_path(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
363357
result = await blob_manager.download_blob("")
364358

365359
assert result is None
366360

367361

368362
@pytest.mark.asyncio
369-
@pytest.mark.skipif(sys.version_info.minor < 10, reason="requires Python 3.10 or higher")
370363
async def test_download_blob_with_user_oid(monkeypatch, mock_env, blob_manager):
371364
with pytest.raises(ValueError) as excinfo:
372365
await blob_manager.download_blob("test_document.pdf", user_oid="user123")
373366

374367
assert "user_oid is not supported for BlobManager" in str(excinfo.value)
375368

376369

370+
@pytest.mark.asyncio
371+
async def test_download_blob_properties_none(monkeypatch, mock_env, mock_blob_container_client_exists, blob_manager):
372+
"""Test that BlobManager.download_blob returns None when download_response.properties is None."""
373+
374+
# Mock the download_blob method with properties=None
375+
class MockDownloadResponseWithNoProperties:
376+
def __init__(self):
377+
self.properties = None # This is the condition we're testing
378+
379+
async def readall(self):
380+
return b"This content should not be returned"
381+
382+
async def mock_download_blob(*args, **kwargs):
383+
return MockDownloadResponseWithNoProperties()
384+
385+
monkeypatch.setattr("azure.storage.blob.aio.BlobClient.download_blob", mock_download_blob)
386+
387+
# Call the download_blob method
388+
result = await blob_manager.download_blob("test_document.pdf")
389+
390+
# Verify the result is None due to properties being None
391+
assert result is None
392+
393+
377394
@pytest.mark.asyncio
378395
async def test_adls_download_blob_permission_denied(monkeypatch, mock_env, adls_blob_manager):
379396
"""Test that AdlsBlobManager.download_blob returns None when a user tries to access a blob that doesn't belong to them."""

tests/test_searchmanager.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,3 +496,176 @@ async def mock_delete_documents(self, documents):
496496
assert len(searched_filters) == 1, "It should have searched once"
497497
assert searched_filters[0] == "sourcefile eq 'foo.pdf'"
498498
assert len(deleted_documents) == 0, "It should have deleted no documents"
499+
500+
501+
@pytest.mark.asyncio
502+
async def test_create_index_with_search_images(monkeypatch, search_info):
503+
"""Test that SearchManager correctly creates an index with image search capabilities."""
504+
indexes = []
505+
506+
async def mock_create_index(self, index):
507+
indexes.append(index)
508+
509+
async def mock_list_index_names(self):
510+
for index in []:
511+
yield index
512+
513+
monkeypatch.setattr(SearchIndexClient, "create_index", mock_create_index)
514+
monkeypatch.setattr(SearchIndexClient, "list_index_names", mock_list_index_names)
515+
516+
# Create a SearchInfo with an Azure Vision endpoint
517+
search_info_with_vision = SearchInfo(
518+
endpoint=search_info.endpoint,
519+
credential=search_info.credential,
520+
index_name=search_info.index_name,
521+
azure_vision_endpoint="https://testvision.cognitiveservices.azure.com/",
522+
)
523+
524+
# Create a SearchManager with search_images=True
525+
manager = SearchManager(search_info_with_vision, search_images=True, field_name_embedding="embedding")
526+
await manager.create_index()
527+
528+
# Verify the index was created correctly
529+
assert len(indexes) == 1, "It should have created one index"
530+
assert indexes[0].name == "test"
531+
532+
# Find the "images" field in the index
533+
images_field = next((field for field in indexes[0].fields if field.name == "images"), None)
534+
assert images_field is not None, "The index should include an 'images' field"
535+
536+
# Verify the "images" field structure
537+
assert images_field.type.startswith(
538+
"Collection(Edm.ComplexType)"
539+
), "The 'images' field should be a collection of complex type"
540+
541+
# Check subfields of the images field
542+
image_subfields = images_field.fields
543+
assert len(image_subfields) == 4, "The 'images' field should have 4 subfields"
544+
545+
# Verify specific subfields
546+
assert any(field.name == "embedding" for field in image_subfields), "Should have an 'embedding' subfield"
547+
assert any(field.name == "url" for field in image_subfields), "Should have a 'url' subfield"
548+
assert any(field.name == "description" for field in image_subfields), "Should have a 'description' subfield"
549+
assert any(field.name == "boundingbox" for field in image_subfields), "Should have a 'boundingbox' subfield"
550+
551+
# Verify vector search configuration
552+
vectorizers = indexes[0].vector_search.vectorizers
553+
assert any(
554+
v.vectorizer_name == "images-vision-vectorizer" for v in vectorizers
555+
), "Should have an AI Vision vectorizer"
556+
557+
# Verify vector search profile
558+
profiles = indexes[0].vector_search.profiles
559+
assert any(p.name == "images_embedding_profile" for p in profiles), "Should have an image embedding profile"
560+
561+
562+
@pytest.mark.asyncio
563+
async def test_create_index_with_search_images_no_endpoint(monkeypatch, search_info):
564+
"""Test that SearchManager raises an error when search_images=True but no Azure Vision endpoint is provided."""
565+
indexes = []
566+
567+
async def mock_create_index(self, index):
568+
indexes.append(index)
569+
570+
async def mock_list_index_names(self):
571+
for index in []:
572+
yield index
573+
574+
monkeypatch.setattr(SearchIndexClient, "create_index", mock_create_index)
575+
monkeypatch.setattr(SearchIndexClient, "list_index_names", mock_list_index_names)
576+
577+
# Create a SearchManager with search_images=True but no Azure Vision endpoint
578+
manager = SearchManager(
579+
search_info, # search_info doesn't have azure_vision_endpoint
580+
search_images=True,
581+
field_name_embedding="embedding",
582+
)
583+
584+
# Verify that create_index raises a ValueError
585+
with pytest.raises(ValueError) as excinfo:
586+
await manager.create_index()
587+
588+
# Check the error message
589+
assert "Azure AI Vision endpoint must be provided to use image embeddings" in str(excinfo.value)
590+
591+
592+
@pytest.mark.asyncio
593+
async def test_create_index_with_search_images_and_embeddings(monkeypatch, search_info):
594+
"""Test that SearchManager correctly creates an index with both image search and embeddings."""
595+
indexes = []
596+
597+
async def mock_create_index(self, index):
598+
indexes.append(index)
599+
600+
async def mock_list_index_names(self):
601+
for index in []:
602+
yield index
603+
604+
async def mock_create_client(*args, **kwargs):
605+
return MockClient(
606+
embeddings_client=MockEmbeddingsClient(
607+
create_embedding_response=openai.types.CreateEmbeddingResponse(
608+
object="list",
609+
data=[
610+
openai.types.Embedding(
611+
embedding=[0.1, 0.2, 0.3],
612+
index=0,
613+
object="embedding",
614+
)
615+
],
616+
model=MOCK_EMBEDDING_MODEL_NAME,
617+
usage=Usage(prompt_tokens=8, total_tokens=8),
618+
)
619+
)
620+
)
621+
622+
monkeypatch.setattr(SearchIndexClient, "create_index", mock_create_index)
623+
monkeypatch.setattr(SearchIndexClient, "list_index_names", mock_list_index_names)
624+
625+
# Create a SearchInfo with an Azure Vision endpoint
626+
search_info_with_vision = SearchInfo(
627+
endpoint=search_info.endpoint,
628+
credential=search_info.credential,
629+
index_name=search_info.index_name,
630+
azure_vision_endpoint="https://testvision.cognitiveservices.azure.com/",
631+
)
632+
633+
# Create embeddings service
634+
embeddings = AzureOpenAIEmbeddingService(
635+
open_ai_service="x",
636+
open_ai_deployment="x",
637+
open_ai_model_name=MOCK_EMBEDDING_MODEL_NAME,
638+
open_ai_dimensions=MOCK_EMBEDDING_DIMENSIONS,
639+
open_ai_api_version="test-api-version",
640+
credential=AzureKeyCredential("test"),
641+
disable_batch=True,
642+
)
643+
monkeypatch.setattr(embeddings, "create_client", mock_create_client)
644+
645+
# Create a SearchManager with both search_images and embeddings
646+
manager = SearchManager(
647+
search_info_with_vision, search_images=True, embeddings=embeddings, field_name_embedding="embedding3"
648+
)
649+
await manager.create_index()
650+
651+
# Verify the index was created correctly
652+
assert len(indexes) == 1, "It should have created one index"
653+
654+
# Find both the embeddings field and images field
655+
embedding_field = next((field for field in indexes[0].fields if field.name == "embedding3"), None)
656+
images_field = next((field for field in indexes[0].fields if field.name == "images"), None)
657+
658+
assert embedding_field is not None, "The index should include an 'embedding3' field"
659+
assert images_field is not None, "The index should include an 'images' field"
660+
661+
# Verify vector search configuration includes both text and image vectorizers
662+
vectorizers = indexes[0].vector_search.vectorizers
663+
assert any(
664+
v.vectorizer_name == "images-vision-vectorizer" for v in vectorizers
665+
), "Should have an AI Vision vectorizer"
666+
assert any(hasattr(v, "ai_services_vision_parameters") for v in vectorizers), "Should have AI vision parameters"
667+
668+
# Verify vector search profiles for both text and images
669+
profiles = indexes[0].vector_search.profiles
670+
assert any(p.name == "images_embedding_profile" for p in profiles), "Should have an image embedding profile"
671+
assert any(p.name == "embedding3-profile" for p in profiles), "Should have a text embedding profile"

0 commit comments

Comments
 (0)