Skip to content

Commit 46bbaf7

Browse files
committed
Cleanup function test
1 parent 9fcaa55 commit 46bbaf7

File tree

1 file changed

+70
-100
lines changed

1 file changed

+70
-100
lines changed

tests/test_function_apps.py

Lines changed: 70 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import base64
2-
import importlib
32
import json
3+
import logging
44
from collections.abc import Iterable
55
from dataclasses import dataclass, field
66
from typing import Any
@@ -10,6 +10,9 @@
1010

1111
from document_extractor import function_app as document_extractor
1212
from figure_processor import function_app as figure_processor
13+
from prepdocslib.fileprocessor import FileProcessor
14+
from prepdocslib.textparser import TextParser
15+
from prepdocslib.textsplitter import SentenceTextSplitter
1316
from tests.mocks import TEST_PNG_BYTES
1417
from text_processor import function_app as text_processor
1518

@@ -75,8 +78,6 @@ async def parse(self, content: Any):
7578
page = document_extractor.Page(page_num=0, offset=0, text=page_text, images=[figure])
7679

7780
# Set up mock file processors and settings
78-
from prepdocslib.fileprocessor import FileProcessor
79-
8081
mock_file_processors = {
8182
".pdf": FileProcessor(StubParser([page]), None),
8283
}
@@ -123,8 +124,6 @@ async def parse(self, content: Any):
123124

124125
@pytest.mark.asyncio
125126
async def test_document_extractor_requires_single_record(monkeypatch: pytest.MonkeyPatch) -> None:
126-
from prepdocslib.fileprocessor import FileProcessor
127-
128127
mock_settings = document_extractor.GlobalSettings(
129128
file_processors={".pdf": FileProcessor(None, None)},
130129
azure_credential=object(),
@@ -138,8 +137,6 @@ async def test_document_extractor_requires_single_record(monkeypatch: pytest.Mon
138137

139138
@pytest.mark.asyncio
140139
async def test_document_extractor_handles_processing_exception(monkeypatch: pytest.MonkeyPatch) -> None:
141-
from prepdocslib.fileprocessor import FileProcessor
142-
143140
async def failing_process(data: dict[str, Any]) -> dict[str, Any]:
144141
raise RuntimeError("boom")
145142

@@ -179,8 +176,6 @@ async def test_document_extractor_invalid_json_returns_error() -> None:
179176

180177
@pytest.mark.asyncio
181178
async def test_document_extractor_process_document_http_error(monkeypatch: pytest.MonkeyPatch) -> None:
182-
from prepdocslib.fileprocessor import FileProcessor
183-
184179
class FailingParser:
185180
async def parse(self, content):
186181
raise document_extractor.HttpResponseError(message="fail")
@@ -215,11 +210,10 @@ def test_document_extractor_missing_file_data() -> None:
215210

216211
def test_document_extractor_managed_identity_reload(monkeypatch: pytest.MonkeyPatch) -> None:
217212
monkeypatch.setenv("AZURE_CLIENT_ID", "client-123")
218-
module = importlib.reload(document_extractor)
219-
module.configure_global_settings()
220-
assert isinstance(module.settings.azure_credential, module.ManagedIdentityCredential)
213+
document_extractor.configure_global_settings()
214+
assert isinstance(document_extractor.settings.azure_credential, document_extractor.ManagedIdentityCredential)
221215
monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
222-
importlib.reload(document_extractor)
216+
document_extractor.configure_global_settings()
223217

224218

225219
@pytest.mark.asyncio
@@ -297,64 +291,82 @@ def test_figure_processor_initialisation_with_env(monkeypatch: pytest.MonkeyPatc
297291
monkeypatch.setenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "deploy")
298292
monkeypatch.setenv("AZURE_VISION_ENDPOINT", "https://vision")
299293

300-
import sys
301-
from pathlib import Path
302-
303-
fp_root = Path(__file__).parent.parent / "app" / "functions" / "figure_processor"
304-
sys.path.insert(0, str(fp_root))
305-
306-
fp_servicesetup = importlib.import_module("prepdocslib.servicesetup")
307-
fp_embeddings = importlib.import_module("prepdocslib.embeddings")
294+
call_state: dict[str, Any] = {}
295+
296+
class StubCredential:
297+
def __init__(self, client_id: str | None = None):
298+
call_state["credential_client_id"] = client_id
299+
300+
def fake_setup_blob_manager(**kwargs: Any) -> str:
301+
call_state["blob_manager_kwargs"] = kwargs
302+
return "blob"
303+
304+
def fake_setup_figure_processor(**kwargs: Any) -> str:
305+
call_state["figure_processor_kwargs"] = kwargs
306+
return "figproc"
307+
308+
def fake_setup_openai_client(
309+
*,
310+
openai_host: Any,
311+
azure_credential: Any,
312+
azure_openai_service: str | None,
313+
azure_openai_custom_url: str | None,
314+
) -> tuple[str, None]:
315+
call_state["openai_client_args"] = {
316+
"openai_host": openai_host,
317+
"azure_credential": azure_credential,
318+
"azure_openai_service": azure_openai_service,
319+
"azure_openai_custom_url": azure_openai_custom_url,
320+
}
321+
return ("openai-client", None)
308322

309-
monkeypatch.setattr(fp_servicesetup, "setup_blob_manager", lambda **_: "blob")
310-
monkeypatch.setattr(fp_servicesetup, "setup_figure_processor", lambda **_: "figproc")
311-
monkeypatch.setattr(fp_servicesetup, "setup_openai_client", lambda **_: ("openai-client", None))
323+
def fake_get_bearer_token_provider(credential: Any, scope: str):
324+
call_state["token_scope"] = scope
325+
call_state["token_credential"] = credential
326+
return lambda: "token"
312327

313328
class DummyImageEmbeddings:
314329
def __init__(self, endpoint: str, token_provider):
315330
self.endpoint = endpoint
316331
self.token_provider = token_provider
317332

318-
monkeypatch.setattr(fp_embeddings, "ImageEmbeddings", DummyImageEmbeddings)
319-
monkeypatch.setattr("azure.identity.aio.get_bearer_token_provider", lambda *_, **__: lambda: "token")
333+
monkeypatch.setattr(figure_processor, "ManagedIdentityCredential", StubCredential)
334+
monkeypatch.setattr(figure_processor, "setup_blob_manager", fake_setup_blob_manager)
335+
monkeypatch.setattr(figure_processor, "setup_figure_processor", fake_setup_figure_processor)
336+
monkeypatch.setattr(figure_processor, "setup_openai_client", fake_setup_openai_client)
337+
monkeypatch.setattr(figure_processor, "get_bearer_token_provider", fake_get_bearer_token_provider)
338+
monkeypatch.setattr(figure_processor, "ImageEmbeddings", DummyImageEmbeddings)
339+
monkeypatch.setattr(figure_processor, "settings", None)
320340

321-
module = importlib.reload(figure_processor)
322-
module.configure_global_settings()
341+
figure_processor.configure_global_settings()
323342

324-
assert module.settings.blob_manager == "blob"
325-
assert module.settings.figure_processor == "figproc"
326-
assert isinstance(module.settings.image_embeddings, DummyImageEmbeddings)
343+
assert figure_processor.settings is not None
344+
assert figure_processor.settings.blob_manager == "blob"
345+
assert figure_processor.settings.figure_processor == "figproc"
346+
embeddings = figure_processor.settings.image_embeddings
347+
assert isinstance(embeddings, DummyImageEmbeddings)
348+
assert embeddings.endpoint == "https://vision"
349+
assert embeddings.token_provider() == "token"
327350

328-
# Reset module to default configuration for subsequent tests
329-
for var in [
330-
"AZURE_CLIENT_ID",
331-
"AZURE_STORAGE_ACCOUNT",
332-
"AZURE_IMAGESTORAGE_CONTAINER",
333-
"USE_MULTIMODAL",
334-
"AZURE_OPENAI_SERVICE",
335-
"AZURE_OPENAI_CHATGPT_DEPLOYMENT",
336-
"AZURE_VISION_ENDPOINT",
337-
]:
338-
monkeypatch.delenv(var, raising=False)
339-
sys.path.remove(str(fp_root))
340-
importlib.reload(figure_processor)
351+
assert call_state["credential_client_id"] == "client-456"
352+
assert call_state["blob_manager_kwargs"]["storage_account"] == "acct"
353+
assert call_state["figure_processor_kwargs"]["use_multimodal"] is True
354+
assert call_state["token_scope"] == "https://cognitiveservices.azure.com/.default"
355+
assert isinstance(call_state["token_credential"], StubCredential)
356+
assert call_state["openai_client_args"]["azure_openai_service"] == "svc"
357+
assert call_state["openai_client_args"]["azure_credential"] is call_state["token_credential"]
341358

342359

343-
def test_figure_processor_warns_when_openai_incomplete(monkeypatch: pytest.MonkeyPatch) -> None:
360+
def test_figure_processor_warns_when_openai_incomplete(monkeypatch: pytest.MonkeyPatch, caplog) -> None:
344361
"""Figure processor is created with warning when USE_MULTIMODAL is true but OpenAI config is incomplete."""
345362
monkeypatch.setenv("USE_MULTIMODAL", "true")
346363
monkeypatch.setenv("AZURE_STORAGE_ACCOUNT", "acct")
347364
monkeypatch.setenv("AZURE_IMAGESTORAGE_CONTAINER", "images")
348365
# OpenAI config missing, so figure_processor will be created but won't work properly
349-
module = importlib.reload(figure_processor)
350-
module.configure_global_settings()
366+
figure_processor.configure_global_settings()
351367
# A FigureProcessor object is created even with incomplete config
352-
assert module.settings.figure_processor is not None
353-
# But it will raise ValueError when trying to describe images due to missing OpenAI client
354-
monkeypatch.delenv("USE_MULTIMODAL", raising=False)
355-
monkeypatch.delenv("AZURE_STORAGE_ACCOUNT", raising=False)
356-
monkeypatch.delenv("AZURE_IMAGESTORAGE_CONTAINER", raising=False)
357-
importlib.reload(figure_processor)
368+
assert figure_processor.settings.figure_processor is not None
369+
assert "USE_MULTIMODAL is true but Azure OpenAI configuration incomplete" in caplog.text
358370

359371

360372
@pytest.mark.asyncio
@@ -371,9 +383,6 @@ async def create_embeddings(self, texts: list[str]) -> list[list[float]]:
371383
return [[0.41, 0.42, 0.43] for _ in texts]
372384

373385
# Set up mock file processors with stub splitter
374-
from prepdocslib.fileprocessor import FileProcessor
375-
from prepdocslib.textparser import TextParser
376-
377386
mock_file_processors = {
378387
".pdf": FileProcessor(TextParser(), StubSplitter()),
379388
}
@@ -562,21 +571,12 @@ async def test_text_processor_invalid_json(monkeypatch: pytest.MonkeyPatch) -> N
562571
@pytest.mark.asyncio
563572
async def test_text_processor_with_client_id(monkeypatch: pytest.MonkeyPatch) -> None:
564573
"""Test text processor uses ManagedIdentityCredential with client ID."""
565-
import os
566-
567574
# Set the AZURE_CLIENT_ID environment variable
568-
original_client_id = os.environ.get("AZURE_CLIENT_ID")
569-
os.environ["AZURE_CLIENT_ID"] = "test-client-id"
570-
571-
try:
572-
# Force reimport to trigger module initialization with the env var set
573-
importlib.reload(text_processor)
574-
finally:
575-
# Restore original value
576-
if original_client_id:
577-
os.environ["AZURE_CLIENT_ID"] = original_client_id
578-
else:
579-
os.environ.pop("AZURE_CLIENT_ID", None)
575+
monkeypatch.setenv("AZURE_CLIENT_ID", "test-client-id")
576+
text_processor.configure_global_settings()
577+
# Verify it was configured (actual verification would check the credential type)
578+
monkeypatch.delenv("AZURE_CLIENT_ID", raising=False)
579+
text_processor.configure_global_settings()
580580

581581

582582
@pytest.mark.asyncio
@@ -589,10 +589,6 @@ async def test_text_processor_embeddings_setup(monkeypatch: pytest.MonkeyPatch)
589589
@pytest.mark.asyncio
590590
async def test_text_processor_no_sections(monkeypatch: pytest.MonkeyPatch) -> None:
591591
"""Test text processor handles empty sections."""
592-
from prepdocslib.fileprocessor import FileProcessor
593-
from prepdocslib.textparser import TextParser
594-
from prepdocslib.textsplitter import SentenceTextSplitter
595-
596592
mock_file_processors = {
597593
".pdf": FileProcessor(TextParser(), SentenceTextSplitter()),
598594
}
@@ -640,12 +636,6 @@ def mock_process_text(pages, file, splitter, category):
640636
@pytest.mark.asyncio
641637
async def test_text_processor_embeddings_not_initialized(monkeypatch: pytest.MonkeyPatch, caplog) -> None:
642638
"""Test text processor logs warning when embeddings requested but not initialized."""
643-
import logging
644-
645-
from prepdocslib.fileprocessor import FileProcessor
646-
from prepdocslib.textparser import TextParser
647-
from prepdocslib.textsplitter import SentenceTextSplitter
648-
649639
mock_file_processors = {
650640
".pdf": FileProcessor(TextParser(), SentenceTextSplitter()),
651641
}
@@ -690,10 +680,6 @@ def mock_process_text(pages, file, splitter, category):
690680
@pytest.mark.asyncio
691681
async def test_text_processor_empty_chunk_skipped(monkeypatch: pytest.MonkeyPatch) -> None:
692682
"""Test text processor skips empty chunks."""
693-
from prepdocslib.fileprocessor import FileProcessor
694-
from prepdocslib.textparser import TextParser
695-
from prepdocslib.textsplitter import SentenceTextSplitter
696-
697683
mock_file_processors = {
698684
".pdf": FileProcessor(TextParser(), SentenceTextSplitter()),
699685
}
@@ -744,10 +730,6 @@ def mock_process_text(pages, file, splitter, category):
744730
@pytest.mark.asyncio
745731
async def test_text_processor_with_multimodal_embeddings(monkeypatch: pytest.MonkeyPatch) -> None:
746732
"""Test text processor includes image embeddings when use_multimodal is true."""
747-
from prepdocslib.fileprocessor import FileProcessor
748-
from prepdocslib.textparser import TextParser
749-
from prepdocslib.textsplitter import SentenceTextSplitter
750-
751733
mock_file_processors = {
752734
".pdf": FileProcessor(TextParser(), SentenceTextSplitter()),
753735
}
@@ -810,12 +792,6 @@ def mock_process_text(pages, file, splitter, category):
810792
@pytest.mark.asyncio
811793
async def test_text_processor_embedding_dimension_mismatch(monkeypatch: pytest.MonkeyPatch, caplog) -> None:
812794
"""Test text processor logs warning when embedding dimensions don't match."""
813-
import logging
814-
815-
from prepdocslib.fileprocessor import FileProcessor
816-
from prepdocslib.textparser import TextParser
817-
from prepdocslib.textsplitter import SentenceTextSplitter
818-
819795
mock_embedding_service = type("MockEmbeddingService", (), {})()
820796

821797
async def mock_create_embeddings(texts):
@@ -867,12 +843,6 @@ def mock_process_text(pages, file, splitter, category):
867843
@pytest.mark.asyncio
868844
async def test_text_processor_embeddings_missing_warning(monkeypatch: pytest.MonkeyPatch, caplog) -> None:
869845
"""Test text processor logs warning when embeddings are requested but missing."""
870-
import logging
871-
872-
from prepdocslib.fileprocessor import FileProcessor
873-
from prepdocslib.textparser import TextParser
874-
from prepdocslib.textsplitter import SentenceTextSplitter
875-
876846
mock_embedding_service = type("MockEmbeddingService", (), {})()
877847

878848
async def mock_create_embeddings(texts):

0 commit comments

Comments
 (0)