Skip to content

Commit 6d4e490

Browse files
committed
Address Copilot feedback on tests
1 parent 1db5f14 commit 6d4e490

File tree

4 files changed

+42
-49
lines changed

4 files changed

+42
-49
lines changed

AGENTS.md

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,28 @@ If necessary, edit this file to ensure it accurately reflects the current state
1717
* app/backend/approaches/prompts/chat_query_rewrite.prompty: Prompt used to rewrite the query based off search history into a better search query
1818
* app/backend/approaches/prompts/chat_query_rewrite_tools.json: Tools used by the query rewriting prompt
1919
* app/backend/approaches/prompts/chat_answer_question.prompty: Prompt used by the Chat approach to actually answer the question based off sources
20-
* app/backend/prepdocslib/cloudingestionstrategy.py: Builds the Azure AI Search indexer and skillset for the cloud ingestion pipeline
21-
* app/backend/prepdocslib/pdfparser.py: Uses Azure Document Intelligence to emit page text plus figure placeholders
22-
* app/backend/prepdocslib/figureprocessor.py: Shared helper that generates figure descriptions for both local ingestion and the cloud figure-processor skill
20+
* app/backend/prepdocslib: Contains the document ingestion library used by both local and cloud ingestion
21+
* app/backend/prepdocslib/blobmanager.py: Manages uploads to Azure Blob Storage
22+
* app/backend/prepdocslib/cloudingestionstrategy.py: Builds the Azure AI Search indexer and skillset for the cloud ingestion pipeline
23+
* app/backend/prepdocslib/csvparser.py: Parses CSV files
24+
* app/backend/prepdocslib/embeddings.py: Generates embeddings for text and images using Azure OpenAI
25+
* app/backend/prepdocslib/figureprocessor.py: Generates figure descriptions for both local ingestion and the cloud figure-processor skill
26+
* app/backend/prepdocslib/fileprocessor.py: Orchestrates parsing and chunking of individual files
27+
* app/backend/prepdocslib/filestrategy.py: Strategy for uploading and indexing files (local ingestion)
28+
* app/backend/prepdocslib/htmlparser.py: Parses HTML files
29+
* app/backend/prepdocslib/integratedvectorizerstrategy.py: Strategy using Azure AI Search integrated vectorization
30+
* app/backend/prepdocslib/jsonparser.py: Parses JSON files
31+
* app/backend/prepdocslib/listfilestrategy.py: Lists files from local filesystem or Azure Data Lake
32+
* app/backend/prepdocslib/mediadescriber.py: Interfaces for describing images (Azure OpenAI GPT-4o, Content Understanding)
33+
* app/backend/prepdocslib/page.py: Data classes for pages, images, and chunks
34+
* app/backend/prepdocslib/parser.py: Base parser interface
35+
* app/backend/prepdocslib/pdfparser.py: Parses PDFs using Azure Document Intelligence or local parser
36+
* app/backend/prepdocslib/searchmanager.py: Manages Azure AI Search index creation and updates
37+
* app/backend/prepdocslib/servicesetup.py: Shared service setup helpers for OpenAI, embeddings, blob storage, etc.
38+
* app/backend/prepdocslib/strategy.py: Base strategy interface for document ingestion
39+
* app/backend/prepdocslib/textparser.py: Parses plain text and markdown files
40+
* app/backend/prepdocslib/textprocessor.py: Processes text chunks for cloud ingestion (merges figures, generates embeddings)
41+
* app/backend/prepdocslib/textsplitter.py: Splits text into chunks using different strategies
2342
* app/backend/app.py: The main entry point for the backend application.
2443
* app/functions: Azure Functions used for cloud ingestion custom skills (document extraction, figure processing, text processing). Each function bundles a synchronized copy of `prepdocslib`; run `python scripts/copy_prepdocslib.py` to refresh the local copies if you modify the library.
2544
* app/frontend: Contains the React frontend code, built with TypeScript, built with vite.

app/backend/prepdocslib/figureprocessor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,9 @@ async def process_page_image(
114114
*,
115115
image: "ImageOnPage",
116116
document_filename: str,
117-
blob_manager: Optional["BaseBlobManager"],
118-
image_embeddings_client: Optional["ImageEmbeddings"],
119-
figure_processor: Optional["FigureProcessor"] = None,
117+
blob_manager: Optional[BaseBlobManager],
118+
image_embeddings_client: Optional[ImageEmbeddings],
119+
figure_processor: Optional[FigureProcessor] = None,
120120
user_oid: Optional[str] = None,
121121
) -> "ImageOnPage":
122122
"""Generate description, upload image, and optionally compute embedding for a figure."""

requirements-dev.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,3 @@ pip-tools
1414
mypy==1.14.1
1515
diff_cover
1616
axe-playwright-python
17-
python-Levenshtein

tests/test_servicesetup.py

Lines changed: 17 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
1+
import openai
12
import pytest
3+
from openai.types.create_embedding_response import Usage
24

5+
from prepdocslib.embeddings import OpenAIEmbeddings
6+
from prepdocslib.figureprocessor import FigureProcessor, MediaDescriptionStrategy
7+
from prepdocslib.pdfparser import DocumentAnalysisParser
38
from prepdocslib.servicesetup import (
49
OpenAIHost,
10+
select_parser,
511
setup_blob_manager,
612
setup_embeddings_service,
13+
setup_figure_processor,
14+
setup_image_embeddings_service,
715
setup_openai_client,
16+
setup_search_info,
817
)
18+
from prepdocslib.textparser import TextParser
919

1020
from .mocks import (
1121
MOCK_EMBEDDING_DIMENSIONS,
@@ -38,9 +48,7 @@ def __init__(
3848
captured["subscription_id"] = subscription_id
3949
captured["image_container"] = image_container
4050

41-
import prepdocslib.servicesetup as servicesetup_module
42-
43-
monkeypatch.setattr(servicesetup_module, "BlobManager", StubBlobManager)
51+
monkeypatch.setattr("prepdocslib.servicesetup.BlobManager", StubBlobManager)
4452

4553
result = setup_blob_manager(
4654
azure_credential=MockAzureCredential(),
@@ -58,9 +66,6 @@ def __init__(
5866

5967

6068
def test_setup_embeddings_service_populates_azure_metadata() -> None:
61-
import openai
62-
from openai.types.create_embedding_response import Usage
63-
6469
embeddings = setup_embeddings_service(
6570
open_ai_client=MockClient(
6671
MockEmbeddingsClient(
@@ -79,17 +84,12 @@ def test_setup_embeddings_service_populates_azure_metadata() -> None:
7984
azure_openai_endpoint="https://service.openai.azure.com",
8085
)
8186

82-
from prepdocslib.embeddings import OpenAIEmbeddings
83-
8487
assert isinstance(embeddings, OpenAIEmbeddings)
8588
assert embeddings.azure_deployment_name == "deployment"
8689
assert embeddings.azure_endpoint == "https://service.openai.azure.com"
8790

8891

8992
def test_setup_embeddings_service_requires_endpoint_for_azure() -> None:
90-
import openai
91-
from openai.types.create_embedding_response import Usage
92-
9393
with pytest.raises(ValueError):
9494
setup_embeddings_service(
9595
open_ai_client=MockClient(
@@ -111,9 +111,6 @@ def test_setup_embeddings_service_requires_endpoint_for_azure() -> None:
111111

112112

113113
def test_setup_embeddings_service_requires_deployment_for_azure() -> None:
114-
import openai
115-
from openai.types.create_embedding_response import Usage
116-
117114
with pytest.raises(ValueError):
118115
setup_embeddings_service(
119116
open_ai_client=MockClient(
@@ -142,10 +139,10 @@ class StubAsyncOpenAI:
142139
def __init__(self, *, base_url: str, api_key, **kwargs) -> None:
143140
captured_base_url.append(base_url)
144141

145-
import prepdocslib.servicesetup as servicesetup_module
146-
147-
monkeypatch.setattr(servicesetup_module, "AsyncOpenAI", StubAsyncOpenAI)
148-
monkeypatch.setattr(servicesetup_module, "get_bearer_token_provider", lambda *args, **kwargs: lambda: "fake_token")
142+
monkeypatch.setattr("prepdocslib.servicesetup.AsyncOpenAI", StubAsyncOpenAI)
143+
monkeypatch.setattr(
144+
"prepdocslib.servicesetup.get_bearer_token_provider", lambda *args, **kwargs: lambda: "fake_token"
145+
)
149146

150147
client, endpoint = setup_openai_client(
151148
openai_host=OpenAIHost.AZURE,
@@ -167,9 +164,7 @@ class StubAsyncOpenAI:
167164
def __init__(self, *, base_url: str, api_key, **kwargs) -> None:
168165
captured_base_url.append(base_url)
169166

170-
import prepdocslib.servicesetup as servicesetup_module
171-
172-
monkeypatch.setattr(servicesetup_module, "AsyncOpenAI", StubAsyncOpenAI)
167+
monkeypatch.setattr("prepdocslib.servicesetup.AsyncOpenAI", StubAsyncOpenAI)
173168

174169
client, endpoint = setup_openai_client(
175170
openai_host=OpenAIHost.AZURE_CUSTOM,
@@ -192,9 +187,7 @@ class StubAsyncOpenAI:
192187
def __init__(self, *, base_url: str, api_key: str, **kwargs) -> None:
193188
captured_api_key.append(api_key)
194189

195-
import prepdocslib.servicesetup as servicesetup_module
196-
197-
monkeypatch.setattr(servicesetup_module, "AsyncOpenAI", StubAsyncOpenAI)
190+
monkeypatch.setattr("prepdocslib.servicesetup.AsyncOpenAI", StubAsyncOpenAI)
198191

199192
client, endpoint = setup_openai_client(
200193
openai_host=OpenAIHost.AZURE,
@@ -238,8 +231,6 @@ def test_setup_openai_client_azure_custom_requires_url() -> None:
238231

239232
def test_setup_search_info_agentic_retrieval_without_model():
240233
"""Test that setup_search_info raises ValueError when using agentic retrieval without search agent model."""
241-
from prepdocslib.servicesetup import setup_search_info
242-
243234
with pytest.raises(ValueError, match="SearchAgent model must be specified"):
244235
setup_search_info(
245236
azure_credential=MockAzureCredential(),
@@ -252,8 +243,6 @@ def test_setup_search_info_agentic_retrieval_without_model():
252243

253244
def test_setup_image_embeddings_multimodal_without_vision():
254245
"""Test that setup_image_embeddings_service raises ValueError when using multimodal without vision endpoint."""
255-
from prepdocslib.servicesetup import setup_image_embeddings_service
256-
257246
with pytest.raises(ValueError, match="Azure AI Vision endpoint must be provided"):
258247
setup_image_embeddings_service(
259248
use_multimodal=True,
@@ -264,9 +253,6 @@ def test_setup_image_embeddings_multimodal_without_vision():
264253

265254
def test_setup_figure_processor_content_understanding():
266255
"""Test that setup_figure_processor returns correct processor for content understanding."""
267-
from prepdocslib.figureprocessor import FigureProcessor, MediaDescriptionStrategy
268-
from prepdocslib.servicesetup import setup_figure_processor
269-
270256
processor = setup_figure_processor(
271257
use_multimodal=False,
272258
use_content_understanding=True,
@@ -283,9 +269,6 @@ def test_setup_figure_processor_content_understanding():
283269

284270
def test_setup_parser_document_intelligence_with_key():
285271
"""Test that select_parser uses key credential when provided."""
286-
from prepdocslib.pdfparser import DocumentAnalysisParser
287-
from prepdocslib.servicesetup import select_parser
288-
289272
parser = select_parser(
290273
file_name="test.pdf",
291274
content_type="application/pdf",
@@ -300,9 +283,6 @@ def test_setup_parser_document_intelligence_with_key():
300283

301284
def test_setup_parser_text_file():
302285
"""Test that select_parser returns TextParser for text files."""
303-
from prepdocslib.servicesetup import select_parser
304-
from prepdocslib.textparser import TextParser
305-
306286
parser = select_parser(
307287
file_name="test.txt",
308288
content_type="text/plain",
@@ -315,9 +295,6 @@ def test_setup_parser_text_file():
315295

316296
def test_setup_parser_application_type_with_di():
317297
"""Test that select_parser uses DI for application/* content types."""
318-
from prepdocslib.pdfparser import DocumentAnalysisParser
319-
from prepdocslib.servicesetup import select_parser
320-
321298
parser = select_parser(
322299
file_name="test.unknown",
323300
content_type="application/unknown",
@@ -330,8 +307,6 @@ def test_setup_parser_application_type_with_di():
330307

331308
def test_setup_parser_unsupported_file_type():
332309
"""Test that select_parser raises ValueError for unsupported file types."""
333-
from prepdocslib.servicesetup import select_parser
334-
335310
with pytest.raises(ValueError, match="Unsupported file type"):
336311
select_parser(
337312
file_name="test.xyz",

0 commit comments

Comments
 (0)