Skip to content

Commit b1e6225

Browse files
committed
Fix all mypy issues
1 parent b470901 commit b1e6225

File tree

5 files changed

+13
-5
lines changed

5 files changed

+13
-5
lines changed

app/backend/app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
from decorators import authenticated, authenticated_path
9595
from error import error_dict, error_response
9696
from prepdocs import (
97+
OpenAIHost,
9798
clean_key_if_exists,
9899
setup_embeddings_service,
99100
setup_file_processors,
@@ -583,7 +584,7 @@ async def setup_clients():
583584
)
584585
text_embeddings_service = setup_embeddings_service(
585586
azure_credential=azure_credential,
586-
openai_host=OPENAI_HOST,
587+
openai_host=OpenAIHost(OPENAI_HOST),
587588
emb_model_name=OPENAI_EMB_MODEL,
588589
emb_model_dimensions=OPENAI_EMB_DIMENSIONS,
589590
azure_openai_service=AZURE_OPENAI_SERVICE,

app/backend/prepdocs.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,17 +188,19 @@ def setup_embeddings_service(
188188

189189
def setup_openai_client(
190190
openai_host: OpenAIHost,
191+
azure_credential: AsyncTokenCredential,
191192
azure_openai_api_key: Union[str, None] = None,
192193
azure_openai_api_version: Union[str, None] = None,
193194
azure_openai_service: Union[str, None] = None,
194195
azure_openai_custom_url: Union[str, None] = None,
195-
azure_credential: AsyncTokenCredential = None,
196196
openai_api_key: Union[str, None] = None,
197197
openai_organization: Union[str, None] = None,
198198
):
199199
if openai_host not in OpenAIHost:
200200
raise ValueError(f"Invalid OPENAI_HOST value: {openai_host}. Must be one of {[h.value for h in OpenAIHost]}.")
201201

202+
openai_client: AsyncOpenAI
203+
202204
if openai_host in [OpenAIHost.AZURE, OpenAIHost.AZURE_CUSTOM]:
203205
if openai_host == OpenAIHost.AZURE_CUSTOM:
204206
logger.info("OPENAI_HOST is azure_custom, setting up Azure OpenAI custom client")
@@ -501,11 +503,11 @@ async def main(strategy: Strategy, setup_index: bool = True):
501503
)
502504
openai_client = setup_openai_client(
503505
openai_host=openai_host,
506+
azure_credential=azd_credential,
504507
azure_openai_api_version=azure_openai_api_version,
505508
azure_openai_service=os.getenv("AZURE_OPENAI_SERVICE"),
506509
azure_openai_custom_url=os.getenv("AZURE_OPENAI_CUSTOM_URL"),
507510
azure_openai_api_key=os.getenv("AZURE_OPENAI_API_KEY_OVERRIDE"),
508-
azure_credential=azd_credential,
509511
openai_api_key=clean_key_if_exists(os.getenv("OPENAI_API_KEY")),
510512
openai_organization=os.getenv("OPENAI_ORGANIZATION"),
511513
)

app/backend/prepdocslib/filestrategy.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ async def parse_file(
3030
pages = [page async for page in processor.parser.parse(content=file.content)]
3131
for page in pages:
3232
for image in page.images:
33+
if not blob_manager or not image_embeddings_client:
34+
raise ValueError("BlobManager and ImageEmbeddingsClient must be provided to parse images in the file.")
3335
if image.url is None:
3436
image.url = await blob_manager.upload_document_image(file, image.bytes, image.filename, image.page_num)
3537
if image_embeddings_client:

app/backend/prepdocslib/pdfparser.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,9 @@ def crop_image_from_pdf_page(
298298
"""
299299
# Scale the bounding box to 72 DPI
300300
bbox_dpi = 72
301-
bbox_pixels = tuple(x * bbox_dpi for x in bbox_inches) # Convert to tuple
301+
# We multiply using unpacking to ensure the resulting tuple has the correct number of elements
302+
x0, y0, x1, y1 = (x * bbox_dpi for x in bbox_inches)
303+
bbox_pixels = (x0, y0, x1, y1)
302304
rect = pymupdf.Rect(bbox_pixels)
303305
# Assume that the PDF has 300 DPI,
304306
# and use the matrix to convert between the 2 DPIs

todo.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ TODO:
33
* Fix/add unit tests - check coverage
44
* mypy
55
* Test with integrated vectorization
6+
* Test with user upload feature
67
* Update all TODOs in the code/docs
7-
8+
* shall i truncate the image_urls to "....." for the JSON display
89

910
Decide:
1011
* In conftest, should I make a new env for vision? Currently I mashed it into the existing env, but it might be cleaner to have a separate one, as now I have to pass llm_inputs explicitly in the tests to turn off image responses.

0 commit comments

Comments
 (0)