Skip to content

Commit 004d576

Browse files
committed
fix: minor fixes for unit tests
1 parent 96f3b98 commit 004d576

File tree

3 files changed

+9
-3
lines changed

3 files changed

+9
-3
lines changed

src/unstructured_client/_hooks/custom/pdf_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import io
44
import logging
5-
from typing import cast, Optional, BinaryIO
5+
from typing import cast, Optional, BinaryIO, Union
66

77
from pypdf import PdfReader
88
from pypdf.errors import PdfReadError
@@ -17,7 +17,7 @@
1717
pdf_logger = logging.getLogger("pypdf")
1818
pdf_logger.setLevel(logging.ERROR)
1919

20-
def read_pdf(pdf_file: BinaryIO) -> Optional[PdfReader]:
20+
def read_pdf(pdf_file: Union[BinaryIO, bytes]) -> Optional[PdfReader]:
2121
"""Reads the given PDF file.
2222
2323
Args:

src/unstructured_client/_hooks/custom/request_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ def get_multipart_stream_fields(request: httpx.Request) -> dict[str, Any]:
3232
3333
Returns:
3434
The multipart fields.
35+
36+
Raises:
37+
Exception: If the filename is not set
3538
"""
3639
content_type = request.headers.get("Content-Type", "")
3740
if "multipart" not in content_type:
@@ -50,6 +53,8 @@ def get_multipart_stream_fields(request: httpx.Request) -> dict[str, Any]:
5053
mapped_fields[name].append(field.value)
5154
mapped_fields[field.name] = field.value
5255
elif isinstance(field, FileField):
56+
if field.filename is None or not field.filename.strip():
57+
raise ValueError("Filename can't be an empty string.")
5358
mapped_fields[field.name] = {
5459
"filename": field.filename,
5560
"content_type": field.headers.get("Content-Type", ""),
@@ -75,6 +80,7 @@ def create_pdf_chunk_request_params(
7580
PARTITION_FORM_SPLIT_PDF_ALLOW_FAILED_KEY,
7681
PARTITION_FORM_FILES_KEY,
7782
PARTITION_FORM_PAGE_RANGE_KEY,
83+
PARTITION_FORM_PAGE_RANGE_KEY.replace("[]", ""),
7884
PARTITION_FORM_STARTING_PAGE_NUMBER_KEY,
7985
]
8086
chunk_payload = {key: form_data[key] for key in form_data if key not in fields_to_drop}

src/unstructured_client/_hooks/custom/split_pdf_hook.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ def before_request(
312312

313313
page_range_start, page_range_end = form_utils.get_page_range(
314314
form_data,
315-
key=PARTITION_FORM_PAGE_RANGE_KEY,
315+
key=PARTITION_FORM_PAGE_RANGE_KEY.replace("[]", ""),
316316
max_pages=pdf.get_num_pages(),
317317
)
318318

0 commit comments

Comments
 (0)