Skip to content

Commit e6481b8

Browse files
committed
chore: removed unnecessary logger, corrected docstrings
1 parent 971f456 commit e6481b8

File tree

1 file changed

+6
-16
lines changed

1 file changed

+6
-16
lines changed

src/unstructured_client/_hooks/custom/split_pdf_hook.py

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -294,18 +294,6 @@ def before_request(
294294
)
295295
# force free PDF object memory
296296
del pdf
297-
logger.info(
298-
"Partitioning %d files with %d page(s) each.",
299-
math.floor(page_count / split_size),
300-
split_size,
301-
)
302-
303-
# Log the remainder pages if there are any
304-
if page_count % split_size > 0:
305-
logger.info(
306-
"Partitioning 1 file with %d page(s).",
307-
page_count % split_size,
308-
)
309297

310298
# Use a variable to adjust the httpx client timeout, or default to 30 minutes
311299
# When we're able to reuse the SDK to make these calls, we can remove this var
@@ -374,9 +362,10 @@ def _get_pdf_pages(
374362
split_size: int = 1,
375363
page_start: int = 1,
376364
page_end: Optional[int] = None
377-
) -> Generator[Tuple[io.BytesIO, int], None, None]:
378-
"""Reads given bytes of a pdf file and split it into n file-like objects, each
379-
with `split_size` pages.
365+
) -> Generator[Tuple[BinaryIO, int], None, None]:
366+
"""Reads given bytes of a pdf file and split it into n pdf-chunks, each
367+
with `split_size` pages. The chunks are written into temporary files in
368+
a temporary directory corresponding to the operation_id.
380369
381370
Args:
382371
file_content: Content of the PDF file.
@@ -387,7 +376,7 @@ def _get_pdf_pages(
387376
page_end: If provided, split up to and including this page number
388377
389378
Yields:
390-
The file contents with their page number and overall pages number of the original document.
379+
The file object with their page number.
391380
"""
392381

393382
offset = page_start - 1
@@ -423,6 +412,7 @@ def _get_pdf_pages(
423412
except Exception: # pylint: disable=broad-except
424413
if pdf_chunk_file and not pdf_chunk_file.closed:
425414
pdf_chunk_file.close()
415+
raise
426416
yield pdf_chunk_file, offset
427417

428418
def _await_elements(

0 commit comments

Comments
 (0)