Skip to content

Commit 0aaa38a

Browse files
committed
nit
1 parent 5c70d08 commit 0aaa38a

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

src/unstructured_client/_hooks/custom/pdf_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ def read_pdf(pdf_file: Union[BinaryIO, bytes]) -> Optional[PdfReader]:
3333
return reader
3434

3535
# TODO(klaijan) - remove once debugged
36-
pdf_logger.debug("Initial PdfReader parsing failed, attempting fallbacks.")
36+
pdf_logger.debug("Primary PdfReader parse failed, attempting multipart and raw extraction fallbacks.")
3737

38-
# load raw bytes
38+
# Load raw bytes
3939
# case bytes
4040
if isinstance(pdf_file, bytes):
4141
raw = pdf_file
@@ -49,7 +49,7 @@ def read_pdf(pdf_file: Union[BinaryIO, bytes]) -> Optional[PdfReader]:
4949
else:
5050
raise TypeError("Expected bytes or a file-like object with 'read()' method")
5151

52-
# multipart extraction
52+
# This looks for multipart extraction
5353
try:
5454
msg = BytesParser(policy=default).parsebytes(raw)
5555
for part in msg.walk():
@@ -63,7 +63,7 @@ def read_pdf(pdf_file: Union[BinaryIO, bytes]) -> Optional[PdfReader]:
6363
except Exception as e:
6464
pdf_logger.debug(f"Multipart extraction failed: {e}")
6565

66-
# look for %PDF-
66+
# This looks for %PDF-
6767
try:
6868
start = raw.find(b"%PDF-")
6969
if start != -1:

0 commit comments

Comments
 (0)