File tree Expand file tree Collapse file tree 1 file changed +4
-4
lines changed
src/unstructured_client/_hooks/custom Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -33,9 +33,9 @@ def read_pdf(pdf_file: Union[BinaryIO, bytes]) -> Optional[PdfReader]:
3333 return reader
3434
3535 # TODO(klaijan) - remove once debugged
36- pdf_logger .debug ("Initial PdfReader parsing failed, attempting fallbacks." )
36+ pdf_logger .debug ("Primary PdfReader parse failed, attempting multipart and raw extraction fallbacks." )
3737
38- # load raw bytes
38+ # Load raw bytes
3939 # case bytes
4040 if isinstance (pdf_file , bytes ):
4141 raw = pdf_file
@@ -49,7 +49,7 @@ def read_pdf(pdf_file: Union[BinaryIO, bytes]) -> Optional[PdfReader]:
4949 else :
5050 raise TypeError ("Expected bytes or a file-like object with 'read()' method" )
5151
52- # multipart extraction
52+ # This looks for multipart extraction
5353 try :
5454 msg = BytesParser (policy = default ).parsebytes (raw )
5555 for part in msg .walk ():
@@ -63,7 +63,7 @@ def read_pdf(pdf_file: Union[BinaryIO, bytes]) -> Optional[PdfReader]:
6363 except Exception as e :
6464 pdf_logger .debug (f"Multipart extraction failed: { e } " )
6565
66- # look for %PDF-
66+ # This looks for %PDF-
6767 try :
6868 start = raw .find (b"%PDF-" )
6969 if start != - 1 :
You can’t perform that action at this time.
0 commit comments