You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# RecordParseError is raised when the file can't be parsed because of a problem with the file content (either the file is not supported or the file is corrupted)
180
182
# if the skip_unprocessable_files flag is set, we log a warning and pass the error as part of the document
181
183
# otherwise, we raise the error to fail the sync
182
184
ifformat.skip_unprocessable_files:
183
185
exception_str=str(e)
184
-
logger.warn(f"File {file.uri} caused an error during parsing: {exception_str}.")
186
+
logger.warning(f"File {file.uri} caused an error during parsing: {exception_str}.")
logger.warn(f"File {file.uri} cannot be parsed. Skipping it.")
194
+
logger.warning(f"File {file.uri} cannot be parsed. Skipping it.")
191
195
else:
192
196
raisee
193
197
exceptExceptionase:
@@ -370,24 +374,25 @@ def _read_file_locally(
370
374
# check whether unstructured library is actually available for better error message and to ensure proper typing (can't be None after this point)
371
375
raiseException("unstructured library is not available")
372
376
373
-
file: Any=file_handle
374
-
375
377
# before the parsing logic is entered, the file is read completely to make sure it is in local memory
376
378
file_handle.seek(0)
377
-
file_handle.read()
379
+
file_content=file_handle.read()
378
380
file_handle.seek(0)
379
381
380
382
try:
381
-
iffiletype==FileType.PDF:
382
-
# for PDF, read the file into a BytesIO object because some code paths in pdf parsing are doing an instance check on the file object and don't work with file-like objects
383
-
file_handle.seek(0)
384
-
withBytesIO(file_handle.read()) asfile:
385
-
file_handle.seek(0)
383
+
# For all file types, create a fresh BytesIO to avoid issues with file-like objects
0 commit comments