File tree Expand file tree Collapse file tree 2 files changed +9
-6
lines changed
unstructured/ingest/doc_processor Expand file tree Collapse file tree 2 files changed +9
-6
lines changed Original file line number Diff line number Diff line change 66
77### Fixes
88
9+ * Fix ` process_document ` file cleaning on failure
10+
911## 0.4.16
1012
1113### Enhancements
Original file line number Diff line number Diff line change 11"""Process aribritrary files with the Unstructured library"""
22
3- import logging
3+ from typing import Any , Dict , List , Optional
44
55from unstructured_inference .models .detectron2 import MODEL_TYPES
66
7+ from unstructured .ingest .interfaces import BaseIngestDoc as IngestDoc
8+ from unstructured .logger import logger
9+
710
811def initialize ():
912 """Download models (avoids subprocesses all doing the same)"""
@@ -14,7 +17,7 @@ def initialize():
1417 MODEL_TYPES [None ]["config_path" ]
1518
1619
17- def process_document (doc ) :
20+ def process_document (doc : "IngestDoc" ) -> Optional [ List [ Dict [ str , Any ]]] :
1821 """Process any IngestDoc-like class of document with Unstructured's auto partition logic."""
1922 isd_elems_no_filename = None
2023 try :
@@ -28,11 +31,9 @@ def process_document(doc):
2831 # the results. Instead, the MainProcess (caller) may work with the aggregate
2932 # results across all docs in memory.
3033 doc .write_result ()
31-
3234 except Exception :
3335 # TODO(crag) save the exception instead of print?
34- logging .error (f"Failed to process { doc } " , exc_info = True )
35- else :
36- doc .cleanup_file ()
36+ logger .error (f"Failed to process { doc } " , exc_info = True )
3737 finally :
38+ doc .cleanup_file ()
3839 return isd_elems_no_filename
You can’t perform that action at this time.
0 commit comments