File tree Expand file tree Collapse file tree 7 files changed +22
-8
lines changed
unstructured_inference/inference Expand file tree Collapse file tree 7 files changed +22
-8
lines changed Original file line number Diff line number Diff line change 55
66## 0.6.4
77
8+ * Add functionality to keep extracted image elements while merging inferred layout with extracted layout
89* add a function to automatically scale table crop images based on text height so the text height is optimum for ` tesseract ` OCR task
910* add the new image auto scaling parameters to ` config.py `
1011
Load Diff This file was deleted.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change 1+ unstructured-inference
2+ pymupdf
3+ pypdf2
Original file line number Diff line number Diff line change @@ -441,7 +441,7 @@ def annotate(
441441 width = style ["width" ]
442442 for region in getattr (self , attribute ):
443443 if isinstance (region , Rectangle ):
444- required_source = getattr (el , "source" , None )
444+ required_source = getattr (region , "source" , None )
445445 if "all" in sources or required_source in sources :
446446 img = draw_bbox (
447447 img ,
Original file line number Diff line number Diff line change @@ -148,13 +148,24 @@ def merge_inferred_layout_with_extracted_layout(
148148 )
149149 if same_bbox :
150150 # Looks like these represent the same region
151- grow_region_to_match_region (inferred_region , extracted_region )
152- inferred_region .text = extracted_region .text
153- region_matched = True
154- elif extracted_is_subregion_of_inferred and inferred_is_text and extracted_is_image :
155- grow_region_to_match_region (inferred_region , extracted_region )
156- region_matched = True
151+ if extracted_is_image :
152+ # keep extracted region, remove inferred region
153+ inferred_regions_to_remove .append (inferred_region )
154+ else :
155+ # keep inferred region, remove extracted region
156+ grow_region_to_match_region (inferred_region , extracted_region )
157+ inferred_region .text = extracted_region .text
158+ region_matched = True
159+ elif extracted_is_subregion_of_inferred and inferred_is_text :
160+ if extracted_is_image :
161+ # keep both extracted and inferred regions
162+ region_matched = False
163+ else :
164+ # keep inferred region, remove extracted region
165+ grow_region_to_match_region (inferred_region , extracted_region )
166+ region_matched = True
157167 elif either_region_is_subregion_of_other and inferred_region .type != "Table" :
168+ # keep extracted region, remove inferred region
158169 inferred_regions_to_remove .append (inferred_region )
159170 if not region_matched :
160171 extracted_elements_to_add .append (extracted_region )
You can’t perform that action at this time.
0 commit comments