Skip to content

Commit d607914

Browse files
maxmnemonicMaxim Lysakcau-git
authored
fix: validation and typechecks in TF post processing and OTSL to HTML conversion function (#18)
* - Improved TF bbox post-processing step that corrected bboxes overlap, now should produce more correct bboxes - Bbox overlap correction step made optional, as an extra parameter in multi_table_predict - Corrected otsl_to_html to be fault tolerant to empty rs_list Signed-off-by: Maxim Lysak <[email protected]> * Safety check for outputs_class, outputs_coord for proper torch tensor before sending to device Signed-off-by: Maxim Lysak <[email protected]> * Proper fix for empty outputs from bbox decoder --------- Signed-off-by: Maxim Lysak <[email protected]> Co-authored-by: Maxim Lysak <[email protected]> Co-authored-by: Christoph Auer <[email protected]>
1 parent cb0b2d4 commit d607914

File tree

4 files changed

+73
-29
lines changed

4 files changed

+73
-29
lines changed

docling_ibm_models/tableformer/data_management/matching_post_processor.py

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#
55
import json
66
import logging
7+
import math
78
import statistics
89

910
import docling_ibm_models.tableformer.settings as s
@@ -403,45 +404,63 @@ def correct_overlap(box1, box2):
403404
# Push horizontally
404405
if x1_min < x2_min:
405406
# Move box1 to the left and box2 to the right
406-
box1["bbox"][2] -= overlap_x
407-
box2["bbox"][0] += overlap_x
407+
box1["bbox"][2] -= math.ceil(overlap_x / 2) + 2
408+
box2["bbox"][0] += math.floor(overlap_x / 2)
408409
else:
409410
# Move box2 to the left and box1 to the right
410-
box2["bbox"][2] -= overlap_x
411-
box1["bbox"][0] += overlap_x
411+
box2["bbox"][2] -= math.ceil(overlap_x / 2) + 2
412+
box1["bbox"][0] += math.floor(overlap_x / 2)
412413
else:
413414
# Push vertically
414415
if y1_min < y2_min:
415416
# Move box1 up and box2 down
416-
box1["bbox"][3] -= overlap_y
417-
box2["bbox"][1] += overlap_y
417+
box1["bbox"][3] -= math.ceil(overlap_y / 2) + 2
418+
box2["bbox"][1] += math.floor(overlap_y / 2)
418419
else:
419420
# Move box2 up and box1 down
420-
box2["bbox"][3] -= overlap_y
421-
box1["bbox"][1] += overlap_y
421+
box2["bbox"][3] -= math.ceil(overlap_y / 2) + 2
422+
box1["bbox"][1] += math.floor(overlap_y / 2)
423+
424+
# Will flip coordinates in proper order, if previous operations reversed it
425+
box1["bbox"] = [
426+
min(box1["bbox"][0], box1["bbox"][2]),
427+
min(box1["bbox"][1], box1["bbox"][3]),
428+
max(box1["bbox"][0], box1["bbox"][2]),
429+
max(box1["bbox"][1], box1["bbox"][3]),
430+
]
431+
box2["bbox"] = [
432+
min(box2["bbox"][0], box2["bbox"][2]),
433+
min(box2["bbox"][1], box2["bbox"][3]),
434+
max(box2["bbox"][0], box2["bbox"][2]),
435+
max(box2["bbox"][1], box2["bbox"][3]),
436+
]
422437

423438
return box1, box2
424439

425440
def do_boxes_overlap(box1, box2):
426-
# print("{} - {}".format(box1["bbox"], box2["bbox"]))
427-
# Extract coordinates from the bounding boxes
428-
x1_min, y1_min, x1_max, y1_max = box1["bbox"]
429-
x2_min, y2_min, x2_max, y2_max = box2["bbox"]
430-
# Check if one box is to the left of the other
431-
if x1_max < x2_min or x2_max < x1_min:
441+
B1 = box1["bbox"]
442+
B2 = box2["bbox"]
443+
if (
444+
(B1[0] >= B2[2])
445+
or (B1[2] <= B2[0])
446+
or (B1[3] <= B2[1])
447+
or (B1[1] >= B2[3])
448+
):
432449
return False
433-
# Check if one box is above the other
434-
if y1_max < y2_min or y2_max < y1_min:
435-
return False
436-
return True
450+
else:
451+
return True
437452

438453
def find_overlapping_pairs_indexes(bboxes):
439454
overlapping_indexes = []
440455
# Compare each box with every other box (combinations)
441456
for i in range(len(bboxes)):
442457
for j in range(i + 1, len(bboxes)):
443-
if do_boxes_overlap(bboxes[i], bboxes[j]):
444-
bboxes[i], bboxes[j] = correct_overlap(bboxes[i], bboxes[j])
458+
if i != j:
459+
if bboxes[i] != bboxes[j]:
460+
if do_boxes_overlap(bboxes[i], bboxes[j]):
461+
bboxes[i], bboxes[j] = correct_overlap(
462+
bboxes[i], bboxes[j]
463+
)
445464

446465
return overlapping_indexes, bboxes
447466

@@ -1144,7 +1163,7 @@ def _clear_pdf_cells(self, pdf_cells):
11441163
new_pdf_cells.append(pdf_cells[i])
11451164
return new_pdf_cells
11461165

1147-
def process(self, matching_details):
1166+
def process(self, matching_details, correct_overlapping_cells=False):
11481167
r"""
11491168
Do post processing, see details in the comments below
11501169
@@ -1348,9 +1367,10 @@ def process(self, matching_details):
13481367
table_cells_wo = po2
13491368
max_cell_id = po3
13501369

1351-
# As the last step - correct cell bboxes in a way that they don't overlap:
1352-
if len(table_cells_wo) <= 300: # For performance reasons
1353-
table_cells_wo = self._find_overlapping(table_cells_wo)
1370+
if correct_overlapping_cells:
1371+
# As the last step - correct cell bboxes in a way that they don't overlap:
1372+
if len(table_cells_wo) <= 300: # For performance reasons
1373+
table_cells_wo = self._find_overlapping(table_cells_wo)
13541374

13551375
self._log().debug("*** final_matches_wo")
13561376
self._log().debug(final_matches_wo)

docling_ibm_models/tableformer/data_management/tf_predictor.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -523,8 +523,9 @@ def resize_img(self, image, width=None, height=None, inter=cv2.INTER_AREA):
523523
# return the resized image
524524
return resized, sf
525525

526-
def multi_table_predict(self, iocr_page, table_bboxes, do_matching=True):
527-
# def multi_table_predict(self, iocr_page, page_image, table_bboxes):
526+
def multi_table_predict(
527+
self, iocr_page, table_bboxes, do_matching=True, correct_overlapping_cells=False
528+
):
528529
multi_tf_output = []
529530
page_image = iocr_page["image"]
530531

@@ -546,7 +547,12 @@ def multi_table_predict(self, iocr_page, table_bboxes, do_matching=True):
546547
# Predict
547548
if do_matching:
548549
tf_responses, predict_details = self.predict(
549-
iocr_page, table_bbox, table_image, scale_factor, None
550+
iocr_page,
551+
table_bbox,
552+
table_image,
553+
scale_factor,
554+
None,
555+
correct_overlapping_cells,
550556
)
551557
else:
552558
tf_responses, predict_details = self.predict_dummy(
@@ -733,7 +739,13 @@ def predict_dummy(
733739
return tf_output, matching_details
734740

735741
def predict(
736-
self, iocr_page, table_bbox, table_image, scale_factor, eval_res_preds=None
742+
self,
743+
iocr_page,
744+
table_bbox,
745+
table_image,
746+
scale_factor,
747+
eval_res_preds=None,
748+
correct_overlapping_cells=False,
737749
):
738750
r"""
739751
Predict the table out of an image in memory
@@ -744,6 +756,8 @@ def predict(
744756
Docling provided table data
745757
eval_res_preds : dict
746758
Ready predictions provided by the evaluation results
759+
correct_overlapping_cells : boolean
760+
Enables or disables last post-processing step, that fixes cell bboxes to remove overlap
747761
748762
Returns
749763
-------
@@ -834,7 +848,9 @@ def predict(
834848
): # There are at least some pdf cells to match with
835849
if self.enable_post_process:
836850
AggProfiler().begin("post_process", self._prof)
837-
matching_details = self._post_processor.process(matching_details)
851+
matching_details = self._post_processor.process(
852+
matching_details, correct_overlapping_cells
853+
)
838854
AggProfiler().end("post_process", self._prof)
839855

840856
# Generate the expected Docling responses

docling_ibm_models/tableformer/models/table04_rs/bbox_decoder_rs.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,12 @@ def inference(self, encoder_out, tag_H):
157157
predictions_classes.append(self._class_embed(h))
158158
if len(predictions_bboxes) > 0:
159159
predictions_bboxes = torch.stack([x[0] for x in predictions_bboxes])
160+
else:
161+
predictions_bboxes = torch.empty(0)
162+
160163
if len(predictions_classes) > 0:
161164
predictions_classes = torch.stack([x[0] for x in predictions_classes])
165+
else:
166+
predictions_classes = torch.empty(0)
162167

163168
return predictions_classes, predictions_bboxes

docling_ibm_models/tableformer/otsl.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ def otsl_check_right(rs_split, x, y):
123123

124124

125125
def otsl_to_html(rs_list, logdebug):
126+
if len(rs_list) == 0:
127+
return []
128+
126129
if rs_list[0] not in ["fcel", "ched", "rhed", "srow", "ecel"]:
127130
# Most likely already HTML...
128131
return rs_list

0 commit comments

Comments
 (0)