Skip to content

Commit e8f396d

Browse files
maxmnemonicMaxim Lysak
andauthored
fix: Table cell overlap removal in TF post-processing: (#10)
- As the last step - correct cell bboxes in a way that they don't overlap, apply only if number of table cells is reasonable - Corrected RGB palette for one of the sample images Signed-off-by: Maxim Lysak <[email protected]> Co-authored-by: Maxim Lysak <[email protected]>
1 parent bc89a57 commit e8f396d

File tree

3 files changed

+85
-14
lines changed

3 files changed

+85
-14
lines changed

docling_ibm_models/tableformer/data_management/matching_post_processor.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,71 @@ def _run_intersection_match(self, cell_matcher, table_cells, pdf_cells):
383383
clean_matches = json.loads(clean_matches_string)
384384
return clean_matches
385385

386+
def _find_overlapping(self, table_cells):
387+
388+
def correct_overlap(box1, box2):
389+
# Extract coordinates from the bounding boxes
390+
x1_min, y1_min, x1_max, y1_max = box1["bbox"]
391+
x2_min, y2_min, x2_max, y2_max = box2["bbox"]
392+
393+
# Calculate the overlap in both x and y directions
394+
overlap_x = min(x1_max, x2_max) - max(x1_min, x2_min)
395+
overlap_y = min(y1_max, y2_max) - max(y1_min, y2_min)
396+
397+
# If there is no overlap, return the original boxes
398+
if overlap_x <= 0 or overlap_y <= 0:
399+
return box1, box2
400+
401+
# Decide how to push the boxes apart
402+
if overlap_x < overlap_y:
403+
# Push horizontally
404+
if x1_min < x2_min:
405+
# Move box1 to the left and box2 to the right
406+
box1["bbox"][2] -= overlap_x
407+
box2["bbox"][0] += overlap_x
408+
else:
409+
# Move box2 to the left and box1 to the right
410+
box2["bbox"][2] -= overlap_x
411+
box1["bbox"][0] += overlap_x
412+
else:
413+
# Push vertically
414+
if y1_min < y2_min:
415+
# Move box1 up and box2 down
416+
box1["bbox"][3] -= overlap_y
417+
box2["bbox"][1] += overlap_y
418+
else:
419+
# Move box2 up and box1 down
420+
box2["bbox"][3] -= overlap_y
421+
box1["bbox"][1] += overlap_y
422+
423+
return box1, box2
424+
425+
def do_boxes_overlap(box1, box2):
426+
# print("{} - {}".format(box1["bbox"], box2["bbox"]))
427+
# Extract coordinates from the bounding boxes
428+
x1_min, y1_min, x1_max, y1_max = box1["bbox"]
429+
x2_min, y2_min, x2_max, y2_max = box2["bbox"]
430+
# Check if one box is to the left of the other
431+
if x1_max < x2_min or x2_max < x1_min:
432+
return False
433+
# Check if one box is above the other
434+
if y1_max < y2_min or y2_max < y1_min:
435+
return False
436+
return True
437+
438+
def find_overlapping_pairs_indexes(bboxes):
439+
overlapping_indexes = []
440+
# Compare each box with every other box (combinations)
441+
for i in range(len(bboxes)):
442+
for j in range(i + 1, len(bboxes)):
443+
if do_boxes_overlap(bboxes[i], bboxes[j]):
444+
bboxes[i], bboxes[j] = correct_overlap(bboxes[i], bboxes[j])
445+
446+
return overlapping_indexes, bboxes
447+
448+
overlapping_indexes, table_cells = find_overlapping_pairs_indexes(table_cells)
449+
return table_cells
450+
386451
def _align_table_cells_to_pdf(self, table_cells, pdf_cells, matches):
387452
r"""
388453
USED in 8.a step
@@ -1261,7 +1326,9 @@ def process(self, matching_details):
12611326
dedupl_table_cells, key=lambda k: k["cell_id"]
12621327
)
12631328

1264-
if len(pdf_cells) > 300:
1329+
if (
1330+
len(pdf_cells) > 300
1331+
): # For performance, skip this step if there are too many pdf_cells
12651332
aligned_table_cells2 = dedupl_table_cells_sorted
12661333
else:
12671334
aligned_table_cells2 = self._align_table_cells_to_pdf(
@@ -1281,6 +1348,10 @@ def process(self, matching_details):
12811348
table_cells_wo = po2
12821349
max_cell_id = po3
12831350

1351+
# As the last step - correct cell bboxes in a way that they don't overlap:
1352+
if len(table_cells_wo) <= 300: # For performance reasons
1353+
table_cells_wo = self._find_overlapping(table_cells_wo)
1354+
12841355
self._log().debug("*** final_matches_wo")
12851356
self._log().debug(final_matches_wo)
12861357
self._log().debug("*** table_cells_wo")
111 KB
Loading

tests/test_tf_predictor.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ def test_tf_predictor():
508508
# List of dicts per table: [{"tf_responses":[...], "predict_details": {}}]
509509

510510
multi_tf_output = predictor.multi_table_predict(
511-
iocr_page, table_bboxes, False
511+
iocr_page, table_bboxes, True
512512
)
513513

514514
# Test output for validity, create visualizations...
@@ -539,10 +539,10 @@ def test_tf_predictor():
539539
img1.rectangle(((xi0, yi0), (xi1, yi1)), outline="gray")
540540
# Visualize original docling_ibm_models.tableformer predictions:
541541
for predicted_bbox in predict_details["prediction_bboxes_page"]:
542-
xp0 = predicted_bbox[0] - 2
543-
yp0 = predicted_bbox[1] - 2
544-
xp1 = predicted_bbox[2] + 2
545-
yp1 = predicted_bbox[3] + 2
542+
xp0 = predicted_bbox[0] - 1
543+
yp0 = predicted_bbox[1] - 1
544+
xp1 = predicted_bbox[2] + 1
545+
yp1 = predicted_bbox[3] + 1
546546
img1.rectangle(((xp0, yp0), (xp1, yp1)), outline="green")
547547

548548
# Check the structure of the list items
@@ -569,26 +569,26 @@ def test_tf_predictor():
569569
yc1 = text_cell["b"]
570570
img1.rectangle(((xc0, yc0), (xc1, yc1)), outline="red")
571571

572-
x0 = response["bbox"]["l"] - 6
573-
y0 = response["bbox"]["t"] - 6
574-
x1 = response["bbox"]["r"] + 6
575-
y1 = response["bbox"]["b"] + 6
572+
x0 = response["bbox"]["l"] - 2
573+
y0 = response["bbox"]["t"] - 2
574+
x1 = response["bbox"]["r"] + 2
575+
y1 = response["bbox"]["b"] + 2
576576

577577
if response["column_header"]:
578578
img1.rectangle(
579-
((x0, y0), (x1, y1)), outline="blue", width=5
579+
((x0, y0), (x1, y1)), outline="blue", width=2
580580
)
581581
elif response["row_header"]:
582582
img1.rectangle(
583-
((x0, y0), (x1, y1)), outline="magenta", width=5
583+
((x0, y0), (x1, y1)), outline="magenta", width=2
584584
)
585585
elif response["row_section"]:
586586
img1.rectangle(
587-
((x0, y0), (x1, y1)), outline="brown", width=5
587+
((x0, y0), (x1, y1)), outline="brown", width=2
588588
)
589589
else:
590590
img1.rectangle(
591-
((x0, y0), (x1, y1)), outline="blue", width=1
591+
((x0, y0), (x1, y1)), outline="black", width=1
592592
)
593593
if viz:
594594
viz_root = "./tests/test_data/viz/"

0 commit comments

Comments
 (0)