Skip to content

Commit 82372a1

Browse files
fix: ReadingOrderPredictor: Improve the algorithm for the bounding boxes dilation (#128)
Signed-off-by: Nikos Livathinos <[email protected]>
1 parent bf12e99 commit 82372a1

File tree

2 files changed

+34
-12
lines changed

2 files changed

+34
-12
lines changed

docling_ibm_models/reading_order/reading_order_rb.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ class ReadingOrderPredictor:
5353
def __init__(self):
5454
self.dilated_page_element = True
5555

56+
# Apply horizontal dilation only if it is less than this page-width normalized threshold
57+
self._horizontal_dilation_threshold_norm = 0.15
58+
5659
self.initialise()
5760

5861
def initialise(self):
@@ -236,6 +239,7 @@ def _predict_page(self, page_elements: List[PageElement]) -> List[PageElement]:
236239
dilated_page_elements: List[PageElement] = copy.deepcopy(
237240
page_elements
238241
) # deep-copy
242+
239243
dilated_page_elements = self._do_horizontal_dilation(
240244
page_elements, dilated_page_elements
241245
)
@@ -397,6 +401,11 @@ def _has_sequence_interruption(
397401
return False
398402

399403
def _do_horizontal_dilation(self, page_elems, dilated_page_elems):
404+
# Compute the dilation threshold
405+
th = 0.0
406+
if page_elems:
407+
page_size = page_elems[0].page_size
408+
th = self._horizontal_dilation_threshold_norm * page_size.width
400409

401410
for i, pelem_i in enumerate(dilated_page_elems):
402411

@@ -409,14 +418,24 @@ def _do_horizontal_dilation(self, page_elems, dilated_page_elems):
409418
if i in self.up_map and len(self.up_map[i]) > 0:
410419
pelem_up = page_elems[self.up_map[i][0]]
411420

412-
x0 = min(x0, pelem_up.l)
413-
x1 = max(x1, pelem_up.r)
421+
# Apply threshold for horizontal dilation
422+
x0_dil = min(x0, pelem_up.l)
423+
x1_dil = max(x1, pelem_up.r)
424+
if (x0 - x0_dil) > th or (x1_dil - x1) > th:
425+
continue
426+
x0 = x0_dil
427+
x1 = x1_dil
414428

415429
if i in self.dn_map and len(self.dn_map[i]) > 0:
416430
pelem_dn = page_elems[self.dn_map[i][0]]
417431

418-
x0 = min(x0, pelem_dn.l)
419-
x1 = max(x1, pelem_dn.r)
432+
# Apply threshold for horizontal dilation
433+
x0_dil = min(x0, pelem_dn.l)
434+
x1_dil = max(x1, pelem_dn.r)
435+
if (x0 - x0_dil) > th or (x1_dil - x1) > th:
436+
continue
437+
x0 = x0_dil
438+
x1 = x1_dil
420439

421440
pelem_i.l = x0
422441
pelem_i.r = x1

tests/test_reading_order.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,16 @@ def test_readingorder():
135135
if score == 0:
136136
continue
137137
# Identify special cases ...
138-
if filename in ["doc_906d54a21ef3c7bfac03f4bb613b0c79ef32fdf81b362450c79e98a96f88708a_page_000001.png",
139-
"doc_2cd17a32ee330a239e19c915738df0c27e8ec3635a60a7e16e2a0cf3868d4af3_page_000001.png",
140-
"doc_bcb3dafc35b5e7476fd1b9cd6eccf5eeef936cd5b13ad846a4943f1e7797f4e9_page_000001.png",
141-
"doc_a0edae1fa147c7bb78ebc493743a68ba4372b5ead31f2a2b146c35119462379e_page_000001.png",
142-
"doc_94ba5468fcb6277721947697048846dc0d0551296be3b45f5918ab857d21dcc7_page_000001.png",
143-
"doc_cbb4a13ffd01d9f777fdb939451d6a21cea1b869ee50d79581451e3601df9ec8_page_000001.png"]:
138+
if filename in ["doc_906d54a21ef3c7bfac03f4bb613b0c79ef32fdf81b362450c79e98a96f88708a_page_000001.png", # 0.720588
139+
"doc_2cd17a32ee330a239e19c915738df0c27e8ec3635a60a7e16e2a0cf3868d4af3_page_000001.png", # 0.64920
140+
"doc_bcb3dafc35b5e7476fd1b9cd6eccf5eeef936cd5b13ad846a4943f1e7797f4e9_page_000001.png", # 0.65
141+
"doc_a0edae1fa147c7bb78ebc493743a68ba4372b5ead31f2a2b146c35119462379e_page_000001.png", # 0.82857
142+
"doc_94ba5468fcb6277721947697048846dc0d0551296be3b45f5918ab857d21dcc7_page_000001.png", # 0.857142
143+
# "doc_cbb4a13ffd01d9f777fdb939451d6a21cea1b869ee50d79581451e3601df9ec8_page_000001.png",
144+
145+
"doc_e2b604a3fb1541b82b6af8caca05682dff0c7735e0a3a4fa7c6a68246fb60e57_page_000001.png", # 0.657142
146+
"doc_827d21de372a2c26237ee1db526460851ae71c1867761776583535f532432e32_page_000001.png", # 0.8922077
147+
"doc_b862cd0d6f06c06ee5ab7729ed4e8ce58e6964eb0f1ab98b3865b57a4808216f_page_000001.png"]: # 0.642857
144148
# print(f"{os.path.basename(filename)}: {score}")
145149
assert score>=0.60, f"reading-order score={score}>0.60"
146150
else:
@@ -264,5 +268,4 @@ def test_readingorder_multipage():
264268
pred_elements = romodel.predict_reading_order(page_elements=true_elements)
265269
for true_elem, pred_elem in zip(true_elements, pred_elements):
266270
print("true: ", str(true_elem), ", pred: ", str(pred_elem))
267-
"""
268-
271+
"""

0 commit comments

Comments
 (0)