@@ -53,6 +53,9 @@ class ReadingOrderPredictor:
5353 def __init__ (self ):
5454 self .dilated_page_element = True
5555
56+ # Apply horizontal dilation only if it is less than this page-width normalized threshold
57+ self ._horizontal_dilation_threshold_norm = 0.15
58+
5659 self .initialise ()
5760
5861 def initialise (self ):
@@ -236,6 +239,7 @@ def _predict_page(self, page_elements: List[PageElement]) -> List[PageElement]:
236239 dilated_page_elements : List [PageElement ] = copy .deepcopy (
237240 page_elements
238241 ) # deep-copy
242+
239243 dilated_page_elements = self ._do_horizontal_dilation (
240244 page_elements , dilated_page_elements
241245 )
@@ -397,6 +401,11 @@ def _has_sequence_interruption(
397401 return False
398402
399403 def _do_horizontal_dilation (self , page_elems , dilated_page_elems ):
404+ # Compute the dilation threshold
405+ th = 0.0
406+ if page_elems :
407+ page_size = page_elems [0 ].page_size
408+ th = self ._horizontal_dilation_threshold_norm * page_size .width
400409
401410 for i , pelem_i in enumerate (dilated_page_elems ):
402411
@@ -409,14 +418,24 @@ def _do_horizontal_dilation(self, page_elems, dilated_page_elems):
409418 if i in self .up_map and len (self .up_map [i ]) > 0 :
410419 pelem_up = page_elems [self .up_map [i ][0 ]]
411420
412- x0 = min (x0 , pelem_up .l )
413- x1 = max (x1 , pelem_up .r )
421+ # Apply threshold for horizontal dilation
422+ x0_dil = min (x0 , pelem_up .l )
423+ x1_dil = max (x1 , pelem_up .r )
424+ if (x0 - x0_dil ) > th or (x1_dil - x1 ) > th :
425+ continue
426+ x0 = x0_dil
427+ x1 = x1_dil
414428
415429 if i in self .dn_map and len (self .dn_map [i ]) > 0 :
416430 pelem_dn = page_elems [self .dn_map [i ][0 ]]
417431
418- x0 = min (x0 , pelem_dn .l )
419- x1 = max (x1 , pelem_dn .r )
432+ # Apply threshold for horizontal dilation
433+ x0_dil = min (x0 , pelem_dn .l )
434+ x1_dil = max (x1 , pelem_dn .r )
435+ if (x0 - x0_dil ) > th or (x1_dil - x1 ) > th :
436+ continue
437+ x0 = x0_dil
438+ x1 = x1_dil
420439
421440 pelem_i .l = x0
422441 pelem_i .r = x1
0 commit comments