@@ -783,7 +783,6 @@ def __init__(
783783 def _generate_table_bbox (self ):
784784 user_provided_bboxes = self ._get_user_provided_bboxes ()
785785
786- # Ensure textlines is a list
787786 filtered_textlines = list (
788787 self ._get_filtered_textlines ()
789788 ) # Convert to list if not already
@@ -793,20 +792,21 @@ def _generate_table_bbox(self):
793792 set ()
794793 ) # Use a set for O(1) average time complexity for lookups
795794 self .table_bbox_parses = {}
796-
797795 if self .parse_details is not None :
798796 self .parse_details ["network_searches" ] = []
799797 self .parse_details ["bbox_searches" ] = []
800798 self .parse_details ["col_searches" ] = []
801799
802800 while textlines : # Continue while there are textlines to process
803- bbox_body = None
804801 bbox_body , gaps_hv = self ._get_bbox_body (user_provided_bboxes , textlines )
805802
806803 if bbox_body is None :
807804 break # Exit the loop if no more bbox_body can be generated
808805
809806 tls_in_bbox = textlines_overlapping_bbox (bbox_body , textlines )
807+ if not tls_in_bbox : # If there are no textlines in the bbox, break
808+ break
809+
810810 cols_boundaries = find_columns_boundaries (tls_in_bbox )
811811 cols_anchors = boundaries_to_split_lines (cols_boundaries )
812812
@@ -819,7 +819,6 @@ def _generate_table_bbox(self):
819819 gaps_hv ,
820820 )
821821
822- # Ensure bbox_full is hashable; convert to tuple if it's a list
823822 if isinstance (bbox_full , list ):
824823 bbox_full = tuple (bbox_full )
825824
@@ -841,8 +840,8 @@ def _generate_table_bbox(self):
841840 textlines = [tl for tl in textlines if tl not in textlines_processed ]
842841
843842 # Early exit if all textlines have been processed
844- if not textlines :
845- break # No more textlines to process, exit the loop
843+ if not textlines : # Check if there are no more textlines to process
844+ break
846845
847846 def _get_bbox_body (self , user_provided_bboxes , textlines ):
848847 if user_provided_bboxes is not None :
0 commit comments