Skip to content
This repository was archived by the owner on Apr 11, 2025. It is now read-only.

Commit eac06c7

Browse files
committed
[REF]: Fix Network parser: generate_table_bbox executing infinitly
Fixes #245 In some documents the Network parse could compile a table. It was running infinitly.
1 parent 8727534 commit eac06c7

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

camelot/parsers/network.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -783,7 +783,6 @@ def __init__(
783783
def _generate_table_bbox(self):
784784
user_provided_bboxes = self._get_user_provided_bboxes()
785785

786-
# Ensure textlines is a list
787786
filtered_textlines = list(
788787
self._get_filtered_textlines()
789788
) # Convert to list if not already
@@ -793,20 +792,21 @@ def _generate_table_bbox(self):
793792
set()
794793
) # Use a set for O(1) average time complexity for lookups
795794
self.table_bbox_parses = {}
796-
797795
if self.parse_details is not None:
798796
self.parse_details["network_searches"] = []
799797
self.parse_details["bbox_searches"] = []
800798
self.parse_details["col_searches"] = []
801799

802800
while textlines: # Continue while there are textlines to process
803-
bbox_body = None
804801
bbox_body, gaps_hv = self._get_bbox_body(user_provided_bboxes, textlines)
805802

806803
if bbox_body is None:
807804
break # Exit the loop if no more bbox_body can be generated
808805

809806
tls_in_bbox = textlines_overlapping_bbox(bbox_body, textlines)
807+
if not tls_in_bbox: # If there are no textlines in the bbox, break
808+
break
809+
810810
cols_boundaries = find_columns_boundaries(tls_in_bbox)
811811
cols_anchors = boundaries_to_split_lines(cols_boundaries)
812812

@@ -819,7 +819,6 @@ def _generate_table_bbox(self):
819819
gaps_hv,
820820
)
821821

822-
# Ensure bbox_full is hashable; convert to tuple if it's a list
823822
if isinstance(bbox_full, list):
824823
bbox_full = tuple(bbox_full)
825824

@@ -841,8 +840,8 @@ def _generate_table_bbox(self):
841840
textlines = [tl for tl in textlines if tl not in textlines_processed]
842841

843842
# Early exit if all textlines have been processed
844-
if not textlines:
845-
break # No more textlines to process, exit the loop
843+
if not textlines: # Check if there are no more textlines to process
844+
break
846845

847846
def _get_bbox_body(self, user_provided_bboxes, textlines):
848847
if user_provided_bboxes is not None:

0 commit comments

Comments
 (0)