Skip to content
This repository was archived by the owner on Apr 11, 2025. It is now read-only.

Commit 1c18a4f

Browse files
committed
[REF]: network parser generate_table_bbox -> split into mark_processed_textlines
[REF]: generate_table_bbox -> split into get_user_provided_bboxes [REF]: generate_table_bbox -> split into get_filtered_textlines
1 parent 64a83c9 commit 1c18a4f

File tree

1 file changed

+27
-15
lines changed

1 file changed

+27
-15
lines changed

camelot/parsers/network.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -780,21 +780,10 @@ def __init__(
780780
)
781781

782782
def _generate_table_bbox(self):
783-
user_provided_bboxes = None
784-
if self.table_areas is not None:
785-
# User gave us table areas already. We will use their coordinates
786-
# to find column anchors.
787-
user_provided_bboxes = []
788-
for area_str in self.table_areas:
789-
user_provided_bboxes.append(bbox_from_str(area_str))
783+
user_provided_bboxes = self._get_user_provided_bboxes()
790784

791785
# Take all the textlines that are not just spaces
792-
all_textlines = [
793-
t
794-
for t in self.horizontal_text + self.vertical_text
795-
if len(t.get_text().strip()) > 0
796-
]
797-
textlines = self._apply_regions_filter(all_textlines)
786+
textlines = self._get_filtered_textlines()
798787

799788
textlines_processed = {}
800789
self.table_bbox_parses = {}
@@ -873,11 +862,34 @@ def _generate_table_bbox(self):
873862
self.parse_details["col_searches"].append(table_parse)
874863

875864
# Remember what textlines we processed, and repeat
876-
for textline in tls_in_bbox:
877-
textlines_processed[textline] = None
878865
textlines = list(
879866
filter(lambda textline: textline not in textlines_processed, textlines)
880867
)
868+
self._mark_processed_textlines(tls_in_bbox, textlines_processed, textlines)
869+
870+
def _get_filtered_textlines(self):
871+
all_textlines = [
872+
t
873+
for t in self.horizontal_text + self.vertical_text
874+
if len(t.get_text().strip()) > 0
875+
]
876+
return self._apply_regions_filter(all_textlines)
877+
878+
def _mark_processed_textlines(
879+
self, tls_in_bbox, textlines_processed, all_textlines
880+
):
881+
for textline in tls_in_bbox:
882+
textlines_processed[textline] = None
883+
all_textlines[:] = [
884+
textline
885+
for textline in all_textlines
886+
if textline not in textlines_processed
887+
]
888+
889+
def _get_user_provided_bboxes(self):
890+
if self.table_areas is not None:
891+
return [bbox_from_str(area_str) for area_str in self.table_areas]
892+
return None
881893

882894
def _generate_columns_and_rows(self, bbox, user_cols):
883895
# select elements which lie within table_bbox

0 commit comments

Comments
 (0)