Skip to content

Commit dfafcf8

Browse files
committed
refactor: streamline variable assignments and method calls in TableExtractionUseCase
1 parent 1b3b388 commit dfafcf8

File tree

1 file changed

+11
-22
lines changed

1 file changed

+11
-22
lines changed

pdf2table/usecases/table_extraction_use_case.py

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -33,33 +33,29 @@ def __init__(
3333
visualize: bool = False,
3434
visualization_save_dir: str = None,
3535
):
36-
self._pdf_extractor = pdf_extractor
37-
self._table_detector = table_detector
38-
self._structure_recognizer = structure_recognizer
39-
self._ocr_service = ocr_service
40-
self._validation_service = TableValidationService()
36+
self.pdf_extractor = pdf_extractor
37+
self.table_detector = table_detector
38+
self.structure_recognizer = structure_recognizer
39+
self.validation_service = TableValidationService()
40+
self.grid_builder = TableGridBuilder(ocr_service)
4141
self._visualize = visualize
4242
self._visualization_save_dir = visualization_save_dir
4343

4444
def extract_tables_from_page(
4545
self, pdf_path: str, page_number: int
4646
) -> List[DetectedTable]:
4747
"""Extract all tables from a PDF page."""
48-
# Extract page image
49-
page_image = self._pdf_extractor.extract_page_image(pdf_path, page_number)
48+
page_image = self.pdf_extractor.extract_page_image(pdf_path, page_number)
5049

51-
# Detect tables
52-
detected_tables = self._table_detector.detect_tables(page_image)
50+
detected_tables = self.table_detector.detect_tables(page_image)
5351

54-
# Visualization: Table detection
5552
if self._visualize:
5653
visualize_table_detection(
5754
page_image,
5855
detected_tables,
5956
visualization_save_dir=self._visualization_save_dir,
6057
)
6158

62-
# Process each detected table
6359
structured_tables = []
6460
for idx, table in enumerate(detected_tables):
6561
try:
@@ -78,31 +74,25 @@ def _process_detected_table(
7874
self, page_image: PageImage, detected_table: DetectedTable, table_idx: int = 0
7975
) -> Optional[DetectedTable]:
8076
"""Process a single detected table to extract its structure."""
81-
# Recognize table structure
82-
detected_cells = self._structure_recognizer.recognize_structure(
77+
detected_cells = self.structure_recognizer.recognize_structure(
8378
page_image, detected_table.detection_box
8479
)
8580

86-
# Visualization: Table structure
8781
if self._visualize:
8882
visualize_table_structure(
8983
page_image,
9084
detected_cells,
9185
detected_table.detection_box,
92-
self._visualization_save_dir
86+
self._visualization_save_dir,
9387
)
9488

95-
# Validate detected structure
96-
if not self._validation_service.is_valid_table_structure(detected_cells):
89+
if not self.validation_service.is_valid_table_structure(detected_cells):
9790
return None
9891

99-
# Build table grid
100-
grid_builder = TableGridBuilder(self._ocr_service)
101-
table_grid = grid_builder.build_grid(
92+
table_grid = self.grid_builder.build_grid(
10293
detected_cells, page_image, detected_table.detection_box
10394
)
10495

105-
# Visualization: Cell grid
10696
if self._visualize and table_grid:
10797
visualize_cell_grid(
10898
table_grid,
@@ -114,6 +104,5 @@ def _process_detected_table(
114104
if not table_grid:
115105
return None
116106

117-
# Update detected table with grid
118107
detected_table.grid = table_grid
119108
return detected_table

0 commit comments

Comments
 (0)