44 PageImage ,
55 DetectedTable ,
66)
7+ from pdf2table .usecases .dtos import TableExtractionResponse
78from pdf2table .usecases .services .table_services import (
89 TableValidationService ,
910)
@@ -43,7 +44,7 @@ def __init__(
4344
4445 def extract_tables (
4546 self , pdf_path : str , page_number : Optional [int ] = None
46- ) -> List [ DetectedTable ] :
47+ ) -> TableExtractionResponse :
4748 """
4849 Extract all tables from a PDF document.
4950
@@ -52,26 +53,29 @@ def extract_tables(
5253 page_number: Optional page number to extract. If None, extracts from all pages.
5354
5455 Returns:
55- List of DetectedTable objects from the specified page(s)
56+ TableExtractionResponse object containing extracted tables
5657 """
57- if page_number is not None :
58- return self .extract_tables_from_page (pdf_path , page_number )
59-
60- # Extract from all pages
61- page_count = self .pdf_extractor .get_page_count (pdf_path )
62- all_tables = []
63-
64- for page_num in range (page_count ):
65- try :
66- tables = self .extract_tables_from_page (pdf_path , page_num )
67- all_tables .extend (tables )
68- except Exception as e :
69- print (f"Error processing page { page_num } : { e } " )
70- continue
71-
72- return all_tables
73-
74- def extract_tables_from_page (
58+ try :
59+ if page_number is not None :
60+ tables = self ._extract_tables_from_page (pdf_path , page_number )
61+ return TableExtractionResponse (tables , pdf_path )
62+
63+ page_count = self .pdf_extractor .get_page_count (pdf_path )
64+ all_tables = []
65+
66+ for page_num in range (page_count ):
67+ try :
68+ tables = self ._extract_tables_from_page (pdf_path , page_num )
69+ all_tables .extend (tables )
70+ except Exception as e :
71+ print (f"Error processing page { page_num } : { e } " )
72+ continue
73+
74+ return TableExtractionResponse (all_tables , pdf_path )
75+ except Exception as e :
76+ return TableExtractionResponse .error (str (e ), pdf_path )
77+
78+ def _extract_tables_from_page (
7579 self , pdf_path : str , page_number : int
7680 ) -> List [DetectedTable ]:
7781 """Extract all tables from a PDF page."""
0 commit comments