55import multiprocessing as mp
66import os
77from pathlib import Path
8+ from typing import Any
89
910from pypdf import PdfReader
1011from pypdf import PdfWriter
@@ -116,15 +117,17 @@ def _get_pages(self, pages):
116117 result .extend (range (p ["start" ], p ["end" ] + 1 ))
117118 return sorted (set (result ))
118119
119- def _save_page (self , filepath : StrByteType | Path , page , temp ):
120+ def _save_page (self , filepath : StrByteType | Path , page : int , temp : str ):
120121 """Saves specified page from PDF into a temporary directory.
121122
122123 Parameters
123124 ----------
125+ filepath : str
126+ Filepath or URL of the PDF file.
124127 page : int
125128 Page number.
126- layout_kwargs : dict, optional (default: {})
127- A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs. # noqa
129+ temp : str
130+ Tmp directory.
128131
129132
130133 Returns
@@ -175,10 +178,10 @@ def _save_page(self, filepath: StrByteType | Path, page, temp):
175178
176179 def parse (
177180 self ,
178- flavor = "lattice" ,
179- suppress_stdout = False ,
180- parallel = False ,
181- layout_kwargs = None ,
181+ flavor : str = "lattice" ,
182+ suppress_stdout : bool = False ,
183+ parallel : bool = False ,
184+ layout_kwargs : dict [ str , Any ] | None = None ,
182185 ** kwargs ,
183186 ):
184187 """Extract tables by calling parser.get_tables on all single page PDFs.
@@ -194,7 +197,7 @@ def parse(
194197 Process pages in parallel using all available cpu cores.
195198 layout_kwargs : dict, optional (default: {})
196199 A dict of `pdfminer.layout.LAParams
197- <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33 >`_ kwargs.
200+ <https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams >`_ kwargs.
198201 kwargs : dict
199202 See camelot.read_pdf kwargs.
200203
@@ -238,19 +241,22 @@ def parse(
238241
239242 return TableList (sorted (tables ))
240243
241- def _parse_page (self , page , tempdir , parser , suppress_stdout , layout_kwargs ):
244+ def _parse_page (
245+ self , page : int , tempdir : str , parser , suppress_stdout : bool , layout_kwargs
246+ ):
242247 """Extract tables by calling parser.get_tables on a single page PDF.
243248
244249 Parameters
245250 ----------
246- page : str
251+ page : int
247252 Page number to parse
248253 parser : Lattice, Stream, Network or Hybrid
249254 The parser to use.
250255 suppress_stdout : bool
251256 Suppress logs and warnings.
252257 layout_kwargs : dict, optional (default: {})
253- A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
258+ A dict of `pdfminer.layout.LAParams
259+ <https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams>`_ kwargs.
254260
255261 Returns
256262 -------
0 commit comments