Skip to content
This repository was archived by the owner on Apr 11, 2025. It is now read-only.

Commit 668d3c2

Browse files
committed
[IMP] add typing to handlers, update docstings and pdfminer url
1 parent 6aebaaa commit 668d3c2

File tree

4 files changed

+22
-14
lines changed

4 files changed

+22
-14
lines changed

camelot/cli.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ def set_config(self, key, value):
3838

3939
@click.group(name="camelot")
4040
@click.version_option(version=__version__)
41-
@click.option("-q", "--quiet", is_flag=False, help="Suppress logs and warnings.")
41+
@click.option(
42+
"-q", "--quiet", is_flag=False, default=False, help="Suppress logs and warnings."
43+
)
4244
@click.option(
4345
"-p",
4446
"--pages",

camelot/handlers.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import multiprocessing as mp
66
import os
77
from pathlib import Path
8+
from typing import Any
89

910
from pypdf import PdfReader
1011
from pypdf import PdfWriter
@@ -116,15 +117,17 @@ def _get_pages(self, pages):
116117
result.extend(range(p["start"], p["end"] + 1))
117118
return sorted(set(result))
118119

119-
def _save_page(self, filepath: StrByteType | Path, page, temp):
120+
def _save_page(self, filepath: StrByteType | Path, page: int, temp: str):
120121
"""Saves specified page from PDF into a temporary directory.
121122
122123
Parameters
123124
----------
125+
filepath : str
126+
Filepath or URL of the PDF file.
124127
page : int
125128
Page number.
126-
layout_kwargs : dict, optional (default: {})
127-
A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs. # noqa
129+
temp : str
130+
Tmp directory.
128131
129132
130133
Returns
@@ -175,10 +178,10 @@ def _save_page(self, filepath: StrByteType | Path, page, temp):
175178

176179
def parse(
177180
self,
178-
flavor="lattice",
179-
suppress_stdout=False,
180-
parallel=False,
181-
layout_kwargs=None,
181+
flavor: str = "lattice",
182+
suppress_stdout: bool = False,
183+
parallel: bool = False,
184+
layout_kwargs: dict[str, Any] | None = None,
182185
**kwargs,
183186
):
184187
"""Extract tables by calling parser.get_tables on all single page PDFs.
@@ -194,7 +197,7 @@ def parse(
194197
Process pages in parallel using all available cpu cores.
195198
layout_kwargs : dict, optional (default: {})
196199
A dict of `pdfminer.layout.LAParams
197-
<https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
200+
<https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams>`_ kwargs.
198201
kwargs : dict
199202
See camelot.read_pdf kwargs.
200203
@@ -238,19 +241,22 @@ def parse(
238241

239242
return TableList(sorted(tables))
240243

241-
def _parse_page(self, page, tempdir, parser, suppress_stdout, layout_kwargs):
244+
def _parse_page(
245+
self, page: int, tempdir: str, parser, suppress_stdout: bool, layout_kwargs
246+
):
242247
"""Extract tables by calling parser.get_tables on a single page PDF.
243248
244249
Parameters
245250
----------
246-
page : str
251+
page : int
247252
Page number to parse
248253
parser : Lattice, Stream, Network or Hybrid
249254
The parser to use.
250255
suppress_stdout : bool
251256
Suppress logs and warnings.
252257
layout_kwargs : dict, optional (default: {})
253-
A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
258+
A dict of `pdfminer.layout.LAParams
259+
<https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams>`_ kwargs.
254260
255261
Returns
256262
-------

camelot/io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def read_pdf(
4646
Process pages in parallel using all available cpu cores.
4747
layout_kwargs : dict, optional (default: {})
4848
A dict of `pdfminer.layout.LAParams
49-
<https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
49+
<https://pdfminersix.readthedocs.io/en/latest/reference/composable.html#laparams>`_ kwargs.
5050
table_areas : list, optional (default: None)
5151
List of table area strings of the form x1,y1,x2,y2
5252
where (x1, y1) -> left-top and (x2, y2) -> right-bottom

tests/test_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
def get_text_from_pdf(filename):
1616
"""Method to extract text object from pdf."""
1717
# https://stackoverflow.com/questions/22898145/how-to-extract-text-and-text-coordinates-from-a-pdf-file
18-
# https://pdfminer-docs.readthedocs.io/programming.html#performing-layout-analysis
18+
# https://pdfminersix.readthedocs.io/en/latest/topic/converting_pdf_to_text.html
1919
document = open(filename, "rb")
2020
# Create resource manager
2121
rsrcmgr = PDFResourceManager()

0 commit comments

Comments
 (0)