Skip to content
This repository was archived by the owner on Apr 2, 2025. It is now read-only.

Commit 56ccc20

Browse files
committed
Update Docstrings on core.py
1 parent fbab152 commit 56ccc20

File tree

1 file changed

+84
-43
lines changed

1 file changed

+84
-43
lines changed

camelot/core.py

Lines changed: 84 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Contains the core functions to parse tables from PDFs."""
2+
13
import math
24
import os
35
import sqlite3
@@ -30,6 +32,7 @@
3032

3133
class TextAlignment:
3234
"""Represents a list of textlines sharing an alignment on a coordinate.
35+
3336
The alignment can be left/right/middle or top/bottom/center.
3437
(PDF coordinate space)
3538
@@ -59,7 +62,7 @@ def __init__(self, coord, textline, align):
5962
self.textlines = [textline]
6063
self.align = align
6164

62-
def __repr__(self):
65+
def __repr__(self): # noqa D105
6366
text_inside = " | ".join(
6467
map(lambda x: x.get_text(), self.textlines[:2])
6568
).replace("\n", "")
@@ -79,8 +82,9 @@ def register_aligned_textline(self, textline, coord):
7982

8083

8184
class TextEdge(TextAlignment):
82-
"""Defines a text edge coordinates relative to a left-bottom
83-
origin. (PDF coordinate space)
85+
"""Defines a text edge coordinates relative to a left-bottom origin.
86+
87+
(PDF coordinate space)
8488
An edge is an alignment bounded over a segment.
8589
8690
Parameters
@@ -108,7 +112,7 @@ def __init__(self, coord, textline, align):
108112
self.y1 = textline.y1
109113
self.is_valid = False
110114

111-
def __repr__(self):
115+
def __repr__(self): # noqa D105
112116
x = round(self.coord, 2)
113117
y0 = round(self.y0, 2)
114118
y1 = round(self.y1, 2)
@@ -117,7 +121,9 @@ def __repr__(self):
117121
)
118122

119123
def update_coords(self, x, textline, edge_tol=50):
120-
"""Updates the text edge's x and bottom y coordinates and sets
124+
"""Update text edge coordinates.
125+
126+
Update the text edge's x and bottom y coordinates and sets
121127
the is_valid attribute.
122128
"""
123129
if math.isclose(self.y0, textline.y0, abs_tol=edge_tol):
@@ -146,7 +152,7 @@ def _update_alignment(self, alignment, coord, textline):
146152
return NotImplemented
147153

148154
def _register_textline(self, textline):
149-
"""Updates an existing text edge in the current dict."""
155+
"""Update an existing text edge in the current dict."""
150156
coords = get_textline_coords(textline)
151157
for alignment_id, alignment_array in self._text_alignments.items():
152158
coord = coords[alignment_id]
@@ -180,7 +186,9 @@ def _register_textline(self, textline):
180186

181187

182188
class TextEdges(TextAlignments):
183-
"""Defines a dict of left, right and middle text edges found on
189+
"""Defines a dict text edges on the PDF page.
190+
191+
The dict contains the left, right and middle text edges found on
184192
the PDF page. The dict has three keys based on the alignments,
185193
and each key's value is a list of camelot.core.TextEdge objects.
186194
"""
@@ -194,25 +202,24 @@ def _create_new_text_alignment(self, coord, textline, align):
194202
return TextEdge(coord, textline, align)
195203

196204
def add(self, coord, textline, align):
197-
"""Adds a new text edge to the current dict."""
205+
"""Add a new text edge to the current dict."""
198206
te = self._create_new_text_alignment(coord, textline, align)
199207
self._text_alignments[align].append(te)
200208

201209
def _update_alignment(self, alignment, coord, textline):
202210
alignment.update_coords(coord, textline, self.edge_tol)
203211

204212
def generate(self, textlines):
205-
"""Generates the text edges dict based on horizontal text
206-
rows.
207-
"""
213+
"""Generates the text edges dict based on horizontal text rows."""
208214
for tl in textlines:
209215
if len(tl.get_text().strip()) > 1: # TODO: hacky
210216
self._register_textline(tl)
211217

212218
def get_relevant(self):
213-
"""Returns the list of relevant text edges (all share the same
214-
alignment) based on which list intersects horizontal text rows
215-
the most.
219+
"""Return the list of relevant text edges.
220+
221+
(all share the same alignment)
222+
based on which list intersects horizontal text rows the most.
216223
"""
217224
intersections_sum = {
218225
"left": sum(
@@ -239,8 +246,9 @@ def get_relevant(self):
239246
)
240247

241248
def get_table_areas(self, textlines, relevant_textedges):
242-
"""Returns a dict of interesting table areas on the PDF page
243-
calculated using relevant text edges.
249+
"""Return a dict of interesting table areas on the PDF page.
250+
251+
The table areas are calculated using relevant text edges.
244252
"""
245253

246254
def pad(area, average_row_height):
@@ -312,7 +320,9 @@ def pad(area, average_row_height):
312320

313321

314322
class Cell:
315-
"""Defines a cell in a table with coordinates relative to a
323+
"""Defines a cell in a table.
324+
325+
With coordinates relative to a
316326
left-bottom origin. (PDF coordinate space)
317327
318328
Parameters
@@ -370,19 +380,19 @@ def __init__(self, x1, y1, x2, y2):
370380
self.vspan = False
371381
self._text = ""
372382

373-
def __repr__(self):
383+
def __repr__(self): # noqa D105
374384
x1 = round(self.x1)
375385
y1 = round(self.y1)
376386
x2 = round(self.x2)
377387
y2 = round(self.y2)
378388
return f"<Cell x1={x1} y1={y1} x2={x2} y2={y2}>"
379389

380390
@property
381-
def text(self):
391+
def text(self): # noqa D102
382392
return self._text
383393

384394
@text.setter
385-
def text(self, t):
395+
def text(self, t): # noqa D105
386396
self._text = "".join([self._text, t])
387397

388398
@property
@@ -392,8 +402,9 @@ def bound(self):
392402

393403

394404
class Table:
395-
"""Defines a table with coordinates relative to a left-bottom
396-
origin. (PDF coordinate space)
405+
"""Defines a table with coordinates relative to a left-bottom origin.
406+
407+
(PDF coordinate space)
397408
398409
Parameters
399410
----------
@@ -443,9 +454,28 @@ def __init__(self, cols, rows):
443454
self._image_path = None # Temporary file to hold an image of the pdf
444455

445456
def __repr__(self):
457+
"""Return a string representation of the class .
458+
459+
Returns
460+
-------
461+
[type]
462+
[description]
463+
"""
446464
return f"<{self.__class__.__name__} shape={self.shape}>"
447465

448466
def __lt__(self, other):
467+
"""Return True if the two pages are less than the current page .
468+
469+
Parameters
470+
----------
471+
other : [type]
472+
[description]
473+
474+
Returns
475+
-------
476+
[type]
477+
[description]
478+
"""
449479
if self.page == other.page:
450480
if self.order < other.order:
451481
return True
@@ -462,7 +492,9 @@ def data(self):
462492

463493
@property
464494
def parsing_report(self):
465-
"""Returns a parsing report with %accuracy, %whitespace,
495+
"""Returns a parsing report.
496+
497+
with % accuracy, % whitespace,
466498
table number on page and page number.
467499
"""
468500
# pretty?
@@ -475,7 +507,7 @@ def parsing_report(self):
475507
return report
476508

477509
def record_metadata(self, parser):
478-
"""Record data about the origin of the table"""
510+
"""Record data about the origin of the table."""
479511
self.flavor = parser.id
480512
self.filename = parser.filename
481513
self.debug_info = parser.debug_info
@@ -489,7 +521,7 @@ def record_metadata(self, parser):
489521
self.pdf_size = (parser.pdf_width, parser.pdf_height)
490522

491523
def get_pdf_image(self):
492-
"""Compute pdf image and cache it"""
524+
"""Compute pdf image and cache it."""
493525
if self._image is None:
494526
if self._image_path is None:
495527
self._image_path = build_file_path_in_temp_dir(
@@ -501,14 +533,16 @@ def get_pdf_image(self):
501533
return self._image
502534

503535
def set_all_edges(self):
504-
"""Sets all table edges to True."""
536+
"""Set all table edges to True."""
505537
for row in self.cells:
506538
for cell in row:
507539
cell.left = cell.right = cell.top = cell.bottom = True
508540
return self
509541

510542
def set_edges(self, vertical, horizontal, joint_tol=2):
511-
"""Sets a cell's edges to True depending on whether the cell's
543+
"""Set the edges of the joint.
544+
545+
Set a cell's edges to True depending on whether the cell's
512546
coordinates overlap with the line's coordinates within a
513547
tolerance.
514548
@@ -518,7 +552,8 @@ def set_edges(self, vertical, horizontal, joint_tol=2):
518552
List of detected vertical lines.
519553
horizontal : list
520554
List of detected horizontal lines.
521-
555+
joint_tol : int, optional
556+
[description], by default 2
522557
"""
523558

524559
def find_close_point(over, coord, joint_tol):
@@ -584,7 +619,9 @@ def set_border(self):
584619
return self
585620

586621
def set_span(self):
587-
"""Sets a cell's hspan or vspan attribute to True depending
622+
"""Set a cell's hspan or vspan attribute.
623+
624+
Set the cell's hspan or vspan attribute to True depending
588625
on whether the cell spans horizontally or vertically.
589626
"""
590627
for row in self.cells:
@@ -616,13 +653,15 @@ def set_span(self):
616653

617654
def copy_spanning_text(self, copy_text=None):
618655
"""Copies over text in empty spanning cells.
656+
619657
Parameters
620658
----------
621659
copy_text : list, optional (default: None)
622660
{'h', 'v'}
623661
Select one or more strings from above and pass them as a list
624662
to specify the direction in which text should be copied over
625663
when a cell spans multiple rows or columns.
664+
626665
Returns
627666
-------
628667
t : camelot.core.Table
@@ -643,7 +682,7 @@ def copy_spanning_text(self, copy_text=None):
643682
return self
644683

645684
def to_csv(self, path, **kwargs):
646-
"""Writes Table to a comma-separated values (csv) file.
685+
"""Write Table(s) to a comma-separated values (csv) file.
647686
648687
For kwargs, check :meth:`pandas.DataFrame.to_csv`.
649688
@@ -658,7 +697,7 @@ def to_csv(self, path, **kwargs):
658697
self.df.to_csv(path, **kw)
659698

660699
def to_json(self, path, **kwargs):
661-
"""Writes Table to a JSON file.
700+
"""Write Table(s) to a JSON file.
662701
663702
For kwargs, check :meth:`pandas.DataFrame.to_json`.
664703
@@ -675,7 +714,7 @@ def to_json(self, path, **kwargs):
675714
f.write(json_string)
676715

677716
def to_excel(self, path, **kwargs):
678-
"""Writes Table to an Excel file.
717+
"""Write Table(s) to an Excel file.
679718
680719
For kwargs, check :meth:`pandas.DataFrame.to_excel`.
681720
@@ -695,7 +734,7 @@ def to_excel(self, path, **kwargs):
695734
writer.save()
696735

697736
def to_html(self, path, **kwargs):
698-
"""Writes Table to an HTML file.
737+
"""Write Table(s) to an HTML file.
699738
700739
For kwargs, check :meth:`pandas.DataFrame.to_html`.
701740
@@ -710,7 +749,7 @@ def to_html(self, path, **kwargs):
710749
f.write(html_string)
711750

712751
def to_markdown(self, path, **kwargs):
713-
"""Writes Table to a Markdown file.
752+
"""Write Table(s) to a Markdown file.
714753
715754
For kwargs, check :meth:`pandas.DataFrame.to_markdown`.
716755
@@ -725,7 +764,7 @@ def to_markdown(self, path, **kwargs):
725764
f.write(md_string)
726765

727766
def to_sqlite(self, path, **kwargs):
728-
"""Writes Table to sqlite database.
767+
"""Write Table(s) to sqlite database.
729768
730769
For kwargs, check :meth:`pandas.DataFrame.to_sql`.
731770
@@ -745,8 +784,9 @@ def to_sqlite(self, path, **kwargs):
745784

746785

747786
class TableList:
748-
"""Defines a list of camelot.core.Table objects. Each table can
749-
be accessed using its index.
787+
"""Defines a list of camelot.core.Table objects.
788+
789+
Each table can be accessed using its index.
750790
751791
Attributes
752792
----------
@@ -755,19 +795,19 @@ class TableList:
755795
756796
"""
757797

758-
def __init__(self, tables):
798+
def __init__(self, tables): # noqa D105
759799
self._tables = tables
760800

761-
def __repr__(self):
801+
def __repr__(self): # noqa D105
762802
return f"<{self.__class__.__name__} n={self.n}>"
763803

764-
def __len__(self):
804+
def __len__(self): # noqa D105
765805
return len(self._tables)
766806

767-
def __getitem__(self, idx):
807+
def __getitem__(self, idx): # noqa D105
768808
return self._tables[idx]
769809

770-
def __iter__(self):
810+
def __iter__(self): # noqa D105
771811
yield from self._tables
772812

773813
@staticmethod
@@ -776,6 +816,7 @@ def _format_func(table, f):
776816

777817
@property
778818
def n(self):
819+
"""The number of tables in the list."""
779820
return len(self)
780821

781822
def _write_file(self, f=None, **kwargs):
@@ -801,7 +842,7 @@ def _compress_dir(self, **kwargs):
801842
z.write(filepath, os.path.basename(filepath))
802843

803844
def export(self, path, f="csv", compress=False):
804-
"""Exports the list of tables to specified file format.
845+
"""Export the list of tables to specified file format.
805846
806847
Parameters
807848
----------

0 commit comments

Comments
 (0)