Skip to content
This repository was archived by the owner on Apr 11, 2025. It is now read-only.

Commit dd44691

Browse files
committed
[REF]: Find_closest_tls
1 parent ff9a501 commit dd44691

File tree

1 file changed

+55
-6
lines changed

1 file changed

+55
-6
lines changed

camelot/parsers/network.py

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
"""Implementation of network table parser."""
22

3+
from __future__ import annotations
4+
35
import copy
46
import math
57

68
import numpy as np
9+
from pdfminer.layout import LTTextLineHorizontal
10+
from pdfminer.layout import LTTextLineVertical
711

812
from ..core import ALL_ALIGNMENTS
913
from ..core import HORIZONTAL_ALIGNMENTS
@@ -26,6 +30,28 @@
2630
MINIMUM_TEXTLINES_IN_TABLE = 6
2731

2832

33+
class TextLine:
34+
"""A placeholder class to represent a text line with bounding box attributes.
35+
36+
Attributes
37+
----------
38+
x0 : float
39+
The x-coordinate of the left edge of the text line.
40+
x1 : float
41+
The x-coordinate of the right edge of the text line.
42+
y0 : float
43+
The y-coordinate of the bottom edge of the text line.
44+
y1 : float
45+
The y-coordinate of the top edge of the text line.
46+
"""
47+
48+
def __init__(self, x0: float, y0: float, x1: float, y1: float):
49+
self.x0 = x0
50+
self.y0 = y0
51+
self.x1 = x1
52+
self.y1 = y1
53+
54+
2955
def column_spread(left, right, col_anchors):
3056
"""Get the number of columns crossed by a segment [left, right]."""
3157
index_left = 0
@@ -38,19 +64,41 @@ def column_spread(left, right, col_anchors):
3864
return index_right - index_left
3965

4066

41-
def find_closest_tls(bbox, tls):
42-
"""Search for tls that are the closest but outside in all 4 directions."""
43-
left, right, top, bottom = None, None, None, None
67+
def find_closest_tls( # noqa: C901
68+
bbox: list[float], tls: list[LTTextLineHorizontal | LTTextLineVertical]
69+
) -> dict[str, LTTextLineHorizontal | LTTextLineVertical | None]:
70+
"""Search for textlines that are closest to the bounding box but outside in all four directions.
71+
72+
Parameters
73+
----------
74+
bbox : list of float
75+
A list containing the coordinates of the bounding box in the order
76+
[left, bottom, right, top].
77+
tls : list of TextLine
78+
A list of textline objects to search for the closest lines.
79+
80+
Returns
81+
-------
82+
dict
83+
A dictionary with keys "left", "right", "top", and "bottom",
84+
each mapping to the closest textline object in that direction or None if not found.
85+
"""
86+
left: LTTextLineHorizontal | LTTextLineVertical | None = None
87+
right: LTTextLineHorizontal | LTTextLineVertical | None = None
88+
top: LTTextLineHorizontal | LTTextLineVertical | None = None
89+
bottom: LTTextLineHorizontal | LTTextLineVertical | None = None
90+
4491
(bbox_left, bbox_bottom, bbox_right, bbox_top) = bbox
92+
4593
for textline in tls:
4694
if textline.x1 < bbox_left:
47-
# Left: check it overlaps horizontally
95+
# Left: check if it overlaps horizontally
4896
if textline.y0 > bbox_top or textline.y1 < bbox_bottom:
4997
continue
5098
if left is None or left.x1 < textline.x1:
5199
left = textline
52100
elif bbox_right < textline.x0:
53-
# Right: check it overlaps horizontally
101+
# Right: check if it overlaps horizontally
54102
if textline.y0 > bbox_top or textline.y1 < bbox_bottom:
55103
continue
56104
if right is None or right.x0 > textline.x0:
@@ -67,6 +115,7 @@ def find_closest_tls(bbox, tls):
67115
# Top
68116
if top is None or top.y0 > textline.y0:
69117
top = textline
118+
70119
return {
71120
"left": left,
72121
"right": right,
@@ -547,7 +596,7 @@ def __init__(
547596
row_tol=2,
548597
column_tol=0,
549598
debug=False,
550-
**kwargs
599+
**kwargs,
551600
):
552601
super().__init__(
553602
"network",

0 commit comments

Comments
 (0)