11"""Implementation of network table parser."""
22
3+ from __future__ import annotations
4+
35import copy
46import math
57
68import numpy as np
9+ from pdfminer .layout import LTTextLineHorizontal
10+ from pdfminer .layout import LTTextLineVertical
711
812from ..core import ALL_ALIGNMENTS
913from ..core import HORIZONTAL_ALIGNMENTS
2630MINIMUM_TEXTLINES_IN_TABLE = 6
2731
2832
33+ class TextLine :
34+ """A placeholder class to represent a text line with bounding box attributes.
35+
36+ Attributes
37+ ----------
38+ x0 : float
39+ The x-coordinate of the left edge of the text line.
40+ x1 : float
41+ The x-coordinate of the right edge of the text line.
42+ y0 : float
43+ The y-coordinate of the bottom edge of the text line.
44+ y1 : float
45+ The y-coordinate of the top edge of the text line.
46+ """
47+
48+ def __init__ (self , x0 : float , y0 : float , x1 : float , y1 : float ):
49+ self .x0 = x0
50+ self .y0 = y0
51+ self .x1 = x1
52+ self .y1 = y1
53+
54+
2955def column_spread (left , right , col_anchors ):
3056 """Get the number of columns crossed by a segment [left, right]."""
3157 index_left = 0
@@ -38,19 +64,41 @@ def column_spread(left, right, col_anchors):
3864 return index_right - index_left
3965
4066
41- def find_closest_tls (bbox , tls ):
42- """Search for tls that are the closest but outside in all 4 directions."""
43- left , right , top , bottom = None , None , None , None
67+ def find_closest_tls ( # noqa: C901
68+ bbox : list [float ], tls : list [LTTextLineHorizontal | LTTextLineVertical ]
69+ ) -> dict [str , LTTextLineHorizontal | LTTextLineVertical | None ]:
70+ """Search for textlines that are closest to the bounding box but outside in all four directions.
71+
72+ Parameters
73+ ----------
74+ bbox : list of float
75+ A list containing the coordinates of the bounding box in the order
76+ [left, bottom, right, top].
77+ tls : list of TextLine
78+ A list of textline objects to search for the closest lines.
79+
80+ Returns
81+ -------
82+ dict
83+ A dictionary with keys "left", "right", "top", and "bottom",
84+ each mapping to the closest textline object in that direction or None if not found.
85+ """
86+ left : LTTextLineHorizontal | LTTextLineVertical | None = None
87+ right : LTTextLineHorizontal | LTTextLineVertical | None = None
88+ top : LTTextLineHorizontal | LTTextLineVertical | None = None
89+ bottom : LTTextLineHorizontal | LTTextLineVertical | None = None
90+
4491 (bbox_left , bbox_bottom , bbox_right , bbox_top ) = bbox
92+
4593 for textline in tls :
4694 if textline .x1 < bbox_left :
47- # Left: check it overlaps horizontally
95+ # Left: check if it overlaps horizontally
4896 if textline .y0 > bbox_top or textline .y1 < bbox_bottom :
4997 continue
5098 if left is None or left .x1 < textline .x1 :
5199 left = textline
52100 elif bbox_right < textline .x0 :
53- # Right: check it overlaps horizontally
101+ # Right: check if it overlaps horizontally
54102 if textline .y0 > bbox_top or textline .y1 < bbox_bottom :
55103 continue
56104 if right is None or right .x0 > textline .x0 :
@@ -67,6 +115,7 @@ def find_closest_tls(bbox, tls):
67115 # Top
68116 if top is None or top .y0 > textline .y0 :
69117 top = textline
118+
70119 return {
71120 "left" : left ,
72121 "right" : right ,
@@ -547,7 +596,7 @@ def __init__(
547596 row_tol = 2 ,
548597 column_tol = 0 ,
549598 debug = False ,
550- ** kwargs
599+ ** kwargs ,
551600 ):
552601 super ().__init__ (
553602 "network" ,
0 commit comments