1- from typing import List , Union
1+ from typing import List
22
33from unstructured_inference .inference .elements import TextRegion
44
55
6- class Column :
7- """Class to capture a column of text in the layout. Will update the midpoint of the
8- column as layout elements are added to help with new element comparisons."""
9-
10- def __init__ (self , layout_elements : List [TextRegion ] = []):
11- self .layout_elements = layout_elements
12-
13- num_elements = len (layout_elements )
14- if num_elements > 0 :
15- self .x_midpoint = sum ([el .x_midpoint for el in layout_elements ]) / num_elements
16- else :
17- self .x_midpoint = 0
18-
19- def add_element (self , layout_element : TextRegion ):
20- """Adds an elements to the column and updates the midpoint."""
21- self .layout_elements .append (layout_element )
22- num_elements = len (self .layout_elements )
23- self .x_midpoint = sum ([el .x_midpoint for el in self .layout_elements ]) / num_elements
24-
25-
266def order_layout (
277 layout : List [TextRegion ],
288 column_tol_factor : float = 0.2 ,
@@ -47,53 +27,7 @@ def order_layout(
4727 if len (layout ) == 0 :
4828 return []
4929
50- width = calculate_width (layout )
51- column_tolerance = column_tol_factor * width
52- full_page_min_width = full_page_threshold_factor * width
53-
5430 layout .sort (key = lambda element : element .y1 )
55-
56- sorted_layout = []
57- columns : List [Column ] = []
58- for layout_element in layout :
59- if layout_element .width > full_page_min_width :
60- sorted_layout .extend (sorted_layout_from_columns (columns ))
61- columns = []
62- sorted_layout .append (layout_element )
63-
64- else :
65- added_to_column = False
66- for column in columns :
67- difference = abs (layout_element .x_midpoint - column .x_midpoint )
68- if difference < column_tolerance :
69- column .add_element (layout_element )
70- added_to_column = True
71- break
72-
73- if not added_to_column :
74- columns .append (Column (layout_elements = [layout_element ]))
75-
76- sorted_layout .extend (sorted_layout_from_columns (columns ))
77- return sorted_layout
78-
79-
80- def sorted_layout_from_columns (columns : List [Column ]) -> List [TextRegion ]:
81- """Creates a sorted list of elements from a list of columns. Columns will be sorted
82- left to right and elements within columns are sorted top to bottom."""
83- sorted_layout = []
84- if len (columns ) > 0 :
85- columns .sort (key = lambda column : column .x_midpoint )
86- for column in columns :
87- column .layout_elements .sort (key = lambda element : element .y1 )
88- for layout_element in column .layout_elements :
89- sorted_layout .append (layout_element )
90- return sorted_layout
91-
92-
93- def calculate_width (layout ) -> Union [float , int ]:
94- """Calculates total width of the elements in the layout. Used for computing the full
95- page threshold and column tolerance."""
96- min_x1 = min ([element .x1 for element in layout ])
97- max_x2 = max ([element .x2 for element in layout ])
98-
99- return max_x2 - min_x1
31+ # NOTE(alan): Temporarily revert to orginal logic pending fixing the new logic
32+ # See code prior to this commit for new logic.
33+ return layout
0 commit comments