@@ -92,13 +92,13 @@ def process(self):
92
92
assert_file_grp_cardinality (self .input_file_grp , 1 )
93
93
assert_file_grp_cardinality (self .output_file_grp , 1 )
94
94
95
- for ( n , input_file ) in enumerate (self .input_files ):
95
+ for n , input_file in enumerate (self .input_files ):
96
96
LOG .info ("INPUT FILE %i / %s" , n , input_file .pageId or input_file .ID )
97
97
file_id = make_file_id (input_file , self .output_file_grp )
98
98
99
99
pcgts = page_from_file (self .workspace .download_file (input_file ))
100
100
self .add_metadata (pcgts )
101
- page_id = pcgts .pcGtsId or input_file .pageId or input_file .ID # (PageType has no id)
101
+ page_id = pcgts .pcGtsId or input_file .pageId or input_file .ID
102
102
page = pcgts .get_Page ()
103
103
104
104
page_image , page_coords , page_image_info = self .workspace .image_from_page (
@@ -127,7 +127,7 @@ def process(self):
127
127
page .get_SeparatorRegion () +
128
128
page .get_UnknownRegion () +
129
129
page .get_CustomRegion ())
130
- regions = page .get_AllRegions (classes = ['Text' ], order = 'reading-order' )
130
+ regions = page .get_AllRegions (classes = ['Text' ])
131
131
if not regions :
132
132
LOG .warning ('Page "%s" contains no text regions' , page_id )
133
133
elif level == 'page' :
@@ -199,6 +199,8 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
199
199
LOG .debug ('unmasking area of text region "%s" for "%s"' ,
200
200
segment .id , page_id )
201
201
segment_polygon = coordinates_of_segment (segment , parent_image , parent_coords )
202
+ segment_polygon = make_valid (Polygon (segment_polygon )).buffer (margin )
203
+ segment_polygon = np .array (segment_polygon .exterior , np .int )[:- 1 ]
202
204
ignore_bin [draw .polygon (segment_polygon [:, 1 ],
203
205
segment_polygon [:, 0 ],
204
206
parent_bin .shape )] = False
@@ -275,9 +277,8 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
275
277
fits = (fits_bg [i ] > 0.6 ) & (fits_fg [i ] > 0.9 )
276
278
if not fits .any ():
277
279
j = np .argmax (fits_bg [i ] * fits_fg [i ])
278
- LOG .debug ("best fit '%s' for new line %d covers only %.1f%% bg / %.1f%% fg" ,
279
- lines [j ].id ,
280
- i , fits_bg [i ,j ] * 100 , fits_fg [i ,j ] * 100 )
280
+ LOG .debug ("best fit '%s' for new line %d fits only %.1f%% bg / %.1f%% fg" ,
281
+ lines [j ].id , i , fits_bg [i ,j ] * 100 , fits_fg [i ,j ] * 100 )
281
282
continue
282
283
covers = covers_bg [i ] * covers_fg [i ] * fits
283
284
j = np .argmax (covers )
0 commit comments