@@ -183,7 +183,6 @@ def process(self):
183
183
def _process_segment (self , parent , parent_image , parent_coords , page_id , zoom , lines , ignore ):
184
184
LOG = getLogger ('processor.OcropyResegment' )
185
185
threshold = self .parameter ['min_fraction' ]
186
- margin = self .parameter ['extend_margins' ]
187
186
method = self .parameter ['method' ]
188
187
# prepare line segmentation
189
188
parent_array = pil2array (parent_image )
@@ -206,32 +205,34 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
206
205
line_labels = np .zeros_like (parent_bin , bool )
207
206
line_labels = np .tile (line_labels [np .newaxis ], (len (lines ), 1 , 1 ))
208
207
line_polygons = []
209
- for i , segment in enumerate (lines ):
210
- if self .parameter ['baseline_only' ] and segment .Baseline :
211
- segment_baseline = baseline_of_segment (segment , parent_coords )
212
- segment_polygon = polygon_from_baseline (segment_baseline , 30 / zoom )
208
+ for i , line in enumerate (lines ):
209
+ if self .parameter ['baseline_only' ] and line .Baseline :
210
+ line_base = baseline_of_segment (line , parent_coords )
211
+ line_poly = polygon_from_baseline (line_base , 30 / zoom )
213
212
else :
214
- segment_polygon = coordinates_of_segment (segment , parent_image , parent_coords )
215
- segment_polygon = make_valid (Polygon (segment_polygon )).buffer (margin )
216
- line_polygons .append (prep (segment_polygon ))
217
- segment_polygon = np .array (segment_polygon .exterior .coords , int )[:- 1 ]
218
- # draw.polygon: If any segment_polygon lies outside of parent
213
+ line_poly = coordinates_of_segment (line , parent_image , parent_coords )
214
+ line_poly = make_valid (Polygon (line_poly ))
215
+ line_polygons .append (line_poly )
216
+ line_polygons = list (map (prep , line_polygons ))
217
+ for i , line_polygon in enumerate (line_polygons ):
218
+ polygon = np .array (line_polygon .context .exterior .coords , int )[:- 1 ]
219
+ # draw.polygon: If any line_polygon lies outside of parent
219
220
# (causing negative/above-max indices), either fully or partially,
220
221
# then this will silently ignore them. The caller does not need
221
222
# to concern herself with this.
222
- segment_y , segment_x = draw .polygon (segment_polygon [:, 1 ],
223
- segment_polygon [:, 0 ],
224
- parent_bin .shape )
225
- line_labels [i , segment_y , segment_x ] = True
223
+ line_y , line_x = draw .polygon (polygon [:, 1 ],
224
+ polygon [:, 0 ],
225
+ parent_bin .shape )
226
+ line_labels [i , line_y , line_x ] = True
226
227
# only text region(s) may contain new text lines
227
- for i , segment in enumerate (set (line .parent_object_ for line in lines )):
228
+ for i , region in enumerate (set (line .parent_object_ for line in lines )):
228
229
LOG .debug ('unmasking area of text region "%s" for "%s"' ,
229
- segment .id , page_id if fullpage else parent .id )
230
- segment_polygon = coordinates_of_segment (segment , parent_image , parent_coords )
231
- segment_polygon = make_valid (Polygon (segment_polygon )). buffer ( margin )
232
- segment_polygon = np .array (segment_polygon .exterior .coords , int )[:- 1 ]
233
- ignore_bin [draw .polygon (segment_polygon [:, 1 ],
234
- segment_polygon [:, 0 ],
230
+ region .id , page_id if fullpage else parent .id )
231
+ region_polygon = coordinates_of_segment (region , parent_image , parent_coords )
232
+ region_polygon = make_valid (Polygon (region_polygon ) )
233
+ region_polygon = np .array (region_polygon .exterior .coords , int )[:- 1 ]
234
+ ignore_bin [draw .polygon (region_polygon [:, 1 ],
235
+ region_polygon [:, 0 ],
235
236
parent_bin .shape )] = False
236
237
# mask/ignore overlapping neighbours
237
238
for i , segment in enumerate (ignore ):
@@ -295,11 +296,10 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
295
296
new_line_polygons , new_line_labels = masks2polygons (
296
297
new_line_labels , new_baselines , parent_bin , '%s "%s"' % (tag , parent .id ),
297
298
min_area = 640 / zoom / zoom )
298
- DSAVE ('line_labels' , [np .mean ( line_labels , axis = 0 ), parent_bin ])
299
+ DSAVE ('line_labels' , [np .argmax ( np . insert ( line_labels , 0 , 0 , axis = 0 ) , axis = 0 ), parent_bin ])
299
300
DSAVE ('new_line_labels' , [new_line_labels , parent_bin ])
300
- new_line_polygons , new_baselines = list (zip (* [
301
- (make_valid (Polygon (line_poly )), LineString (baseline ))
302
- for _ , line_poly , baseline in new_line_polygons ])) or ([], [])
301
+ new_line_polygons , new_baselines = list (zip (* [(Polygon (poly ), LineString (base ))
302
+ for _ , poly , base in new_line_polygons ])) or ([], [])
303
303
# polygons for intersecting pairs
304
304
intersections = dict ()
305
305
# ratio of overlap between intersection and new line
@@ -375,7 +375,6 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
375
375
keep1 [ind1 ] = False
376
376
#keep2[ind2] = False
377
377
# validate assignments retain enough area and do not loose unassigned matches
378
- line_polygons = [poly .context .buffer (- margin ) for poly in line_polygons ]
379
378
for j , line in enumerate (lines ):
380
379
new_lines = np .nonzero (assignments == j )[0 ]
381
380
if not np .prod (new_lines .shape ):
@@ -404,9 +403,8 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
404
403
# combine all assigned new lines to single outline polygon
405
404
if len (new_lines ) > 1 :
406
405
LOG .debug ("joining %d new line polygons for '%s'" , len (new_lines ), line .id )
407
- new_polygon = join_polygons ([intersections [(i , j )]
406
+ new_polygon = join_polygons ([new_line_polygons [ i ] # intersections[(i, j)]
408
407
for i in new_lines ], loc = line .id , scale = scale )
409
- line_polygons [j ] = new_polygon
410
408
new_baseline = join_baselines ([new_polygon .intersection (new_baselines [i ])
411
409
for i in new_lines ], loc = line .id )
412
410
# convert back to absolute (page) coordinates:
@@ -422,14 +420,15 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
422
420
new_baseline = coordinates_for_segment (new_baseline .coords ,
423
421
parent_image , parent_coords )
424
422
line .set_Baseline (BaselineType (points = points_from_polygon (new_baseline )))
423
+ line_polygons [j ] = prep (new_polygon )
425
424
# now also ensure the assigned lines do not overlap other existing lines
426
425
for i in new_lines :
427
426
for otherj in np .nonzero (fits_fg [i ] > 0.1 )[0 ]:
428
427
if j == otherj :
429
428
continue
430
429
otherline = lines [otherj ]
431
430
LOG .debug ("subtracting new '%s' from overlapping '%s'" , line .id , otherline .id )
432
- other_polygon = diff_polygons (line_polygons [otherj ], new_polygon )
431
+ other_polygon = diff_polygons (line_polygons [otherj ]. context , new_polygon )
433
432
if other_polygon .is_empty :
434
433
continue
435
434
# convert back to absolute (page) coordinates:
0 commit comments