6
6
from shapely .geometry import Polygon , asPolygon
7
7
from shapely .prepared import prep
8
8
from shapely .ops import unary_union
9
+ import alphashape
9
10
10
11
from ocrd_modelfactory import page_from_file
11
12
from ocrd_models .ocrd_page import (
@@ -177,59 +178,58 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
177
178
tag = 'region'
178
179
fullpage = False
179
180
report = check_region (parent_bin , zoom )
181
+ if report :
182
+ LOG .warning ('Invalid %s "%s": %s' , tag , parent .id , report )
183
+ return
184
+ # get existing line labels:
185
+ line_labels = np .zeros_like (parent_bin , np .bool )
186
+ line_labels = np .tile (line_labels [np .newaxis ], (len (lines ), 1 , 1 ))
187
+ line_polygons = []
188
+ for i , segment in enumerate (lines ):
189
+ segment_polygon = coordinates_of_segment (segment , parent_image , parent_coords )
190
+ segment_polygon = make_valid (Polygon (segment_polygon )).buffer (margin )
191
+ line_polygons .append (prep (segment_polygon ))
192
+ segment_polygon = np .array (segment_polygon .exterior , np .int )[:- 1 ]
193
+ # draw.polygon: If any segment_polygon lies outside of parent
194
+ # (causing negative/above-max indices), either fully or partially,
195
+ # then this will silently ignore them. The caller does not need
196
+ # to concern herself with this.
197
+ segment_y , segment_x = draw .polygon (segment_polygon [:, 1 ],
198
+ segment_polygon [:, 0 ],
199
+ parent_bin .shape )
200
+ line_labels [i , segment_y , segment_x ] = True
201
+ # only text region(s) may contain new text lines
202
+ for i , segment in enumerate (set (line .parent_object_ for line in lines )):
203
+ LOG .debug ('unmasking area of text region "%s" for "%s"' ,
204
+ segment .id , parent .id )
205
+ segment_polygon = coordinates_of_segment (segment , parent_image , parent_coords )
206
+ segment_polygon = make_valid (Polygon (segment_polygon )).buffer (margin )
207
+ segment_polygon = np .array (segment_polygon .exterior , np .int )[:- 1 ]
208
+ ignore_bin [draw .polygon (segment_polygon [:, 1 ],
209
+ segment_polygon [:, 0 ],
210
+ parent_bin .shape )] = False
211
+ # mask/ignore overlapping neighbours
212
+ for i , segment in enumerate (ignore ):
213
+ LOG .debug ('masking area of %s "%s" for "%s"' ,
214
+ type (segment ).__name__ [:- 4 ], segment .id , parent .id )
215
+ segment_polygon = coordinates_of_segment (segment , parent_image , parent_coords )
216
+ ignore_bin [draw .polygon (segment_polygon [:, 1 ],
217
+ segment_polygon [:, 0 ],
218
+ parent_bin .shape )] = True
180
219
try :
181
- if report :
182
- raise Exception (report )
183
- # get existing line labels:
184
- line_labels = np .zeros_like (parent_bin , np .bool )
185
- line_labels = np .tile (np .expand_dims (line_labels , - 1 ), (1 ,1 ,len (lines )))
186
- line_polygons = []
187
- for i , segment in enumerate (lines ):
188
- segment_polygon = coordinates_of_segment (segment , parent_image , parent_coords )
189
- segment_polygon = make_valid (Polygon (segment_polygon )).buffer (margin )
190
- line_polygons .append (prep (segment_polygon ))
191
- segment_polygon = np .array (segment_polygon .exterior , np .int )[:- 1 ]
192
- # draw.polygon: If any segment_polygon lies outside of parent
193
- # (causing negative/above-max indices), either fully or partially,
194
- # then this will silently ignore them. The caller does not need
195
- # to concern herself with this.
196
- segment_y , segment_x = draw .polygon (segment_polygon [:, 1 ],
197
- segment_polygon [:, 0 ],
198
- parent_bin .shape )
199
- line_labels [segment_y , segment_x , i ] = True
200
- # only text region(s) may contain new text lines
201
- for i , segment in enumerate (set (line .parent_object_ for line in lines )):
202
- LOG .debug ('unmasking area of text region "%s" for "%s"' ,
203
- segment .id , page_id )
204
- segment_polygon = coordinates_of_segment (segment , parent_image , parent_coords )
205
- segment_polygon = make_valid (Polygon (segment_polygon )).buffer (margin )
206
- segment_polygon = np .array (segment_polygon .exterior , np .int )[:- 1 ]
207
- ignore_bin [draw .polygon (segment_polygon [:, 1 ],
208
- segment_polygon [:, 0 ],
209
- parent_bin .shape )] = False
210
- # mask/ignore overlapping neighbours
211
- for i , segment in enumerate (ignore ):
212
- LOG .debug ('masking area of %s "%s" for "%s"' ,
213
- type (segment ).__name__ [:- 4 ], segment .id , page_id )
214
- segment_polygon = coordinates_of_segment (segment , parent_image , parent_coords )
215
- ignore_bin [draw .polygon (segment_polygon [:, 1 ],
216
- segment_polygon [:, 0 ],
217
- parent_bin .shape )] = True
218
220
new_line_labels , _ , _ , _ , _ , scale = compute_segmentation (
219
221
parent_bin , seps = ignore_bin , zoom = zoom , fullpage = fullpage ,
220
222
maxseps = 0 , maxcolseps = len (ignore ), maximages = 0 )
221
223
except Exception as err :
222
- LOG .warning ('Cannot line-segment %s "%s": %s' ,
223
- tag , page_id if fullpage else parent .id , err )
224
+ LOG .warning ('Cannot line-segment %s "%s": %s' , tag , parent .id , err )
224
225
return
225
226
LOG .info ("Found %d new line labels for %d existing lines on %s '%s'" ,
226
- new_line_labels .max (), len (lines ), tag , page_id if fullpage else parent .id )
227
+ new_line_labels .max (), len (lines ), tag , parent .id )
227
228
# polygonalize and prepare comparison
228
229
new_line_polygons , new_line_labels = masks2polygons (
229
- new_line_labels , parent_bin ,
230
- '%s "%s"' % (tag , page_id if fullpage else parent .id ),
230
+ new_line_labels , parent_bin , '%s "%s"' % (tag , parent .id ),
231
231
min_area = 640 / zoom / zoom )
232
- # DSAVE('line_labels', [np.mean(line_labels, axis=2 ), parent_bin])
232
+ # DSAVE('line_labels', [np.mean(line_labels, axis=0 ), parent_bin])
233
233
# DSAVE('new_line_labels', [new_line_labels, parent_bin], disabled=False)
234
234
new_line_polygons = [make_valid (Polygon (line_poly ))
235
235
for line_label , line_poly in new_line_polygons ]
@@ -250,7 +250,7 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
250
250
if not inter :
251
251
continue
252
252
new_line_mask = (new_line_labels == i + 1 ) & parent_bin
253
- line_mask = line_labels [:,:, j ] & parent_bin
253
+ line_mask = line_labels [j ] & parent_bin
254
254
inter_mask = new_line_mask & line_mask
255
255
if (not np .count_nonzero (inter_mask ) or
256
256
not np .count_nonzero (new_line_mask ) or
@@ -314,15 +314,14 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
314
314
LOG .debug ("new lines for '%s' would loose %d non-matching segments totalling %.1f%% bg" ,
315
315
line .id , np .count_nonzero (looses ), covers * 100 )
316
316
continue
317
- line_count = np .count_nonzero (line_labels [:,:, j ] & parent_bin )
317
+ line_count = np .count_nonzero (line_labels [j ] & parent_bin )
318
318
new_count = covers * line_count
319
319
LOG .debug ('Black pixels before/after resegment of line "%s": %d/%d' ,
320
320
line .id , line_count , new_count )
321
321
# combine all assigned new lines to single outline polygon
322
322
if len (new_lines ) > 1 :
323
323
LOG .debug ("joining %d new line polygons for '%s'" , len (new_lines ), line .id )
324
- new_polygon = join_polygons ([intersections [(i , j )] for i in new_lines ],
325
- contract = scale // 2 )
324
+ new_polygon = join_polygons ([intersections [(i , j )] for i in new_lines ], loc = line .id )
326
325
line_polygons [j ] = new_polygon
327
326
# convert back to absolute (page) coordinates:
328
327
line_polygon = coordinates_for_segment (new_polygon .exterior .coords [:- 1 ],
@@ -341,6 +340,8 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
341
340
otherline = lines [otherj ]
342
341
LOG .debug ("subtracting new '%s' from overlapping '%s'" , line .id , otherline .id )
343
342
other_polygon = diff_polygons (line_polygons [otherj ], new_polygon )
343
+ if other_polygon .is_empty :
344
+ continue
344
345
# convert back to absolute (page) coordinates:
345
346
other_polygon = coordinates_for_segment (other_polygon .exterior .coords [:- 1 ],
346
347
parent_image , parent_coords )
@@ -359,19 +360,36 @@ def diff_polygons(poly1, poly2):
359
360
poly = make_valid (poly )
360
361
return poly
361
362
362
- def join_polygons (polygons , contract = 2 ):
363
- # construct convex hull
364
- compoundp = unary_union (polygons )
365
- jointp = compoundp .convex_hull
366
- # FIXME: calculate true alpha shape
367
- # make hull slightly concave by dilation and reconstruction
368
- for step in range (int (contract )+ 1 ):
369
- nextp = jointp .buffer (- 1 )
370
- if (nextp .type == 'MultiPolygon' or
371
- nextp .union (compoundp ).type == 'MultiPolygon' ):
372
- break
373
- jointp = nextp
374
- jointp = jointp .union (compoundp )
363
+ def join_polygons (polygons , loc = '' ):
364
+ """construct concave hull (alpha shape) from input polygons"""
365
+ # compoundp = unary_union(polygons)
366
+ # jointp = compoundp.convex_hull
367
+ LOG = getLogger ('processor.OcropyResegment' )
368
+ if len (polygons ) == 1 :
369
+ return polygons [0 ]
370
+ # get equidistant list of points along hull
371
+ # (otherwise alphashape will jump across the interior)
372
+ points = [poly .exterior .interpolate (dist ).coords [0 ] # .xy
373
+ for poly in polygons
374
+ for dist in np .arange (0 , poly .length , 5.0 )]
375
+ #alpha = alphashape.optimizealpha(points) # too slow
376
+ alpha = 0.05
377
+ jointp = alphashape .alphashape (points , alpha )
378
+ tries = 0
379
+ # from descartes import PolygonPatch
380
+ # import matplotlib.pyplot as plt
381
+ while jointp .type in ['MultiPolygon' , 'GeometryCollection' ]:
382
+ # plt.figure()
383
+ # plt.gca().scatter(*zip(*points))
384
+ # for geom in jointp.geoms:
385
+ # plt.gca().add_patch(PolygonPatch(geom, alpha=0.2))
386
+ # plt.show()
387
+ alpha *= 0.7
388
+ tries += 1
389
+ if tries > 10 :
390
+ LOG .warning ("cannot find alpha for concave hull on '%s'" , loc )
391
+ alpha = 0
392
+ jointp = alphashape .alphashape (points , alpha )
375
393
if jointp .minimum_clearance < 1.0 :
376
394
# follow-up calculations will necessarily be integer;
377
395
# so anticipate rounding here and then ensure validity
0 commit comments