@@ -184,6 +184,7 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
184
184
LOG = getLogger ('processor.OcropyResegment' )
185
185
threshold = self .parameter ['min_fraction' ]
186
186
method = self .parameter ['method' ]
187
+ maxdist = self .parameter ['spread' ]/ zoom * 300 / 72 # in pt
187
188
# prepare line segmentation
188
189
parent_array = pil2array (parent_image )
189
190
#parent_array, _ = common.binarize(parent_array, maxskew=0) # just in case still raw
@@ -273,19 +274,19 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
273
274
new_labels [line_labels [i ]] = i + 1
274
275
continue
275
276
line_baseline = baseline_of_segment (line , parent_coords )
276
- line_polygon = polygon_from_baseline (line_baseline , scale )
277
+ line_polygon = polygon_from_baseline (line_baseline , maxdist or scale / 2 )
277
278
line_polygon = np .array (line_polygon .exterior .coords , int )[:- 1 ]
278
279
line_y , line_x = draw .polygon (line_polygon [:, 1 ],
279
280
line_polygon [:, 0 ],
280
281
parent_bin .shape )
281
282
new_labels [line_y , line_x ] = i + 1
282
283
spread_dist (lines , line_labels , new_labels , parent_bin , components , parent_coords ,
283
- scale = scale , loc = parent .id , threshold = threshold )
284
+ maxdist = maxdist or scale / 2 , loc = parent .id , threshold = threshold )
284
285
return
285
286
try :
286
287
new_line_labels , new_baselines , _ , _ , _ , scale = compute_segmentation (
287
- parent_bin , seps = ignore_bin , zoom = zoom , fullpage = fullpage ,
288
- maxseps = 0 , maxcolseps = len (ignore ), maximages = 0 )
288
+ parent_bin , seps = ignore_bin , zoom = zoom , spread_dist = maxdist or scale / 2 ,
289
+ fullpage = fullpage , maxseps = 0 , maxcolseps = len (ignore ), maximages = 0 )
289
290
except Exception as err :
290
291
LOG .error ('Cannot line-segment %s "%s": %s' ,
291
292
tag , page_id if fullpage else parent .id , err )
@@ -441,7 +442,7 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
441
442
otherline .get_Coords ().set_points (points_from_polygon (other_polygon ))
442
443
443
444
def spread_dist (lines , old_labels , new_labels , binarized , components , coords ,
444
- scale = 43 , loc = '' , threshold = 0.9 ):
445
+ maxdist = 43 , loc = '' , threshold = 0.9 ):
445
446
"""redefine line coordinates by contourizing spread of connected components propagated from new labels"""
446
447
LOG = getLogger ('processor.OcropyResegment' )
447
448
DSAVE ('seeds' , [new_labels , (components > 0 )])
@@ -452,13 +453,13 @@ def spread_dist(lines, old_labels, new_labels, binarized, components, coords,
452
453
new_labels2 = segmentation .watershed (new_labels2 , markers = new_labels , mask = (components > 0 ))
453
454
DSAVE ('propagated' , new_labels2 )
454
455
# dilate/grow labels from connected components against each other and bg
455
- new_labels = morph .spread_labels (new_labels2 , maxdist = scale * 2 )
456
+ new_labels = morph .spread_labels (new_labels2 , maxdist = maxdist )
456
457
DSAVE ('spread' , new_labels )
457
458
# now propagate again to catch smallest components like punctuation
458
459
new_labels2 = morph .propagate_labels (binarized , new_labels , conflict = 0 )
459
460
new_labels2 = segmentation .watershed (new_labels2 , markers = new_labels , mask = binarized )
460
461
DSAVE ('propagated-again' , [new_labels2 , binarized & (new_labels2 == 0 )])
461
- new_labels = morph .spread_labels (new_labels2 , maxdist = scale / 2 )
462
+ new_labels = morph .spread_labels (new_labels2 , maxdist = maxdist / 4 )
462
463
DSAVE ('spread-again' , [new_labels , binarized ])
463
464
# find polygon hull and modify line coords
464
465
for i , line in enumerate (lines ):
@@ -496,7 +497,7 @@ def spread_dist(lines, old_labels, new_labels, binarized, components, coords,
496
497
# get alpha shape
497
498
poly = join_polygons ([make_valid (Polygon (contour ))
498
499
for contour in contours ],
499
- loc = line .id , scale = scale )
500
+ loc = line .id , scale = maxdist )
500
501
poly = poly .exterior .coords [:- 1 ]
501
502
polygon = coordinates_for_segment (poly , None , coords )
502
503
polygon = polygon_for_parent (polygon , line .parent_object_ )
0 commit comments