Skip to content

Commit b6c8957

Browse files
author
Robert Sachunsky
committed
resegment: expose parameter spread (analogous to segment)
1 parent 6e95b38 commit b6c8957

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

ocrd_cis/ocrd-tool.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,12 @@
222222
"description": "share of foreground pixels that must be retained by the output polygons",
223223
"default": 0.75
224224
},
225+
"spread": {
226+
"type": "number",
227+
"format": "float",
228+
"description": "distance in points (pt) from the foreground to project textline labels into the background for polygonal contours; if zero, project half a scale/capheight",
229+
"default": 2.4
230+
},
225231
"extend_margins": {
226232
"type": "number",
227233
"format": "integer",

ocrd_cis/ocropy/resegment.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
184184
LOG = getLogger('processor.OcropyResegment')
185185
threshold = self.parameter['min_fraction']
186186
method = self.parameter['method']
187+
maxdist = self.parameter['spread']/zoom*300/72 # in pt
187188
# prepare line segmentation
188189
parent_array = pil2array(parent_image)
189190
#parent_array, _ = common.binarize(parent_array, maxskew=0) # just in case still raw
@@ -273,19 +274,19 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
273274
new_labels[line_labels[i]] = i + 1
274275
continue
275276
line_baseline = baseline_of_segment(line, parent_coords)
276-
line_polygon = polygon_from_baseline(line_baseline, scale)
277+
line_polygon = polygon_from_baseline(line_baseline, maxdist or scale/2)
277278
line_polygon = np.array(line_polygon.exterior.coords, int)[:-1]
278279
line_y, line_x = draw.polygon(line_polygon[:, 1],
279280
line_polygon[:, 0],
280281
parent_bin.shape)
281282
new_labels[line_y, line_x] = i + 1
282283
spread_dist(lines, line_labels, new_labels, parent_bin, components, parent_coords,
283-
scale=scale, loc=parent.id, threshold=threshold)
284+
maxdist=maxdist or scale/2, loc=parent.id, threshold=threshold)
284285
return
285286
try:
286287
new_line_labels, new_baselines, _, _, _, scale = compute_segmentation(
287-
parent_bin, seps=ignore_bin, zoom=zoom, fullpage=fullpage,
288-
maxseps=0, maxcolseps=len(ignore), maximages=0)
288+
parent_bin, seps=ignore_bin, zoom=zoom, spread_dist=maxdist or scale/2,
289+
fullpage=fullpage, maxseps=0, maxcolseps=len(ignore), maximages=0)
289290
except Exception as err:
290291
LOG.error('Cannot line-segment %s "%s": %s',
291292
tag, page_id if fullpage else parent.id, err)
@@ -441,7 +442,7 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
441442
otherline.get_Coords().set_points(points_from_polygon(other_polygon))
442443

443444
def spread_dist(lines, old_labels, new_labels, binarized, components, coords,
444-
scale=43, loc='', threshold=0.9):
445+
maxdist=43, loc='', threshold=0.9):
445446
"""redefine line coordinates by contourizing spread of connected components propagated from new labels"""
446447
LOG = getLogger('processor.OcropyResegment')
447448
DSAVE('seeds', [new_labels, (components>0)])
@@ -452,13 +453,13 @@ def spread_dist(lines, old_labels, new_labels, binarized, components, coords,
452453
new_labels2 = segmentation.watershed(new_labels2, markers=new_labels, mask=(components > 0))
453454
DSAVE('propagated', new_labels2)
454455
# dilate/grow labels from connected components against each other and bg
455-
new_labels = morph.spread_labels(new_labels2, maxdist=scale*2)
456+
new_labels = morph.spread_labels(new_labels2, maxdist=maxdist)
456457
DSAVE('spread', new_labels)
457458
# now propagate again to catch smallest components like punctuation
458459
new_labels2 = morph.propagate_labels(binarized, new_labels, conflict=0)
459460
new_labels2 = segmentation.watershed(new_labels2, markers=new_labels, mask=binarized)
460461
DSAVE('propagated-again', [new_labels2, binarized & (new_labels2==0)])
461-
new_labels = morph.spread_labels(new_labels2, maxdist=scale/2)
462+
new_labels = morph.spread_labels(new_labels2, maxdist=maxdist/4)
462463
DSAVE('spread-again', [new_labels, binarized])
463464
# find polygon hull and modify line coords
464465
for i, line in enumerate(lines):
@@ -496,7 +497,7 @@ def spread_dist(lines, old_labels, new_labels, binarized, components, coords,
496497
# get alpha shape
497498
poly = join_polygons([make_valid(Polygon(contour))
498499
for contour in contours],
499-
loc=line.id, scale=scale)
500+
loc=line.id, scale=maxdist)
500501
poly = poly.exterior.coords[:-1]
501502
polygon = coordinates_for_segment(poly, None, coords)
502503
polygon = polygon_for_parent(polygon, line.parent_object_)

0 commit comments

Comments
 (0)