Skip to content

Commit 8a71d8e

Browse files
author
Robert Sachunsky
committed
resegment (lineest): use new polygons instead of intersections but ignore extend_margins
1 parent d2a5279 commit 8a71d8e

File tree

2 files changed

+29
-30
lines changed

2 files changed

+29
-30
lines changed

ocrd_cis/ocrd-tool.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@
225225
"extend_margins": {
226226
"type": "number",
227227
"format": "integer",
228-
"description": "number of pixels to extend the input polygons in all directions",
228+
"description": "(ignored)",
229229
"default": 3
230230
}
231231
}

ocrd_cis/ocropy/resegment.py

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,6 @@ def process(self):
183183
def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, lines, ignore):
184184
LOG = getLogger('processor.OcropyResegment')
185185
threshold = self.parameter['min_fraction']
186-
margin = self.parameter['extend_margins']
187186
method = self.parameter['method']
188187
# prepare line segmentation
189188
parent_array = pil2array(parent_image)
@@ -206,32 +205,34 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
206205
line_labels = np.zeros_like(parent_bin, bool)
207206
line_labels = np.tile(line_labels[np.newaxis], (len(lines), 1, 1))
208207
line_polygons = []
209-
for i, segment in enumerate(lines):
210-
if self.parameter['baseline_only'] and segment.Baseline:
211-
segment_baseline = baseline_of_segment(segment, parent_coords)
212-
segment_polygon = polygon_from_baseline(segment_baseline, 30/zoom)
208+
for i, line in enumerate(lines):
209+
if self.parameter['baseline_only'] and line.Baseline:
210+
line_base = baseline_of_segment(line, parent_coords)
211+
line_poly = polygon_from_baseline(line_base, 30/zoom)
213212
else:
214-
segment_polygon = coordinates_of_segment(segment, parent_image, parent_coords)
215-
segment_polygon = make_valid(Polygon(segment_polygon)).buffer(margin)
216-
line_polygons.append(prep(segment_polygon))
217-
segment_polygon = np.array(segment_polygon.exterior.coords, int)[:-1]
218-
# draw.polygon: If any segment_polygon lies outside of parent
213+
line_poly = coordinates_of_segment(line, parent_image, parent_coords)
214+
line_poly = make_valid(Polygon(line_poly))
215+
line_polygons.append(line_poly)
216+
line_polygons = list(map(prep, line_polygons))
217+
for i, line_polygon in enumerate(line_polygons):
218+
polygon = np.array(line_polygon.context.exterior.coords, int)[:-1]
219+
# draw.polygon: If any line_polygon lies outside of parent
219220
# (causing negative/above-max indices), either fully or partially,
220221
# then this will silently ignore them. The caller does not need
221222
# to concern herself with this.
222-
segment_y, segment_x = draw.polygon(segment_polygon[:, 1],
223-
segment_polygon[:, 0],
224-
parent_bin.shape)
225-
line_labels[i, segment_y, segment_x] = True
223+
line_y, line_x = draw.polygon(polygon[:, 1],
224+
polygon[:, 0],
225+
parent_bin.shape)
226+
line_labels[i, line_y, line_x] = True
226227
# only text region(s) may contain new text lines
227-
for i, segment in enumerate(set(line.parent_object_ for line in lines)):
228+
for i, region in enumerate(set(line.parent_object_ for line in lines)):
228229
LOG.debug('unmasking area of text region "%s" for "%s"',
229-
segment.id, page_id if fullpage else parent.id)
230-
segment_polygon = coordinates_of_segment(segment, parent_image, parent_coords)
231-
segment_polygon = make_valid(Polygon(segment_polygon)).buffer(margin)
232-
segment_polygon = np.array(segment_polygon.exterior.coords, int)[:-1]
233-
ignore_bin[draw.polygon(segment_polygon[:, 1],
234-
segment_polygon[:, 0],
230+
region.id, page_id if fullpage else parent.id)
231+
region_polygon = coordinates_of_segment(region, parent_image, parent_coords)
232+
region_polygon = make_valid(Polygon(region_polygon))
233+
region_polygon = np.array(region_polygon.exterior.coords, int)[:-1]
234+
ignore_bin[draw.polygon(region_polygon[:, 1],
235+
region_polygon[:, 0],
235236
parent_bin.shape)] = False
236237
# mask/ignore overlapping neighbours
237238
for i, segment in enumerate(ignore):
@@ -295,11 +296,10 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
295296
new_line_polygons, new_line_labels = masks2polygons(
296297
new_line_labels, new_baselines, parent_bin, '%s "%s"' % (tag, parent.id),
297298
min_area=640/zoom/zoom)
298-
DSAVE('line_labels', [np.mean(line_labels, axis=0), parent_bin])
299+
DSAVE('line_labels', [np.argmax(np.insert(line_labels, 0, 0, axis=0), axis=0), parent_bin])
299300
DSAVE('new_line_labels', [new_line_labels, parent_bin])
300-
new_line_polygons, new_baselines = list(zip(*[
301-
(make_valid(Polygon(line_poly)), LineString(baseline))
302-
for _, line_poly, baseline in new_line_polygons])) or ([], [])
301+
new_line_polygons, new_baselines = list(zip(*[(Polygon(poly), LineString(base))
302+
for _, poly, base in new_line_polygons])) or ([], [])
303303
# polygons for intersecting pairs
304304
intersections = dict()
305305
# ratio of overlap between intersection and new line
@@ -375,7 +375,6 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
375375
keep1[ind1] = False
376376
#keep2[ind2] = False
377377
# validate assignments retain enough area and do not loose unassigned matches
378-
line_polygons = [poly.context.buffer(-margin) for poly in line_polygons]
379378
for j, line in enumerate(lines):
380379
new_lines = np.nonzero(assignments == j)[0]
381380
if not np.prod(new_lines.shape):
@@ -404,9 +403,8 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
404403
# combine all assigned new lines to single outline polygon
405404
if len(new_lines) > 1:
406405
LOG.debug("joining %d new line polygons for '%s'", len(new_lines), line.id)
407-
new_polygon = join_polygons([intersections[(i, j)]
406+
new_polygon = join_polygons([new_line_polygons[i] #intersections[(i, j)]
408407
for i in new_lines], loc=line.id, scale=scale)
409-
line_polygons[j] = new_polygon
410408
new_baseline = join_baselines([new_polygon.intersection(new_baselines[i])
411409
for i in new_lines], loc=line.id)
412410
# convert back to absolute (page) coordinates:
@@ -422,14 +420,15 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
422420
new_baseline = coordinates_for_segment(new_baseline.coords,
423421
parent_image, parent_coords)
424422
line.set_Baseline(BaselineType(points=points_from_polygon(new_baseline)))
423+
line_polygons[j] = prep(new_polygon)
425424
# now also ensure the assigned lines do not overlap other existing lines
426425
for i in new_lines:
427426
for otherj in np.nonzero(fits_fg[i] > 0.1)[0]:
428427
if j == otherj:
429428
continue
430429
otherline = lines[otherj]
431430
LOG.debug("subtracting new '%s' from overlapping '%s'", line.id, otherline.id)
432-
other_polygon = diff_polygons(line_polygons[otherj], new_polygon)
431+
other_polygon = diff_polygons(line_polygons[otherj].context, new_polygon)
433432
if other_polygon.is_empty:
434433
continue
435434
# convert back to absolute (page) coordinates:

0 commit comments

Comments
 (0)