Skip to content

Commit 6bb46e2

Browse files
author
Robert Sachunsky
committed
segment: more robust intersection with parent…
- cover zero area intersections, treating them like empty - cover heterogeneous intersections, removing zero area shapes - cover invalid paths on the input or output side (from rounding) via repeated simplification - cover invalid paths which cannot be repaired through simplification directly (find a new starting point in the point sequence) - update shapely
1 parent 073d4ca commit 6bb46e2

File tree

2 files changed

+35
-3
lines changed

2 files changed

+35
-3
lines changed

ocrd_cis/ocropy/segment.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
from skimage import draw
66
from skimage.morphology import convex_hull_image
77
import cv2
8-
from shapely.geometry import Polygon
8+
from shapely.geometry import Polygon, asPolygon
99
from shapely.prepared import prep
10+
from shapely.ops import unary_union
1011

1112
from ocrd_modelfactory import page_from_file
1213
from ocrd_models.ocrd_page import (
@@ -666,16 +667,47 @@ def polygon_for_parent(polygon, parent):
666667
[parent.get_imageWidth(),0]])
667668
else:
668669
parentp = Polygon(polygon_from_points(parent.get_Coords().points))
670+
# check if clipping is necessary
669671
if childp.within(parentp):
670672
return polygon
673+
# ensure input coords have valid paths (without self-intersection)
674+
# (this can happen when shapes valid in floating point are rounded)
675+
childp = make_valid(childp)
676+
parentp = make_valid(parentp)
677+
# clip to parent
671678
interp = childp.intersection(parentp)
672-
if interp.is_empty:
679+
# post-process
680+
if interp.is_empty or interp.area == 0.0:
673681
# FIXME: we need a better strategy against this
674682
raise Exception("intersection of would-be segment with parent is empty")
683+
if interp.type == 'GeometryCollection':
684+
# heterogeneous result: filter zero-area shapes (LineString, Point)
685+
interp = unary_union([geom for geom in interp.geoms if geom.area > 0])
675686
if interp.type == 'MultiPolygon':
687+
# homogeneous result: construct convex hull to connect
688+
# FIXME: construct concave hull / alpha shape
676689
interp = interp.convex_hull
690+
if interp.minimum_clearance < 1.0:
691+
# follow-up calculations will necessarily be integer;
692+
# so anticipate rounding here and then ensure validity
693+
interp = asPolygon(np.round(interp.exterior.coords))
694+
interp = make_valid(interp)
677695
return interp.exterior.coords[:-1] # keep open
678696

697+
def make_valid(polygon):
698+
for split in range(1, len(polygon.exterior.coords)-1):
699+
if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
700+
break
701+
# simplification may not be possible (at all) due to ordering
702+
# in that case, try another starting point
703+
polygon = Polygon(polygon.exterior.coords[-split:]+polygon.exterior.coords[:-split])
704+
for tolerance in range(1, int(polygon.area)):
705+
if polygon.is_valid:
706+
break
707+
# simplification may require a larger tolerance
708+
polygon = polygon.simplify(tolerance)
709+
return polygon
710+
679711
def page_get_reading_order(ro, rogroup):
680712
"""Add all elements from the given reading order group to the given dictionary.
681713

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
'scipy',
4444
'numpy>=1.17.0',
4545
'pillow>=7.1.2',
46-
'shapely',
46+
'shapely>=1.7.1',
4747
'scikit-image',
4848
'opencv-python-headless',
4949
'matplotlib>3.0.0',

0 commit comments

Comments
 (0)