Skip to content

Commit c361dbb

Browse files
authored
Merge pull request #61 from bertsky/segment-improve-sep-line-detection
segment: valid polygons, slightly improved h/v-line seps
2 parents e429da4 + 00570e3 commit c361dbb

File tree

4 files changed

+38
-6
lines changed

4 files changed

+38
-6
lines changed

ocrd_cis/ocrd-tool.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"git_url": "https://github.com/cisocrgroup/ocrd_cis",
3-
"version": "0.1.1",
3+
"version": "0.1.2",
44
"tools": {
55
"ocrd-cis-ocropy-binarize": {
66
"executable": "ocrd-cis-ocropy-binarize",

ocrd_cis/ocropy/common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ def compute_hlines(binary, scale,
561561
DSAVE('hlines5_selected', horiz+0.6*binary)
562562
# 6- dilate vertically a little
563563
# to get a smooth contour without gaps
564-
horiz = morph.r_dilation(horiz, (d0,d1))
564+
horiz = morph.r_dilation(horiz, (d0,odd(scale)))
565565
DSAVE('hlines6_v-dilated', horiz+0.6*binary)
566566
return horiz > 0
567567

@@ -630,7 +630,7 @@ def compute_separators_morph(binary, scale,
630630
DSAVE('colseps5_selected', vert+0.6*binary)
631631
# 6- dilate horizontally a little
632632
# to get a smooth contour without gaps
633-
vert = morph.r_dilation(vert, (d0,d1))
633+
vert = morph.r_dilation(vert, (odd(scale),d1))
634634
DSAVE('colseps6_h-dilated', vert+0.6*binary)
635635
return vert > 0
636636

ocrd_cis/ocropy/segment.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
from skimage import draw
66
from skimage.morphology import convex_hull_image
77
import cv2
8-
from shapely.geometry import Polygon
8+
from shapely.geometry import Polygon, asPolygon
99
from shapely.prepared import prep
10+
from shapely.ops import unary_union
1011

1112
from ocrd_modelfactory import page_from_file
1213
from ocrd_models.ocrd_page import (
@@ -666,16 +667,47 @@ def polygon_for_parent(polygon, parent):
666667
[parent.get_imageWidth(),0]])
667668
else:
668669
parentp = Polygon(polygon_from_points(parent.get_Coords().points))
670+
# check if clipping is necessary
669671
if childp.within(parentp):
670672
return polygon
673+
# ensure input coords have valid paths (without self-intersection)
674+
# (this can happen when shapes valid in floating point are rounded)
675+
childp = make_valid(childp)
676+
parentp = make_valid(parentp)
677+
# clip to parent
671678
interp = childp.intersection(parentp)
672-
if interp.is_empty:
679+
# post-process
680+
if interp.is_empty or interp.area == 0.0:
673681
# FIXME: we need a better strategy against this
674682
raise Exception("intersection of would-be segment with parent is empty")
683+
if interp.type == 'GeometryCollection':
684+
# heterogeneous result: filter zero-area shapes (LineString, Point)
685+
interp = unary_union([geom for geom in interp.geoms if geom.area > 0])
675686
if interp.type == 'MultiPolygon':
687+
# homogeneous result: construct convex hull to connect
688+
# FIXME: construct concave hull / alpha shape
676689
interp = interp.convex_hull
690+
if interp.minimum_clearance < 1.0:
691+
# follow-up calculations will necessarily be integer;
692+
# so anticipate rounding here and then ensure validity
693+
interp = asPolygon(np.round(interp.exterior.coords))
694+
interp = make_valid(interp)
677695
return interp.exterior.coords[:-1] # keep open
678696

697+
def make_valid(polygon):
698+
for split in range(1, len(polygon.exterior.coords)-1):
699+
if polygon.is_valid or polygon.simplify(polygon.area).is_valid:
700+
break
701+
# simplification may not be possible (at all) due to ordering
702+
# in that case, try another starting point
703+
polygon = Polygon(polygon.exterior.coords[-split:]+polygon.exterior.coords[:-split])
704+
for tolerance in range(1, int(polygon.area)):
705+
if polygon.is_valid:
706+
break
707+
# simplification may require a larger tolerance
708+
polygon = polygon.simplify(tolerance)
709+
return polygon
710+
679711
def page_get_reading_order(ro, rogroup):
680712
"""Add all elements from the given reading order group to the given dictionary.
681713

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
'scipy',
4444
'numpy>=1.17.0',
4545
'pillow>=7.1.2',
46-
'shapely',
46+
'shapely>=1.7.1',
4747
'scikit-image',
4848
'opencv-python-headless',
4949
'python-Levenshtein',

0 commit comments

Comments
 (0)