Skip to content

Commit 3ac4b70

Browse files
author
Robert Sachunsky
committed
ocropy.lines2regions: fall back to topological partitioning
when no cut or separator-split partition can be found for the current slice, then attempt to find another separator-split by grouping lines along their mutual horizontal neighbourship with fg separators; repeatedly allow both kinds of partitioning, if interspersed
1 parent 6d8c0d3 commit 3ac4b70

File tree

1 file changed

+115
-6
lines changed

1 file changed

+115
-6
lines changed

ocrd_cis/ocropy/common.py

Lines changed: 115 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,7 +1273,12 @@ def lines2regions(binary, llabels,
12731273
sepmask = 1-morph.keep_marked(1-sepmask, lbinary>0)
12741274
DSAVE('sepmask', [sepmask,binary])
12751275
objects = [None] + morph.find_objects(llabels)
1276-
#centers = measurements.center_of_mass(binary, llabels)
1276+
#centers = measurements.center_of_mass(binary, llabels, np.unique(llabels))
1277+
def center(obj):
1278+
if morph.sl.empty(obj):
1279+
return [0,0]
1280+
return morph.sl.center(obj)
1281+
centers = list(map(center, objects[1:]))
12771282
if scale is None:
12781283
scale = psegutils.estimate_scale(binary, zoom)
12791284
bincounts = np.bincount(lbinary.flatten())
@@ -1419,11 +1424,108 @@ def finalize():
14191424
npartitions = len(np.setdiff1d(np.unique(splitmap), [0]))
14201425
new_partition_type = 'splitmask'
14211426
if debug: LOG.debug(' %d sepmask partitions after filtering and merging', npartitions)
1427+
if partition_type != 'topological':
1428+
# try to partition spanning lines against separator-split lines
1429+
# get current slice's line labels
1430+
def find_topological():
1431+
# run only if needed (no other partition/slicing possible)
1432+
nonlocal partitions, npartitions, new_partition_type
1433+
llab = sl.cut(llabels, box)
1434+
if isinstance(mask, np.ndarray):
1435+
llab = np.where(mask, llab, 0)
1436+
obj = [sl.intersect(o, box) for o in objects]
1437+
# get current slice's foreground
1438+
bin = sl.cut(binary, box)
1439+
if isinstance(mask, np.ndarray):
1440+
bin = np.where(mask, bin, 0)
1441+
# get current slice's separator labels
1442+
seplab, nseps = morph.label(sepm)
1443+
if nseps == 0:
1444+
return
1445+
sepind = np.unique(seplab)
1446+
# (but keep only those with large fg i.e. ignore white-space seps)
1447+
seplabs, counts = np.unique(seplab * bin, return_counts=True)
1448+
kept = np.in1d(seplab.ravel(), seplabs[counts > scale * min_line])
1449+
seplab = seplab * kept.reshape(*seplab.shape)
1450+
DSAVE('seplab', seplab)
1451+
sepobj = morph.find_objects(seplab)
1452+
if not len(sepobj):
1453+
return
1454+
# get current slice's line labels
1455+
# (but keep only those with foreground)
1456+
linelabels = np.setdiff1d(np.unique(lbin), [0])
1457+
nlines = linelabels.max() + 1
1458+
# find pairs of lines above each other with a separator next to them
1459+
leftseps = np.zeros((nlines, nseps), np.bool)
1460+
rghtseps = np.zeros((nlines, nseps), np.bool)
1461+
for line in linelabels:
1462+
for i, sep in enumerate(sepobj):
1463+
if sep is None:
1464+
continue
1465+
if sl.yoverlap(sep, obj[line]) / sl.height(obj[line]) <= 0.75:
1466+
continue
1467+
sepx = np.nonzero(seplab[obj[line][0]] == i + 1)[1]
1468+
binx = np.nonzero(lbin[obj[line][0]] == line)[1]
1469+
if not binx.size:
1470+
continue
1471+
# more robust to noise: 95% instead of max(), 5% instead of min()
1472+
if sepx.max() <= np.percentile(binx, 5):
1473+
leftseps[line, i] = True
1474+
if sepx.min() >= np.percentile(binx, 95):
1475+
rghtseps[line, i] = True
1476+
# true separators have some lines on either side
1477+
trueseps = leftseps.max(axis=0) & rghtseps.max(axis=0)
1478+
if not np.any(trueseps):
1479+
return
1480+
if debug: LOG.debug("trueseps: %s", str(trueseps))
1481+
neighbours = np.zeros((nlines, nlines), np.bool)
1482+
for i in linelabels:
1483+
for j in linelabels[i+1:]:
1484+
if sl.yoverlap_rel(obj[i], obj[j]) > 0.5:
1485+
continue
1486+
# pair must have common separator on one side,
1487+
# which must also have some line on the other side
1488+
if (np.any(leftseps[i] & leftseps[j] & trueseps) or
1489+
np.any(rghtseps[i] & rghtseps[j] & trueseps)):
1490+
if debug: LOG.debug("neighbours: %d/%d", i, j)
1491+
neighbours[i,j] = True
1492+
if not np.any(neighbours):
1493+
return
1494+
# group neighbours by adjacency (i.e. put any contiguous pairs
1495+
# of such line labels into the same group)
1496+
nlabels = llab.max() + 1
1497+
splitmap = np.zeros(nlabels, dtype=np.int)
1498+
for i, j in zip(*neighbours.nonzero()):
1499+
if splitmap[i] > 0:
1500+
splitmap[j] = splitmap[i]
1501+
elif splitmap[j] > 0:
1502+
splitmap[i] = splitmap[j]
1503+
else:
1504+
splitmap[i] = i
1505+
splitmap[j] = i
1506+
nsplits = splitmap.max()
1507+
# group non-neighbours by adjacency (i.e. put any other contiguous
1508+
# non-empty line labels into the same group)
1509+
nonneighbours = (splitmap==0)[llab] * (llab > 0) * (sepm == 0)
1510+
nonneighbours, _ = morph.label(nonneighbours)
1511+
for i, j in morph.correspondences(nonneighbours, llab, False).T:
1512+
if i > 0 and j > 0:
1513+
splitmap[j] = i + nsplits
1514+
if debug: LOG.debug(' groups of adjacent lines: %s', str(splitmap))
1515+
partitions = splitmap[llab]
1516+
DSAVE('partitions', partitions)
1517+
npartitions = len(np.setdiff1d(np.unique(splitmap), [0]))
14221518
if npartitions > 1:
1423-
# sort partitions in reading order
1424-
order = morph.reading_order(partitions,rl,bt)
1425-
partitions = order[partitions]
1426-
#lpartitions = order[lpartitions]
1519+
if debug: LOG.debug(" found %d spanning partitions", npartitions)
1520+
# re-assign background to nearest partition
1521+
partitions = morph.spread_labels(np.where(llab, partitions, 0))
1522+
# re-assert mask, if any
1523+
if isinstance(mask, np.ndarray):
1524+
partitions = np.where(mask, partitions, 0)
1525+
new_partition_type = 'topological'
1526+
else:
1527+
def find_topological():
1528+
return
14271529

14281530
# try cuts via h/v projection profiles
14291531
y = np.mean(lbin>0, axis=1)
@@ -1617,6 +1719,11 @@ def finalize():
16171719
partitionscores = y_partitionscores
16181720
lim = len(y)
16191721

1722+
if not np.any(gaps) and npartitions == 1:
1723+
# no slices and no partitions, but separators exist
1724+
# so try to fall back to more elaborate partitioning
1725+
find_topological() # partitions, npartitions, new_partition_type
1726+
16201727
# now that we have a decision on direction (x/y)
16211728
# as well as scores for its gaps, decide whether
16221729
# to prefer cuts at annotated separators (partitions) instead
@@ -1630,7 +1737,9 @@ def finalize():
16301737
sum(map(sl.height if prefer_vertical else sl.width,
16311738
(morph.find_objects(partitions)))) > np.max(
16321739
partitionscores, initial=0))):
1633-
# continue on each partition by suppressing the others
1740+
# continue on each partition by suppressing the others, respectively
1741+
order = morph.reading_order(partitions,rl,bt)
1742+
partitions = order[partitions]
16341743
LOG.debug('cutting by %d partitions on %s', npartitions, box)
16351744
if debug:
16361745
# show current cut/box inside full image

0 commit comments

Comments
 (0)