Skip to content

Commit 6d8c0d3

Browse files
author
Robert Sachunsky
committed
ocropy.lines2regions: improve splitting by separators
when trying to partition slices by separators, - also treat pre-existing regions like separators, and - fix the condition on smallest allowed partitions (insignificant but complete lines)
1 parent 64083c7 commit 6d8c0d3

File tree

1 file changed

+37
-34
lines changed

1 file changed

+37
-34
lines changed

ocrd_cis/ocropy/common.py

Lines changed: 37 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,13 +1281,13 @@ def lines2regions(binary, llabels,
12811281
LOG.debug('combining lines to regions')
12821282
relabel = np.zeros(np.amax(llabels)+1, np.int)
12831283
num_regions = 0
1284-
def recursive_x_y_cut(box, mask=None, is_partition=False, debug=False):
1284+
def recursive_x_y_cut(box, mask=None, partition_type=None, debug=False):
12851285
"""Split lbinary at horizontal or vertical gaps recursively.
12861286
12871287
- ``box`` current slice
12881288
- ``mask`` (optional) binary mask for current box to focus
12891289
line labels on (passed+sliced down recursively)
1290-
- ``is_partition`` whether ``mask`` was created by partitioning
1290+
- ``partition_type`` whether ``mask`` was created by partitioning
12911291
immediately before (without any intermediate cuts), and thus
12921292
must not be repeated in the current iteration
12931293
@@ -1366,55 +1366,58 @@ def finalize():
13661366
finalize()
13671367
return
13681368

1369-
# try cuts via annotated separators (strong integration)
1369+
# try split via annotated separators (strong integration)
13701370
# i.e. does current slice of sepmask contain true partitions?
1371-
# (at least 2 partitions which contain at least 1 significant line label each)
1371+
# (at least 2 partitions which contain at least 1 line label each,
1372+
# where each line label in that partition in the current slice
1373+
# must cover a significant part of that line label in the full image)
13721374
partitions, npartitions = None, 0
13731375
if (isinstance(sepmask, np.ndarray) and
13741376
np.count_nonzero(sepmask)):
13751377
sepm = sl.cut(sepmask, box)
13761378
if isinstance(mask, np.ndarray):
13771379
sepm = np.where(mask, sepm, 1)
1378-
if is_partition:
1379-
# sepmask already applied in current X-Y branch:
1380-
# don't try again, but provide `partitions` for next step
1381-
partitions, npartitions = 1-sepm, 1
1382-
else:
1383-
# sepmask already applied in higher X-Y branch:
1384-
# apply again in this cut like another separator
1380+
if isinstance(rlabels, np.ndarray):
1381+
# treat existing regions like separators
1382+
rlab = sl.cut(rlabels, box)
1383+
if isinstance(mask, np.ndarray):
1384+
rlab = np.where(mask, rlab, 0)
1385+
sepm = np.where(rlab, 1, sepm)
1386+
# provide `partitions` for next step
1387+
partitions, npartitions = 1-sepm, 1
1388+
new_partition_type = None
1389+
# try to find `partitions` in current step
1390+
if partition_type != 'splitmask':
1391+
# sepmask not applied yet, or already applied in higher X-Y branch:
1392+
# try to apply in this cut like another separator
13851393
partitions, npartitions = morph.label(1-sepm)
13861394
if npartitions > 1:
1387-
# delete partitions that have no significant line labels
1388-
lpartitions = [None]
1395+
# first, delete partitions that have no significant line labels
1396+
splitmap = np.zeros(len(objects)+1, dtype=np.int)
13891397
for label in range(1, npartitions+1):
1390-
linelabels = np.bincount(lbin[partitions==label], minlength=len(objects))
1391-
linelabels[0] = 0 # without bg
1398+
linecounts = np.bincount(lbin[partitions==label], minlength=len(objects))
1399+
linecounts[0] = 0 # without bg
13921400
# get significant line labels for this partition
1393-
# (but keep insignificant non-empty labels when complete)
1394-
linelabels = np.nonzero(linelabels >= min(max(1, bincounts.max()),
1395-
min_line * scale))[0]
1396-
if np.any(linelabels):
1397-
lpartitions.append(linelabels)
1401+
# (but keep insignificant non-empty labels if complete)
1402+
mincounts = np.minimum(min_line * scale, np.maximum(1, bincounts))
1403+
linelabels = np.nonzero(linecounts >= mincounts)[0]
1404+
if linelabels.size:
1405+
splitmap[linelabels] = label
13981406
if debug: LOG.debug(' sepmask partition %d: %s', label, str(linelabels))
13991407
else:
1400-
lpartitions.append(None)
14011408
partitions[partitions==label] = 0
1402-
# merge partitions that share any significant line labels
1409+
# second, merge partitions that share any significant line labels
14031410
for label1 in range(1, npartitions+1):
1404-
if lpartitions[label1] is None:
1411+
if not np.any(splitmap == label1):
14051412
continue
14061413
for label2 in range(label1+1, npartitions+1):
1407-
if lpartitions[label2] is None:
1414+
if not np.any(splitmap == label2):
14081415
continue
1409-
if np.any(np.intersect1d(lpartitions[label1],
1410-
lpartitions[label2])):
1416+
if np.any((splitmap == label1) & (splitmap == label2)):
1417+
splitmap[splitmap == label2] = label1
14111418
partitions[partitions==label2] = label1
1412-
lpartitions[label1] = np.union1d(lpartitions[label1],
1413-
lpartitions[label2])
1414-
lpartitions[label2] = [0]
1415-
# re-label and re-order surviving partitions
1416-
lpartitions = np.setdiff1d(np.unique(partitions), [0]) # without bg/sepm
1417-
npartitions = len(lpartitions)
1419+
npartitions = len(np.setdiff1d(np.unique(splitmap), [0]))
1420+
new_partition_type = 'splitmask'
14181421
if debug: LOG.debug(' %d sepmask partitions after filtering and merging', npartitions)
14191422
if npartitions > 1:
14201423
# sort partitions in reading order
@@ -1442,7 +1445,7 @@ def finalize():
14421445
llab[box[0],box[1].stop-1-i] = -10*np.log(y+1e-9)
14431446
llab[box[0].start+i,box[1]] = -10*np.log(x+1e-9)
14441447
llab[box[0].stop-1-i,box[1]] = -10*np.log(x+1e-9)
1445-
DSAVE('recursive_x_y_cut' + ('_masked' if is_partition else ''), llab)
1448+
DSAVE('recursive_x_y_cut_' + (partition_type or 'sliced'), llab)
14461449
gap_weights = list()
14471450
for is_horizontal, profile in enumerate([y, x]):
14481451
# find gaps in projection profiles
@@ -1640,7 +1643,7 @@ def finalize():
16401643
DSAVE('recursive_x_y_cut_partitions', llab2)
16411644
for label in range(1, npartitions+1):
16421645
LOG.debug('next partition %d on %s', label, box)
1643-
recursive_x_y_cut(box, mask=partitions==label, is_partition=True)
1646+
recursive_x_y_cut(box, mask=partitions==label, partition_type=new_partition_type)
16441647
return
16451648

16461649
if not np.any(gaps):

0 commit comments

Comments
 (0)