Skip to content

Commit 2ee845b

Browse files
author
Robert Sachunsky
committed
ocropy.lines2regions: fix textline assignment around existing regions
in `finalize`, if predefined region labels are present, when re-ordering the slice's old and new zones and assigning textlines to them, - calculate the order based on fg relationships, not bg - make sure textlines are assigned to their majority zone
1 parent f0fcf89 commit 2ee845b

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

ocrd_cis/ocropy/common.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,7 +1317,9 @@ def finalize():
13171317
LOG.debug('new region %d for %d lines', num_regions, len(linelabels))
13181318
relabel[linelabels] = num_regions
13191319
else:
1320-
# (partial) initial segmentation exists - order existing and non-existing groups
1320+
# (partial) initial segmentation exists - order existing groups against rest,
1321+
# this must be done on full labels (bg+fg), so we first need to reconstruct
1322+
# this slice's llab/rlab
13211323
rlab = sl.cut(rlabels, box)
13221324
if isinstance(mask, np.ndarray):
13231325
rlab = np.where(mask, rlab, 0)
@@ -1329,11 +1331,21 @@ def finalize():
13291331
llab *= linelabels0[llab]
13301332
newregion = rlab.max()+1
13311333
rlab = np.where(llab, np.where(rlab, rlab, newregion), 0)
1332-
order = np.argsort(morph.reading_order(rlab,rl,bt))
1334+
order = np.argsort(morph.reading_order((lbin>0) * rlab, rl, bt))
1335+
# get region label with highest share for each line,
1336+
# then assign it to that region
1337+
llab2rlab, llabcount = dict(), dict()
1338+
for line, region, count in morph.correspondences(llab, rlab).T:
1339+
if line > 0 and region > 0 and count > llabcount.get(line, 0):
1340+
llabcount[line] = count
1341+
llab2rlab[line] = region
1342+
rlab2llab = dict()
1343+
for line, region in llab2rlab.items():
1344+
rlab2llab.setdefault(region, list()).append(line)
13331345
for region in order:
1334-
lines = np.setdiff1d(np.unique(llab * (rlab == region)), [0])
1335-
if not lines.any():
1346+
if not region in rlab2llab:
13361347
continue
1348+
lines = rlab2llab[region]
13371349
num_regions += 1
13381350
if region == newregion:
13391351
if debug:

0 commit comments

Comments
 (0)