Skip to content

Commit 8353c7f

Browse files
author
Robert Sachunsky
committed
resegment: (temporarily) enlarge parent region, too
1 parent 4cea672 commit 8353c7f

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

ocrd_cis/ocropy/resegment.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,13 @@ def process(self):
9292
assert_file_grp_cardinality(self.input_file_grp, 1)
9393
assert_file_grp_cardinality(self.output_file_grp, 1)
9494

95-
for (n, input_file) in enumerate(self.input_files):
95+
for n, input_file in enumerate(self.input_files):
9696
LOG.info("INPUT FILE %i / %s", n, input_file.pageId or input_file.ID)
9797
file_id = make_file_id(input_file, self.output_file_grp)
9898

9999
pcgts = page_from_file(self.workspace.download_file(input_file))
100100
self.add_metadata(pcgts)
101-
page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID # (PageType has no id)
101+
page_id = pcgts.pcGtsId or input_file.pageId or input_file.ID
102102
page = pcgts.get_Page()
103103

104104
page_image, page_coords, page_image_info = self.workspace.image_from_page(
@@ -127,7 +127,7 @@ def process(self):
127127
page.get_SeparatorRegion() +
128128
page.get_UnknownRegion() +
129129
page.get_CustomRegion())
130-
regions = page.get_AllRegions(classes=['Text'], order='reading-order')
130+
regions = page.get_AllRegions(classes=['Text'])
131131
if not regions:
132132
LOG.warning('Page "%s" contains no text regions', page_id)
133133
elif level == 'page':
@@ -199,6 +199,8 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
199199
LOG.debug('unmasking area of text region "%s" for "%s"',
200200
segment.id, page_id)
201201
segment_polygon = coordinates_of_segment(segment, parent_image, parent_coords)
202+
segment_polygon = make_valid(Polygon(segment_polygon)).buffer(margin)
203+
segment_polygon = np.array(segment_polygon.exterior, np.int)[:-1]
202204
ignore_bin[draw.polygon(segment_polygon[:, 1],
203205
segment_polygon[:, 0],
204206
parent_bin.shape)] = False
@@ -275,9 +277,8 @@ def _process_segment(self, parent, parent_image, parent_coords, page_id, zoom, l
275277
fits = (fits_bg[i] > 0.6) & (fits_fg[i] > 0.9)
276278
if not fits.any():
277279
j = np.argmax(fits_bg[i] * fits_fg[i])
278-
LOG.debug("best fit '%s' for new line %d covers only %.1f%% bg / %.1f%% fg",
279-
lines[j].id,
280-
i, fits_bg[i,j] * 100, fits_fg[i,j] * 100)
280+
LOG.debug("best fit '%s' for new line %d fits only %.1f%% bg / %.1f%% fg",
281+
lines[j].id, i, fits_bg[i,j] * 100, fits_fg[i,j] * 100)
281282
continue
282283
covers = covers_bg[i] * covers_fg[i] * fits
283284
j = np.argmax(covers)

0 commit comments

Comments
 (0)