Skip to content

Commit a30ce3b

Browse files
authored
Merge pull request #77 from bertsky/fix-resegment
resegment: skip empty line polygons
2 parents c3fad1a + c2f9203 commit a30ce3b

File tree

14 files changed

+1152
-489
lines changed

14 files changed

+1152
-489
lines changed

README.md

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -228,14 +228,25 @@ ocrd-cis-ocropy-resegment \
228228

229229
Available parameters are:
230230
```sh
231-
"dpi" [number - -1]
231+
"level-of-operation" [string - "page"]
232+
PAGE XML hierarchy level to segment textlines in ('region' abides by
233+
existing text region boundaries, 'page' optimises lines in the whole
234+
page once
235+
Possible values: ["page", "region"]
236+
"method" [string - "lineest"]
237+
source for new line polygon candidates ('lineest' for line
238+
estimation, i.e. how Ocropy would have segmented text lines;
239+
'baseline' tries to re-polygonize from the baseline annotation;
240+
'ccomps' avoids crossing connected components by majority rule)
241+
Possible values: ["lineest", "baseline", "ccomps"]
242+
"dpi" [number - 0]
232243
pixel density in dots per inch (overrides any meta-data in the
233-
images); disabled when negative
234-
"min_fraction" [number - 0.8]
235-
share of foreground pixels that must be retained by the largest label
244+
images); disabled when zero or negative
245+
"min_fraction" [number - 0.75]
246+
share of foreground pixels that must be retained by the output
247+
polygons
236248
"extend_margins" [number - 3]
237-
number of pixels to extend the input polygons horizontally and
238-
vertically before intersecting
249+
number of pixels to extend the input polygons in all directions
239250
```
240251
241252
### ocrd-cis-ocropy-segment

ocrd_cis/align/cli.py

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,28 @@
88
from ocrd.decorators import ocrd_cli_wrap_processor
99
from ocrd_utils import MIMETYPE_PAGE
1010
from ocrd_utils import getLogger
11+
from ocrd_utils import getLevelName
1112
from ocrd_utils import make_file_id
1213
from ocrd_modelfactory import page_from_file
1314
from ocrd_models.ocrd_page import to_xml
1415
from ocrd_models.ocrd_page_generateds import TextEquivType
1516
from ocrd_cis import JavaAligner
1617
from ocrd_cis import get_ocrd_tool
1718

18-
LOG_LEVEL = 'INFO'
19-
2019
@click.command()
2120
@ocrd_cli_options
2221
def ocrd_cis_align(*args, **kwargs):
23-
if 'log_level' in kwargs and kwargs['log_level']:
24-
global LOG_LEVEL
25-
LOG_LEVEL = kwargs['log_level']
2622
return ocrd_cli_wrap_processor(Aligner, *args, **kwargs)
2723

2824
class Aligner(Processor):
29-
LOG_LEVEL = 'INFO'
3025
def __init__(self, *args, **kwargs):
3126
ocrd_tool = get_ocrd_tool()
3227
kwargs['ocrd_tool'] = ocrd_tool['tools']['ocrd-cis-align']
3328
kwargs['version'] = ocrd_tool['version']
3429
super(Aligner, self).__init__(*args, **kwargs)
35-
self.log = getLogger('cis.Processor.Aligner')
30+
31+
if hasattr(self, 'workspace'):
32+
self.log = getLogger('cis.Processor.Aligner')
3633

3734
def process(self):
3835
ifgs = self.input_file_grp.split(",") # input file groups
@@ -100,14 +97,14 @@ def align_lines(self, lines):
10097
te = TextEquivType(
10198
Unicode=get_textequiv_unicode(line.region),
10299
conf=get_textequiv_conf(line.region),
103-
dataType="ocrd-cis-line-alignment",
104-
dataTypeDetails=ddt)
100+
dataType="other",
101+
dataTypeDetails="ocrd-cis-line-alignment:" + ddt)
105102
lines[0].region.add_TextEquiv(te)
106103
else:
107104
self.log.debug("len: %i, i: %i", len(lines[0].region.get_TextEquiv()), i)
108-
lines[0].region.get_TextEquiv()[i].set_dataType(
109-
"ocrd-cis-line-alignment-master-ocr")
110-
lines[0].region.get_TextEquiv()[i].set_dataTypeDetails(ddt)
105+
lines[0].region.get_TextEquiv()[i].set_dataType("other")
106+
lines[0].region.get_TextEquiv()[i].set_dataTypeDetails(
107+
"ocrd-cis-line-alignment-master-ocr:" + ddt)
111108
lines[0].region.get_TextEquiv()[i].set_index(i+1)
112109
self.align_words(lines)
113110

@@ -149,8 +146,8 @@ def te0(x):
149146
ifg = word.input_file.input_file_group
150147
self.log.debug("(empty) word alignment: [%s]", ifg)
151148
te = TextEquivType(
152-
dataType="ocrd-cis-empty-word-alginment",
153-
dataTypeDetails=ifg)
149+
dataType="other",
150+
dataTypeDetails="ocrd-cis-empty-word-alignment:" + ifg)
154151
words[0].region[0].add_TextEquiv(te)
155152
words[0].region[0].get_TextEquiv()[i].set_index(i+1)
156153
continue
@@ -165,13 +162,13 @@ def te0(x):
165162
te = TextEquivType(
166163
Unicode=_str,
167164
conf=conf,
168-
dataType="ocrd-cis-word-alignment",
169-
dataTypeDetails=ddt)
165+
dataType="other",
166+
dataTypeDetails="ocrd-cis-word-alignment:" + ddt)
170167
words[0].region[0].add_TextEquiv(te)
171168
else:
172-
words[0].region[0].get_TextEquiv()[i].set_dataType(
173-
'ocrd-cis-word-alignment-master-ocr')
174-
words[0].region[0].get_TextEquiv()[i].set_dataTypeDetails(ddt)
169+
words[0].region[0].get_TextEquiv()[i].set_dataType("other")
170+
words[0].region[0].get_TextEquiv()[i].set_dataTypeDetails(
171+
"ocrd-cis-word-alignment-master-ocr:" + ddt)
175172
words[0].region[0].get_TextEquiv()[i].set_index(i+1)
176173

177174
def find_word(self, tokens, regions, t="other"):
@@ -292,7 +289,7 @@ def run_java_aligner(self, ifs):
292289
self.log.debug("input line: %s", i)
293290
n = len(ifs)
294291
self.log.debug("starting java client")
295-
p = JavaAligner(n, LOG_LEVEL or 'INFO')
292+
p = JavaAligner(n, getLevelName(self.log.getEffectiveLevel()))
296293
return p.run("\n".join(_input))
297294

298295
class FileAlignment:

ocrd_cis/ocrd-tool.json

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,20 @@
191191
"output_file_grp": [
192192
"OCR-D-SEG-LINE"
193193
],
194-
"description": "Resegment lines with ocropy (by shrinking annotated polygons)",
194+
"description": "Resegment text lines",
195195
"parameters": {
196+
"level-of-operation": {
197+
"type": "string",
198+
"enum": ["page", "region"],
199+
"description": "PAGE XML hierarchy level to segment textlines in ('region' abides by existing text region boundaries, 'page' optimises lines in the whole page once",
200+
"default": "page"
201+
},
202+
"method": {
203+
"type": "string",
204+
"enum": ["lineest", "baseline", "ccomps"],
205+
"description": "source for new line polygon candidates ('lineest' for line estimation, i.e. how Ocropy would have segmented text lines; 'baseline' tries to re-polygonize from the baseline annotation; 'ccomps' avoids crossing connected components by majority rule)",
206+
"default": "lineest"
207+
},
196208
"dpi": {
197209
"type": "number",
198210
"format": "float",
@@ -202,13 +214,13 @@
202214
"min_fraction": {
203215
"type": "number",
204216
"format": "float",
205-
"description": "share of foreground pixels that must be retained by the largest label",
206-
"default": 0.8
217+
"description": "share of foreground pixels that must be retained by the output polygons",
218+
"default": 0.75
207219
},
208220
"extend_margins": {
209221
"type": "number",
210222
"format": "integer",
211-
"description": "number of pixels to extend the input polygons horizontally and vertically before intersecting",
223+
"description": "number of pixels to extend the input polygons in all directions",
212224
"default": 3
213225
}
214226
}
@@ -238,9 +250,15 @@
238250
"range": {
239251
"type": "number",
240252
"format": "float",
241-
"description": "maximum vertical disposition or maximum margin (will be multiplied by mean centerline deltas to yield pixels)",
253+
"description": "maximum vertical disposition or maximum margin (will be multiplied by mean centerline deltas to yield pixels); also the mean vertical padding",
242254
"default": 4.0
243255
},
256+
"smoothness": {
257+
"type": "number",
258+
"format": "float",
259+
"description": "kernel size (relative to image height) of horizontal blur applied to foreground to find the center line; the smaller the more dynamic (0.1 would be a better default)",
260+
"default": 1.0
261+
},
244262
"max_neighbour": {
245263
"type": "number",
246264
"format": "float",

ocrd_cis/ocropy/binarize.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def process(self):
140140
if level == 'table':
141141
regions = page.get_TableRegion()
142142
else: # region
143-
regions = page.get_AllRegions(classes=['Text'])
143+
regions = page.get_AllRegions(classes=['Text'], order='reading-order')
144144
if not regions:
145145
self.logger.warning('Page "%s" contains no text regions', page_id)
146146
for region in regions:
@@ -175,6 +175,9 @@ def process(self):
175175
file_id, self.output_file_grp, out.local_filename)
176176

177177
def process_page(self, page, page_image, page_xywh, zoom, page_id, file_id):
178+
if not page_image.width or not page_image.height:
179+
self.logger.warning("Skipping page '%s' with zero size", page_id)
180+
return
178181
self.logger.info("About to binarize page '%s'", page_id)
179182
features = page_xywh['features']
180183
if 'angle' in page_xywh and page_xywh['angle']:
@@ -220,6 +223,9 @@ def process_page(self, page, page_image, page_xywh, zoom, page_id, file_id):
220223
comments=features))
221224

222225
def process_region(self, region, region_image, region_xywh, zoom, page_id, file_id):
226+
if not region_image.width or not region_image.height:
227+
self.logger.warning("Skipping region '%s' with zero size", region.id)
228+
return
223229
self.logger.info("About to binarize page '%s' region '%s'", page_id, region.id)
224230
features = region_xywh['features']
225231
if 'angle' in region_xywh and region_xywh['angle']:
@@ -267,6 +273,9 @@ def process_region(self, region, region_image, region_xywh, zoom, page_id, file_
267273
comments=features))
268274

269275
def process_line(self, line, line_image, line_xywh, zoom, page_id, region_id, file_id):
276+
if not line_image.width or not line_image.height:
277+
self.logger.warning("Skipping line '%s' with zero size", line.id)
278+
return
270279
self.logger.info("About to binarize page '%s' region '%s' line '%s'",
271280
page_id, region_id, line.id)
272281
features = line_xywh['features']

ocrd_cis/ocropy/clip.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def process(self):
103103
else:
104104
zoom = 1
105105

106+
# FIXME: what about text regions inside table regions?
106107
regions = list(page.get_TextRegion())
107108
num_texts = len(regions)
108109
regions += (
@@ -228,19 +229,23 @@ def process_segment(self, segment, segment_mask, segment_polygon, neighbours,
228229
segment_image = image_from_polygon(parent_image, segment_polygon)
229230
segment_bbox = bbox_from_polygon(segment_polygon)
230231
for neighbour, neighbour_mask in neighbours:
232+
if not np.any(segment_mask > neighbour_mask):
233+
LOG.info('Ignoring enclosing neighbour "%s" of segment "%s" on page "%s"',
234+
neighbour.id, segment.id, page_id)
235+
continue
231236
# find connected components that (only) belong to the neighbour:
232237
intruders = segment_mask * morph.keep_marked(parent_bin, neighbour_mask > 0) # overlaps neighbour
233238
intruders = morph.remove_marked(intruders, segment_mask > neighbour_mask) # but exclusively
234239
num_intruders = np.count_nonzero(intruders)
235240
num_foreground = np.count_nonzero(segment_mask * parent_bin)
236241
if not num_intruders:
237242
continue
238-
if num_intruders / num_foreground > 1.0 - self.parameter['min_fraction']:
239-
LOG.info('Too many intruders (%d/%d) from neighbour "%s" in segment "%s" on page "%s"',
240-
num_intruders, num_foreground, neighbour.id, segment.id, page_id)
241-
continue
242-
LOG.debug('segment "%s" vs neighbour "%s": suppressing %d pixels on page "%s"',
243-
segment.id, neighbour.id, np.count_nonzero(intruders), page_id)
243+
LOG.debug('segment "%s" vs neighbour "%s": suppressing %d of %d pixels on page "%s"',
244+
segment.id, neighbour.id, num_intruders, num_foreground, page_id)
245+
# suppress in segment_mask so these intruders can stay in the neighbours
246+
# (are not removed from both sides)
247+
segment_mask -= intruders
248+
# suppress in derived image result to be annotated
244249
clip_mask = array2pil(intruders)
245250
segment_image.paste(background_image, mask=clip_mask) # suppress in raw image
246251
if segment_image.mode in ['RGB', 'L', 'RGBA', 'LA']:

0 commit comments

Comments
 (0)