cisocrgroup
diff --git a/‎README.md‎
Lines changed: 17 additions & 6 deletions b/‎README.md‎
Lines changed: 17 additions & 6 deletions
diff --git a/‎ocrd_cis/align/cli.py‎
Lines changed: 17 additions & 20 deletions b/‎ocrd_cis/align/cli.py‎
Lines changed: 17 additions & 20 deletions
diff --git a/‎ocrd_cis/ocrd-tool.json‎
Lines changed: 23 additions & 5 deletions b/‎ocrd_cis/ocrd-tool.json‎
Lines changed: 23 additions & 5 deletions
diff --git a/‎ocrd_cis/ocropy/binarize.py‎
Lines changed: 10 additions & 1 deletion b/‎ocrd_cis/ocropy/binarize.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎ocrd_cis/ocropy/clip.py‎
Lines changed: 11 additions & 6 deletions b/‎ocrd_cis/ocropy/clip.py‎
Lines changed: 11 additions & 6 deletions
@@ -228,14 +228,25 @@ ocrd-cis-ocropy-resegment \
 
 Available parameters are:
 ```sh
-   "dpi" [number - -1]
+   "level-of-operation" [string - "page"]
+    PAGE XML hierarchy level to segment textlines in ('region' abides by
+    existing text region boundaries, 'page' optimises lines in the whole
+    page once
+    Possible values: ["page", "region"]
+   "method" [string - "lineest"]
+    source for new line polygon candidates ('lineest' for line
+    estimation, i.e. how Ocropy would have segmented text lines;
+    'baseline' tries to re-polygonize from the baseline annotation;
+    'ccomps' avoids crossing connected components by majority rule)
+    Possible values: ["lineest", "baseline", "ccomps"]
+   "dpi" [number - 0]
     pixel density in dots per inch (overrides any meta-data in the
-    images); disabled when negative
-   "min_fraction" [number - 0.8]
-    share of foreground pixels that must be retained by the largest label
+    images); disabled when zero or negative
+   "min_fraction" [number - 0.75]
+    share of foreground pixels that must be retained by the output
+    polygons
    "extend_margins" [number - 3]
-    number of pixels to extend the input polygons horizontally and
-    vertically before intersecting
+    number of pixels to extend the input polygons in all directions
 ```
 
 ### ocrd-cis-ocropy-segment
 
@@ -8,31 +8,28 @@
 from ocrd.decorators import ocrd_cli_wrap_processor
 from ocrd_utils import MIMETYPE_PAGE
 from ocrd_utils import getLogger
+from ocrd_utils import getLevelName
 from ocrd_utils import make_file_id
 from ocrd_modelfactory import page_from_file
 from ocrd_models.ocrd_page import to_xml
 from ocrd_models.ocrd_page_generateds import TextEquivType
 from ocrd_cis import JavaAligner
 from ocrd_cis import get_ocrd_tool
 
-LOG_LEVEL = 'INFO'
-
 @click.command()
 @ocrd_cli_options
 def ocrd_cis_align(*args, **kwargs):
-    if 'log_level' in kwargs and kwargs['log_level']:
-        global LOG_LEVEL
-        LOG_LEVEL = kwargs['log_level']
     return ocrd_cli_wrap_processor(Aligner, *args, **kwargs)
 
 class Aligner(Processor):
-    LOG_LEVEL = 'INFO'
     def __init__(self, *args, **kwargs):
         ocrd_tool = get_ocrd_tool()
         kwargs['ocrd_tool'] = ocrd_tool['tools']['ocrd-cis-align']
         kwargs['version'] = ocrd_tool['version']
         super(Aligner, self).__init__(*args, **kwargs)
-        self.log = getLogger('cis.Processor.Aligner')
+
+        if hasattr(self, 'workspace'):
+            self.log = getLogger('cis.Processor.Aligner')
 
     def process(self):
         ifgs = self.input_file_grp.split(",")  # input file groups
@@ -100,14 +97,14 @@ def align_lines(self, lines):
                 te = TextEquivType(
                     Unicode=get_textequiv_unicode(line.region),
                     conf=get_textequiv_conf(line.region),
-                    dataType="ocrd-cis-line-alignment",
-                    dataTypeDetails=ddt)
+                    dataType="other",
+                    dataTypeDetails="ocrd-cis-line-alignment:" + ddt)
                 lines[0].region.add_TextEquiv(te)
             else:
                 self.log.debug("len: %i, i: %i", len(lines[0].region.get_TextEquiv()), i)
-                lines[0].region.get_TextEquiv()[i].set_dataType(
-                    "ocrd-cis-line-alignment-master-ocr")
-            lines[0].region.get_TextEquiv()[i].set_dataTypeDetails(ddt)
+                lines[0].region.get_TextEquiv()[i].set_dataType("other")
+                lines[0].region.get_TextEquiv()[i].set_dataTypeDetails(
+                    "ocrd-cis-line-alignment-master-ocr:" + ddt)
             lines[0].region.get_TextEquiv()[i].set_index(i+1)
         self.align_words(lines)
 
@@ -149,8 +146,8 @@ def te0(x):
                 ifg = word.input_file.input_file_group
                 self.log.debug("(empty) word alignment: [%s]", ifg)
                 te = TextEquivType(
-                    dataType="ocrd-cis-empty-word-alginment",
-                    dataTypeDetails=ifg)
+                    dataType="other",
+                    dataTypeDetails="ocrd-cis-empty-word-alignment:" + ifg)
                 words[0].region[0].add_TextEquiv(te)
                 words[0].region[0].get_TextEquiv()[i].set_index(i+1)
                 continue
@@ -165,13 +162,13 @@ def te0(x):
                 te = TextEquivType(
                     Unicode=_str,
                     conf=conf,
-                    dataType="ocrd-cis-word-alignment",
-                    dataTypeDetails=ddt)
+                    dataType="other",
+                    dataTypeDetails="ocrd-cis-word-alignment:" + ddt)
                 words[0].region[0].add_TextEquiv(te)
             else:
-                words[0].region[0].get_TextEquiv()[i].set_dataType(
-                    'ocrd-cis-word-alignment-master-ocr')
-                words[0].region[0].get_TextEquiv()[i].set_dataTypeDetails(ddt)
+                words[0].region[0].get_TextEquiv()[i].set_dataType("other")
+                words[0].region[0].get_TextEquiv()[i].set_dataTypeDetails(
+                    "ocrd-cis-word-alignment-master-ocr:" + ddt)
             words[0].region[0].get_TextEquiv()[i].set_index(i+1)
 
     def find_word(self, tokens, regions, t="other"):
@@ -292,7 +289,7 @@ def run_java_aligner(self, ifs):
             self.log.debug("input line: %s", i)
         n = len(ifs)
         self.log.debug("starting java client")
-        p = JavaAligner(n, LOG_LEVEL or 'INFO')
+        p = JavaAligner(n, getLevelName(self.log.getEffectiveLevel()))
         return p.run("\n".join(_input))
 
 class FileAlignment:
 
@@ -191,8 +191,20 @@
 			"output_file_grp": [
 				"OCR-D-SEG-LINE"
 			],
-			"description": "Resegment lines with ocropy (by shrinking annotated polygons)",
+			"description": "Resegment text lines",
 			"parameters": {
+				"level-of-operation": {
+					"type": "string",
+					"enum": ["page", "region"],
+					"description": "PAGE XML hierarchy level to segment textlines in ('region' abides by existing text region boundaries, 'page' optimises lines in the whole page once",
+					"default": "page"
+				},
+				"method": {
+					"type": "string",
+					"enum": ["lineest", "baseline", "ccomps"],
+					"description": "source for new line polygon candidates ('lineest' for line estimation, i.e. how Ocropy would have segmented text lines; 'baseline' tries to re-polygonize from the baseline annotation; 'ccomps' avoids crossing connected components by majority rule)",
+					"default": "lineest"
+				},
 				"dpi": {
 					"type": "number",
 					"format": "float",
@@ -202,13 +214,13 @@
 				"min_fraction": {
 					"type": "number",
 					"format": "float",
-					"description": "share of foreground pixels that must be retained by the largest label",
-					"default": 0.8
+					"description": "share of foreground pixels that must be retained by the output polygons",
+					"default": 0.75
 				},
 				"extend_margins": {
 					"type": "number",
 					"format": "integer",
-					"description": "number of pixels to extend the input polygons horizontally and vertically before intersecting",
+					"description": "number of pixels to extend the input polygons in all directions",
 					"default": 3
 				}
 			}
@@ -238,9 +250,15 @@
 				"range": {
 					"type": "number",
 					"format": "float",
-					"description": "maximum vertical disposition or maximum margin (will be multiplied by mean centerline deltas to yield pixels)",
+					"description": "maximum vertical disposition or maximum margin (will be multiplied by mean centerline deltas to yield pixels); also the mean vertical padding",
 					"default": 4.0
 				},
+				"smoothness": {
+					"type": "number",
+					"format": "float",
+					"description": "kernel size (relative to image height) of horizontal blur applied to foreground to find the center line; the smaller the more dynamic (0.1 would be a better default)",
+					"default": 1.0
+				},
 				"max_neighbour": {
 					"type": "number",
 					"format": "float",
 
@@ -140,7 +140,7 @@ def process(self):
                 if level == 'table':
                     regions = page.get_TableRegion()
                 else: # region
-                    regions = page.get_AllRegions(classes=['Text'])
+                    regions = page.get_AllRegions(classes=['Text'], order='reading-order')
                 if not regions:
                     self.logger.warning('Page "%s" contains no text regions', page_id)
                 for region in regions:
@@ -175,6 +175,9 @@ def process(self):
                              file_id, self.output_file_grp, out.local_filename)
 
     def process_page(self, page, page_image, page_xywh, zoom, page_id, file_id):
+        if not page_image.width or not page_image.height:
+            self.logger.warning("Skipping page '%s' with zero size", page_id)
+            return
         self.logger.info("About to binarize page '%s'", page_id)
         features = page_xywh['features']
         if 'angle' in page_xywh and page_xywh['angle']:
@@ -220,6 +223,9 @@ def process_page(self, page, page_image, page_xywh, zoom, page_id, file_id):
             comments=features))
 
     def process_region(self, region, region_image, region_xywh, zoom, page_id, file_id):
+        if not region_image.width or not region_image.height:
+            self.logger.warning("Skipping region '%s' with zero size", region.id)
+            return
         self.logger.info("About to binarize page '%s' region '%s'", page_id, region.id)
         features = region_xywh['features']
         if 'angle' in region_xywh and region_xywh['angle']:
@@ -267,6 +273,9 @@ def process_region(self, region, region_image, region_xywh, zoom, page_id, file_
             comments=features))
 
     def process_line(self, line, line_image, line_xywh, zoom, page_id, region_id, file_id):
+        if not line_image.width or not line_image.height:
+            self.logger.warning("Skipping line '%s' with zero size", line.id)
+            return
         self.logger.info("About to binarize page '%s' region '%s' line '%s'",
                          page_id, region_id, line.id)
         features = line_xywh['features']
 
@@ -103,6 +103,7 @@ def process(self):
             else:
                 zoom = 1
 
+            # FIXME: what about text regions inside table regions?
             regions = list(page.get_TextRegion())
             num_texts = len(regions)
             regions += (
@@ -228,19 +229,23 @@ def process_segment(self, segment, segment_mask, segment_polygon, neighbours,
         segment_image = image_from_polygon(parent_image, segment_polygon)
         segment_bbox = bbox_from_polygon(segment_polygon)
         for neighbour, neighbour_mask in neighbours:
+            if not np.any(segment_mask > neighbour_mask):
+                LOG.info('Ignoring enclosing neighbour "%s" of segment "%s" on page "%s"',
+                         neighbour.id, segment.id, page_id)
+                continue
             # find connected components that (only) belong to the neighbour:
             intruders = segment_mask * morph.keep_marked(parent_bin, neighbour_mask > 0) # overlaps neighbour
             intruders = morph.remove_marked(intruders, segment_mask > neighbour_mask) # but exclusively
             num_intruders = np.count_nonzero(intruders)
             num_foreground = np.count_nonzero(segment_mask * parent_bin)
             if not num_intruders:
                 continue
-            if num_intruders / num_foreground > 1.0 - self.parameter['min_fraction']:
-                LOG.info('Too many intruders (%d/%d) from neighbour "%s" in segment "%s" on page "%s"',
-                         num_intruders, num_foreground, neighbour.id, segment.id, page_id)
-                continue
-            LOG.debug('segment "%s" vs neighbour "%s": suppressing %d pixels on page "%s"',
-                      segment.id, neighbour.id, np.count_nonzero(intruders), page_id)
+            LOG.debug('segment "%s" vs neighbour "%s": suppressing %d of %d pixels on page "%s"',
+                      segment.id, neighbour.id, num_intruders, num_foreground, page_id)
+            # suppress in segment_mask so these intruders can stay in the neighbours
+            # (are not removed from both sides)
+            segment_mask -= intruders
+            # suppress in derived image result to be annotated
             clip_mask = array2pil(intruders)
             segment_image.paste(background_image, mask=clip_mask) # suppress in raw image
             if segment_image.mode in ['RGB', 'L', 'RGBA', 'LA']: