Merge pull request #62 from scalableminds/fix_downsampling

valentin-pinkau · web-flow · commit 4f70c96a1b63 · 2019-02-04T17:05:53.000+01:00
Fix downsampling for non uint8 data
diff --git a/tests/test_downsampling.py b/tests/test_downsampling.py
@@ -8,6 +8,7 @@
 )
 import wkw
 from wkcuber.utils import WkwDatasetInfo, open_wkw
+from wkcuber.downsampling import _mode, non_linear_filter_3d
 import shutil
 
 WKW_CUBE_SIZE = 1024
@@ -26,14 +27,50 @@ def test_downsample_cube():
     buffer = np.zeros((CUBE_EDGE_LEN,) * 3, dtype=np.uint8)
     buffer[:, :, :] = np.arange(0, CUBE_EDGE_LEN)
 
-    output = downsample_cube(buffer, (2, 2, 2), InterpolationModes.MEDIAN)
+    output = downsample_cube(buffer, (2, 2, 2), InterpolationModes.MODE)
 
     assert output.shape == (CUBE_EDGE_LEN // 2,) * 3
     assert buffer[0, 0, 0] == 0
     assert buffer[0, 0, 1] == 1
     assert np.all(output[:, :, :] == np.arange(0, CUBE_EDGE_LEN, 2))
 
 
+def test_downsample_mode():
+
+    a = np.array([[1, 3, 4, 2, 2, 7], [5, 2, 2, 1, 4, 1], [3, 3, 2, 2, 1, 1]])
+
+    result = _mode(a)
+    expected_result = np.array([1, 3, 2, 2, 1, 1])
+
+    assert np.all(result == expected_result)
+
+
+def test_downsample_median():
+
+    a = np.array([[1, 3, 4, 2, 2, 7], [5, 2, 2, 1, 4, 1], [3, 3, 2, 2, 1, 1]])
+
+    result = np.median(a, axis=0)
+    expected_result = np.array([3, 3, 2, 2, 2, 1])
+
+    assert np.all(result == expected_result)
+
+
+def test_non_linear_filter_reshape():
+    a = np.array([[[1, 3], [1, 4]], [[4, 2], [3, 1]]], dtype=np.uint8)
+
+    a_filtered = non_linear_filter_3d(a, [2, 2, 2], _mode)
+    assert a_filtered.dtype == np.uint8
+    expected_result = [1]
+    assert np.all(expected_result == a_filtered)
+
+    a = np.array([[[1, 3], [1, 4]], [[4, 3], [3, 1]]], np.uint32)
+
+    a_filtered = non_linear_filter_3d(a, [2, 2, 1], _mode)
+    assert a_filtered.dtype == np.uint32
+    expected_result = [1, 3]
+    assert np.all(expected_result == a_filtered)
+
+
 def test_cube_addresses():
     addresses = cube_addresses(source_info)
     assert len(addresses) == 5 * 5 * 1
diff --git a/wkcuber/__main__.py b/wkcuber/__main__.py
@@ -94,7 +94,6 @@ def create_parser():
         args.layer_name,
         Mag(1),
         Mag(args.max_mag),
-        args.dtype,
         "default",
         DEFAULT_EDGE_LEN,
         args.jobs,
diff --git a/wkcuber/downsampling.py b/wkcuber/downsampling.py
@@ -5,7 +5,6 @@
 from argparse import ArgumentParser
 from math import floor, log2
 from os import path, listdir
-from scipy.stats import mode
 from scipy.ndimage.interpolation import zoom
 from itertools import product
 from functools import lru_cache
@@ -61,20 +60,13 @@ def create_parser():
         default="default",
     )
 
-    parser.add_argument(
-        "--dtype",
-        "-d",
-        help="Target datatype (e.g. uint8, uint16, uint32)",
-        default="uint8",
-    )
-
     parser.add_argument(
         "--from_mag",
         "--from",
         "-f",
         help="Resolution to base downsampling on",
-        type=int,
-        default=1,
+        type=str,
+        default="1",
     )
 
     # Either provide the maximum resolution to be downsampled OR a specific, anisotropic magnification.
@@ -193,6 +185,7 @@ def downsample_cube_job(
 
         with open_wkw(source_wkw_info) as source_wkw:
             num_channels = source_wkw.header.num_channels
+            source_dtype = source_wkw.header.voxel_type
             with open_wkw(
                 target_wkw_info,
                 pool_get_lock(),
@@ -203,7 +196,7 @@ def downsample_cube_job(
                     source_wkw.header.file_len * source_wkw.header.block_len
                 )
                 shape = (num_channels,) + (wkw_cubelength,) * 3
-                file_buffer = np.zeros(shape, target_wkw_info.dtype)
+                file_buffer = np.zeros(shape, source_dtype)
                 tile_length = cube_edge_len
                 tile_count_per_dim = wkw_cubelength // tile_length
                 assert (
@@ -331,7 +324,50 @@ def _median(x):
 
 
 def _mode(x):
-    return mode(x, axis=0, nan_policy="omit")[0][0]
+    """
+    Fast mode implementation from: https://stackoverflow.com/a/35674754
+    """
+    # Check inputs
+    ndim = x.ndim
+    axis = 0
+    # Sort array
+    sort = np.sort(x, axis=axis)
+    # Create array to transpose along the axis and get padding shape
+    transpose = np.roll(np.arange(ndim)[::-1], axis)
+    shape = list(sort.shape)
+    shape[axis] = 1
+    # Create a boolean array along strides of unique values
+    strides = (
+        np.concatenate(
+            [
+                np.zeros(shape=shape, dtype="bool"),
+                np.diff(sort, axis=axis) == 0,
+                np.zeros(shape=shape, dtype="bool"),
+            ],
+            axis=axis,
+        )
+        .transpose(transpose)
+        .ravel()
+    )
+    # Count the stride lengths
+    counts = np.cumsum(strides)
+    counts[~strides] = np.concatenate([[0], np.diff(counts[~strides])])
+    counts[strides] = 0
+    # Get shape of padded counts and slice to return to the original shape
+    shape = np.array(sort.shape)
+    shape[axis] += 1
+    shape = shape[transpose]
+    slices = [slice(None)] * ndim
+    slices[axis] = slice(1, None)
+    # Reshape and compute final counts
+    counts = counts.reshape(shape).transpose(transpose)[tuple(slices)] + 1
+
+    # Find maximum counts and return modals/counts
+    slices = [slice(None, i) for i in sort.shape]
+    del slices[axis]
+    index = np.ogrid[slices]
+    index.insert(axis, np.argmax(counts, axis=axis))
+    return sort[tuple(index)]
 
 
 def downsample_cube(cube_buffer, factors, interpolation_mode):
@@ -358,7 +394,6 @@ def downsample_mag(
     layer_name,
     source_mag: Mag,
     target_mag: Mag,
-    dtype="uint8",
     interpolation_mode="default",
     cube_edge_len=DEFAULT_EDGE_LEN,
     jobs=1,
@@ -373,12 +408,11 @@ def downsample_mag(
     else:
         interpolation_mode = InterpolationModes[interpolation_mode.upper()]
 
-    source_wkw_info = WkwDatasetInfo(
-        path, layer_name, dtype, source_mag.to_layer_name()
-    )
-    target_wkw_info = WkwDatasetInfo(
-        path, layer_name, dtype, target_mag.to_layer_name()
-    )
+    source_wkw_info = WkwDatasetInfo(path, layer_name, None, source_mag.to_layer_name())
+    with open_wkw(source_wkw_info) as source:
+        target_wkw_info = WkwDatasetInfo(
+            path, layer_name, source.header.voxel_type, target_mag.to_layer_name()
+        )
     downsample(
         source_wkw_info,
         target_wkw_info,
@@ -396,7 +430,6 @@ def downsample_mags(
     layer_name,
     from_mag: Mag,
     max_mag: Mag,
-    dtype,
     interpolation_mode,
     cube_edge_len,
     jobs,
@@ -410,7 +443,6 @@ def downsample_mags(
             layer_name,
             source_mag,
             target_mag,
-            dtype,
             interpolation_mode,
             cube_edge_len,
             jobs,
@@ -435,7 +467,6 @@ def downsample_mags(
             args.layer_name,
             from_mag,
             anisotropic_target_mag,
-            args.dtype,
             args.interpolation_mode,
             args.buffer_cube_size,
             args.jobs,
@@ -447,7 +478,6 @@ def downsample_mags(
             args.layer_name,
             from_mag,
             max_mag,
-            args.dtype,
             args.interpolation_mode,
             args.buffer_cube_size,
             args.jobs,
diff --git a/wkcuber/utils.py b/wkcuber/utils.py
@@ -21,7 +21,10 @@
 
 
 def _open_wkw(info, **kwargs):
-    header = wkw.Header(np.dtype(info.dtype), **kwargs)
+    if info.dtype is not None:
+        header = wkw.Header(np.dtype(info.dtype), **kwargs)
+    else:
+        header = None
     ds = wkw.Dataset.open(
         path.join(info.dataset_path, info.layer_name, str(info.mag)), header
     )