use fast mode implementation

valentin-pinkau · valentin-pinkau · commit bfbe3c72e2c4 · 2019-02-01T14:33:02.000+01:00
diff --git a/tests/test_downsampling.py b/tests/test_downsampling.py
@@ -8,6 +8,7 @@
 )
 import wkw
 from wkcuber.utils import WkwDatasetInfo, open_wkw
+from wkcuber.downsampling import _mode
 import shutil
 
 WKW_CUBE_SIZE = 1024
@@ -26,13 +27,24 @@ def test_downsample_cube():
     buffer = np.zeros((CUBE_EDGE_LEN,) * 3, dtype=np.uint8)
     buffer[:, :, :] = np.arange(0, CUBE_EDGE_LEN)
 
-    output = downsample_cube(buffer, (2, 2, 2), InterpolationModes.MEDIAN)
+    output = downsample_cube(buffer, (2, 2, 2), InterpolationModes.MODE)
 
     assert output.shape == (CUBE_EDGE_LEN // 2,) * 3
     assert buffer[0, 0, 0] == 0
     assert buffer[0, 0, 1] == 1
     assert np.all(output[:, :, :] == np.arange(0, CUBE_EDGE_LEN, 2))
 
+def test_downsample_mode():
+
+    a = np.array([[1, 3, 4, 2, 2, 7],
+                  [5, 2, 2, 1, 4, 1],
+                  [3, 3, 2, 2, 1, 1]])
+
+    result = _mode(a)
+    expected_result = np.array([1, 3, 2, 2, 1, 1])
+
+    assert np.all(result == expected_result)
+
 
 def test_cube_addresses():
     addresses = cube_addresses(source_info)
diff --git a/wkcuber/downsampling.py b/wkcuber/downsampling.py
@@ -5,7 +5,6 @@
 from argparse import ArgumentParser
 from math import floor, log2
 from os import path, listdir
-from scipy.stats import mode
 from scipy.ndimage.interpolation import zoom
 from itertools import product
 from functools import lru_cache
@@ -73,8 +72,8 @@ def create_parser():
         "--from",
         "-f",
         help="Resolution to base downsampling on",
-        type=int,
-        default=1,
+        type=str,
+        default='1',
     )
 
     # Either provide the maximum resolution to be downsampled OR a specific, anisotropic magnification.
@@ -331,7 +330,42 @@ def _median(x):
 
 
 def _mode(x):
-    return mode(x, axis=0, nan_policy="omit")[0][0]
+    """
+    Fast mode implementation from: https://stackoverflow.com/a/35674754
+    """
+    # Check inputs
+    ndim = x.ndim
+    axis = 0
+    # Sort array
+    sort = np.sort(x, axis=axis)
+    # Create array to transpose along the axis and get padding shape
+    transpose = np.roll(np.arange(ndim)[::-1], axis)
+    shape = list(sort.shape)
+    shape[axis] = 1
+    # Create a boolean array along strides of unique values
+    strides = np.concatenate([np.zeros(shape=shape, dtype='bool'),
+                                 np.diff(sort, axis=axis) == 0,
+                                 np.zeros(shape=shape, dtype='bool')],
+                                axis=axis).transpose(transpose).ravel()
+    # Count the stride lengths
+    counts = np.cumsum(strides)
+    counts[~strides] = np.concatenate([[0], np.diff(counts[~strides])])
+    counts[strides] = 0
+    # Get shape of padded counts and slice to return to the original shape
+    shape = np.array(sort.shape)
+    shape[axis] += 1
+    shape = shape[transpose]
+    slices = [slice(None)] * ndim
+    slices[axis] = slice(1, None)
+    # Reshape and compute final counts
+    counts = counts.reshape(shape).transpose(transpose)[slices] + 1
+
+    # Find maximum counts and return modals/counts
+    slices = [slice(None, i) for i in sort.shape]
+    del slices[axis]
+    index = np.ogrid[slices]
+    index.insert(axis, np.argmax(counts, axis=axis))
+    return sort[index]
 
 
 def downsample_cube(cube_buffer, factors, interpolation_mode):