Merge pull request #107 from bioimage-io/prepost

FynnBe · web-flow · commit 6f80afe98712 · 2021-09-17T14:58:43.000+02:00
Update pre-and-postprocessing
diff --git a/bioimageio/core/prediction_pipeline/_postprocessing.py b/bioimageio/core/prediction_pipeline/_postprocessing.py
@@ -3,20 +3,61 @@
 import xarray as xr
 from bioimageio.core.resource_io.nodes import Postprocessing
 
-from ._preprocessing import binarize, chain
+from . import _preprocessing as ops
 from ._types import Transform
 
 
-def sigmoid(tensor: xr.DataArray, **kwargs):
-    if kwargs:
-        raise NotImplementedError(f"Passed kwargs for sigmoid {kwargs}")
-    return 1 / (1 + xr.ufuncs.exp(-tensor))
+# TODO how do we implement reference_tensor?
 
 
-KNOWN_POSTPROCESSING = {"binarize": binarize, "sigmoid": sigmoid}
+def scale_range(
+    tensor: xr.DataArray,
+    *,
+    reference_tensor=None,
+    mode="per_sample",
+    axes=None,
+    min_percentile=0.0,
+    max_percentile=100.0,
+) -> xr.DataArray:
 
+    # TODO if reference tensor is passed, we need to use it to compute quantiles instead of 'tensor'
+    if reference_tensor is None:
+        tensor_ = tensor
+    else:
+        raise NotImplementedError
 
-def make_postprocessing(spec: List[Postprocessing]) -> Transform:
+    # valid modes according to spec: "per_sample", "per_dataset"
+    # TODO implement per_dataset
+    if mode != "per_sample":
+        raise NotImplementedError(f"Unsupported mode for scale_range: {mode}")
+
+    if axes:
+        axes = tuple(axes)
+        v_lower = tensor_.quantile(min_percentile / 100.0, dim=axes)
+        v_upper = tensor_.quantile(max_percentile / 100.0, dim=axes)
+    else:
+        v_lower = tensor_.quantile(min_percentile / 100.0)
+        v_upper = tensor_.quantile(max_percentile / 100.0)
+
+    return ops.ensure_dtype((tensor - v_lower) / v_upper, dtype="float32")
+
+
+# TODO scale the tensor s.t. it matches the mean and variance of the reference tensor
+def scale_mean_variance(tensor: xr.DataArray, *, reference_tensor, mode="per_sample"):
+    raise NotImplementedError
+
+
+KNOWN_POSTPROCESSING = {
+    "binarize": ops.binarize,
+    "clip": ops.clip,
+    "scale_linear": ops.scale_linear,
+    "scale_range": ops.scale_range,
+    "sigmoid": ops.sigmoid,
+    "zero_mean_unit_variance": ops.zero_mean_unit_variance,
+}
+
+
+def make_postprocessing(spec: List[Postprocessing], dtype: str) -> Transform:
     """
     :param preprocessing: bioimage-io spec node
     """
@@ -32,4 +73,9 @@ def make_postprocessing(spec: List[Postprocessing]) -> Transform:
 
         functions.append((fn, kwargs))
 
-    return chain(*functions)
+    # There is a difference between pre-and-postprocessing:
+    # Tre-processing always returns float32, because its output is consumed y the model.
+    # Post-processing, however, should return the dtype that is specified in the model spec.
+    functions.append((ops.ensure_dtype, {"dtype": dtype}))
+
+    return ops.chain(*functions)
diff --git a/bioimageio/core/prediction_pipeline/_prediction_pipeline.py b/bioimageio/core/prediction_pipeline/_prediction_pipeline.py
@@ -172,7 +172,7 @@ def create_prediction_pipeline(
     postprocessing: List[Transform] = []
     for out in bioimageio_model.outputs:
         postprocessing_spec = [] if out.postprocessing is missing else out.postprocessing.copy()
-        postprocessing.append(make_postprocessing(postprocessing_spec))
+        postprocessing.append(make_postprocessing(postprocessing_spec, out.data_type))
 
     return _PredictionPipelineImpl(
         name=bioimageio_model.name,
diff --git a/bioimageio/core/prediction_pipeline/_preprocessing.py b/bioimageio/core/prediction_pipeline/_preprocessing.py
@@ -72,12 +72,19 @@ def ensure_dtype(tensor: xr.DataArray, *, dtype):
     return tensor.astype(dtype)
 
 
+def sigmoid(tensor: xr.DataArray, **kwargs):
+    if kwargs:
+        raise NotImplementedError(f"Passed kwargs for sigmoid {kwargs}")
+    return 1.0 / (1.0 + xr.ufuncs.exp(-tensor))
+
+
 KNOWN_PREPROCESSING: Dict[PreprocessingName, Transform] = {
     "scale_linear": scale_linear,
     "zero_mean_unit_variance": zero_mean_unit_variance,
     "binarize": binarize,
     "clip": clip,
-    "scale_range": scale_range
+    "scale_range": scale_range,
+    "sigmoid": sigmoid
     # "__tiktorch_ensure_dtype": ensure_dtype,
 }
 
diff --git a/tests/prediction_pipeline/test_postprocessing.py b/tests/prediction_pipeline/test_postprocessing.py
@@ -0,0 +1,20 @@
+import numpy as np
+import xarray as xr
+from bioimageio.core.resource_io.nodes import Postprocessing
+from bioimageio.core.prediction_pipeline._postprocessing import make_postprocessing
+
+
+def test_binarize_postprocessing():
+    shape = (3, 32, 32)
+    axes = ("c", "y", "x")
+    np_data = np.random.rand(*shape)
+    data = xr.DataArray(np_data, dims=axes)
+
+    threshold = 0.5
+    exp = xr.DataArray(np_data > threshold, dims=axes)
+
+    for dtype in ("float32", "float64", "uint8", "uint16"):
+        binarize = make_postprocessing(spec=[Postprocessing("binarize", kwargs={"threshold": threshold})], dtype=dtype)
+        res = binarize(data)
+        assert np.dtype(res.dtype) == np.dtype(dtype)
+        xr.testing.assert_allclose(res, exp.astype(dtype))
diff --git a/tests/prediction_pipeline/test_preprocessing.py b/tests/prediction_pipeline/test_preprocessing.py
@@ -151,3 +151,16 @@ def test_scale_range_axes():
     preprocessing = make_preprocessing([scale_range_spec])
     result = preprocessing(data)
     xr.testing.assert_allclose(expected, result)
+
+
+def test_sigmoid():
+    shape = (3, 32, 32)
+    axes = ("c", "y", "x")
+    np_data = np.random.rand(*shape)
+    data = xr.DataArray(np_data, dims=axes)
+
+    sigmoid = make_preprocessing([Preprocessing("sigmoid", kwargs={})])
+    res = sigmoid(data)
+
+    exp = xr.DataArray(1.0 / (1 + np.exp(-np_data)), dims=axes)
+    xr.testing.assert_allclose(res, exp)