append EnsureDtype for postprocessing

FynnBe · FynnBe · commit 5f13387dc6a4 · 2021-09-21T13:19:00.000+02:00
diff --git a/bioimageio/core/prediction_pipeline/_combined_processing.py b/bioimageio/core/prediction_pipeline/_combined_processing.py
@@ -12,6 +12,7 @@
 from ._processing import (
     Binarize,
     Clip,
+    EnsureDtype,
     Processing,
     ScaleLinear,
     ScaleMeanVariance,
@@ -62,6 +63,13 @@ def __init__(self, inputs: List[nodes.InputTensor], outputs: List[nodes.OutputTe
             if out.postprocessing is not missing
         ]
 
+        # There is a difference between pre-and-postprocessing:
+        # Pre-processing always returns float32, because its output is consumed by the model.
+        # Post-processing, however, should return the dtype that is specified in the model spec.
+        # todo: cast dtype for inputs before preprocessing? or check dtype?
+        for out in outputs:
+            self._post.append(EnsureDtype(tensor_name=out.name, dtype=out.data_type))
+
         self._req_input_stats = {s: self._collect_required_stats(self._prep, s) for s in SCOPES}
         self._req_output_stats = {s: self._collect_required_stats(self._post, s) for s in SCOPES}
         if any(self._req_output_stats[s] for s in SCOPES):
@@ -84,9 +92,6 @@ def required_output_dataset_statistics(self) -> Dict[str, Set[Measure]]:
 
     @property
     def computed_dataset_statistics(self) -> Dict[str, Dict[Measure, Any]]:
-        if self._computed_dataset_stats is None:
-            raise RuntimeError("Set computed dataset statistics first!")
-
         return self._computed_dataset_stats
 
     def apply_preprocessing(
@@ -141,12 +146,14 @@ def set_computed_dataset_statistics(self, computed: Dict[str, Dict[Measure, Any]
         for proc in self._prep:
             proc.set_computed_dataset_statistics(self.computed_dataset_statistics)
 
+    @classmethod
     def compute_sample_statistics(
-        self, tensors: Dict[str, xr.DataArray], measures: Dict[str, Set[Measure]]
+        cls, tensors: Dict[str, xr.DataArray], measures: Dict[str, Set[Measure]]
     ) -> Dict[str, Dict[Measure, Any]]:
-        return {tname: self._compute_tensor_statistics(tensors[tname], ms) for tname, ms in measures.items()}
+        return {tname: cls._compute_tensor_statistics(tensors[tname], ms) for tname, ms in measures.items()}
 
-    def _compute_tensor_statistics(self, tensor: xr.DataArray, measures: Set[Measure]) -> Dict[Measure, Any]:
+    @staticmethod
+    def _compute_tensor_statistics(tensor: xr.DataArray, measures: Set[Measure]) -> Dict[Measure, Any]:
         ret = {}
         for measure in measures:
             if isinstance(measure, Mean):
diff --git a/bioimageio/core/prediction_pipeline/_processing.py b/bioimageio/core/prediction_pipeline/_processing.py
@@ -1,19 +1,12 @@
-from dataclasses import dataclass, field
-from typing import Any, Dict, Literal, Optional, Sequence, Set, get_args
+from dataclasses import dataclass, field, fields
+from typing import Any, Dict, Literal, Optional, Sequence, Set, Union, get_args
 
 import numpy as np
 import xarray as xr
 
 from bioimageio.core.statistical_measures import Mean, Measure, Percentile, Std
 
 
-def ensure_dtype(tensor: xr.DataArray, *, dtype) -> xr.DataArray:
-    """
-    Convert array to a given datatype
-    """
-    return tensor.astype(dtype)
-
-
 @dataclass
 class Processing:
     """base class for all Pre- and Postprocessing transformations"""
@@ -37,7 +30,7 @@ def get_required_sample_statistics(self) -> Dict[str, Set[Measure]]:
 
     def set_computed_dataset_statistics(self, computed: Dict[str, Dict[Measure, Any]]):
         """helper to set computed statistics and check if they match the requirements"""
-        for tensor_name, req_measures in self.get_required_dataset_statistics():
+        for tensor_name, req_measures in self.get_required_dataset_statistics().items():
             comp_measures = computed.get(tensor_name, {})
             for req_measure in req_measures:
                 if req_measure not in comp_measures:
@@ -46,7 +39,7 @@ def set_computed_dataset_statistics(self, computed: Dict[str, Dict[Measure, Any]
 
     def set_computed_sample_statistics(self, computed: Dict[str, Dict[Measure, Any]]):
         """helper to set computed statistics and check if they match the requirements"""
-        for tensor_name, req_measures in self.get_required_sample_statistics():
+        for tensor_name, req_measures in self.get_required_sample_statistics().items():
             comp_measures = computed.get(tensor_name, {})
             for req_measure in req_measures:
                 if req_measure not in comp_measures:
@@ -69,15 +62,35 @@ def get_computed_sample_statistics(self, tensor_name: str, measure: Measure):
 
         return ret
 
+    def __call__(self, tensor: xr.DataArray) -> xr.DataArray:
+        return self.apply(tensor)
+
     def apply(self, tensor: xr.DataArray) -> xr.DataArray:
         """apply processing to named tensors"""
         raise NotImplementedError
 
     def __post_init__(self):
         """validate common kwargs by their annotations"""
-        if hasattr(self, "mode"):
-            if self.mode not in get_args(self.mode):
-                raise NotImplementedError(f"Unsupported mode {self.mode} for {self.__class__.__name__}: {self.mode}")
+        self.computed_dataset_statistics = {}
+        self.computed_sample_statistics = {}
+
+        for f in fields(self):
+            if f.name == "mode":
+                assert hasattr(self, "mode")
+                if self.mode not in get_args(f.type):
+                    raise NotImplementedError(
+                        f"Unsupported mode {self.mode} for {self.__class__.__name__}: {self.mode}"
+                    )
+
+
+#
+# helpers
+#
+def ensure_dtype(tensor: xr.DataArray, *, dtype) -> xr.DataArray:
+    """
+    Convert array to a given datatype
+    """
+    return tensor.astype(dtype)
 
 
 #
@@ -102,12 +115,20 @@ def apply(self, tensor: xr.DataArray) -> xr.DataArray:
         return ensure_dtype(tensor.clip(min=self.min, max=self.max), dtype="float32")
 
 
+@dataclass
+class EnsureDtype(Processing):
+    dtype: str
+
+    def apply(self, tensor: xr.DataArray) -> xr.DataArray:
+        return ensure_dtype(tensor, dtype=self.dtype)
+
+
 @dataclass
 class ScaleLinear(Processing):
     """scale the tensor with a fixed multiplicative and additive factor"""
 
-    gain: float
-    offset: float
+    gain: Union[float, Sequence[float]]
+    offset: Union[float, Sequence[float]]
     axes: Optional[Sequence[str]] = None
 
     def apply(self, tensor: xr.DataArray) -> xr.DataArray:
@@ -121,6 +142,12 @@ def apply(self, tensor: xr.DataArray) -> xr.DataArray:
 
         return ensure_dtype(tensor * gain + offset, dtype="float32")
 
+    def __post_init__(self):
+        super().__post_init__()
+        if self.axes is None:
+            assert isinstance(self.gain, (int, float))
+            assert isinstance(self.offset, (int, float))
+
 
 @dataclass
 class ScaleMeanVariance(Processing):