ensure_dtype('float') via last preprocessing step

FynnBe · FynnBe · commit 6359d66437c7 · 2022-10-17T19:57:48.000+02:00
diff --git a/bioimageio/core/prediction_pipeline/_combined_processing.py b/bioimageio/core/prediction_pipeline/_combined_processing.py
@@ -59,14 +59,14 @@ def from_tensor_specs(cls, tensor_specs: List[Union[nodes.InputTensor, nodes.Out
         combine_tensors = {}
         for ts in tensor_specs:
             # There is a difference between pre-and postprocessing:
-            # Preprocessing always returns float32, because its output is consumed by the model.
-            # Postprocessing, however, should return the dtype that is specified in the model spec.
-            # todo: cast dtype for inputs before preprocessing? or check dtype?
+            # After preprocessing we ensure float32, because the output is consumed by the model.
+            # After postprocessing the dtype that is specified in the model spec needs to be ensured.
             assert ts.name not in combine_tensors
             if isinstance(ts, nodes.InputTensor):
                 # todo: assert nodes.InputTensor.dtype with assert_dtype_before?
+                # todo: in the long run we do not want to limit model inputs to float32...
                 combine_tensors[ts.name] = TensorProcessingInfo(
-                    [Processing(p.name, kwargs=p.kwargs) for p in ts.preprocessing]
+                    [Processing(p.name, kwargs=p.kwargs) for p in ts.preprocessing], ensure_dtype_after="float32"
                 )
             elif isinstance(ts, nodes.OutputTensor):
                 combine_tensors[ts.name] = TensorProcessingInfo(
diff --git a/bioimageio/core/prediction_pipeline/_processing.py b/bioimageio/core/prediction_pipeline/_processing.py
@@ -92,16 +92,6 @@ def __post_init__(self):
                     raise NotImplementedError(f"Unsupported mode {self.mode} for {self.__class__.__name__}")
 
 
-#
-# helpers
-#
-def ensure_dtype(tensor: xr.DataArray, *, dtype) -> xr.DataArray:
-    """
-    Convert array to a given datatype
-    """
-    return tensor.astype(dtype)
-
-
 #
 # Pre- and Postprocessing implementations
 #
@@ -129,12 +119,12 @@ def apply(self, tensor: xr.DataArray) -> xr.DataArray:
 
 @dataclass
 class Binarize(Processing):
-    """'output = tensor > threshold' (note: returns float array)."""
+    """'output = tensor > threshold'."""
 
     threshold: float = MISSING  # make dataclass inheritance work for py<3.10 by using an explicit MISSING value.
 
     def apply(self, tensor: xr.DataArray) -> xr.DataArray:
-        return ensure_dtype(tensor > self.threshold, dtype="float32")
+        return tensor > self.threshold
 
 
 @dataclass
@@ -145,7 +135,7 @@ class Clip(Processing):
     max: float = MISSING
 
     def apply(self, tensor: xr.DataArray) -> xr.DataArray:
-        return ensure_dtype(tensor.clip(min=self.min, max=self.max), dtype="float32")
+        return tensor.clip(min=self.min, max=self.max)
 
 
 @dataclass
@@ -155,7 +145,7 @@ class EnsureDtype(Processing):
     dtype: str = MISSING
 
     def apply(self, tensor: xr.DataArray) -> xr.DataArray:
-        return ensure_dtype(tensor, dtype=self.dtype)
+        return tensor.astype(self.dtype)
 
 
 @dataclass
@@ -175,7 +165,7 @@ def apply(self, tensor: xr.DataArray) -> xr.DataArray:
             gain = self.gain
             offset = self.offset
 
-        return ensure_dtype(tensor * gain + offset, dtype="float32")
+        return tensor * gain + offset
 
     def __post_init__(self):
         super().__post_init__()
@@ -210,8 +200,7 @@ def apply(self, tensor: xr.DataArray) -> xr.DataArray:
         ref_mean = self.get_computed_measure(self.reference_tensor, Mean(axes), mode=self.mode)
         ref_std = self.get_computed_measure(self.reference_tensor, Std(axes), mode=self.mode)
 
-        tensor = (tensor - mean) / (std + self.eps) * (ref_std + self.eps) + ref_mean
-        return ensure_dtype(tensor, dtype="float32")
+        return (tensor - mean) / (std + self.eps) * (ref_std + self.eps) + ref_mean
 
 
 @dataclass
@@ -236,7 +225,7 @@ def apply(self, tensor: xr.DataArray) -> xr.DataArray:
         v_lower = self.get_computed_measure(ref_name, Percentile(self.min_percentile, axes=axes))
         v_upper = self.get_computed_measure(ref_name, Percentile(self.max_percentile, axes=axes))
 
-        return ensure_dtype((tensor - v_lower) / (v_upper - v_lower + self.eps), dtype="float32")
+        return (tensor - v_lower) / (v_upper - v_lower + self.eps)
 
     def __post_init__(self):
         super().__post_init__()
@@ -281,8 +270,7 @@ def apply(self, tensor: xr.DataArray) -> xr.DataArray:
         else:
             raise ValueError(self.mode)
 
-        tensor = (tensor - mean) / (std + self.eps)
-        return ensure_dtype(tensor, dtype="float32")
+        return (tensor - mean) / (std + self.eps)
 
 
 _KnownProcessing = TypedDict(