bioimage-io
diff --git a/‎bioimageio/core/prediction.py‎
Lines changed: 16 additions & 8 deletions b/‎bioimageio/core/prediction.py‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎bioimageio/core/prediction_pipeline/_combined_processing.py‎
Lines changed: 45 additions & 155 deletions b/‎bioimageio/core/prediction_pipeline/_combined_processing.py‎
Lines changed: 45 additions & 155 deletions
@@ -3,7 +3,7 @@
 from copy import deepcopy
 from itertools import product
 from pathlib import Path
-from typing import Dict, List, Optional, OrderedDict, Sequence, Tuple, Union
+from typing import Dict, Iterator, List, NamedTuple, Optional, OrderedDict, Sequence, Tuple, Union
 
 import imageio
 import numpy as np
@@ -150,7 +150,15 @@ def _apply_crop(data, crop):
     return data[crop]
 
 
-def _get_tiling(shape, tile_shape, halo, input_axes):
+class TileDef(NamedTuple):
+    outer: Dict[str, slice]
+    inner: Dict[str, slice]
+    local: Dict[str, slice]
+
+
+def get_tiling(
+    shape: Sequence[int], tile_shape: Dict[str, int], halo: Dict[str, int], input_axes: Sequence[str]
+) -> Iterator[TileDef]:
     assert len(shape) == len(input_axes)
 
     shape_ = [sh for sh, ax in zip(shape, input_axes) if ax in "xyz"]
@@ -189,15 +197,15 @@ def _get_tiling(shape, tile_shape, halo, input_axes):
         local_tile["b"] = slice(None)
         local_tile["c"] = slice(None)
 
-        yield outer_tile, inner_tile, local_tile
+        yield TileDef(outer_tile, inner_tile, local_tile)
 
 
 def _predict_with_tiling_impl(
     prediction_pipeline: PredictionPipeline,
-    inputs: List[xr.DataArray],
-    outputs: List[xr.DataArray],
-    tile_shapes: List[dict],
-    halos: List[dict],
+    inputs: Sequence[xr.DataArray],
+    outputs: Sequence[xr.DataArray],
+    tile_shapes: Sequence[Dict[str, int]],
+    halos: Sequence[Dict[str, int]],
     verbose: bool = False,
 ):
     if len(inputs) > 1:
@@ -214,7 +222,7 @@ def _predict_with_tiling_impl(
     tile_shape = tile_shapes[0]
     halo = halos[0]
 
-    tiles = _get_tiling(shape=input_.shape, tile_shape=tile_shape, halo=halo, input_axes=input_.dims)
+    tiles = get_tiling(shape=input_.shape, tile_shape=tile_shape, halo=halo, input_axes=input_.dims)
 
     assert all(isinstance(ax, str) for ax in input_.dims)
     input_axes: Tuple[str, ...] = input_.dims  # noqa
 
@@ -1,178 +1,68 @@
-import warnings
-from collections import defaultdict
-from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Type
-
-import xarray as xr
+from typing import List, Optional, Sequence, Union
 
 from bioimageio.core.resource_io import nodes
-from bioimageio.core.statistical_measures import Mean, Measure, Percentile, Std
-from bioimageio.spec.model.raw_nodes import PostprocessingName, PreprocessingName
-from ._processing import (
-    Binarize,
-    Clip,
-    EnsureDtype,
-    Processing,
-    ScaleLinear,
-    ScaleMeanVariance,
-    ScaleRange,
-    Sigmoid,
-    ZeroMeanUnitVariance,
-)
+from ._processing import EnsureDtype, KNOWN_PROCESSING, Processing
+from ._utils import ComputedMeasures, PER_DATASET, PER_SAMPLE, RequiredMeasures, Sample
 
 try:
     from typing import Literal
 except ImportError:
     from typing_extensions import Literal  # type: ignore
 
-KNOWN_PREPROCESSING: Dict[PreprocessingName, Type[Processing]] = {
-    "binarize": Binarize,
-    "clip": Clip,
-    "scale_linear": ScaleLinear,
-    "scale_range": ScaleRange,
-    "sigmoid": Sigmoid,
-    "zero_mean_unit_variance": ZeroMeanUnitVariance,
-}
-
-KNOWN_POSTPROCESSING: Dict[PostprocessingName, Type[Processing]] = {
-    "binarize": Binarize,
-    "clip": Clip,
-    "scale_linear": ScaleLinear,
-    "scale_mean_variance": ScaleMeanVariance,
-    "scale_range": ScaleRange,
-    "sigmoid": Sigmoid,
-    "zero_mean_unit_variance": ZeroMeanUnitVariance,
-}
-
-
-Scope = Literal["sample", "dataset"]
-SAMPLE: Literal["sample"] = "sample"
-DATASET: Literal["dataset"] = "dataset"
-SCOPES: Set[Scope] = {SAMPLE, DATASET}
-
 
 class CombinedProcessing:
-    def __init__(self, inputs: List[nodes.InputTensor], outputs: List[nodes.OutputTensor]):
-        self._prep = [
-            KNOWN_PREPROCESSING[step.name](tensor_name=ipt.name, **step.kwargs)
-            for ipt in inputs
-            for step in ipt.preprocessing or []
-        ]
-        self._post = [
-            KNOWN_POSTPROCESSING.get(step.name)(tensor_name=out.name, **step.kwargs)
-            for out in outputs
-            for step in out.postprocessing or []
-        ]
+    def __init__(self, tensor_specs: Union[List[nodes.InputTensor], List[nodes.OutputTensor]]):
+        PRE: Literal["pre"] = "pre"
+        POST: Literal["post"] = "post"
+        proc_prefix: Optional[Literal["pre", "post"]] = None
+        self._procs = []
+        for t in tensor_specs:
+            if isinstance(t, nodes.InputTensor):
+                steps = t.preprocessing or []
+                if proc_prefix is not None and proc_prefix != PRE:
+                    raise ValueError(f"Invalid mixed input/output tensor specs: {tensor_specs}")
+
+                proc_prefix = PRE
+            elif isinstance(t, nodes.OutputTensor):
+                steps = t.postprocessing or []
+                if proc_prefix is not None and proc_prefix != POST:
+                    raise ValueError(f"Invalid mixed input/output tensor specs: {tensor_specs}")
+
+                proc_prefix = POST
+            else:
+                raise NotImplementedError(t)
+
+            for step in steps:
+                self._procs.append(KNOWN_PROCESSING[proc_prefix][step.name](tensor_name=t.name, **step.kwargs))
 
         # There is a difference between pre-and-postprocessing:
         # Pre-processing always returns float32, because its output is consumed by the model.
         # Post-processing, however, should return the dtype that is specified in the model spec.
         # todo: cast dtype for inputs before preprocessing? or check dtype?
-        for out in outputs:
-            self._post.append(EnsureDtype(tensor_name=out.name, dtype=out.data_type))
+        if proc_prefix == POST:
+            for t in tensor_specs:
+                self._procs.append(EnsureDtype(tensor_name=t.name, dtype=t.data_type))
 
-        self._req_input_stats = {s: self._collect_required_stats(self._prep, s) for s in SCOPES}
-        self._req_output_stats = {s: self._collect_required_stats(self._post, s) for s in SCOPES}
-        if self._req_output_stats[DATASET]:
+        self.required_measures: RequiredMeasures = self._collect_required_measures(self._procs)
+        if proc_prefix == POST and self.required_measures[PER_DATASET]:
             raise NotImplementedError("computing statistics for output tensors per dataset is not yet implemented")
 
-        self._computed_dataset_stats: Optional[Dict[str, Dict[Measure, Any]]] = None
-
-        self.input_tensor_names = [ipt.name for ipt in inputs]
-        self.output_tensor_names = [out.name for out in outputs]
-        assert not any(name in self.output_tensor_names for name in self.input_tensor_names)
-        assert not any(name in self.input_tensor_names for name in self.output_tensor_names)
-
-    @property
-    def required_input_dataset_statistics(self) -> Dict[str, Set[Measure]]:
-        return self._req_input_stats[DATASET]
-
-    @property
-    def required_output_dataset_statistics(self) -> Dict[str, Set[Measure]]:
-        return self._req_output_stats[DATASET]
-
-    @property
-    def computed_dataset_statistics(self) -> Dict[str, Dict[Measure, Any]]:
-        return self._computed_dataset_stats
-
-    def apply_preprocessing(
-        self, *input_tensors: xr.DataArray
-    ) -> Tuple[List[xr.DataArray], Dict[str, Dict[Measure, Any]]]:
-        assert len(input_tensors) == len(self.input_tensor_names)
-        tensors = dict(zip(self.input_tensor_names, input_tensors))
-        sample_stats = self.compute_sample_statistics(tensors, self._req_input_stats[SAMPLE])
-        for proc in self._prep:
-            proc.set_computed_sample_statistics(sample_stats)
-            tensors[proc.tensor_name] = proc.apply(tensors[proc.tensor_name])
-
-        return [tensors[tn] for tn in self.input_tensor_names], sample_stats
+        self.tensor_names = [t.name for t in tensor_specs]
 
-    def apply_postprocessing(
-        self, *output_tensors: xr.DataArray, input_sample_statistics: Dict[str, Dict[Measure, Any]]
-    ) -> Tuple[List[xr.DataArray], Dict[str, Dict[Measure, Any]]]:
-        assert len(output_tensors) == len(self.output_tensor_names)
-        tensors = dict(zip(self.output_tensor_names, output_tensors))
-        sample_stats = {
-            **input_sample_statistics,
-            **self.compute_sample_statistics(tensors, self._req_output_stats[SAMPLE]),
-        }
-        for proc in self._post:
-            proc.set_computed_sample_statistics(sample_stats)
-            tensors[proc.tensor_name] = proc.apply(tensors[proc.tensor_name])
-
-        return [tensors[tn] for tn in self.output_tensor_names], sample_stats
-
-    def set_computed_dataset_statistics(self, computed: Dict[str, Dict[Measure, Any]]):
-        """
-        This method sets the externally computed dataset statistics.
-        Which statistics are expected is specified by the `required_dataset_statistics` property.
-        """
-        # always expect input tensor statistics
-        for tensor_name, req_measures in self.required_input_dataset_statistics:
-            comp_measures = computed.get(tensor_name, {})
-            for req_measure in req_measures:
-                if req_measure not in comp_measures:
-                    raise ValueError(f"Missing required measure {req_measure} for input tensor {tensor_name}")
-
-        # as output tensor statistics may initially not be available, we only warn about their absence
-        output_statistics_missing = False
-        for tensor_name, req_measures in self.required_output_dataset_statistics:
-            comp_measures = computed.get(tensor_name, {})
-            for req_measure in req_measures:
-                if req_measure not in comp_measures:
-                    output_statistics_missing = True
-                    warnings.warn(f"Missing required measure {req_measure} for output tensor {tensor_name}")
-
-        self._computed_dataset_stats = computed
-
-        # set dataset statistics for each processing step
-        for proc in self._prep:
-            proc.set_computed_dataset_statistics(self.computed_dataset_statistics)
-
-    @classmethod
-    def compute_sample_statistics(
-        cls, tensors: Dict[str, xr.DataArray], measures: Dict[str, Set[Measure]]
-    ) -> Dict[str, Dict[Measure, Any]]:
-        return {tname: cls._compute_tensor_statistics(tensors[tname], ms) for tname, ms in measures.items()}
+    def apply(self, sample: Sample, computed_measures: ComputedMeasures) -> None:
+        for proc in self._procs:
+            proc.set_computed_measures(computed_measures)
+            sample[proc.tensor_name] = proc.apply(sample[proc.tensor_name])
 
     @staticmethod
-    def _compute_tensor_statistics(tensor: xr.DataArray, measures: Set[Measure]) -> Dict[Measure, Any]:
-        ret = {}
-        for measure in measures:
-            ret[measure] = measure.compute(tensor)
-
-        return ret
-
-    @staticmethod
-    def _collect_required_stats(proc: Sequence[Processing], scope: Literal["sample", "dataset"]):
-        stats = defaultdict(set)
+    def _collect_required_measures(proc: Sequence[Processing]) -> RequiredMeasures:
+        ret: RequiredMeasures = {PER_SAMPLE: {}, PER_DATASET: {}}
         for p in proc:
-            if scope == SAMPLE:
-                req = p.get_required_sample_statistics()
-            elif scope == DATASET:
-                req = p.get_required_dataset_statistics()
-            else:
-                raise ValueError(scope)
-            for tn, ms in req.items():
-                stats[tn].update(ms)
+            for mode, ms_per_mode in p.get_required_measures().items():
+                for tn, ms_per_tn in ms_per_mode.items():
+                    if tn not in ret[mode]:
+                        ret[mode][tn] = set()
+
+                    ret[mode][tn].update(ms_per_tn)
 
-        return dict(stats)
+        return ret