|
1 | | -import warnings |
2 | | -from collections import defaultdict |
3 | | -from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Type |
4 | | - |
5 | | -import xarray as xr |
| 1 | +from typing import List, Optional, Sequence, Union |
6 | 2 |
|
7 | 3 | from bioimageio.core.resource_io import nodes |
8 | | -from bioimageio.core.statistical_measures import Mean, Measure, Percentile, Std |
9 | | -from bioimageio.spec.model.raw_nodes import PostprocessingName, PreprocessingName |
10 | | -from ._processing import ( |
11 | | - Binarize, |
12 | | - Clip, |
13 | | - EnsureDtype, |
14 | | - Processing, |
15 | | - ScaleLinear, |
16 | | - ScaleMeanVariance, |
17 | | - ScaleRange, |
18 | | - Sigmoid, |
19 | | - ZeroMeanUnitVariance, |
20 | | -) |
| 4 | +from ._processing import EnsureDtype, KNOWN_PROCESSING, Processing |
| 5 | +from ._utils import ComputedMeasures, PER_DATASET, PER_SAMPLE, RequiredMeasures, Sample |
21 | 6 |
|
22 | 7 | try: |
23 | 8 | from typing import Literal |
24 | 9 | except ImportError: |
25 | 10 | from typing_extensions import Literal # type: ignore |
26 | 11 |
|
27 | | -KNOWN_PREPROCESSING: Dict[PreprocessingName, Type[Processing]] = { |
28 | | - "binarize": Binarize, |
29 | | - "clip": Clip, |
30 | | - "scale_linear": ScaleLinear, |
31 | | - "scale_range": ScaleRange, |
32 | | - "sigmoid": Sigmoid, |
33 | | - "zero_mean_unit_variance": ZeroMeanUnitVariance, |
34 | | -} |
35 | | - |
36 | | -KNOWN_POSTPROCESSING: Dict[PostprocessingName, Type[Processing]] = { |
37 | | - "binarize": Binarize, |
38 | | - "clip": Clip, |
39 | | - "scale_linear": ScaleLinear, |
40 | | - "scale_mean_variance": ScaleMeanVariance, |
41 | | - "scale_range": ScaleRange, |
42 | | - "sigmoid": Sigmoid, |
43 | | - "zero_mean_unit_variance": ZeroMeanUnitVariance, |
44 | | -} |
45 | | - |
46 | | - |
47 | | -Scope = Literal["sample", "dataset"] |
48 | | -SAMPLE: Literal["sample"] = "sample" |
49 | | -DATASET: Literal["dataset"] = "dataset" |
50 | | -SCOPES: Set[Scope] = {SAMPLE, DATASET} |
51 | | - |
52 | 12 |
|
53 | 13 | class CombinedProcessing: |
54 | | - def __init__(self, inputs: List[nodes.InputTensor], outputs: List[nodes.OutputTensor]): |
55 | | - self._prep = [ |
56 | | - KNOWN_PREPROCESSING[step.name](tensor_name=ipt.name, **step.kwargs) |
57 | | - for ipt in inputs |
58 | | - for step in ipt.preprocessing or [] |
59 | | - ] |
60 | | - self._post = [ |
61 | | - KNOWN_POSTPROCESSING.get(step.name)(tensor_name=out.name, **step.kwargs) |
62 | | - for out in outputs |
63 | | - for step in out.postprocessing or [] |
64 | | - ] |
| 14 | + def __init__(self, tensor_specs: Union[List[nodes.InputTensor], List[nodes.OutputTensor]]): |
| 15 | + PRE: Literal["pre"] = "pre" |
| 16 | + POST: Literal["post"] = "post" |
| 17 | + proc_prefix: Optional[Literal["pre", "post"]] = None |
| 18 | + self._procs = [] |
| 19 | + for t in tensor_specs: |
| 20 | + if isinstance(t, nodes.InputTensor): |
| 21 | + steps = t.preprocessing or [] |
| 22 | + if proc_prefix is not None and proc_prefix != PRE: |
| 23 | + raise ValueError(f"Invalid mixed input/output tensor specs: {tensor_specs}") |
| 24 | + |
| 25 | + proc_prefix = PRE |
| 26 | + elif isinstance(t, nodes.OutputTensor): |
| 27 | + steps = t.postprocessing or [] |
| 28 | + if proc_prefix is not None and proc_prefix != POST: |
| 29 | + raise ValueError(f"Invalid mixed input/output tensor specs: {tensor_specs}") |
| 30 | + |
| 31 | + proc_prefix = POST |
| 32 | + else: |
| 33 | + raise NotImplementedError(t) |
| 34 | + |
| 35 | + for step in steps: |
| 36 | + self._procs.append(KNOWN_PROCESSING[proc_prefix][step.name](tensor_name=t.name, **step.kwargs)) |
65 | 37 |
|
66 | 38 | # There is a difference between pre-and-postprocessing: |
67 | 39 | # Pre-processing always returns float32, because its output is consumed by the model. |
68 | 40 | # Post-processing, however, should return the dtype that is specified in the model spec. |
69 | 41 | # todo: cast dtype for inputs before preprocessing? or check dtype? |
70 | | - for out in outputs: |
71 | | - self._post.append(EnsureDtype(tensor_name=out.name, dtype=out.data_type)) |
| 42 | + if proc_prefix == POST: |
| 43 | + for t in tensor_specs: |
| 44 | + self._procs.append(EnsureDtype(tensor_name=t.name, dtype=t.data_type)) |
72 | 45 |
|
73 | | - self._req_input_stats = {s: self._collect_required_stats(self._prep, s) for s in SCOPES} |
74 | | - self._req_output_stats = {s: self._collect_required_stats(self._post, s) for s in SCOPES} |
75 | | - if self._req_output_stats[DATASET]: |
| 46 | + self.required_measures: RequiredMeasures = self._collect_required_measures(self._procs) |
| 47 | + if proc_prefix == POST and self.required_measures[PER_DATASET]: |
76 | 48 | raise NotImplementedError("computing statistics for output tensors per dataset is not yet implemented") |
77 | 49 |
|
78 | | - self._computed_dataset_stats: Optional[Dict[str, Dict[Measure, Any]]] = None |
79 | | - |
80 | | - self.input_tensor_names = [ipt.name for ipt in inputs] |
81 | | - self.output_tensor_names = [out.name for out in outputs] |
82 | | - assert not any(name in self.output_tensor_names for name in self.input_tensor_names) |
83 | | - assert not any(name in self.input_tensor_names for name in self.output_tensor_names) |
84 | | - |
85 | | - @property |
86 | | - def required_input_dataset_statistics(self) -> Dict[str, Set[Measure]]: |
87 | | - return self._req_input_stats[DATASET] |
88 | | - |
89 | | - @property |
90 | | - def required_output_dataset_statistics(self) -> Dict[str, Set[Measure]]: |
91 | | - return self._req_output_stats[DATASET] |
92 | | - |
93 | | - @property |
94 | | - def computed_dataset_statistics(self) -> Dict[str, Dict[Measure, Any]]: |
95 | | - return self._computed_dataset_stats |
96 | | - |
97 | | - def apply_preprocessing( |
98 | | - self, *input_tensors: xr.DataArray |
99 | | - ) -> Tuple[List[xr.DataArray], Dict[str, Dict[Measure, Any]]]: |
100 | | - assert len(input_tensors) == len(self.input_tensor_names) |
101 | | - tensors = dict(zip(self.input_tensor_names, input_tensors)) |
102 | | - sample_stats = self.compute_sample_statistics(tensors, self._req_input_stats[SAMPLE]) |
103 | | - for proc in self._prep: |
104 | | - proc.set_computed_sample_statistics(sample_stats) |
105 | | - tensors[proc.tensor_name] = proc.apply(tensors[proc.tensor_name]) |
106 | | - |
107 | | - return [tensors[tn] for tn in self.input_tensor_names], sample_stats |
| 50 | + self.tensor_names = [t.name for t in tensor_specs] |
108 | 51 |
|
109 | | - def apply_postprocessing( |
110 | | - self, *output_tensors: xr.DataArray, input_sample_statistics: Dict[str, Dict[Measure, Any]] |
111 | | - ) -> Tuple[List[xr.DataArray], Dict[str, Dict[Measure, Any]]]: |
112 | | - assert len(output_tensors) == len(self.output_tensor_names) |
113 | | - tensors = dict(zip(self.output_tensor_names, output_tensors)) |
114 | | - sample_stats = { |
115 | | - **input_sample_statistics, |
116 | | - **self.compute_sample_statistics(tensors, self._req_output_stats[SAMPLE]), |
117 | | - } |
118 | | - for proc in self._post: |
119 | | - proc.set_computed_sample_statistics(sample_stats) |
120 | | - tensors[proc.tensor_name] = proc.apply(tensors[proc.tensor_name]) |
121 | | - |
122 | | - return [tensors[tn] for tn in self.output_tensor_names], sample_stats |
123 | | - |
124 | | - def set_computed_dataset_statistics(self, computed: Dict[str, Dict[Measure, Any]]): |
125 | | - """ |
126 | | - This method sets the externally computed dataset statistics. |
127 | | - Which statistics are expected is specified by the `required_dataset_statistics` property. |
128 | | - """ |
129 | | - # always expect input tensor statistics |
130 | | - for tensor_name, req_measures in self.required_input_dataset_statistics: |
131 | | - comp_measures = computed.get(tensor_name, {}) |
132 | | - for req_measure in req_measures: |
133 | | - if req_measure not in comp_measures: |
134 | | - raise ValueError(f"Missing required measure {req_measure} for input tensor {tensor_name}") |
135 | | - |
136 | | - # as output tensor statistics may initially not be available, we only warn about their absence |
137 | | - output_statistics_missing = False |
138 | | - for tensor_name, req_measures in self.required_output_dataset_statistics: |
139 | | - comp_measures = computed.get(tensor_name, {}) |
140 | | - for req_measure in req_measures: |
141 | | - if req_measure not in comp_measures: |
142 | | - output_statistics_missing = True |
143 | | - warnings.warn(f"Missing required measure {req_measure} for output tensor {tensor_name}") |
144 | | - |
145 | | - self._computed_dataset_stats = computed |
146 | | - |
147 | | - # set dataset statistics for each processing step |
148 | | - for proc in self._prep: |
149 | | - proc.set_computed_dataset_statistics(self.computed_dataset_statistics) |
150 | | - |
151 | | - @classmethod |
152 | | - def compute_sample_statistics( |
153 | | - cls, tensors: Dict[str, xr.DataArray], measures: Dict[str, Set[Measure]] |
154 | | - ) -> Dict[str, Dict[Measure, Any]]: |
155 | | - return {tname: cls._compute_tensor_statistics(tensors[tname], ms) for tname, ms in measures.items()} |
| 52 | + def apply(self, sample: Sample, computed_measures: ComputedMeasures) -> None: |
| 53 | + for proc in self._procs: |
| 54 | + proc.set_computed_measures(computed_measures) |
| 55 | + sample[proc.tensor_name] = proc.apply(sample[proc.tensor_name]) |
156 | 56 |
|
157 | 57 | @staticmethod |
158 | | - def _compute_tensor_statistics(tensor: xr.DataArray, measures: Set[Measure]) -> Dict[Measure, Any]: |
159 | | - ret = {} |
160 | | - for measure in measures: |
161 | | - ret[measure] = measure.compute(tensor) |
162 | | - |
163 | | - return ret |
164 | | - |
165 | | - @staticmethod |
166 | | - def _collect_required_stats(proc: Sequence[Processing], scope: Literal["sample", "dataset"]): |
167 | | - stats = defaultdict(set) |
| 58 | + def _collect_required_measures(proc: Sequence[Processing]) -> RequiredMeasures: |
| 59 | + ret: RequiredMeasures = {PER_SAMPLE: {}, PER_DATASET: {}} |
168 | 60 | for p in proc: |
169 | | - if scope == SAMPLE: |
170 | | - req = p.get_required_sample_statistics() |
171 | | - elif scope == DATASET: |
172 | | - req = p.get_required_dataset_statistics() |
173 | | - else: |
174 | | - raise ValueError(scope) |
175 | | - for tn, ms in req.items(): |
176 | | - stats[tn].update(ms) |
| 61 | + for mode, ms_per_mode in p.get_required_measures().items(): |
| 62 | + for tn, ms_per_tn in ms_per_mode.items(): |
| 63 | + if tn not in ret[mode]: |
| 64 | + ret[mode][tn] = set() |
| 65 | + |
| 66 | + ret[mode][tn].update(ms_per_tn) |
177 | 67 |
|
178 | | - return dict(stats) |
| 68 | + return ret |
0 commit comments