|
| 1 | +""" |
| 2 | +Copyright (c) 2018-2022 Intel Corporation |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +""" |
| 16 | + |
| 17 | +from functools import partial |
| 18 | +import numpy as np |
| 19 | + |
| 20 | +from .base_custom_evaluator import BaseCustomEvaluator |
| 21 | +from .base_models import ( |
| 22 | + BaseDLSDKModel, BaseCascadeModel, BaseOpenVINOModel, |
| 23 | + create_model, create_encoder |
| 24 | +) |
| 25 | +from ...adapters import create_adapter |
| 26 | +from ...config import ConfigError |
| 27 | +from ...utils import contains_all, extract_image_representations, parse_partial_shape |
| 28 | +from ...dataset import DataProvider |
| 29 | + |
| 30 | +class MultiviewDataProvider(DataProvider): |
| 31 | + def __init__(self, |
| 32 | + data_reader, annotation_provider=None, tag='', dataset_config=None, data_list=None, subset=None, |
| 33 | + batch=None, subdirs=None |
| 34 | + ): |
| 35 | + super().__init__(data_reader, annotation_provider, tag, dataset_config, data_list, subset, batch) |
| 36 | + self.subdirs = subdirs |
| 37 | + |
| 38 | + def __getitem__(self, item): |
| 39 | + if self.batch is None or self._batch <= 0: |
| 40 | + self.batch = 1 |
| 41 | + if self.size <= item * self.batch: |
| 42 | + raise IndexError |
| 43 | + batch_annotation = [] |
| 44 | + batch_start = item * self.batch |
| 45 | + batch_end = min(self.size, batch_start + self.batch) |
| 46 | + batch_input_ids = self.subset[batch_start:batch_end] if self.subset else range(batch_start, batch_end) |
| 47 | + batch_identifiers = [self._data_list[idx] for idx in batch_input_ids] |
| 48 | + batch_input = [self.read_data(identifier=identifier) for identifier in batch_identifiers] |
| 49 | + if self.annotation_provider: |
| 50 | + batch_annotation = [self.annotation_provider[idx] for idx in batch_identifiers] |
| 51 | + |
| 52 | + return batch_input_ids, batch_annotation, batch_input, batch_identifiers |
| 53 | + |
| 54 | + def read_data(self, identifier): |
| 55 | + multi_idx = [f'{subdir}/{identifier}' for subdir in self.subdirs] |
| 56 | + data = self.data_reader(identifier=multi_idx) |
| 57 | + data.identfier = multi_idx |
| 58 | + return data |
| 59 | + |
| 60 | + |
| 61 | +class MultiViewActionRecognitionEvaluator(BaseCustomEvaluator): |
| 62 | + def __init__(self, dataset_config, launcher, model, orig_config, view_subdirs=None): |
| 63 | + super().__init__(dataset_config, launcher, orig_config) |
| 64 | + self.model = model |
| 65 | + if hasattr(self.model.decoder, 'adapter'): |
| 66 | + self.adapter_type = self.model.decoder.adapter.__provider__ |
| 67 | + self.view_subdirs = view_subdirs |
| 68 | + |
| 69 | + def select_dataset(self, dataset_tag): |
| 70 | + super().select_dataset(dataset_tag) |
| 71 | + self.dataset = MultiviewDataProvider( |
| 72 | + self.dataset.data_reader, |
| 73 | + self.dataset.annotation_provider, |
| 74 | + self.dataset.tag, |
| 75 | + self.dataset.dataset_config, |
| 76 | + batch=self.dataset.batch, |
| 77 | + subset=self.dataset.subset, |
| 78 | + subdirs=self.view_subdirs) |
| 79 | + |
| 80 | + @classmethod |
| 81 | + def from_configs(cls, config, delayed_model_loading=False, orig_config=None): |
| 82 | + dataset_config, launcher, _ = cls.get_dataset_and_launcher_info(config) |
| 83 | + model = SequentialModel( |
| 84 | + config.get('network_info', {}), launcher, config.get('_models', []), config.get('_model_is_blob'), |
| 85 | + delayed_model_loading |
| 86 | + ) |
| 87 | + view_subdirs = config.get('view_subdirs', []) |
| 88 | + return cls(dataset_config, launcher, model, orig_config, view_subdirs) |
| 89 | + |
| 90 | + def _process(self, output_callback, calculate_metrics, progress_reporter, metric_config, csv_file): |
| 91 | + for batch_id, (batch_input_ids, batch_annotation, batch_inputs, batch_identifiers) in enumerate(self.dataset): |
| 92 | + batch_inputs = self.preprocessor.process(batch_inputs, batch_annotation) |
| 93 | + batch_inputs_extr, _ = extract_image_representations(batch_inputs) |
| 94 | + encoder_callback = None |
| 95 | + if output_callback: |
| 96 | + encoder_callback = partial(output_callback, metrics_result=None, element_identifiers=batch_identifiers, |
| 97 | + dataset_indices=batch_input_ids) |
| 98 | + batch_raw_prediction, batch_prediction = self.model.predict( |
| 99 | + batch_identifiers, batch_inputs_extr, encoder_callback=encoder_callback |
| 100 | + ) |
| 101 | + metrics_result = self._get_metrics_result(batch_input_ids, batch_annotation, batch_prediction, |
| 102 | + calculate_metrics) |
| 103 | + if output_callback: |
| 104 | + output_callback(batch_raw_prediction[0], metrics_result=metrics_result, |
| 105 | + element_identifiers=batch_identifiers, dataset_indices=batch_input_ids) |
| 106 | + self._update_progress(progress_reporter, metric_config, batch_id, len(batch_prediction), csv_file) |
| 107 | + |
| 108 | + |
| 109 | +class SequentialModel(BaseCascadeModel): |
| 110 | + def __init__(self, network_info, launcher, models_args, is_blob, delayed_model_loading=False): |
| 111 | + super().__init__(network_info, launcher) |
| 112 | + parts = ['encoder', 'decoder'] |
| 113 | + network_info = self.fill_part_with_model(network_info, parts, models_args, is_blob, delayed_model_loading) |
| 114 | + if not contains_all(network_info, parts) and not delayed_model_loading: |
| 115 | + raise ConfigError('network_info should contain encoder and decoder fields') |
| 116 | + self.num_processing_frames = network_info['decoder'].get('num_processing_frames', 16) |
| 117 | + self.processing_frames_buffer = [] |
| 118 | + self._encoder_mapping = { |
| 119 | + 'dlsdk': EncoderDLSDKModel, |
| 120 | + 'openvino': EncoderOpenVINO, |
| 121 | + } |
| 122 | + self._decoder_mapping = { |
| 123 | + 'dlsdk': DecoderDLSDKModel, |
| 124 | + 'openvino': DecoderOpenVINOModel, |
| 125 | + } |
| 126 | + self.encoder = create_encoder(network_info['encoder'], launcher, self._encoder_mapping, delayed_model_loading) |
| 127 | + self.decoder = create_model(network_info['decoder'], launcher, self._decoder_mapping, 'decoder', |
| 128 | + delayed_model_loading) |
| 129 | + self._part_by_name = {'encoder': self.encoder, 'decoder': self.decoder} |
| 130 | + |
| 131 | + def predict(self, identifiers, input_data, encoder_callback=None): |
| 132 | + raw_outputs = [] |
| 133 | + predictions = [] |
| 134 | + if len(np.shape(input_data)) == 5: |
| 135 | + input_data = input_data[0] |
| 136 | + encoder_preds = [] |
| 137 | + for data in input_data: |
| 138 | + encoder_prediction = self.encoder.predict(identifiers, [data]) |
| 139 | + if isinstance(encoder_prediction, tuple): |
| 140 | + encoder_prediction, raw_encoder_prediction = encoder_prediction |
| 141 | + else: |
| 142 | + raw_encoder_prediction = encoder_prediction |
| 143 | + if encoder_callback: |
| 144 | + encoder_callback(raw_encoder_prediction) |
| 145 | + encoder_preds.append(encoder_prediction[self.encoder.output_blob]) |
| 146 | + raw_output, prediction = self.decoder.predict(identifiers, encoder_preds) |
| 147 | + raw_outputs.append(raw_output) |
| 148 | + predictions.append(prediction) |
| 149 | + |
| 150 | + return raw_outputs, predictions |
| 151 | + |
| 152 | + |
| 153 | +class EncoderDLSDKModel(BaseDLSDKModel): |
| 154 | + def predict(self, identifiers, input_data): |
| 155 | + input_dict = self.fit_to_input(input_data) |
| 156 | + if not self.is_dynamic and self.dynamic_inputs: |
| 157 | + self._reshape_input({key: data.shape for key, data in input_dict.items()}) |
| 158 | + return self.exec_network.infer(input_dict) |
| 159 | + |
| 160 | + def fit_to_input(self, input_data): |
| 161 | + input_data = np.transpose(input_data, (0, 3, 1, 2)) |
| 162 | + has_info = hasattr(self.exec_network, 'input_info') |
| 163 | + if has_info: |
| 164 | + input_info = self.exec_network.input_info[self.input_blob].input_data |
| 165 | + else: |
| 166 | + input_info = self.exec_network.inputs[self.input_blob] |
| 167 | + if (hasattr(input_info, 'is_dynamic') and not input_info.is_dynamic) or input_info.shape: |
| 168 | + input_data = input_data.reshape(input_info.shape) |
| 169 | + |
| 170 | + return {self.input_blob: np.array(input_data)} |
| 171 | + |
| 172 | + |
| 173 | +class EncoderOpenVINO(BaseOpenVINOModel): |
| 174 | + def predict(self, identifiers, input_data): |
| 175 | + input_dict = self.fit_to_input(input_data) |
| 176 | + if not self.is_dynamic and self.dynamic_inputs: |
| 177 | + self._reshape_input({key: data.shape for key, data in input_dict.items()}) |
| 178 | + return self.infer(input_dict, raw_results=True) |
| 179 | + |
| 180 | + def fit_to_input(self, input_data): |
| 181 | + input_data = np.transpose(input_data, (0, 3, 1, 2)) |
| 182 | + input_info = self.inputs[self.input_blob] |
| 183 | + if not input_info.get_partial_shape().is_dynamic: |
| 184 | + input_data = input_data.reshape(parse_partial_shape(input_info.shape)) |
| 185 | + |
| 186 | + return {self.input_blob: np.array(input_data)} |
| 187 | + |
| 188 | + |
| 189 | +class DecoderDLSDKModel(BaseDLSDKModel): |
| 190 | + def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False): |
| 191 | + self.adapter = create_adapter(network_info.get('adapter', 'classification')) |
| 192 | + super().__init__(network_info, launcher, suffix, delayed_model_loading) |
| 193 | + self.adapter.output_blob = self.output_blob |
| 194 | + |
| 195 | + def predict(self, identifiers, input_data): |
| 196 | + input_dict = self.fit_to_input(input_data) |
| 197 | + if not self.is_dynamic and self.dynamic_inputs: |
| 198 | + self._reshape_input({key: data.shape for key, data in input_dict.items()}) |
| 199 | + raw_result = self.exec_network.infer(input_dict) |
| 200 | + result = self.adapter.process([raw_result], identifiers, [{}]) |
| 201 | + |
| 202 | + return raw_result, result |
| 203 | + |
| 204 | + def fit_to_input(self, input_data): |
| 205 | + has_info = hasattr(self.exec_network, 'input_info') |
| 206 | + inputs = {} |
| 207 | + input_info = ( |
| 208 | + self.exec_network.input_info |
| 209 | + if has_info else self.exec_network.inputs |
| 210 | + ) |
| 211 | + for input_name, data in zip(input_info, input_data): |
| 212 | + info = input_info[input_name] if not has_info else input_info[input_name].input_data |
| 213 | + if not info.is_dynamic: |
| 214 | + data = np.reshape(data, input_info.shape) |
| 215 | + inputs[input_name] = data |
| 216 | + return inputs |
| 217 | + |
| 218 | + |
| 219 | +class DecoderOpenVINOModel(BaseOpenVINOModel): |
| 220 | + def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False): |
| 221 | + self.adapter = create_adapter(network_info.get('adapter', 'classification')) |
| 222 | + super().__init__(network_info, launcher, suffix, delayed_model_loading) |
| 223 | + self.adapter.output_blob = self.output_blob |
| 224 | + |
| 225 | + def predict(self, identifiers, input_data): |
| 226 | + input_dict = self.fit_to_input(input_data) |
| 227 | + if not self.is_dynamic and self.dynamic_inputs: |
| 228 | + self._reshape_input({key: data.shape for key, data in input_dict.items()}) |
| 229 | + raw_result, raw_node_result = self.infer(input_dict, raw_results=True) |
| 230 | + result = self.adapter.process([raw_result], identifiers, [{}]) |
| 231 | + |
| 232 | + return raw_node_result, result |
| 233 | + |
| 234 | + def fit_to_input(self, input_data): |
| 235 | + inputs = {} |
| 236 | + for (input_name, input_info), data in zip(self.inputs.items(), input_data): |
| 237 | + if not input_info.get_partial_shape().is_dynamic: |
| 238 | + data = np.reshape(data, input_info.shape) |
| 239 | + inputs[input_name] = data |
| 240 | + return inputs |
0 commit comments