Skip to content

Commit d2f8211

Browse files
authored
AC: multiview action recognition (#3098)
* AC: multiview action recognition * update fit to input for decoder * minor fixes
1 parent ff1fa5a commit d2f8211

File tree

5 files changed

+353
-3
lines changed

5 files changed

+353
-3
lines changed

tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616

1717
from .adapter import Adapter, AdapterField, create_adapter
1818

19-
from .action_recognition import ActionDetection
19+
20+
from .action_recognition import ActionDetection, ActionRecognitionWithNoAction
2021
from .background_matting import ImageBackgroundMattingAdapter
2122
from .text_detection import (
2223
TextDetectionAdapter,
@@ -272,5 +273,7 @@
272273

273274
'MaskToBinaryClassification',
274275

276+
'ActionRecognitionWithNoAction',
277+
275278
'ImageBackgroundMattingAdapter',
276279
]

tools/accuracy_checker/openvino/tools/accuracy_checker/adapters/action_recognition.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@
1919

2020
from ..adapters import Adapter
2121
from ..config import ConfigValidator, StringField, NumberField, BoolField, ListField
22-
from ..representation import DetectionPrediction, ActionDetectionPrediction, ContainerPrediction
22+
from ..representation import (
23+
DetectionPrediction,
24+
ActionDetectionPrediction,
25+
ContainerPrediction,
26+
ClassificationPrediction
27+
)
2328
from ..utils import contains_all
2429

2530

@@ -319,3 +324,51 @@ def find_layer(regex, output_name, all_outputs):
319324
self.add_conf_outs = add_conf_with_bias
320325
return
321326
return
327+
328+
class ActionRecognitionWithNoAction(Adapter):
329+
__provider__ = 'action_recognition_with_condition'
330+
331+
@classmethod
332+
def parameters(cls):
333+
params = super().parameters()
334+
params.update({
335+
'no_action_id': NumberField(
336+
optional=True, default=0, min_value=0, value_type=int, description='no_action label id'),
337+
'no_action_threshold': NumberField(
338+
optional=True, default=0.5, min_value=0, max_value=1, value_type=float,
339+
description='threshold for selection no action'),
340+
'action_output': StringField(optional=True, description='output layer name with action scores')
341+
})
342+
return params
343+
344+
def configure(self):
345+
self.no_action_id = self.get_value_from_config('no_action_id')
346+
self.no_action_threshold = self.get_value_from_config('no_action_threshold')
347+
self.action_output = self.get_value_from_config('action_output')
348+
self.output_verified = False
349+
350+
def process(self, raw, identifiers, frame_meta):
351+
predictions = []
352+
if not self.output_verified:
353+
self.select_output_blob(raw)
354+
outputs = self._extract_predictions(raw, frame_meta)
355+
for identifier, action_out in zip(identifiers, outputs[self.action_output]):
356+
no_action_score = action_out[self.no_action_id]
357+
if no_action_score >= self.no_action_threshold:
358+
mask = np.ones_like(action_out, dtype=bool)
359+
mask[self.no_action_id] = False
360+
scores = action_out[mask] = 0.
361+
else:
362+
scores = action_out
363+
scores[self.no_action_id] = 0.
364+
predictions.append(ClassificationPrediction(identifier, scores))
365+
return predictions
366+
367+
def select_output_blob(self, outputs):
368+
self.output_verified = True
369+
if self.action_output:
370+
self.action_output = self.check_output_name(self.action_out, outputs)
371+
return
372+
super().select_output_blob(outputs)
373+
self.action_output = self.output_blob
374+
return

tools/accuracy_checker/openvino/tools/accuracy_checker/annotation_converters/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@
128128
from .mvtec import MVTecDatasetConverter
129129
from .gan_annotation_converter import GANAnnotationConverter
130130
from .kitti_converter import KITTIConverter
131+
from .smartlab_action_recognition import SmartLabActionRecognition
131132

132133
__all__ = [
133134
'BaseFormatConverter',
@@ -255,5 +256,6 @@
255256
'SpeakerReIdentificationDatasetConverter',
256257
'MVTecDatasetConverter',
257258
'GANAnnotationConverter',
258-
'KITTIConverter'
259+
'KITTIConverter',
260+
'SmartLabActionRecognition',
259261
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
"""
2+
Copyright (c) 2018-2022 Intel Corporation
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
17+
from .format_converter import BaseFormatConverter, ConverterReturn
18+
from ..config import NumberField, PathField
19+
from ..representation import ClassificationAnnotation
20+
from ..utils import read_txt, get_path
21+
22+
23+
class SmartLabActionRecognition(BaseFormatConverter):
24+
__provider__ = 'smartlab_action_recognition'
25+
26+
@classmethod
27+
def parameters(cls):
28+
params = super().parameters()
29+
params.update({
30+
'stream': NumberField(optional=False, description='currently used stream id', value_type=int),
31+
'labels_dir': PathField(is_directory=True, optional=False, description='directory with label files')
32+
})
33+
return params
34+
35+
def configure(self):
36+
self.stream_id = self.get_value_from_config('stream')
37+
self.labels_dir = self.get_value_from_config('labels_dir')
38+
self.stream_file = get_path(self.labels_dir / 'streams_{}.txt'.format(self.stream_id))
39+
40+
def convert(self, check_content=False, progress_callback=None, progress_interval=100, **kwargs):
41+
annotations = []
42+
stream_description = read_txt(self.stream_file)
43+
num_iterations = len(stream_description)
44+
for idx, annotation_line in enumerate(stream_description):
45+
identifier, label = annotation_line.split()
46+
annotations.append(ClassificationAnnotation(identifier, int(label)))
47+
if progress_callback and idx % progress_interval == 0:
48+
progress_callback(idx * 100 / num_iterations)
49+
return ConverterReturn(annotations, self.get_meta(), None)
50+
51+
def get_meta(self):
52+
return {'label_map': {0: 'no_action', 1: 'noise_action', 2: 'adjust_rider'}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
"""
2+
Copyright (c) 2018-2022 Intel Corporation
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
17+
from functools import partial
18+
import numpy as np
19+
20+
from .base_custom_evaluator import BaseCustomEvaluator
21+
from .base_models import (
22+
BaseDLSDKModel, BaseCascadeModel, BaseOpenVINOModel,
23+
create_model, create_encoder
24+
)
25+
from ...adapters import create_adapter
26+
from ...config import ConfigError
27+
from ...utils import contains_all, extract_image_representations, parse_partial_shape
28+
from ...dataset import DataProvider
29+
30+
class MultiviewDataProvider(DataProvider):
31+
def __init__(self,
32+
data_reader, annotation_provider=None, tag='', dataset_config=None, data_list=None, subset=None,
33+
batch=None, subdirs=None
34+
):
35+
super().__init__(data_reader, annotation_provider, tag, dataset_config, data_list, subset, batch)
36+
self.subdirs = subdirs
37+
38+
def __getitem__(self, item):
39+
if self.batch is None or self._batch <= 0:
40+
self.batch = 1
41+
if self.size <= item * self.batch:
42+
raise IndexError
43+
batch_annotation = []
44+
batch_start = item * self.batch
45+
batch_end = min(self.size, batch_start + self.batch)
46+
batch_input_ids = self.subset[batch_start:batch_end] if self.subset else range(batch_start, batch_end)
47+
batch_identifiers = [self._data_list[idx] for idx in batch_input_ids]
48+
batch_input = [self.read_data(identifier=identifier) for identifier in batch_identifiers]
49+
if self.annotation_provider:
50+
batch_annotation = [self.annotation_provider[idx] for idx in batch_identifiers]
51+
52+
return batch_input_ids, batch_annotation, batch_input, batch_identifiers
53+
54+
def read_data(self, identifier):
55+
multi_idx = [f'{subdir}/{identifier}' for subdir in self.subdirs]
56+
data = self.data_reader(identifier=multi_idx)
57+
data.identfier = multi_idx
58+
return data
59+
60+
61+
class MultiViewActionRecognitionEvaluator(BaseCustomEvaluator):
62+
def __init__(self, dataset_config, launcher, model, orig_config, view_subdirs=None):
63+
super().__init__(dataset_config, launcher, orig_config)
64+
self.model = model
65+
if hasattr(self.model.decoder, 'adapter'):
66+
self.adapter_type = self.model.decoder.adapter.__provider__
67+
self.view_subdirs = view_subdirs
68+
69+
def select_dataset(self, dataset_tag):
70+
super().select_dataset(dataset_tag)
71+
self.dataset = MultiviewDataProvider(
72+
self.dataset.data_reader,
73+
self.dataset.annotation_provider,
74+
self.dataset.tag,
75+
self.dataset.dataset_config,
76+
batch=self.dataset.batch,
77+
subset=self.dataset.subset,
78+
subdirs=self.view_subdirs)
79+
80+
@classmethod
81+
def from_configs(cls, config, delayed_model_loading=False, orig_config=None):
82+
dataset_config, launcher, _ = cls.get_dataset_and_launcher_info(config)
83+
model = SequentialModel(
84+
config.get('network_info', {}), launcher, config.get('_models', []), config.get('_model_is_blob'),
85+
delayed_model_loading
86+
)
87+
view_subdirs = config.get('view_subdirs', [])
88+
return cls(dataset_config, launcher, model, orig_config, view_subdirs)
89+
90+
def _process(self, output_callback, calculate_metrics, progress_reporter, metric_config, csv_file):
91+
for batch_id, (batch_input_ids, batch_annotation, batch_inputs, batch_identifiers) in enumerate(self.dataset):
92+
batch_inputs = self.preprocessor.process(batch_inputs, batch_annotation)
93+
batch_inputs_extr, _ = extract_image_representations(batch_inputs)
94+
encoder_callback = None
95+
if output_callback:
96+
encoder_callback = partial(output_callback, metrics_result=None, element_identifiers=batch_identifiers,
97+
dataset_indices=batch_input_ids)
98+
batch_raw_prediction, batch_prediction = self.model.predict(
99+
batch_identifiers, batch_inputs_extr, encoder_callback=encoder_callback
100+
)
101+
metrics_result = self._get_metrics_result(batch_input_ids, batch_annotation, batch_prediction,
102+
calculate_metrics)
103+
if output_callback:
104+
output_callback(batch_raw_prediction[0], metrics_result=metrics_result,
105+
element_identifiers=batch_identifiers, dataset_indices=batch_input_ids)
106+
self._update_progress(progress_reporter, metric_config, batch_id, len(batch_prediction), csv_file)
107+
108+
109+
class SequentialModel(BaseCascadeModel):
110+
def __init__(self, network_info, launcher, models_args, is_blob, delayed_model_loading=False):
111+
super().__init__(network_info, launcher)
112+
parts = ['encoder', 'decoder']
113+
network_info = self.fill_part_with_model(network_info, parts, models_args, is_blob, delayed_model_loading)
114+
if not contains_all(network_info, parts) and not delayed_model_loading:
115+
raise ConfigError('network_info should contain encoder and decoder fields')
116+
self.num_processing_frames = network_info['decoder'].get('num_processing_frames', 16)
117+
self.processing_frames_buffer = []
118+
self._encoder_mapping = {
119+
'dlsdk': EncoderDLSDKModel,
120+
'openvino': EncoderOpenVINO,
121+
}
122+
self._decoder_mapping = {
123+
'dlsdk': DecoderDLSDKModel,
124+
'openvino': DecoderOpenVINOModel,
125+
}
126+
self.encoder = create_encoder(network_info['encoder'], launcher, self._encoder_mapping, delayed_model_loading)
127+
self.decoder = create_model(network_info['decoder'], launcher, self._decoder_mapping, 'decoder',
128+
delayed_model_loading)
129+
self._part_by_name = {'encoder': self.encoder, 'decoder': self.decoder}
130+
131+
def predict(self, identifiers, input_data, encoder_callback=None):
132+
raw_outputs = []
133+
predictions = []
134+
if len(np.shape(input_data)) == 5:
135+
input_data = input_data[0]
136+
encoder_preds = []
137+
for data in input_data:
138+
encoder_prediction = self.encoder.predict(identifiers, [data])
139+
if isinstance(encoder_prediction, tuple):
140+
encoder_prediction, raw_encoder_prediction = encoder_prediction
141+
else:
142+
raw_encoder_prediction = encoder_prediction
143+
if encoder_callback:
144+
encoder_callback(raw_encoder_prediction)
145+
encoder_preds.append(encoder_prediction[self.encoder.output_blob])
146+
raw_output, prediction = self.decoder.predict(identifiers, encoder_preds)
147+
raw_outputs.append(raw_output)
148+
predictions.append(prediction)
149+
150+
return raw_outputs, predictions
151+
152+
153+
class EncoderDLSDKModel(BaseDLSDKModel):
154+
def predict(self, identifiers, input_data):
155+
input_dict = self.fit_to_input(input_data)
156+
if not self.is_dynamic and self.dynamic_inputs:
157+
self._reshape_input({key: data.shape for key, data in input_dict.items()})
158+
return self.exec_network.infer(input_dict)
159+
160+
def fit_to_input(self, input_data):
161+
input_data = np.transpose(input_data, (0, 3, 1, 2))
162+
has_info = hasattr(self.exec_network, 'input_info')
163+
if has_info:
164+
input_info = self.exec_network.input_info[self.input_blob].input_data
165+
else:
166+
input_info = self.exec_network.inputs[self.input_blob]
167+
if (hasattr(input_info, 'is_dynamic') and not input_info.is_dynamic) or input_info.shape:
168+
input_data = input_data.reshape(input_info.shape)
169+
170+
return {self.input_blob: np.array(input_data)}
171+
172+
173+
class EncoderOpenVINO(BaseOpenVINOModel):
174+
def predict(self, identifiers, input_data):
175+
input_dict = self.fit_to_input(input_data)
176+
if not self.is_dynamic and self.dynamic_inputs:
177+
self._reshape_input({key: data.shape for key, data in input_dict.items()})
178+
return self.infer(input_dict, raw_results=True)
179+
180+
def fit_to_input(self, input_data):
181+
input_data = np.transpose(input_data, (0, 3, 1, 2))
182+
input_info = self.inputs[self.input_blob]
183+
if not input_info.get_partial_shape().is_dynamic:
184+
input_data = input_data.reshape(parse_partial_shape(input_info.shape))
185+
186+
return {self.input_blob: np.array(input_data)}
187+
188+
189+
class DecoderDLSDKModel(BaseDLSDKModel):
190+
def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
191+
self.adapter = create_adapter(network_info.get('adapter', 'classification'))
192+
super().__init__(network_info, launcher, suffix, delayed_model_loading)
193+
self.adapter.output_blob = self.output_blob
194+
195+
def predict(self, identifiers, input_data):
196+
input_dict = self.fit_to_input(input_data)
197+
if not self.is_dynamic and self.dynamic_inputs:
198+
self._reshape_input({key: data.shape for key, data in input_dict.items()})
199+
raw_result = self.exec_network.infer(input_dict)
200+
result = self.adapter.process([raw_result], identifiers, [{}])
201+
202+
return raw_result, result
203+
204+
def fit_to_input(self, input_data):
205+
has_info = hasattr(self.exec_network, 'input_info')
206+
inputs = {}
207+
input_info = (
208+
self.exec_network.input_info
209+
if has_info else self.exec_network.inputs
210+
)
211+
for input_name, data in zip(input_info, input_data):
212+
info = input_info[input_name] if not has_info else input_info[input_name].input_data
213+
if not info.is_dynamic:
214+
data = np.reshape(data, input_info.shape)
215+
inputs[input_name] = data
216+
return inputs
217+
218+
219+
class DecoderOpenVINOModel(BaseOpenVINOModel):
220+
def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
221+
self.adapter = create_adapter(network_info.get('adapter', 'classification'))
222+
super().__init__(network_info, launcher, suffix, delayed_model_loading)
223+
self.adapter.output_blob = self.output_blob
224+
225+
def predict(self, identifiers, input_data):
226+
input_dict = self.fit_to_input(input_data)
227+
if not self.is_dynamic and self.dynamic_inputs:
228+
self._reshape_input({key: data.shape for key, data in input_dict.items()})
229+
raw_result, raw_node_result = self.infer(input_dict, raw_results=True)
230+
result = self.adapter.process([raw_result], identifiers, [{}])
231+
232+
return raw_node_result, result
233+
234+
def fit_to_input(self, input_data):
235+
inputs = {}
236+
for (input_name, input_info), data in zip(self.inputs.items(), input_data):
237+
if not input_info.get_partial_shape().is_dynamic:
238+
data = np.reshape(data, input_info.shape)
239+
inputs[input_name] = data
240+
return inputs

0 commit comments

Comments
 (0)