|
21 | 21 |
|
22 | 22 | from ...adapters import create_adapter
|
23 | 23 | from ...config import ConfigError
|
24 |
| -from ...utils import contains_all, contains_any, read_pickle |
| 24 | +from ...utils import contains_all, contains_any, read_pickle, parse_partial_shape |
25 | 25 | from .asr_encoder_decoder_evaluator import AutomaticSpeechRecognitionEvaluator
|
26 |
| -from .base_models import BaseCascadeModel, BaseDLSDKModel, BaseONNXModel, create_model, create_encoder |
| 26 | +from .base_models import ( |
| 27 | + BaseCascadeModel, BaseDLSDKModel, BaseOpenVINOModel, BaseONNXModel, create_model, create_encoder |
| 28 | +) |
27 | 29 |
|
28 | 30 |
|
29 | 31 | class ASREvaluator(AutomaticSpeechRecognitionEvaluator):
|
@@ -59,15 +61,18 @@ def __init__(self, network_info, launcher, models_args, is_blob, adapter_info, d
|
59 | 61 | raise ConfigError('network_info should contain encoder, prediction and joint fields')
|
60 | 62 | self._encoder_mapping = {
|
61 | 63 | 'dlsdk': EncoderDLSDKModel,
|
| 64 | + 'openvino': EncoderOVMOdel, |
62 | 65 | 'onnx_runtime': EncoderONNXModel,
|
63 | 66 | 'dummy': DummyEncoder
|
64 | 67 | }
|
65 | 68 | self._prediction_mapping = {
|
66 | 69 | 'dlsdk': PredictionDLSDKModel,
|
| 70 | + 'openvino': PredictionOVModel, |
67 | 71 | 'onnx_runtime': PredictionONNXModel
|
68 | 72 | }
|
69 | 73 | self._joint_mapping = {
|
70 | 74 | 'dlsdk': JointDLSDKModel,
|
| 75 | + 'openvino': JointOVModel, |
71 | 76 | 'onnx_runtime': JointONNXModel
|
72 | 77 | }
|
73 | 78 | self.encoder = create_encoder(network_info['encoder'], launcher, self._encoder_mapping, delayed_model_loading)
|
@@ -249,27 +254,105 @@ def set_input_and_output(self):
|
249 | 254 | )
|
250 | 255 |
|
251 | 256 |
|
| 257 | +class CommonOVModel(BaseOpenVINOModel): |
| 258 | + def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False): |
| 259 | + self.input_layers = network_info.get('inputs', self.default_input_layers) |
| 260 | + self.output_layers = network_info.get('outputs', self.default_output_layers) |
| 261 | + if len(self.input_layers) == 1: |
| 262 | + self.input_blob = self.input_layers[0] |
| 263 | + if len(self.output_layers) == 1: |
| 264 | + self.output_blob = self.output_layers[0] |
| 265 | + super().__init__(network_info, launcher, suffix, delayed_model_loading) |
| 266 | + |
| 267 | + def predict(self, identifiers, input_data, callback=None): |
| 268 | + input_data = self.fit_to_input(input_data) |
| 269 | + results = self.infer(input_data) |
| 270 | + return results, results[self.output_blob] |
| 271 | + |
| 272 | + def fit_to_input(self, input_data): |
| 273 | + if isinstance(input_data, dict): |
| 274 | + fitted = {} |
| 275 | + for input_blob in self.inputs.keys(): |
| 276 | + fitted.update(self.fit_one_input(input_blob, input_data[input_blob])) |
| 277 | + else: |
| 278 | + fitted = self.fit_one_input(self.input_blob, input_data) |
| 279 | + return fitted |
| 280 | + |
| 281 | + def fit_one_input(self, input_blob, input_data): |
| 282 | + if (input_blob in self.dynamic_inputs or parse_partial_shape( |
| 283 | + self.inputs[input_blob].get_partial_shape()) != np.shape(input_data)): |
| 284 | + self._reshape_input({input_blob: np.shape(input_data)}) |
| 285 | + |
| 286 | + return {input_blob: np.array(input_data)} |
| 287 | + |
| 288 | + def set_input_and_output(self): |
| 289 | + input_blob = next(iter(self.inputs)) |
| 290 | + with_prefix = input_blob.startswith(self.default_model_suffix) |
| 291 | + if self.input_blob is None or with_prefix != self.with_prefix: |
| 292 | + if self.output_blob is None: |
| 293 | + output_blob = next(iter(self.exec_network.outputs)).get_node().friendly_name |
| 294 | + else: |
| 295 | + output_blob = ( |
| 296 | + '_'.join([self.default_model_suffix, self.output_blob]) |
| 297 | + if with_prefix else self.output_blob.split(self.default_model_suffix + '_')[-1] |
| 298 | + ) |
| 299 | + self.input_blob = input_blob |
| 300 | + self.output_blob = output_blob |
| 301 | + self.with_prefix = with_prefix |
| 302 | + for idx, inp in enumerate(self.input_layers): |
| 303 | + self.input_layers[idx] = ( |
| 304 | + '_'.join([self.default_model_suffix, inp]) |
| 305 | + if with_prefix else inp.split(self.default_model_suffix)[-1] |
| 306 | + ) |
| 307 | + for idx, out in enumerate(self.output_layers): |
| 308 | + self.output_layers[idx] = ( |
| 309 | + '_'.join([self.default_model_suffix, out]) |
| 310 | + if with_prefix else out.split(self.default_model_suffix)[-1] |
| 311 | + ) |
| 312 | + |
| 313 | + |
252 | 314 | class EncoderDLSDKModel(CommonDLSDKModel):
|
253 | 315 | def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
|
254 | 316 | self.default_input_layers = []
|
255 | 317 | self.default_output_layers = ['472']
|
256 | 318 | super().__init__(network_info, launcher, suffix, delayed_model_loading)
|
257 | 319 |
|
258 | 320 |
|
| 321 | +class EncoderOVMOdel(CommonOVModel): |
| 322 | + def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False): |
| 323 | + self.default_input_layers = [] |
| 324 | + self.default_output_layers = ['472/sink_port_0'] |
| 325 | + super().__init__(network_info, launcher, suffix, delayed_model_loading) |
| 326 | + |
| 327 | + |
259 | 328 | class PredictionDLSDKModel(CommonDLSDKModel):
|
260 | 329 | def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
|
261 | 330 | self.default_input_layers = ['input.1', '1', '2']
|
262 | 331 | self.default_output_layers = ['151', '152', '153']
|
263 | 332 | super().__init__(network_info, launcher, suffix, delayed_model_loading)
|
264 | 333 |
|
265 | 334 |
|
| 335 | +class PredictionOVModel(CommonOVModel): |
| 336 | + def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False): |
| 337 | + self.default_input_layers = ['input.1', '1', '2'] |
| 338 | + self.default_output_layers = ['151/sink_port_0', '152/sink_port_0', '153/sink_port_0'] |
| 339 | + super().__init__(network_info, launcher, suffix, delayed_model_loading) |
| 340 | + |
| 341 | + |
266 | 342 | class JointDLSDKModel(CommonDLSDKModel):
|
267 | 343 | def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
|
268 | 344 | self.default_input_layers = ['0', '1']
|
269 | 345 | self.default_output_layers = []
|
270 | 346 | super().__init__(network_info, launcher, suffix, delayed_model_loading)
|
271 | 347 |
|
272 | 348 |
|
| 349 | +class JointOVModel(CommonOVModel): |
| 350 | + def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False): |
| 351 | + self.default_input_layers = ['0', '1'] |
| 352 | + self.default_output_layers = [] |
| 353 | + super().__init__(network_info, launcher, suffix, delayed_model_loading) |
| 354 | + |
| 355 | + |
273 | 356 | class CommonONNXModel(BaseONNXModel):
|
274 | 357 | def predict(self, identifiers, input_data, callback=None):
|
275 | 358 | fitted = self.fit_to_input(input_data)
|
|
0 commit comments