openvinotoolkit
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/evaluators/custom_evaluators/asr_custom_encoder_decoder_joint.py
Lines changed: 52 additions & 1 deletion b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/evaluators/custom_evaluators/asr_custom_encoder_decoder_joint.py
Lines changed: 52 additions & 1 deletion
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/evaluators/custom_evaluators/asr_encoder_decoder_evaluator.py
Lines changed: 30 additions & 1 deletion b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/evaluators/custom_evaluators/asr_encoder_decoder_evaluator.py
Lines changed: 30 additions & 1 deletion
diff --git a/‎tools/accuracy_checker/openvino/tools/accuracy_checker/evaluators/custom_evaluators/asr_encoder_prediction_joint_evaluator.py
Lines changed: 85 additions & 2 deletions b/‎tools/accuracy_checker/openvino/tools/accuracy_checker/evaluators/custom_evaluators/asr_encoder_prediction_joint_evaluator.py
Lines changed: 85 additions & 2 deletions
@@ -17,7 +17,7 @@
 import math
 import numpy as np
 from .asr_encoder_prediction_joint_evaluator import ASREvaluator
-from .base_models import create_model, BaseCascadeModel, BaseDLSDKModel, BaseONNXModel
+from .base_models import create_model, BaseCascadeModel, BaseDLSDKModel, BaseONNXModel, BaseOpenVINOModel
 from ...adapters import create_adapter
 from ...utils import generate_layer_name, contains_all, contains_any
 from ...config import ConfigError
@@ -261,27 +261,75 @@ def predict(self, identifiers, input_data):
         raise NotImplementedError
 
 
+class CommonOpenVINOModel(BaseOpenVINOModel):
+    def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
+        self.select_inputs_outputs(network_info)
+        self.reset()
+        super().__init__(network_info, launcher, suffix, delayed_model_loading)
+
+    def set_input_and_output(self):
+        inputs = self.exec_network.inputs if self.exec_network is not None else self.network.inputs
+        input_blob = next(iter(inputs)).get_node().friendly_name
+        with_prefix = input_blob.startswith(self.default_model_suffix)
+        if with_prefix != self.with_prefix:
+            self.input_names = [
+                generate_layer_name(
+                    inp_name, self.default_model_suffix + '_', with_prefix) for inp_name in self.input_names
+            ]
+            self.output_names = [
+                generate_layer_name(
+                    out_name, self.default_model_suffix + '_', with_prefix) for out_name in self.output_names
+            ]
+            self.with_prefix = with_prefix
+
+
+    def predict(self, identifiers, input_data):
+        raise NotImplementedError
+
+
+
 class DLSDKEncoder(Encoder, CommonDLSDKModel):
     def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
         self.default_inputs = ['input_0', 'input_1', 'input_2']
         self.default_outputs = ['output_0', 'output_1', 'output_2']
         super().__init__(network_info, launcher, suffix, delayed_model_loading)
 
 
+class OVEncoder(Encoder, CommonOpenVINOModel):
+    def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
+        self.default_inputs = ['input_0', 'input_1', 'input_2']
+        self.default_outputs = ['output_0/sink_port_0', 'output_1/sink_port_0', 'output_2/sink_port_0']
+        super().__init__(network_info, launcher, suffix, delayed_model_loading)
+
+
 class DLSDKDecoder(Decoder, CommonDLSDKModel):
     def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
         self.default_inputs = ['input_0', 'input_1', 'input_2']
         self.default_outputs = ['output_0', 'output_1', 'output_2']
         super().__init__(network_info, launcher, suffix, delayed_model_loading)
 
 
+class OVDecoder(Decoder, CommonOpenVINOModel):
+    def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
+        self.default_inputs = ['input_0', 'input_1', 'input_2']
+        self.default_outputs = ['output_0/sink_port_0', 'output_1/sink_port_0', 'output_2/sink_port_0']
+        super().__init__(network_info, launcher, suffix, delayed_model_loading)
+
+
 class DLSDKJoint(Joint, CommonDLSDKModel):
     def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
         self.default_inputs = ['0', '1']
         self.default_outputs = ['8']
         super().__init__(network_info, launcher, suffix, delayed_model_loading)
 
 
+class OVJoint(Joint, CommonOpenVINOModel):
+    def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
+        self.default_inputs = ['0', '1']
+        self.default_outputs = ['8/sink_port']
+        super().__init__(network_info, launcher, suffix, delayed_model_loading)
+
+
 class CommonONNXModel(BaseONNXModel):
     def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
         self.select_inputs_outputs(network_info)
@@ -352,14 +400,17 @@ def __init__(self, network_info, adapter_config, launcher, models_args, is_blob,
             raise ConfigError('network_info should contain encoder, prediction and joint fields')
         self._decoder_mapping = {
             'dlsdk': DLSDKDecoder,
+            'openvino': OVDecoder,
             'onnx_runtime': ONNXDecoder
         }
         self._encoder_mapping = {
             'dlsdk': DLSDKEncoder,
+            'openvino': OVEncoder,
             'onnx_runtime': ONNXEncoder
         }
         self._joint_mapping = {
             'dlsdk': DLSDKJoint,
+            'openvino': OVJoint,
             'onnx_runtime': ONNXJoint
         }
         self.encoder = create_model(network_info['encoder'], launcher, self._encoder_mapping, 'encoder',
 
@@ -21,7 +21,9 @@
 
 
 from .base_custom_evaluator import BaseCustomEvaluator
-from .base_models import BaseCascadeModel, BaseDLSDKModel, BaseONNXModel, create_model, create_encoder
+from .base_models import (
+    BaseCascadeModel, BaseDLSDKModel, BaseONNXModel, BaseOpenVINOModel,
+    create_model, create_encoder)
 from ...adapters import create_adapter
 from ...config import ConfigError
 from ...utils import contains_all, contains_any, extract_image_representations, read_pickle
@@ -79,10 +81,12 @@ def __init__(self, network_info, launcher, models_args, is_blob, delayed_model_l
             raise ConfigError('network_info should contain encoder and decoder fields')
         self._decoder_mapping = {
             'dlsdk': DecoderDLSDKModel,
+            'openvino': DecoderOVModel,
             'onnx_runtime': DecoderONNXModel
         }
         self._encoder_mapping = {
             'dlsdk': EncoderDLSDKModel,
+            'openvino': EncoderOVModel,
             'onnx_runtime': EncoderONNXModel,
             'dummy': DummyEncoder
         }
@@ -138,6 +142,13 @@ def predict(self, identifiers, input_data):
         return results, results[self.output_blob]
 
 
+class EncoderOVModel(BaseOpenVINOModel):
+    def predict(self, identifiers, input_data):
+        input_data = self.fit_to_input(input_data)
+        results = self.infer(input_data)
+        return results, results[self.output_blob]
+
+
 class DecoderDLSDKModel(BaseDLSDKModel):
     def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
         self.adapter = create_adapter(network_info.get('adapter', 'ctc_greedy_decoder'))
@@ -156,6 +167,24 @@ def set_input_and_output(self):
         self.adapter.output_blob = self.output_blob
 
 
+class DecoderOVModel(BaseOpenVINOModel):
+    def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
+        self.adapter = create_adapter(network_info.get('adapter', 'ctc_greedy_decoder'))
+        super().__init__(network_info, launcher, suffix, delayed_model_loading)
+        self.adapter.output_blob = self.output_blob
+
+    def predict(self, identifiers, input_data):
+        feed_dict = self.fit_to_input(input_data)
+        results = self.infer(feed_dict)
+        result = self.adapter.process([results], identifiers, [{}])
+
+        return results, result
+
+    def set_input_and_output(self):
+        super().set_input_and_output()
+        self.adapter.output_blob = self.output_blob
+
+
 class EncoderONNXModel(BaseONNXModel):
     def predict(self, identifiers, input_data):
         results = self.inference_session.run((self.output_blob.name, ), self.fit_to_input(input_data))
 
@@ -21,9 +21,11 @@
 
 from ...adapters import create_adapter
 from ...config import ConfigError
-from ...utils import contains_all, contains_any, read_pickle
+from ...utils import contains_all, contains_any, read_pickle, parse_partial_shape
 from .asr_encoder_decoder_evaluator import AutomaticSpeechRecognitionEvaluator
-from .base_models import BaseCascadeModel, BaseDLSDKModel, BaseONNXModel, create_model, create_encoder
+from .base_models import (
+    BaseCascadeModel, BaseDLSDKModel, BaseOpenVINOModel, BaseONNXModel, create_model, create_encoder
+)
 
 
 class ASREvaluator(AutomaticSpeechRecognitionEvaluator):
@@ -59,15 +61,18 @@ def __init__(self, network_info, launcher, models_args, is_blob, adapter_info, d
             raise ConfigError('network_info should contain encoder, prediction and joint fields')
         self._encoder_mapping = {
             'dlsdk': EncoderDLSDKModel,
+            'openvino': EncoderOVMOdel,
             'onnx_runtime': EncoderONNXModel,
             'dummy': DummyEncoder
         }
         self._prediction_mapping = {
             'dlsdk': PredictionDLSDKModel,
+            'openvino': PredictionOVModel,
             'onnx_runtime': PredictionONNXModel
         }
         self._joint_mapping = {
             'dlsdk': JointDLSDKModel,
+            'openvino': JointOVModel,
             'onnx_runtime': JointONNXModel
         }
         self.encoder = create_encoder(network_info['encoder'], launcher, self._encoder_mapping, delayed_model_loading)
@@ -249,27 +254,105 @@ def set_input_and_output(self):
                 )
 
 
+class CommonOVModel(BaseOpenVINOModel):
+    def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
+        self.input_layers = network_info.get('inputs', self.default_input_layers)
+        self.output_layers = network_info.get('outputs', self.default_output_layers)
+        if len(self.input_layers) == 1:
+            self.input_blob = self.input_layers[0]
+        if len(self.output_layers) == 1:
+            self.output_blob = self.output_layers[0]
+        super().__init__(network_info, launcher, suffix, delayed_model_loading)
+
+    def predict(self, identifiers, input_data, callback=None):
+        input_data = self.fit_to_input(input_data)
+        results = self.infer(input_data)
+        return results, results[self.output_blob]
+
+    def fit_to_input(self, input_data):
+        if isinstance(input_data, dict):
+            fitted = {}
+            for input_blob in self.inputs.keys():
+                fitted.update(self.fit_one_input(input_blob, input_data[input_blob]))
+        else:
+            fitted = self.fit_one_input(self.input_blob, input_data)
+        return fitted
+
+    def fit_one_input(self, input_blob, input_data):
+        if (input_blob in self.dynamic_inputs or parse_partial_shape(
+            self.inputs[input_blob].get_partial_shape()) != np.shape(input_data)):
+            self._reshape_input({input_blob: np.shape(input_data)})
+
+        return {input_blob: np.array(input_data)}
+
+    def set_input_and_output(self):
+        input_blob = next(iter(self.inputs))
+        with_prefix = input_blob.startswith(self.default_model_suffix)
+        if self.input_blob is None or with_prefix != self.with_prefix:
+            if self.output_blob is None:
+                output_blob = next(iter(self.exec_network.outputs)).get_node().friendly_name
+            else:
+                output_blob = (
+                    '_'.join([self.default_model_suffix, self.output_blob])
+                    if with_prefix else self.output_blob.split(self.default_model_suffix + '_')[-1]
+                )
+            self.input_blob = input_blob
+            self.output_blob = output_blob
+            self.with_prefix = with_prefix
+            for idx, inp in enumerate(self.input_layers):
+                self.input_layers[idx] = (
+                    '_'.join([self.default_model_suffix, inp])
+                    if with_prefix else inp.split(self.default_model_suffix)[-1]
+                )
+            for idx, out in enumerate(self.output_layers):
+                self.output_layers[idx] = (
+                    '_'.join([self.default_model_suffix, out])
+                    if with_prefix else out.split(self.default_model_suffix)[-1]
+                )
+
+
 class EncoderDLSDKModel(CommonDLSDKModel):
     def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
         self.default_input_layers = []
         self.default_output_layers = ['472']
         super().__init__(network_info, launcher, suffix, delayed_model_loading)
 
 
+class EncoderOVMOdel(CommonOVModel):
+    def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
+        self.default_input_layers = []
+        self.default_output_layers = ['472/sink_port_0']
+        super().__init__(network_info, launcher, suffix, delayed_model_loading)
+
+
 class PredictionDLSDKModel(CommonDLSDKModel):
     def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
         self.default_input_layers = ['input.1', '1', '2']
         self.default_output_layers = ['151', '152', '153']
         super().__init__(network_info, launcher, suffix, delayed_model_loading)
 
 
+class PredictionOVModel(CommonOVModel):
+    def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
+        self.default_input_layers = ['input.1', '1', '2']
+        self.default_output_layers = ['151/sink_port_0', '152/sink_port_0', '153/sink_port_0']
+        super().__init__(network_info, launcher, suffix, delayed_model_loading)
+
+
 class JointDLSDKModel(CommonDLSDKModel):
     def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
         self.default_input_layers = ['0', '1']
         self.default_output_layers = []
         super().__init__(network_info, launcher, suffix, delayed_model_loading)
 
 
+class JointOVModel(CommonOVModel):
+    def __init__(self, network_info, launcher, suffix=None, delayed_model_loading=False):
+        self.default_input_layers = ['0', '1']
+        self.default_output_layers = []
+        super().__init__(network_info, launcher, suffix, delayed_model_loading)
+
+
 class CommonONNXModel(BaseONNXModel):
     def predict(self, identifiers, input_data, callback=None):
         fitted = self.fit_to_input(input_data)