daniil-lyakhov · daniil-lyakhov · May 11, 2023 · May 19, 2023 · May 22, 2023 · May 23, 2023
diff --git a/examples/post_training_quantization/onnx/mobilenet_v2/main.py b/examples/post_training_quantization/onnx/mobilenet_v2/main.py
@@ -104,6 +104,7 @@ def run_benchmark(path_to_model: str, shape: Optional[List[int]] = None, verbose
 # >> output_names = [output.name for output in sess.get_outputs()]
 # >> for data_item in val_loader:
 # >>    sess.run(output_names, input_feed=transform_fn(data_item))
+
 input_name = model.graph.input[0].name
 
 

diff --git a/examples/post_training_quantization/openvino/mozilla-deepspeech/accuracy_checker.json b/examples/post_training_quantization/openvino/mozilla-deepspeech/accuracy_checker.json
@@ -0,0 +1,147 @@
+{
+    "compression": {
+        "algorithms": [
+            {
+                "name": "DefaultQuantization",
+                "params": {
+                    "preset": "performance",
+                    "stat_subset_size": 3
+                }
+            }
+        ],
+        "dump_intermediate_model": true
+    },
+    "engine": {
+        "datasets": [
+            {
+                "metrics": [
+                    {
+                        "type": "wer"
+                    }
+                ],
+                "name": "LibriSpeech_test_clean_wav",
+		"data_source": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/librispeech/test/LibriSpeech/test-clean.wav",
+
+		 "annotation_conversion": {
+      		 	"converter": "librispeech",
+      		 	"data_dir": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/librispeech/test/LibriSpeech/test-clean.wav"
+		 },
+                "preprocessing": [
+                    {
+                        "int16mode": true,
+                        "type": "audio_normalization"
+                    },
+                    {
+                        "duration": "512 samples",
+                        "overlap": "192 samples",
+                        "type": "clip_audio"
+                    },
+                    {
+                        "base": 512,
+                        "type": "hanning_window"
+                    },
+                    {
+                        "fftbase": 512,
+                        "magnitude_squared": true,
+                        "skip_channels": true,
+                        "type": "audio_spectrogram"
+                    },
+                    {
+                        "base": 257,
+                        "filterbank_channel_count": 40,
+                        "lower_frequency_limit": 20,
+                        "sample_rate": 16000,
+                        "type": "audio_triangle_filtering",
+                        "upper_frequency_limit": 4000
+                    },
+                    {
+                        "filterbank_channel_count": 40,
+                        "numceps": 26,
+                        "type": "audio_dct"
+                    },
+                    {
+                        "context": 9,
+                        "numceps": 26,
+                        "type": "clip_cepstrum"
+                    },
+                    {
+                        "step": 16,
+                        "type": "pack_cepstrum"
+                    }
+                ],
+                "reader": "wav_reader"
+            }
+        ],
+        "launchers": [
+            {
+                "adapter": {
+                    "beam_size": 32,
+                    "lm_alpha": 0.75,
+                    "lm_beta": 1.05,
+                    "lm_file": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/model_attributes/mozilla-deepspeech-0.6.1/lm.binary",
+                    "lm_oov_score": -1000,
+                    "lm_vocabulary_length": 4463723,
+                    "lm_vocabulary_offset": 941235601,
+                    "logarithmic_prob": false,
+                    "probability_out": "logits",
+                    "type": "ctc_beam_search_decoder_with_lm"
+                },
+                "framework": "dlsdk",
+                "inputs": [
+                    {
+                        "layout": "NHWC",
+                        "name": "input_node",
+                        "type": "INPUT"
+                    },
+                    {
+                        "name": "previous_state_c",
+                        "type": "LSTM_INPUT",
+                        "value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/BlockLSTM/TensorIterator.2"
+                    },
+                    {
+                        "name": "previous_state_h",
+                        "type": "LSTM_INPUT",
+                        "value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/BlockLSTM/TensorIterator.1"
+                    }
+                ]
+            },
+            {
+                "adapter": {
+                    "beam_size": 32,
+                    "lm_alpha": 0.75,
+                    "lm_beta": 1.05,
+                    "lm_file": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/model_attributes/mozilla-deepspeech-0.6.1/lm.binary",
+                    "lm_oov_score": -1000,
+                    "lm_vocabulary_length": 4463723,
+                    "lm_vocabulary_offset": 941235601,
+                    "logarithmic_prob": false,
+                    "probability_out": "logits",
+                    "type": "ctc_beam_search_decoder_with_lm"
+                },
+                "framework": "openvino",
+                "inputs": [
+                    {
+                        "layout": "NHWC",
+                        "name": "input_node",
+                        "type": "INPUT"
+                    },
+                    {
+                        "name": "previous_state_c",
+                        "type": "LSTM_INPUT",
+                        "value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd:0"
+                    },
+                    {
+                        "name": "previous_state_h",
+                        "type": "LSTM_INPUT",
+                        "value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd_1:0"
+                    }
+                ]
+            }
+        ]
+    },
+    "model": {
+        "model": "/mnt/omz/cv_bench_cache/ww18_weekly_23.0.0-10862-40bf400b189-API2.0/mozilla-deepspeech-0.6.1/tf/tf_frozen/FP16/1/dldt/mozilla-deepspeech-0.6.1.xml",
+        "model_name": "mozilla-deepspeech-0.6.1",
+        "weights": "/mnt/omz/cv_bench_cache/ww18_weekly_23.0.0-10862-40bf400b189-API2.0/mozilla-deepspeech-0.6.1/tf/tf_frozen/FP16/1/dldt/mozilla-deepspeech-0.6.1.bin"
+    }
+}
diff --git a/examples/post_training_quantization/openvino/mozilla-deepspeech/main.py b/examples/post_training_quantization/openvino/mozilla-deepspeech/main.py
@@ -0,0 +1,87 @@
+import os
+import subprocess
+from collections import defaultdict
+from typing import Dict
+
+import numpy as np
+import openvino.runtime as ov
+from openvino.tools.accuracy_checker.evaluators.quantization_model_evaluator import create_model_evaluator
+from openvino.tools.pot.configs.config import Config
+
+import nncf
+from examples.post_training_quantization.openvino.tiny_gpt2.wrapper import NNCFOVWrappedModel
+
+model_name = "mozilla-deepspeech-0.6.1"
+cache_dir = os.path.dirname(__file__)
+dataset_config = os.path.join(cache_dir, "accuracy_checker.json")
+
+command = f"omz_downloader --name {model_name} --cache_dir {cache_dir}"
+cmd_output = subprocess.call(command, shell=True)  # nosec
+
+model_dir = os.path.join(cache_dir, model_name)
+if not os.path.exists(model_dir):
+    command = f"omz_converter --name {model_name} -o {os.path.join(cache_dir, model_name)}"
+    cmd_output = subprocess.call(command, shell=True)  # nosec
+
+xml_path = os.path.join(model_dir, f"public/{model_name}/FP16/{model_name}.xml")
+ov_model = ov.Core().read_model(xml_path)
+
+config = Config.read_config(dataset_config)
+config.configure_params()
+accuracy_checker_config = config.engine
+
+model_evaluator = create_model_evaluator(accuracy_checker_config)
+model_evaluator.load_network([{"model": ov_model}])
+model_evaluator.select_dataset("")
+
+
+def sequence_transform_fn(data_item):
+    """
+    Quantization transform function. Extracts and preprocesses sequential inputs data from dataloader
+    for quantization, returns iterable on preprocessed elements of feeded data item.
+
+    :param data_item:  Data item produced by DataLoader during iteration
+    :return: Iterable object on preprocessed elements of feeded data item.
+    """
+    return data_item
+
+
+def custom_forward(self, model, data_item):
+    """
+    Combines preprocessed model inputs from `get_tokens_from_sequence_fn` and model outputs
+    from previous iteration. None is feeded as model outputs on first iteration.
+
+    :param model_inputs: Preprocessed model input from `get_token_from_sequence_fn`.
+    :param model_outputs: Outuputs of target model from previous iteration. None on first iteration.
+    :return: Dict of acutual model inputs combined from preprocessed model input from `get_token_from_sequence_fn`
+        and previous model outputs for sequential models.
+    """
+
+    def iter_through_sequence():
+        _, batch_annotation, batch_input, _ = data_item
+        filled_inputs, _, _ = model_evaluator._get_batch_input(batch_input, batch_annotation)
+        for filled_input in filled_inputs:
+            input_data = {}
+            for name, value in filled_input.items():
+                input_data[model_evaluator.launcher.input_to_tensor_name[name]] = value
+            yield input_data
+
+    model_outputs = None
+    for model_inputs in iter_through_sequence():
+        state_inputs = model_evaluator.launcher._fill_lstm_inputs(model_outputs)
+        model_inputs.update(state_inputs)
+        model_outputs = model(model_inputs)
+        self.collect_statistics_callback(model_outputs)
+    return self.collected_statistics
+
+
+def set_model_fn(self, ov_model):
+    self._ov_model = ov.Core().compile_model(ov_model, device_name="CPU")
+
+
+dataset = nncf.CustomInferenceDataset(model_evaluator.dataset, sequence_transform_fn, custom_forward)
+
+# Check for user
+wrapped_model = NNCFOVWrappedModel(ov_model, custom_forward, set_model_fn)
+
+quantized_model = nncf.quantize(wrapped_model, dataset, subset_size=3)
diff --git a/examples/post_training_quantization/openvino/tiny_gpt2/__init__.py b/examples/post_training_quantization/openvino/tiny_gpt2/__init__.py
diff --git a/examples/post_training_quantization/openvino/tiny_gpt2/main.py b/examples/post_training_quantization/openvino/tiny_gpt2/main.py
@@ -0,0 +1,81 @@
+import logging
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Dict, Optional, Tuple, Union
+
+import numpy as np
+import openvino
+import torch
+from openvino.runtime import Core
+from openvino.runtime import Tensor
+from openvino.tools import mo
+from optimum.exporters import TasksManager
+from optimum.exporters.onnx import export
+from optimum.intel.openvino import OVModelForCausalLM
+from optimum.utils import NormalizedConfigManager
+from transformers import AutoModelForCausalLM
+from transformers import AutoTokenizer
+from transformers import PretrainedConfig
+from transformers.file_utils import add_start_docstrings
+from transformers.file_utils import add_start_docstrings_to_model_forward
+from transformers.modeling_outputs import CausalLMOutputWithPast
+
+import nncf
+from examples.post_training_quantization.openvino.tiny_gpt2.wrapper import NNCFOVWrappedModel
+
+GENERATION_LENGTH = 20
+
+
+model_id = "hf-internal-testing/tiny-random-gpt2"
+# model_id = "hf-internal-testing/tiny-random-GPTNeoModel"
+# model_id = "hf-internal-testing/tiny-random-GPTNeoXForCausalLM"
+
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+tokens = tokenizer("This is a sample input", return_tensors="pt")
+
+model_with_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True)
+
+
+def set_ov_model_in_hf_model(hf_model, ov_model):
+    hf_model.model = ov_model
+    hf_model.request = ov_model.create_infer_request()
+
+
+def get_custom_forward(ov_model, callback_fn):
+    hf_model = model_with_pkv
+    set_ov_model_in_hf_model(hf_model, ov_model)
+
+    def _callback_fn(info):
+        outputs = {k: v for k, v in zip(info["infer_request"].model_outputs, info["infer_request"].outputs)}
+        callback_fn(outputs)
+
+    hf_model.request.set_callback(_callback_fn, {"infer_request": hf_model.request})
+
+    def custom_forward(dataitem):
+        hf_model.generate(**dataitem, min_length=GENERATION_LENGTH, max_length=GENERATION_LENGTH, num_beams=1)
+
+    return custom_forward
+
+
+def transform_fn(data_item):
+    return data_item
+
+
+dataset = nncf.CustomInferenceDataset([tokens] * 10, transform_fn, get_custom_forward)
+
+
+# Fix ov model duplicated names:
+names = set()
+for op in model_with_pkv.model.get_ops():
+    friendly_name = op.get_friendly_name()
+    while True:
+        if friendly_name not in names:
+            break
+        friendly_name += "_"
+    names.add(friendly_name)
+    op.set_friendly_name(friendly_name)
+
+quantized_model = quantized_model = nncf.quantize(model_with_pkv.model, dataset, subset_size=3)
+
+model_with_pkv.model = quantized_model
+model_with_pkv.request = None
diff --git a/examples/post_training_quantization/openvino/tiny_gpt2/wrapper.py b/examples/post_training_quantization/openvino/tiny_gpt2/wrapper.py
@@ -0,0 +1,54 @@
+from collections import defaultdict
+from typing import Any
+
+import numpy as np
+import openvino.runtime as ov
+
+
+class NNCFOVWrappedModel:
+    def __init__(self, ov_model, custom_forward, **kwargs) -> None:
+        self._ov_model = ov_model
+        self._original_model_outputs_names = {op.node.friendly_name for op in ov_model.outputs}
+        self._custom_forward = custom_forward
+        self._collected_statistics = defaultdict(list)
+        self._stack_axis = 0
+        self._ov_statistics_model = None
+        self._kwargs = kwargs
+
+    def __getattr__(self, __name: str) -> Any:
+        return object.__getattribute__(self._ov_model, __name)
+
+    def __call__(self, *args: Any, **kwds: Any) -> Any:
+        if self._ov_statistics_model is None:
+            raise RuntimeError()
+        return self._custom_forward(self, self._ov_statistics_model, *args, **kwds)
+
+    def set_statistics_ov_model(self, ov_model):
+        self._ov_statistics_model = ov.Core().compile_model(ov_model, device_name="CPU")
+
+    @property
+    def collected_statistics(self):
+        aggregated_statistics = {}
+        for friendly_name, values in self._collected_statistics.items():
+            aggregated_statistics[friendly_name] = np.stack(values, axis=self._stack_axis)
+        return aggregated_statistics
+
+    def collect_statistics_callback(self, *args):
+        # Take all not original outputs and save to self._collected_statistics
+        if len(args) == 1:
+            outputs = args[0]
+            assert isinstance(outputs, dict)
+        else:
+            assert len(args) == 2
+            outputs = {k: v for k, v in zip(*args)}
+        original_model_output = {}
+        for op, value in outputs.items():
+            if op.node.friendly_name in self._original_model_outputs_names:
+                original_model_output[op] = value
+                continue
+            if not isinstance(value, np.ndarray):
+                value = value.data
+            self._collected_statistics[op.node.friendly_name].append(value)
+        if len(args) == 1:
+            return original_model_output
+        return zip(*[(k, v) for k, v in original_model_output.items()])
diff --git a/nncf/__init__.py b/nncf/__init__.py
@@ -14,6 +14,7 @@
 from nncf.common.logging.logger import disable_logging
 from nncf.common.logging.logger import set_log_level
 from nncf.config import NNCFConfig
+from nncf.data import CustomInferenceDataset
 from nncf.data import Dataset
 from nncf.parameters import DropType
 from nncf.parameters import ModelType

diff --git a/nncf/common/factory.py b/nncf/common/factory.py
@@ -35,7 +35,7 @@ def create(model: TModel) -> NNCFGraph:
             from nncf.onnx.graph.nncf_graph_builder import GraphConverter
 
             return GraphConverter.create_nncf_graph(model)
-        if model_backend == BackendType.OPENVINO:
+        if model_backend in [BackendType.OPENVINO, BackendType.OPTIMUM]:
             from nncf.openvino.graph.nncf_graph_builder import GraphConverter
 
             return GraphConverter.create_nncf_graph(model)