Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def run_benchmark(path_to_model: str, shape: Optional[List[int]] = None, verbose
# >> output_names = [output.name for output in sess.get_outputs()]
# >> for data_item in val_loader:
# >> sess.run(output_names, input_feed=transform_fn(data_item))

input_name = model.graph.input[0].name


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
{
"compression": {
"algorithms": [
{
"name": "DefaultQuantization",
"params": {
"preset": "performance",
"stat_subset_size": 3
}
}
],
"dump_intermediate_model": true
},
"engine": {
"datasets": [
{
"metrics": [
{
"type": "wer"
}
],
"name": "LibriSpeech_test_clean_wav",
"data_source": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/librispeech/test/LibriSpeech/test-clean.wav",

"annotation_conversion": {
"converter": "librispeech",
"data_dir": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/librispeech/test/LibriSpeech/test-clean.wav"
},
"preprocessing": [
{
"int16mode": true,
"type": "audio_normalization"
},
{
"duration": "512 samples",
"overlap": "192 samples",
"type": "clip_audio"
},
{
"base": 512,
"type": "hanning_window"
},
{
"fftbase": 512,
"magnitude_squared": true,
"skip_channels": true,
"type": "audio_spectrogram"
},
{
"base": 257,
"filterbank_channel_count": 40,
"lower_frequency_limit": 20,
"sample_rate": 16000,
"type": "audio_triangle_filtering",
"upper_frequency_limit": 4000
},
{
"filterbank_channel_count": 40,
"numceps": 26,
"type": "audio_dct"
},
{
"context": 9,
"numceps": 26,
"type": "clip_cepstrum"
},
{
"step": 16,
"type": "pack_cepstrum"
}
],
"reader": "wav_reader"
}
],
"launchers": [
{
"adapter": {
"beam_size": 32,
"lm_alpha": 0.75,
"lm_beta": 1.05,
"lm_file": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/model_attributes/mozilla-deepspeech-0.6.1/lm.binary",
"lm_oov_score": -1000,
"lm_vocabulary_length": 4463723,
"lm_vocabulary_offset": 941235601,
"logarithmic_prob": false,
"probability_out": "logits",
"type": "ctc_beam_search_decoder_with_lm"
},
"framework": "dlsdk",
"inputs": [
{
"layout": "NHWC",
"name": "input_node",
"type": "INPUT"
},
{
"name": "previous_state_c",
"type": "LSTM_INPUT",
"value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/BlockLSTM/TensorIterator.2"
},
{
"name": "previous_state_h",
"type": "LSTM_INPUT",
"value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/BlockLSTM/TensorIterator.1"
}
]
},
{
"adapter": {
"beam_size": 32,
"lm_alpha": 0.75,
"lm_beta": 1.05,
"lm_file": "/mnt/omz_new/nn_icv_cv_externalN/omz-validation-datasets/model_attributes/mozilla-deepspeech-0.6.1/lm.binary",
"lm_oov_score": -1000,
"lm_vocabulary_length": 4463723,
"lm_vocabulary_offset": 941235601,
"logarithmic_prob": false,
"probability_out": "logits",
"type": "ctc_beam_search_decoder_with_lm"
},
"framework": "openvino",
"inputs": [
{
"layout": "NHWC",
"name": "input_node",
"type": "INPUT"
},
{
"name": "previous_state_c",
"type": "LSTM_INPUT",
"value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd:0"
},
{
"name": "previous_state_h",
"type": "LSTM_INPUT",
"value": "cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/GatherNd_1:0"
}
]
}
]
},
"model": {
"model": "/mnt/omz/cv_bench_cache/ww18_weekly_23.0.0-10862-40bf400b189-API2.0/mozilla-deepspeech-0.6.1/tf/tf_frozen/FP16/1/dldt/mozilla-deepspeech-0.6.1.xml",
"model_name": "mozilla-deepspeech-0.6.1",
"weights": "/mnt/omz/cv_bench_cache/ww18_weekly_23.0.0-10862-40bf400b189-API2.0/mozilla-deepspeech-0.6.1/tf/tf_frozen/FP16/1/dldt/mozilla-deepspeech-0.6.1.bin"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import os
import subprocess
from collections import defaultdict
from typing import Dict

import numpy as np
import openvino.runtime as ov
from openvino.tools.accuracy_checker.evaluators.quantization_model_evaluator import create_model_evaluator
from openvino.tools.pot.configs.config import Config

import nncf
from examples.post_training_quantization.openvino.tiny_gpt2.wrapper import NNCFOVWrappedModel

model_name = "mozilla-deepspeech-0.6.1"
cache_dir = os.path.dirname(__file__)
dataset_config = os.path.join(cache_dir, "accuracy_checker.json")

command = f"omz_downloader --name {model_name} --cache_dir {cache_dir}"
cmd_output = subprocess.call(command, shell=True) # nosec

model_dir = os.path.join(cache_dir, model_name)
if not os.path.exists(model_dir):
command = f"omz_converter --name {model_name} -o {os.path.join(cache_dir, model_name)}"
cmd_output = subprocess.call(command, shell=True) # nosec

xml_path = os.path.join(model_dir, f"public/{model_name}/FP16/{model_name}.xml")
ov_model = ov.Core().read_model(xml_path)

config = Config.read_config(dataset_config)
config.configure_params()
accuracy_checker_config = config.engine

model_evaluator = create_model_evaluator(accuracy_checker_config)
model_evaluator.load_network([{"model": ov_model}])
model_evaluator.select_dataset("")


def sequence_transform_fn(data_item):
"""
Quantization transform function. Extracts and preprocesses sequential inputs data from dataloader
for quantization, returns iterable on preprocessed elements of feeded data item.

:param data_item: Data item produced by DataLoader during iteration
:return: Iterable object on preprocessed elements of feeded data item.
"""
return data_item


def custom_forward(self, model, data_item):
"""
Combines preprocessed model inputs from `get_tokens_from_sequence_fn` and model outputs
from previous iteration. None is feeded as model outputs on first iteration.

:param model_inputs: Preprocessed model input from `get_token_from_sequence_fn`.
:param model_outputs: Outuputs of target model from previous iteration. None on first iteration.
:return: Dict of acutual model inputs combined from preprocessed model input from `get_token_from_sequence_fn`
and previous model outputs for sequential models.
"""

def iter_through_sequence():
_, batch_annotation, batch_input, _ = data_item
filled_inputs, _, _ = model_evaluator._get_batch_input(batch_input, batch_annotation)
for filled_input in filled_inputs:
input_data = {}
for name, value in filled_input.items():
input_data[model_evaluator.launcher.input_to_tensor_name[name]] = value
yield input_data

model_outputs = None
for model_inputs in iter_through_sequence():
state_inputs = model_evaluator.launcher._fill_lstm_inputs(model_outputs)
model_inputs.update(state_inputs)
model_outputs = model(model_inputs)
self.collect_statistics_callback(model_outputs)
return self.collected_statistics


def set_model_fn(self, ov_model):
self._ov_model = ov.Core().compile_model(ov_model, device_name="CPU")


dataset = nncf.CustomInferenceDataset(model_evaluator.dataset, sequence_transform_fn, custom_forward)

# Check for user
wrapped_model = NNCFOVWrappedModel(ov_model, custom_forward, set_model_fn)

quantized_model = nncf.quantize(wrapped_model, dataset, subset_size=3)
Empty file.
81 changes: 81 additions & 0 deletions examples/post_training_quantization/openvino/tiny_gpt2/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import logging
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Dict, Optional, Tuple, Union

import numpy as np
import openvino
import torch
from openvino.runtime import Core
from openvino.runtime import Tensor
from openvino.tools import mo
from optimum.exporters import TasksManager
from optimum.exporters.onnx import export
from optimum.intel.openvino import OVModelForCausalLM
from optimum.utils import NormalizedConfigManager
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
from transformers import PretrainedConfig
from transformers.file_utils import add_start_docstrings
from transformers.file_utils import add_start_docstrings_to_model_forward
from transformers.modeling_outputs import CausalLMOutputWithPast

import nncf
from examples.post_training_quantization.openvino.tiny_gpt2.wrapper import NNCFOVWrappedModel

GENERATION_LENGTH = 20


model_id = "hf-internal-testing/tiny-random-gpt2"
# model_id = "hf-internal-testing/tiny-random-GPTNeoModel"
# model_id = "hf-internal-testing/tiny-random-GPTNeoXForCausalLM"

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokens = tokenizer("This is a sample input", return_tensors="pt")

model_with_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True)


def set_ov_model_in_hf_model(hf_model, ov_model):
hf_model.model = ov_model
hf_model.request = ov_model.create_infer_request()


def get_custom_forward(ov_model, callback_fn):
hf_model = model_with_pkv
set_ov_model_in_hf_model(hf_model, ov_model)

def _callback_fn(info):
outputs = {k: v for k, v in zip(info["infer_request"].model_outputs, info["infer_request"].outputs)}
callback_fn(outputs)

hf_model.request.set_callback(_callback_fn, {"infer_request": hf_model.request})

def custom_forward(dataitem):
hf_model.generate(**dataitem, min_length=GENERATION_LENGTH, max_length=GENERATION_LENGTH, num_beams=1)

return custom_forward


def transform_fn(data_item):
return data_item


dataset = nncf.CustomInferenceDataset([tokens] * 10, transform_fn, get_custom_forward)


# Fix ov model duplicated names:
names = set()
for op in model_with_pkv.model.get_ops():
friendly_name = op.get_friendly_name()
while True:
if friendly_name not in names:
break
friendly_name += "_"
names.add(friendly_name)
op.set_friendly_name(friendly_name)

quantized_model = quantized_model = nncf.quantize(model_with_pkv.model, dataset, subset_size=3)

model_with_pkv.model = quantized_model
model_with_pkv.request = None
54 changes: 54 additions & 0 deletions examples/post_training_quantization/openvino/tiny_gpt2/wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from collections import defaultdict
from typing import Any

import numpy as np
import openvino.runtime as ov


class NNCFOVWrappedModel:
def __init__(self, ov_model, custom_forward, **kwargs) -> None:
self._ov_model = ov_model
self._original_model_outputs_names = {op.node.friendly_name for op in ov_model.outputs}
self._custom_forward = custom_forward
self._collected_statistics = defaultdict(list)
self._stack_axis = 0
self._ov_statistics_model = None
self._kwargs = kwargs

def __getattr__(self, __name: str) -> Any:
return object.__getattribute__(self._ov_model, __name)

def __call__(self, *args: Any, **kwds: Any) -> Any:
if self._ov_statistics_model is None:
raise RuntimeError()
return self._custom_forward(self, self._ov_statistics_model, *args, **kwds)

def set_statistics_ov_model(self, ov_model):
self._ov_statistics_model = ov.Core().compile_model(ov_model, device_name="CPU")

@property
def collected_statistics(self):
aggregated_statistics = {}
for friendly_name, values in self._collected_statistics.items():
aggregated_statistics[friendly_name] = np.stack(values, axis=self._stack_axis)
return aggregated_statistics

def collect_statistics_callback(self, *args):
# Take all not original outputs and save to self._collected_statistics
if len(args) == 1:
outputs = args[0]
assert isinstance(outputs, dict)
else:
assert len(args) == 2
outputs = {k: v for k, v in zip(*args)}
original_model_output = {}
for op, value in outputs.items():
if op.node.friendly_name in self._original_model_outputs_names:
original_model_output[op] = value
continue
if not isinstance(value, np.ndarray):
value = value.data
self._collected_statistics[op.node.friendly_name].append(value)
if len(args) == 1:
return original_model_output
return zip(*[(k, v) for k, v in original_model_output.items()])
1 change: 1 addition & 0 deletions nncf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from nncf.common.logging.logger import disable_logging
from nncf.common.logging.logger import set_log_level
from nncf.config import NNCFConfig
from nncf.data import CustomInferenceDataset
from nncf.data import Dataset
from nncf.parameters import DropType
from nncf.parameters import ModelType
Expand Down
2 changes: 1 addition & 1 deletion nncf/common/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def create(model: TModel) -> NNCFGraph:
from nncf.onnx.graph.nncf_graph_builder import GraphConverter

return GraphConverter.create_nncf_graph(model)
if model_backend == BackendType.OPENVINO:
if model_backend in [BackendType.OPENVINO, BackendType.OPTIMUM]:
from nncf.openvino.graph.nncf_graph_builder import GraphConverter

return GraphConverter.create_nncf_graph(model)
Expand Down
Loading