From 5d2784d9a76cbf2dd5ad5ec5e116b9e9ceccfa1f Mon Sep 17 00:00:00 2001
From: Aleksandr Suslov <alexander.suslov@intel.com>
Date: Wed, 5 Feb 2025 10:47:24 +0400
Subject: [PATCH 01/18] added init integration of quantization

---
 backends/openvino/__init__.py                 |   3 +-
 backends/openvino/quantizer/__init__.py       |   3 +
 backends/openvino/quantizer/quantizer.py      | 309 ++++++++++++++++++
 backends/openvino/requirements.txt            |   1 +
 examples/openvino/CMakeLists.txt              |   1 +
 examples/openvino/aot/README.md               |   7 +
 .../openvino/aot/aot_openvino_compiler.py     |  71 +++-
 examples/openvino/openvino_build_example.sh   |   1 +
 8 files changed, 392 insertions(+), 4 deletions(-)
 create mode 100644 backends/openvino/quantizer/__init__.py
 create mode 100644 backends/openvino/quantizer/quantizer.py

diff --git a/backends/openvino/__init__.py b/backends/openvino/__init__.py
index dac275d3f12..4a69f6b75ff 100644
--- a/backends/openvino/__init__.py
+++ b/backends/openvino/__init__.py
@@ -1,4 +1,5 @@
 from .partitioner import OpenvinoPartitioner
 from .preprocess import OpenvinoBackend
+from .quantizer.quantizer import OpenVINOQuantizer
 
-__all__ = [OpenvinoBackend, OpenvinoPartitioner] 
+__all__ = [OpenvinoBackend, OpenvinoPartitioner, OpenVINOQuantizer] 
diff --git a/backends/openvino/quantizer/__init__.py b/backends/openvino/quantizer/__init__.py
new file mode 100644
index 00000000000..03ea98e2c5b
--- /dev/null
+++ b/backends/openvino/quantizer/__init__.py
@@ -0,0 +1,3 @@
+from .quantizer import OpenVINOQuantizer
+
+__all__ = [OpenVINOQuantizer]
diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py
new file mode 100644
index 00000000000..58fde3e23f1
--- /dev/null
+++ b/backends/openvino/quantizer/quantizer.py
@@ -0,0 +1,309 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import defaultdict
+from typing import Dict, List, Optional, Tuple, Union
+
+import torch.fx
+from torch.ao.quantization.observer import HistogramObserver
+from torch.ao.quantization.observer import PerChannelMinMaxObserver
+from torch.ao.quantization.quantizer.quantizer import EdgeOrNode
+from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation
+from torch.ao.quantization.quantizer.quantizer import QuantizationSpec
+from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase
+from torch.ao.quantization.quantizer.quantizer import Quantizer
+from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec
+
+import nncf
+from nncf.common.graph.graph import NNCFGraph
+from nncf.common.logging import nncf_logger
+from nncf.common.quantization.quantizer_propagation.solver import QuantizerPropagationRule
+from nncf.common.quantization.quantizer_setup import QuantizationPointBase
+from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup
+from nncf.common.quantization.structs import QuantizationPreset
+from nncf.common.quantization.structs import QuantizationScheme
+from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
+from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name
+from nncf.experimental.torch.fx.transformations import fold_constant_except_qdq
+from nncf.parameters import ModelType
+from nncf.parameters import QuantizationMode
+from nncf.parameters import TargetDevice
+from nncf.quantization.advanced_parameters import FP8QuantizationParameters
+from nncf.quantization.advanced_parameters import OverflowFix
+from nncf.quantization.advanced_parameters import QuantizationParameters
+from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
+from nncf.scopes import IgnoredScope
+from nncf.torch.model_graph_manager import get_weight_tensor_port_ids
+
+QUANT_ANNOTATION_KEY = "quantization_annotation"
+
+
+class OpenVINOQuantizer(Quantizer):
+    """
+    Implementation of the Torch AO quantizer which annotates models with quantization annotations
+    optimally for the inference via OpenVINO.
+    """
+
+    def __init__(
+        self,
+        *,
+        mode: Optional[QuantizationMode] = None,
+        preset: Optional[QuantizationPreset] = None,
+        target_device: TargetDevice = TargetDevice.ANY,
+        model_type: Optional[ModelType] = None,
+        ignored_scope: Optional[IgnoredScope] = None,
+        overflow_fix: Optional[OverflowFix] = None,
+        quantize_outputs: bool = False,
+        activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None,
+        weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None,
+        quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE,
+    ):
+        """
+        :param mode: Defines optimization mode for the algorithm. None by default.
+        :param preset: A preset controls the quantization mode (symmetric and asymmetric).
+            It can take the following values:
+            - `performance`: Symmetric quantization of weights and activations.
+            - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
+            Default value is None. In this case, `mixed` preset is used for `transformer`
+            model type otherwise `performance`.
+        :param target_device: A target device the specificity of which will be taken
+            into account while compressing in order to obtain the best performance
+            for this type of device, defaults to TargetDevice.ANY.
+        :param model_type: Model type is needed to specify additional patterns
+            in the model. Supported only `transformer` now.
+        :param ignored_scope: An ignored scope that defined the list of model control
+            flow graph nodes to be ignored during quantization.
+        :param overflow_fix: This option controls whether to apply the overflow issue
+            fix for the 8-bit quantization.
+        :param quantize_outputs: Whether to insert additional quantizers right before
+            each of the model outputs.
+        :param activations_quantization_params: Quantization parameters for model
+            activations.
+        :param weights_quantization_params: Quantization parameters for model weights.
+        :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers.
+        MERGE_ALL_IN_ONE by default.
+        """
+        self._min_max_algo = MinMaxQuantization(
+            mode=mode,
+            preset=preset,
+            target_device=target_device,
+            model_type=model_type,
+            ignored_scope=ignored_scope,
+            overflow_fix=overflow_fix,
+            quantize_outputs=quantize_outputs,
+            activations_quantization_params=activations_quantization_params,
+            weights_quantization_params=weights_quantization_params,
+            quantizer_propagation_rule=quantizer_propagation_rule,
+        )
+
+    def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
+        self._min_max_algo._set_backend_entity(model)
+        return self._min_max_algo.find_quantization_setup(model, nncf_graph)
+
+    def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
+        nncf_graph = GraphConverter.create_nncf_graph(model)
+        quantization_setup = self.get_quantization_setup(model, nncf_graph)
+
+        graph = model.graph
+        node_vs_torch_annotation = defaultdict(QuantizationAnnotation)
+
+        for qp in quantization_setup.quantization_points.values():
+            edge_or_node, annotation = self._get_edge_or_node_and_annotation(
+                graph, nncf_graph, qp, node_vs_torch_annotation
+            )
+            qspec = self._get_torch_ao_qspec_from_qp(qp)
+            self._fill_torch_ao_annotation(edge_or_node, qspec, annotation)
+
+        for quantizer_ids in quantization_setup.unified_scale_groups.values():
+
+            root_quantizer_id = self._get_unified_scales_root_quantizer_id(
+                nncf_graph, quantizer_ids, quantization_setup
+            )
+            root_qp = quantization_setup.quantization_points[root_quantizer_id]
+
+            if any(root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig for q_id in quantizer_ids):
+                qps = [quantization_setup.quantization_points[q_id] for q_id in quantizer_ids]
+                msg = (
+                    "Different quantization configs are set to one unified scale group:"
+                    f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}"
+                )
+                raise nncf.InternalError(msg)
+
+            root_target_node = get_graph_node_by_name(graph, root_qp.insertion_point.target_node_name)
+            root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph)
+
+            for quantizer_id in quantizer_ids:
+                if quantizer_id == root_quantizer_id:
+                    continue
+
+                qspec = SharedQuantizationSpec(root_edge_or_node)
+                qp = quantization_setup.quantization_points[quantizer_id]
+                edge_or_node, annotation = self._get_edge_or_node_and_annotation(
+                    graph, nncf_graph, qp, node_vs_torch_annotation
+                )
+                self._fill_torch_ao_annotation(edge_or_node, qspec, annotation)
+
+        for node, annotation in node_vs_torch_annotation.items():
+            assert QUANT_ANNOTATION_KEY not in node.meta
+            node.meta[QUANT_ANNOTATION_KEY] = annotation
+
+    @staticmethod
+    def _get_unified_scales_root_quantizer_id(
+        nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: SingleConfigQuantizerSetup
+    ) -> int:
+        """
+        Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id`
+        in the given NNCFGraph. This is required by the `_get_obs_or_fq_map` function.
+        Refer to: https://github.com/pytorch/pytorch/blob/main/torch/ao/quantization/pt2e/prepare.py#L291
+
+        :param nncf_graph: The NNCFGraph instance.
+        :param quantizer_ids: The list of quantizer IDs to evaluate.
+        :param quantizer_setup: The instance of SingleConfigQuantizerSetup.
+        :return: The ID of the earliest quantizer node in terms of `nncf_node.node_id`.
+        """
+        nncf_node_quantizer_id = None
+        root_quantizer_id = None
+        for quantizer_id in quantizer_ids:
+            target_node_name = quantizer_setup.quantization_points[quantizer_id].insertion_point.target_node_name
+            nncf_node = nncf_graph.get_node_by_name(target_node_name)
+            if nncf_node_quantizer_id is None or nncf_node.node_id < nncf_node_quantizer_id:
+                root_quantizer_id = quantizer_id
+                nncf_node_quantizer_id = nncf_node.node_id
+        return root_quantizer_id
+
+    @staticmethod
+    def _get_edge_or_node_and_annotation(
+        graph: torch.fx.Graph,
+        nncf_graph: NNCFGraph,
+        qp: QuantizationPointBase,
+        node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation],
+    ) -> Tuple[EdgeOrNode, QuantizationAnnotation]:
+        """
+        Retrieves the edge or node and its corresponding QuantizationAnnotation based on the given graph,
+        quantization point, and node-to-annotation mapping.
+
+        :param graph: torch.fx.Graph instance.
+        :param nncf_graph: NNCFGraph instance.
+        :param qp: QuantizationPointBase instance.
+        :param node_vs_torch_annotation: A dictionary mapping torch.fx.GraphNode objects to their respective
+            QuantizationAnnotations.
+        :return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation.
+        """
+        target_node = get_graph_node_by_name(graph, qp.insertion_point.target_node_name)
+        annotation = node_vs_torch_annotation[target_node]
+        edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph)
+        return edge_or_node, annotation
+
+    @staticmethod
+    def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nncf_graph: NNCFGraph) -> EdgeOrNode:
+        """
+        Returns the edge or node based on the given target node and quantization point.
+
+        :param target_node: Target node instance.
+        :param qp: QuantizationPointBase instance.
+        :param graph: NNCFGraph instance.
+        :return: The corresponding EdgeOrNode derived from the target node and quantization point.
+        """
+        ip = qp.insertion_point
+        if qp.is_weight_quantization_point():
+            nncf_node = nncf_graph.get_node_by_name(target_node.name)
+            weights_ports_ids = get_weight_tensor_port_ids(nncf_node, nncf_graph)
+            if len(weights_ports_ids) > 1:
+                # TODO(dlyakhov): support quantization for nodes with several weights
+                nncf_logger.warning(
+                    f"Quantization of the weighted node {target_node.name}"
+                    " is not yet supported by the OpenVINOQuantizer."
+                    f" Only the weight on port ID {weights_ports_ids[0]} will be quantized."
+                    f" Quantizable weights are located on ports: {weights_ports_ids}."
+                )
+            weight_node = target_node.all_input_nodes[weights_ports_ids[0]]
+            return (weight_node, target_node)
+
+        if ip.input_port_id is None:
+            return target_node
+
+        node = target_node.all_input_nodes[ip.input_port_id]
+        return (node, target_node)
+
+    @staticmethod
+    def _fill_torch_ao_annotation(
+        edge_or_node: EdgeOrNode,
+        qspec: QuantizationSpecBase,
+        annotation_to_update: QuantizationAnnotation,
+    ) -> None:
+        """
+        Helper method to update the annotation_to_update based on the specified edge_or_node and qspec.
+
+        :param edge_or_node: The target EdgeOrNode to be used for the update.
+        :param qspec: An instance of QuantizationSpecBase representing the quantization specification to apply.
+        :param annotation_to_update: The annotation to update based on the edge_or_node and qspec.
+        """
+        if isinstance(edge_or_node, torch.fx.Node):
+            annotation_to_update.output_qspec = qspec
+        else:
+            annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec
+
+    @staticmethod
+    def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec:
+        """
+        Retrieves the quantization configuration from the given quantization point and
+        converts it into a QuantizationSpec.
+
+        :param qp: An instance of QuantizationPointBase.
+        :return: A QuantizationSpec retrieved and converted from the quantization point.
+        """
+        # Eps value is copied from nncf/torch/quantization/layers.py
+        extra_args = {"eps": 1e-16}
+        qconfig = qp.qconfig
+        is_weight = qp.is_weight_quantization_point()
+
+        if qconfig.per_channel:
+            torch_qscheme = (
+                torch.per_channel_symmetric
+                if qconfig.mode is QuantizationScheme.SYMMETRIC
+                else torch.per_channel_affine
+            )
+        else:
+            torch_qscheme = (
+                torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine
+            )
+        if is_weight:
+            observer = PerChannelMinMaxObserver
+            quant_min = -128
+            quant_max = 127
+            dtype = torch.int8
+            channel_axis = 0
+        else:
+            observer = (
+                HistogramObserver
+                if torch_qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine]
+                else PerChannelMinMaxObserver
+            )
+            quant_min = 0
+            quant_max = 255
+            dtype = torch.int8 if qconfig.signedness_to_force else torch.uint8
+            channel_axis = 1  # channel dim for activations
+        return QuantizationSpec(
+            dtype=dtype,
+            observer_or_fake_quant_ctr=observer.with_args(**extra_args),
+            quant_min=quant_min,
+            quant_max=quant_max,
+            qscheme=torch_qscheme,
+            ch_axis=channel_axis,
+            is_dynamic=False,
+        )
+
+    def validate(self, model: torch.fx.GraphModule) -> None:
+        pass
+
+    def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
+        fold_constant_except_qdq(model)
+        return model
diff --git a/backends/openvino/requirements.txt b/backends/openvino/requirements.txt
index 7c3de886e27..f00257127a3 100644
--- a/backends/openvino/requirements.txt
+++ b/backends/openvino/requirements.txt
@@ -6,3 +6,4 @@ tokenizers
 transformers
 piq
 pillow
+nncf @ https://github.com/openvinotoolkit/nncf.git
diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt
index 4a1917fa3af..10638a7b5f7 100644
--- a/examples/openvino/CMakeLists.txt
+++ b/examples/openvino/CMakeLists.txt
@@ -55,6 +55,7 @@ target_include_directories(openvino_portable_ops_lib PUBLIC ${_common_include_di
 
 # Build Executor Runner
 add_executable(openvino_executor_runner ${_openvino_executor_runner__srcs})
+
 target_include_directories(
   openvino_executor_runner PUBLIC ${_common_include_directories} ${EXECUTORCH_ROOT}/cmake-openvino-out/third-party/gflags/include
 )
diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md
index 6c59f1dad41..46e476a8408 100644
--- a/examples/openvino/aot/README.md
+++ b/examples/openvino/aot/README.md
@@ -31,10 +31,17 @@ python aot_openvino_compiler.py --suite <MODEL_SUITE> --model <MODEL_NAME> --inp
   - `[1, 3, 224, 224]` (Zsh users: wrap in quotes)
   - `(1, 3, 224, 224)`
 
+- **`--quantize`** (optional):
+  Enable model quantization: Default is False.
+
+- **`--dataset`** (optional):
+  Path to the calibration dataset. TODO: It is necessary to think in what form to support the dataset. For the experiment, tiny-imagenet is used, which can be downloaded from here http://cs231n.stanford.edu/tiny-imagenet-200.zip and specify the path to it. 
+
 - **`--device`** (optional):  
   Target device for the compiled model. Default is `CPU`.  
   Examples: `CPU`, `GPU`
 
+
 ## **Examples**
 
 ### Export a TIMM VGG16 model for the CPU
diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index 4674fbbd755..cc31e011e38 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -4,10 +4,15 @@
 # except in compliance with the License. See the license file in the root
 # directory of this source tree for more details.
 
+import nncf.experimental
+import nncf.experimental.torch
 import executorch
+import nncf
 import timm
 import torch
+import torchvision.datasets as datasets
 import torchvision.models as torchvision_models
+import torchvision.transforms as transforms
 from transformers import AutoModel
 from executorch.exir.backend.backend_details import CompileSpec
 from executorch.backends.openvino.preprocess import OpenvinoBackend
@@ -16,6 +21,12 @@
 from torch.export import export, ExportedProgram
 from torch.export.exported_program import ExportedProgram
 import argparse
+from executorch.backends.openvino import OpenVINOQuantizer
+from torch.ao.quantization.quantize_pt2e import (
+    convert_pt2e,
+    prepare_pt2e,
+)
+
 
 # Function to load a model based on the selected suite
 def load_model(suite: str, model_name: str):
@@ -30,7 +41,48 @@ def load_model(suite: str, model_name: str):
     else:
         raise ValueError(f"Unsupported model suite: {suite}")
 
-def main(suite: str, model_name: str, input_shape, device: str):
+
+def load_calibration_dataset(dataset_path: str):
+    val_dir = f"{dataset_path}/val"
+
+    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+
+    val_dataset = datasets.ImageFolder(
+        val_dir,
+        transforms.Compose(
+            [
+                transforms.Resize(64), # for tiny imagenet
+                transforms.ToTensor(),
+                normalize,
+            ]
+        ),
+    )
+
+    calibration_dataset = torch.utils.data.DataLoader(
+        val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True
+    )
+
+    return calibration_dataset
+
+
+def quantize_model(model: torch.fx.GraphModule, calibration_dataset: torch.utils.data.DataLoader, subset_size=300):
+    quantizer = OpenVINOQuantizer()
+
+    print("PTQ: Annotate the model...")
+    annotated_model = prepare_pt2e(model, quantizer)
+    
+    print("PTQ: Calibrate the model...")
+    for idx, data in enumerate(calibration_dataset):
+        if idx >= subset_size:
+            break
+        annotated_model(data[0])
+
+    print("PTQ: Convert the quantized model...")
+    quantized_model = convert_pt2e(annotated_model)
+    return quantized_model
+
+
+def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str):
     # Ensure input_shape is a tuple
     if isinstance(input_shape, list):
         input_shape = tuple(input_shape)
@@ -44,9 +96,19 @@ def main(suite: str, model_name: str, input_shape, device: str):
     # Provide input
     example_args = (torch.randn(*input_shape), )
 
-    # Export to aten dialect using torch.export
+    # Export the model to the aten dialect
     aten_dialect: ExportedProgram = export(model, example_args)
 
+    if quantize:
+        # Quantize model
+        if not dataset_path:
+            raise ValueError("Quantization requires a calibration dataset.")
+        calibration_dataset = load_calibration_dataset(dataset_path)
+
+        captured_model = aten_dialect.module()
+        quantized_model = quantize_model(captured_model, calibration_dataset)
+        aten_dialect: ExportedProgram = export(quantized_model, example_args)
+
     # Convert to edge dialect
     edge_program: EdgeProgramManager = to_edge(aten_dialect)
     to_be_lowered_module = edge_program.exported_program()
@@ -71,10 +133,13 @@ def main(suite: str, model_name: str, input_shape, device: str):
     parser.add_argument("--model", type=str, required=True, help="Model name to be loaded.")
     parser.add_argument("--input_shape", type=eval, required=True,
                         help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).")
+    parser.add_argument("--quantize", action="store_true", help="Enable model quantization.")
+    parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.")
     parser.add_argument("--device", type=str, default="CPU",
                         help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.")
 
     args = parser.parse_args()
 
     # Run the main function with parsed arguments
-    main(args.suite, args.model, args.input_shape, args.device)
+    with nncf.torch.disable_patching():
+        main(args.suite, args.model, args.input_shape, args.quantize, args.dataset, args.device)
diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh
index ee16658941d..52c508d8ee2 100755
--- a/examples/openvino/openvino_build_example.sh
+++ b/examples/openvino/openvino_build_example.sh
@@ -34,6 +34,7 @@ main() {
     local example_dir=examples/openvino
     local example_build_dir="${build_dir}/${example_dir}"
     local cmake_prefix_path="${PWD}/${build_dir}/lib/cmake/ExecuTorch;${PWD}/${build_dir}/third-party/gflags;"
+
     rm -rf "${example_build_dir}"
 
     ## OpenVINO original

From 61488d5a9d77ebf86658392c8ee0e24b6eb9f550 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Fri, 7 Feb 2025 18:12:06 +0100
Subject: [PATCH 02/18] deit3_small_patch16_224_in21ft1k

---
 backends/openvino/quantizer/quantizer.py      |  2 ++
 .../openvino/aot/aot_openvino_compiler.py     | 25 +++++++++++--------
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py
index 58fde3e23f1..aefa91f7455 100644
--- a/backends/openvino/quantizer/quantizer.py
+++ b/backends/openvino/quantizer/quantizer.py
@@ -15,6 +15,7 @@
 import torch.fx
 from torch.ao.quantization.observer import HistogramObserver
 from torch.ao.quantization.observer import PerChannelMinMaxObserver
+from torch.ao.quantization.observer import MinMaxObserver
 from torch.ao.quantization.quantizer.quantizer import EdgeOrNode
 from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation
 from torch.ao.quantization.quantizer.quantizer import QuantizationSpec
@@ -276,6 +277,7 @@ def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec:
                 torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine
             )
         if is_weight:
+            observer = PerChannelMinMaxObserver if qconfig.per_channel else MinMaxObserver
             observer = PerChannelMinMaxObserver
             quant_min = -128
             quant_max = 127
diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index cc31e011e38..dabf1c964fa 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -65,20 +65,17 @@ def load_calibration_dataset(dataset_path: str):
     return calibration_dataset
 
 
-def quantize_model(model: torch.fx.GraphModule, calibration_dataset: torch.utils.data.DataLoader, subset_size=300):
-    quantizer = OpenVINOQuantizer()
+def quantize_model(model: torch.fx.GraphModule, example_args, subset_size=300):
+    quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"]))
 
     print("PTQ: Annotate the model...")
     annotated_model = prepare_pt2e(model, quantizer)
     
     print("PTQ: Calibrate the model...")
-    for idx, data in enumerate(calibration_dataset):
-        if idx >= subset_size:
-            break
-        annotated_model(data[0])
+    annotated_model(*example_args)
 
     print("PTQ: Convert the quantized model...")
-    quantized_model = convert_pt2e(annotated_model)
+    quantized_model = convert_pt2e(annotated_model, fold_quantize=False)
     return quantized_model
 
 
@@ -106,7 +103,9 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
         calibration_dataset = load_calibration_dataset(dataset_path)
 
         captured_model = aten_dialect.module()
-        quantized_model = quantize_model(captured_model, calibration_dataset)
+        visualize_fx_model(captured_model, f"{model_name}_fp32.svg")
+        quantized_model = quantize_model(captured_model, example_args)
+        visualize_fx_model(quantized_model, f"{model_name}_int8.svg")
         aten_dialect: ExportedProgram = export(quantized_model, example_args)
 
     # Convert to edge dialect
@@ -121,9 +120,15 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
     exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig())
 
     # Serialize and save it to a file
-    with open(f"{model_name}.pte", "wb") as file:
+    model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" 
+    with open(model_name, "wb") as file:
         exec_prog.write_to_file(file)
-    print(f"Model exported and saved as {model_name}.pte on {device}.")
+    print(f"Model exported and saved as {model_name} on {device}.")
+
+from torch.fx.passes.graph_drawer import FxGraphDrawer
+def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str):
+    g = FxGraphDrawer(model, output_svg_path)
+    g.get_dot_graph().write_svg(output_svg_path)
 
 if __name__ == "__main__":
     # Argument parser for dynamic inputs

From 42155a1d433d87428781099b9c1ba276e7aebb55 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Fri, 7 Feb 2025 18:28:57 +0100
Subject: [PATCH 03/18] Resnet-like model checked

---
 examples/openvino/aot/aot_openvino_compiler.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index dabf1c964fa..a062af4d001 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -66,7 +66,8 @@ def load_calibration_dataset(dataset_path: str):
 
 
 def quantize_model(model: torch.fx.GraphModule, example_args, subset_size=300):
-    quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"]))
+    #quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"]))
+    quantizer = OpenVINOQuantizer()
 
     print("PTQ: Annotate the model...")
     annotated_model = prepare_pt2e(model, quantizer)
@@ -100,12 +101,12 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
         # Quantize model
         if not dataset_path:
             raise ValueError("Quantization requires a calibration dataset.")
-        calibration_dataset = load_calibration_dataset(dataset_path)
+        #calibration_dataset = load_calibration_dataset(dataset_path)
 
         captured_model = aten_dialect.module()
-        visualize_fx_model(captured_model, f"{model_name}_fp32.svg")
+        #visualize_fx_model(captured_model, f"{model_name}_fp32.svg")
         quantized_model = quantize_model(captured_model, example_args)
-        visualize_fx_model(quantized_model, f"{model_name}_int8.svg")
+        #visualize_fx_model(quantized_model, f"{model_name}_int8.svg")
         aten_dialect: ExportedProgram = export(quantized_model, example_args)
 
     # Convert to edge dialect

From 7c66314296db63523872df6407bfbc271d4d8e4c Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Mon, 10 Feb 2025 19:39:26 +0100
Subject: [PATCH 04/18] WIP

---
 backends/openvino/quantizer/quantizer.py      |  87 +++++-------
 .../openvino/aot/aot_openvino_compiler.py     | 132 ++++++++++++++----
 .../openvino_executor_runner.cpp              |   1 +
 3 files changed, 142 insertions(+), 78 deletions(-)

diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py
index aefa91f7455..b5f43251426 100644
--- a/backends/openvino/quantizer/quantizer.py
+++ b/backends/openvino/quantizer/quantizer.py
@@ -10,12 +10,11 @@
 # limitations under the License.
 
 from collections import defaultdict
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple
 
 import torch.fx
 from torch.ao.quantization.observer import HistogramObserver
 from torch.ao.quantization.observer import PerChannelMinMaxObserver
-from torch.ao.quantization.observer import MinMaxObserver
 from torch.ao.quantization.quantizer.quantizer import EdgeOrNode
 from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation
 from torch.ao.quantization.quantizer.quantizer import QuantizationSpec
@@ -24,25 +23,11 @@
 from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec
 
 import nncf
+import nncf.common.quantization as q
+import nncf.experimental.torch.fx as nncf_fx
+import nncf.parameters as p
+import nncf.quantization.advanced_parameters as advanced_p
 from nncf.common.graph.graph import NNCFGraph
-from nncf.common.logging import nncf_logger
-from nncf.common.quantization.quantizer_propagation.solver import QuantizerPropagationRule
-from nncf.common.quantization.quantizer_setup import QuantizationPointBase
-from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup
-from nncf.common.quantization.structs import QuantizationPreset
-from nncf.common.quantization.structs import QuantizationScheme
-from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
-from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name
-from nncf.experimental.torch.fx.transformations import fold_constant_except_qdq
-from nncf.parameters import ModelType
-from nncf.parameters import QuantizationMode
-from nncf.parameters import TargetDevice
-from nncf.quantization.advanced_parameters import FP8QuantizationParameters
-from nncf.quantization.advanced_parameters import OverflowFix
-from nncf.quantization.advanced_parameters import QuantizationParameters
-from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
-from nncf.scopes import IgnoredScope
-from nncf.torch.model_graph_manager import get_weight_tensor_port_ids
 
 QUANT_ANNOTATION_KEY = "quantization_annotation"
 
@@ -56,16 +41,15 @@ class OpenVINOQuantizer(Quantizer):
     def __init__(
         self,
         *,
-        mode: Optional[QuantizationMode] = None,
-        preset: Optional[QuantizationPreset] = None,
-        target_device: TargetDevice = TargetDevice.ANY,
-        model_type: Optional[ModelType] = None,
-        ignored_scope: Optional[IgnoredScope] = None,
-        overflow_fix: Optional[OverflowFix] = None,
+        mode: Optional[p.QuantizationMode] = None,
+        preset: Optional[q.structs.QuantizationPreset] = None,
+        target_device: p.TargetDevice = p.TargetDevice.ANY,
+        transformer_model: bool = False,
+        ignored_scope: Optional[nncf.IgnoredScope] = None,
+        overflow_fix: Optional[advanced_p.OverflowFix] = None,
         quantize_outputs: bool = False,
-        activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None,
-        weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None,
-        quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE,
+        activations_quantization_params: Optional[advanced_p.QuantizationParameters] = None,
+        weights_quantization_params: Optional[advanced_p.QuantizationParameters] = None,
     ):
         """
         :param mode: Defines optimization mode for the algorithm. None by default.
@@ -89,29 +73,28 @@ def __init__(
         :param activations_quantization_params: Quantization parameters for model
             activations.
         :param weights_quantization_params: Quantization parameters for model weights.
-        :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers.
-        MERGE_ALL_IN_ONE by default.
         """
-        self._min_max_algo = MinMaxQuantization(
+        self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization(
             mode=mode,
             preset=preset,
             target_device=target_device,
-            model_type=model_type,
+            model_type=p.ModelType.TRANSFORMER if transformer_model else None,
             ignored_scope=ignored_scope,
             overflow_fix=overflow_fix,
             quantize_outputs=quantize_outputs,
             activations_quantization_params=activations_quantization_params,
             weights_quantization_params=weights_quantization_params,
-            quantizer_propagation_rule=quantizer_propagation_rule,
         )
 
-    def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
+    def get_nncf_quantization_setup(
+        self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph
+    ) -> q.quantizer_setup.SingleConfigQuantizerSetup:
         self._min_max_algo._set_backend_entity(model)
         return self._min_max_algo.find_quantization_setup(model, nncf_graph)
 
     def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
-        nncf_graph = GraphConverter.create_nncf_graph(model)
-        quantization_setup = self.get_quantization_setup(model, nncf_graph)
+        nncf_graph = nncf_fx.nncf_graph_builder.GraphConverter.create_nncf_graph(model)
+        quantization_setup = self.get_nncf_quantization_setup(model, nncf_graph)
 
         graph = model.graph
         node_vs_torch_annotation = defaultdict(QuantizationAnnotation)
@@ -138,7 +121,9 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
                 )
                 raise nncf.InternalError(msg)
 
-            root_target_node = get_graph_node_by_name(graph, root_qp.insertion_point.target_node_name)
+            root_target_node = nncf_fx.node_utils.get_graph_node_by_name(
+                graph, root_qp.insertion_point.target_node_name
+            )
             root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph)
 
             for quantizer_id in quantizer_ids:
@@ -155,10 +140,11 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
         for node, annotation in node_vs_torch_annotation.items():
             assert QUANT_ANNOTATION_KEY not in node.meta
             node.meta[QUANT_ANNOTATION_KEY] = annotation
+        return model
 
     @staticmethod
     def _get_unified_scales_root_quantizer_id(
-        nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: SingleConfigQuantizerSetup
+        nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: q.quantizer_setup.SingleConfigQuantizerSetup
     ) -> int:
         """
         Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id`
@@ -184,7 +170,7 @@ def _get_unified_scales_root_quantizer_id(
     def _get_edge_or_node_and_annotation(
         graph: torch.fx.Graph,
         nncf_graph: NNCFGraph,
-        qp: QuantizationPointBase,
+        qp: q.quantizer_setup.QuantizationPointBase,
         node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation],
     ) -> Tuple[EdgeOrNode, QuantizationAnnotation]:
         """
@@ -198,13 +184,15 @@ def _get_edge_or_node_and_annotation(
             QuantizationAnnotations.
         :return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation.
         """
-        target_node = get_graph_node_by_name(graph, qp.insertion_point.target_node_name)
+        target_node = nncf_fx.node_utils.get_graph_node_by_name(graph, qp.insertion_point.target_node_name)
         annotation = node_vs_torch_annotation[target_node]
         edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph)
         return edge_or_node, annotation
 
     @staticmethod
-    def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nncf_graph: NNCFGraph) -> EdgeOrNode:
+    def _get_edge_or_node(
+        target_node: torch.fx.Node, qp: q.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph
+    ) -> EdgeOrNode:
         """
         Returns the edge or node based on the given target node and quantization point.
 
@@ -216,10 +204,10 @@ def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nnc
         ip = qp.insertion_point
         if qp.is_weight_quantization_point():
             nncf_node = nncf_graph.get_node_by_name(target_node.name)
-            weights_ports_ids = get_weight_tensor_port_ids(nncf_node, nncf_graph)
+            weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids(nncf_node, nncf_graph)
             if len(weights_ports_ids) > 1:
                 # TODO(dlyakhov): support quantization for nodes with several weights
-                nncf_logger.warning(
+                nncf.common.logging.nncf_logger.warning(
                     f"Quantization of the weighted node {target_node.name}"
                     " is not yet supported by the OpenVINOQuantizer."
                     f" Only the weight on port ID {weights_ports_ids[0]} will be quantized."
@@ -253,7 +241,7 @@ def _fill_torch_ao_annotation(
             annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec
 
     @staticmethod
-    def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec:
+    def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) -> QuantizationSpec:
         """
         Retrieves the quantization configuration from the given quantization point and
         converts it into a QuantizationSpec.
@@ -269,15 +257,16 @@ def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec:
         if qconfig.per_channel:
             torch_qscheme = (
                 torch.per_channel_symmetric
-                if qconfig.mode is QuantizationScheme.SYMMETRIC
+                if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC
                 else torch.per_channel_affine
             )
         else:
             torch_qscheme = (
-                torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine
+                torch.per_tensor_symmetric
+                if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC
+                else torch.per_tensor_affine
             )
         if is_weight:
-            observer = PerChannelMinMaxObserver if qconfig.per_channel else MinMaxObserver
             observer = PerChannelMinMaxObserver
             quant_min = -128
             quant_max = 127
@@ -307,5 +296,5 @@ def validate(self, model: torch.fx.GraphModule) -> None:
         pass
 
     def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
-        fold_constant_except_qdq(model)
+        nncf_fx.transformations.fold_constant_except_qdq(model)
         return model
diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index a062af4d001..928757c32e2 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -22,11 +22,15 @@
 from torch.export.exported_program import ExportedProgram
 import argparse
 from executorch.backends.openvino import OpenVINOQuantizer
+#from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer
+from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e
 from torch.ao.quantization.quantize_pt2e import (
     convert_pt2e,
     prepare_pt2e,
 )
-
+from sklearn.metrics import accuracy_score
+from timm.data import resolve_data_config
+from timm.data.transforms_factory import create_transform
 
 # Function to load a model based on the selected suite
 def load_model(suite: str, model_name: str):
@@ -42,20 +46,17 @@ def load_model(suite: str, model_name: str):
         raise ValueError(f"Unsupported model suite: {suite}")
 
 
-def load_calibration_dataset(dataset_path: str):
+def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module):
     val_dir = f"{dataset_path}/val"
 
-    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    if suite == "torchvision":
+        transform = torchvision_models.get_model_weights(model.name).transforms()
+    else:
+        transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model))
 
     val_dataset = datasets.ImageFolder(
         val_dir,
-        transforms.Compose(
-            [
-                transforms.Resize(64), # for tiny imagenet
-                transforms.ToTensor(),
-                normalize,
-            ]
-        ),
+        transform=transform
     )
 
     calibration_dataset = torch.utils.data.DataLoader(
@@ -65,21 +66,6 @@ def load_calibration_dataset(dataset_path: str):
     return calibration_dataset
 
 
-def quantize_model(model: torch.fx.GraphModule, example_args, subset_size=300):
-    #quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"]))
-    quantizer = OpenVINOQuantizer()
-
-    print("PTQ: Annotate the model...")
-    annotated_model = prepare_pt2e(model, quantizer)
-    
-    print("PTQ: Calibrate the model...")
-    annotated_model(*example_args)
-
-    print("PTQ: Convert the quantized model...")
-    quantized_model = convert_pt2e(annotated_model, fold_quantize=False)
-    return quantized_model
-
-
 def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str):
     # Ensure input_shape is a tuple
     if isinstance(input_shape, list):
@@ -98,15 +84,24 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
     aten_dialect: ExportedProgram = export(model, example_args)
 
     if quantize:
+        if suite == "huggingface":
+            raise ValueError("Quantization of {suite} models did not support yet.")
+
         # Quantize model
         if not dataset_path:
             raise ValueError("Quantization requires a calibration dataset.")
-        #calibration_dataset = load_calibration_dataset(dataset_path)
+        calibration_dataset = load_calibration_dataset(dataset_path, suite, model)
 
         captured_model = aten_dialect.module()
         #visualize_fx_model(captured_model, f"{model_name}_fp32.svg")
-        quantized_model = quantize_model(captured_model, example_args)
-        #visualize_fx_model(quantized_model, f"{model_name}_int8.svg")
+        quantizer = OpenVINOQuantizer()
+
+        print("PTQ: Quantize the model")
+        def transform(x):
+            return x[0]
+
+        quantized_model = quantize_pt2e(captured_model, quantizer, calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False)
+
         aten_dialect: ExportedProgram = export(quantized_model, example_args)
 
     # Convert to edge dialect
@@ -121,16 +116,95 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
     exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig())
 
     # Serialize and save it to a file
-    model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" 
+    model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte"
     with open(model_name, "wb") as file:
         exec_prog.write_to_file(file)
     print(f"Model exported and saved as {model_name} on {device}.")
 
+    if quantize:
+        print("Start validation of the quantized model:")
+
+        # 1: Dump inputs
+        import os
+        import shutil
+
+        dest_path = "tmp_inputs"
+        out_path = "tmp_outputs"
+        targets, input_files = [], []
+        for d in [dest_path, out_path]:
+            if os.path.exists(d):
+                shutil.rmtree(d)
+            os.makedirs(d)
+        input_list = ""
+        for idx, data in enumerate(calibration_dataset):
+            feature, target = data
+            targets.append(target)
+            file_name = f"{dest_path}/input_{idx}_0.raw"
+            input_list += file_name + " "
+            if not isinstance(feature, torch.Tensor):
+                feature = torch.tensor(feature)
+            feature.detach().numpy().tofile(file_name)
+            input_files.append(file_name)
+
+        inp_list_file = os.path.join(dest_path, "in_list.txt")
+        with open(inp_list_file, "w") as f:
+            input_list = input_list.strip() + "\n"
+            f.write(input_list)
+
+        # 2: Run the executor
+        print("Run openvino_executor_runner...")
+        import subprocess
+        breakpoint()
+        subprocess.run(["../../../cmake-openvino-out/examples/openvino/openvino_executor_runner",
+                    f"--model_path={model_name}",
+                    f"--input_list_path={inp_list_file}",
+                    f"--output_folder_path={out_path}",
+                    #f"--num_iter={len(input_files)}"
+        ])
+
+        # 3: load the outputs and compare with the targets
+        import numpy as np
+        predictions = []
+        for i in range(len(input_files)):
+            predictions.append(
+                np.fromfile(
+                    os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32
+                )
+            )
+
+        k_val = [1, 5]
+        acc_top1 = accuracy_score(predictions, targets)
+        print(f"acc@1: {acc_top1}")
+
+
 from torch.fx.passes.graph_drawer import FxGraphDrawer
 def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str):
     g = FxGraphDrawer(model, output_svg_path)
     g.get_dot_graph().write_svg(output_svg_path)
 
+def generate_inputs(dest_path: str, file_name: str, inputs=None, input_list=None):
+    input_list_file = None
+    input_files = []
+
+    # Prepare input list
+    if input_list is not None:
+        input_list_file = f"{dest_path}/{file_name}"
+        with open(input_list_file, "w") as f:
+            f.write(input_list)
+            f.flush()
+
+    # Prepare input data
+    if inputs is not None:
+        for idx, data in enumerate(inputs):
+            for i, d in enumerate(data):
+                file_name = f"{dest_path}/input_{idx}_{i}.raw"
+                if not isinstance(d, torch.Tensor):
+                    d = torch.tensor(d)
+                d.detach().numpy().tofile(file_name)
+                input_files.append(file_name)
+
+    return input_list_file, input_files
+
 if __name__ == "__main__":
     # Argument parser for dynamic inputs
     parser = argparse.ArgumentParser(description="Export models with executorch.")
diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp
index 7615b63649a..b0d3a9004c2 100644
--- a/examples/openvino/executor_runner/openvino_executor_runner.cpp
+++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp
@@ -179,6 +179,7 @@ int main(int argc, char** argv) {
       std::string file_path;
       while (std::getline(input_list, file_path)) {
         auto input_files = split(file_path, " ");
+        ET_LOG(Info, "INPUT_FILES.SIZE: %ld", input_files.size());
         if (input_files.size() == 0) {
           break;
         }

From c1fa9e25851b5819dea18b0070c9ab46cc2e0c3a Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Tue, 11 Feb 2025 10:31:15 +0100
Subject: [PATCH 05/18] Formating

---
 backends/openvino/quantizer/quantizer.py      |  15 +-
 .../openvino/aot/aot_openvino_compiler.py     | 128 ++++++++----------
 .../openvino_executor_runner.cpp              |   2 +
 3 files changed, 63 insertions(+), 82 deletions(-)

diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py
index b5f43251426..63da8325e4f 100644
--- a/backends/openvino/quantizer/quantizer.py
+++ b/backends/openvino/quantizer/quantizer.py
@@ -1,13 +1,8 @@
-# Copyright (c) 2025 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Copyright (c) Intel Corporation
+#
+# Licensed under the BSD License (the "License"); you may not use this file
+# except in compliance with the License. See the license file in the root
+# directory of this source tree for more details.
 
 from collections import defaultdict
 from typing import Dict, List, Optional, Tuple
diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index 928757c32e2..91df971403c 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -4,33 +4,31 @@
 # except in compliance with the License. See the license file in the root
 # directory of this source tree for more details.
 
-import nncf.experimental
-import nncf.experimental.torch
+import argparse
+
 import executorch
-import nncf
 import timm
 import torch
 import torchvision.datasets as datasets
 import torchvision.models as torchvision_models
-import torchvision.transforms as transforms
-from transformers import AutoModel
-from executorch.exir.backend.backend_details import CompileSpec
-from executorch.backends.openvino.preprocess import OpenvinoBackend
-from executorch.backends.openvino.partitioner import OpenvinoPartitioner
-from executorch.exir import EdgeProgramManager, to_edge
-from torch.export import export, ExportedProgram
-from torch.export.exported_program import ExportedProgram
-import argparse
 from executorch.backends.openvino import OpenVINOQuantizer
-#from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer
-from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e
-from torch.ao.quantization.quantize_pt2e import (
-    convert_pt2e,
-    prepare_pt2e,
-)
+from executorch.backends.openvino.partitioner import OpenvinoPartitioner
+from executorch.exir import EdgeProgramManager
+from executorch.exir import to_edge
+from executorch.exir.backend.backend_details import CompileSpec
 from sklearn.metrics import accuracy_score
 from timm.data import resolve_data_config
 from timm.data.transforms_factory import create_transform
+from torch.export import ExportedProgram
+from torch.export import export
+from torch.export.exported_program import ExportedProgram
+from transformers import AutoModel
+
+import nncf
+import nncf.experimental
+import nncf.experimental.torch
+from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e
+
 
 # Function to load a model based on the selected suite
 def load_model(suite: str, model_name: str):
@@ -54,10 +52,7 @@ def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Modu
     else:
         transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model))
 
-    val_dataset = datasets.ImageFolder(
-        val_dir,
-        transform=transform
-    )
+    val_dataset = datasets.ImageFolder(val_dir, transform=transform)
 
     calibration_dataset = torch.utils.data.DataLoader(
         val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True
@@ -78,7 +73,7 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
     model = model.eval()
 
     # Provide input
-    example_args = (torch.randn(*input_shape), )
+    example_args = (torch.randn(*input_shape),)
 
     # Export the model to the aten dialect
     aten_dialect: ExportedProgram = export(model, example_args)
@@ -93,14 +88,19 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
         calibration_dataset = load_calibration_dataset(dataset_path, suite, model)
 
         captured_model = aten_dialect.module()
-        #visualize_fx_model(captured_model, f"{model_name}_fp32.svg")
         quantizer = OpenVINOQuantizer()
 
         print("PTQ: Quantize the model")
+
         def transform(x):
             return x[0]
 
-        quantized_model = quantize_pt2e(captured_model, quantizer, calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False)
+        quantized_model = quantize_pt2e(
+            captured_model,
+            quantizer,
+            calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform),
+            fold_quantize=False,
+        )
 
         aten_dialect: ExportedProgram = export(quantized_model, example_args)
 
@@ -154,69 +154,53 @@ def transform(x):
         # 2: Run the executor
         print("Run openvino_executor_runner...")
         import subprocess
-        breakpoint()
-        subprocess.run(["../../../cmake-openvino-out/examples/openvino/openvino_executor_runner",
-                    f"--model_path={model_name}",
-                    f"--input_list_path={inp_list_file}",
-                    f"--output_folder_path={out_path}",
-                    #f"--num_iter={len(input_files)}"
-        ])
+
+        subprocess.run(
+            [
+                "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner",
+                f"--model_path={model_name}",
+                f"--input_list_path={inp_list_file}",
+                f"--output_folder_path={out_path}",
+                # f"--num_iter={len(input_files)}"
+            ]
+        )
 
         # 3: load the outputs and compare with the targets
         import numpy as np
+
         predictions = []
         for i in range(len(input_files)):
-            predictions.append(
-                np.fromfile(
-                    os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32
-                )
-            )
+            predictions.append(np.fromfile(os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32))
 
-        k_val = [1, 5]
         acc_top1 = accuracy_score(predictions, targets)
         print(f"acc@1: {acc_top1}")
 
 
-from torch.fx.passes.graph_drawer import FxGraphDrawer
-def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str):
-    g = FxGraphDrawer(model, output_svg_path)
-    g.get_dot_graph().write_svg(output_svg_path)
-
-def generate_inputs(dest_path: str, file_name: str, inputs=None, input_list=None):
-    input_list_file = None
-    input_files = []
-
-    # Prepare input list
-    if input_list is not None:
-        input_list_file = f"{dest_path}/{file_name}"
-        with open(input_list_file, "w") as f:
-            f.write(input_list)
-            f.flush()
-
-    # Prepare input data
-    if inputs is not None:
-        for idx, data in enumerate(inputs):
-            for i, d in enumerate(data):
-                file_name = f"{dest_path}/input_{idx}_{i}.raw"
-                if not isinstance(d, torch.Tensor):
-                    d = torch.tensor(d)
-                d.detach().numpy().tofile(file_name)
-                input_files.append(file_name)
-
-    return input_list_file, input_files
-
 if __name__ == "__main__":
     # Argument parser for dynamic inputs
     parser = argparse.ArgumentParser(description="Export models with executorch.")
-    parser.add_argument("--suite", type=str, required=True, choices=["timm", "torchvision", "huggingface"],
-                        help="Select the model suite (timm, torchvision, huggingface).")
+    parser.add_argument(
+        "--suite",
+        type=str,
+        required=True,
+        choices=["timm", "torchvision", "huggingface"],
+        help="Select the model suite (timm, torchvision, huggingface).",
+    )
     parser.add_argument("--model", type=str, required=True, help="Model name to be loaded.")
-    parser.add_argument("--input_shape", type=eval, required=True,
-                        help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).")
+    parser.add_argument(
+        "--input_shape",
+        type=eval,
+        required=True,
+        help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).",
+    )
     parser.add_argument("--quantize", action="store_true", help="Enable model quantization.")
     parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.")
-    parser.add_argument("--device", type=str, default="CPU",
-                        help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="CPU",
+        help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.",
+    )
 
     args = parser.parse_args()
 
diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp
index b0d3a9004c2..41268751b2f 100644
--- a/examples/openvino/executor_runner/openvino_executor_runner.cpp
+++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp
@@ -180,6 +180,7 @@ int main(int argc, char** argv) {
       while (std::getline(input_list, file_path)) {
         auto input_files = split(file_path, " ");
         ET_LOG(Info, "INPUT_FILES.SIZE: %ld", input_files.size());
+        ET_LOG(Info, "NUM_INPUTS: %ld", num_inputs);
         if (input_files.size() == 0) {
           break;
         }
@@ -189,6 +190,7 @@ int main(int argc, char** argv) {
                 method_meta.input_tensor_meta(input_index);
             auto input_data_ptr = inputs[input_index].toTensor().data_ptr<char>();
 
+            ET_LOG(Info, "READ FILE %s", std::string(input_files[input_index]));
             std::ifstream fin(input_files[input_index], std::ios::binary);
             fin.seekg(0, fin.end);
             size_t file_size = fin.tellg();

From e2415afba91eaf52eda3b9f8a1e20c739f3183f8 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Tue, 11 Feb 2025 12:10:03 +0100
Subject: [PATCH 06/18] openvino_executor_runner.cpp can run on several inputs

---
 .../openvino/aot/aot_openvino_compiler.py     |  75 ++--
 .../openvino_executor_runner.cpp              | 321 ++++++++++--------
 2 files changed, 214 insertions(+), 182 deletions(-)

diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index 91df971403c..64f2ca2b955 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -5,8 +5,13 @@
 # directory of this source tree for more details.
 
 import argparse
+import os
+import shutil
+import subprocess
+from pathlib import Path
 
 import executorch
+import numpy as np
 import timm
 import torch
 import torchvision.datasets as datasets
@@ -19,9 +24,9 @@
 from sklearn.metrics import accuracy_score
 from timm.data import resolve_data_config
 from timm.data.transforms_factory import create_transform
-from torch.export import ExportedProgram
 from torch.export import export
 from torch.export.exported_program import ExportedProgram
+from torch.fx.passes.graph_drawer import FxGraphDrawer
 from transformers import AutoModel
 
 import nncf
@@ -36,12 +41,14 @@ def load_model(suite: str, model_name: str):
         return timm.create_model(model_name, pretrained=True)
     elif suite == "torchvision":
         if not hasattr(torchvision_models, model_name):
-            raise ValueError(f"Model {model_name} not found in torchvision.")
+            msg = f"Model {model_name} not found in torchvision."
+            raise ValueError(msg)
         return getattr(torchvision_models, model_name)(pretrained=True)
     elif suite == "huggingface":
         return AutoModel.from_pretrained(model_name)
     else:
-        raise ValueError(f"Unsupported model suite: {suite}")
+        msg = f"Unsupported model suite: {suite}"
+        raise ValueError(msg)
 
 
 def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module):
@@ -61,12 +68,32 @@ def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Modu
     return calibration_dataset
 
 
+def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str):
+    g = FxGraphDrawer(model, output_svg_path)
+    g.get_dot_graph().write_svg(output_svg_path)
+
+
+def dump_inputs(calibration_dataset, dest_path):
+    input_files, targets = [], []
+    for idx, data in enumerate(calibration_dataset):
+        feature, target = data
+        targets.append(target)
+        file_name = f"{dest_path}/input_{idx}_0.raw"
+        if not isinstance(feature, torch.Tensor):
+            feature = torch.tensor(feature)
+        feature.detach().numpy().tofile(file_name)
+        input_files.append(file_name)
+
+    return input_files, targets
+
+
 def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str):
     # Ensure input_shape is a tuple
     if isinstance(input_shape, list):
         input_shape = tuple(input_shape)
     elif not isinstance(input_shape, tuple):
-        raise ValueError("Input shape must be a list or tuple.")
+        msg = "Input shape must be a list or tuple."
+        raise ValueError(msg)
 
     # Load the selected model
     model = load_model(suite, model_name)
@@ -80,11 +107,13 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
 
     if quantize:
         if suite == "huggingface":
-            raise ValueError("Quantization of {suite} models did not support yet.")
+            msg = f"Quantization of {suite} models did not support yet."
+            raise ValueError(msg)
 
         # Quantize model
         if not dataset_path:
-            raise ValueError("Quantization requires a calibration dataset.")
+            msg = "Quantization requires a calibration dataset."
+            raise ValueError(msg)
         calibration_dataset = load_calibration_dataset(dataset_path, suite, model)
 
         captured_model = aten_dialect.module()
@@ -101,6 +130,7 @@ def transform(x):
             calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform),
             fold_quantize=False,
         )
+        visualize_fx_model(quantized_model, f"{model_name}_int8.svg")
 
         aten_dialect: ExportedProgram = export(quantized_model, example_args)
 
@@ -123,37 +153,21 @@ def transform(x):
 
     if quantize:
         print("Start validation of the quantized model:")
-
         # 1: Dump inputs
-        import os
-        import shutil
-
-        dest_path = "tmp_inputs"
-        out_path = "tmp_outputs"
-        targets, input_files = [], []
+        dest_path = Path("tmp_inputs")
+        out_path = Path("tmp_outputs")
         for d in [dest_path, out_path]:
             if os.path.exists(d):
                 shutil.rmtree(d)
             os.makedirs(d)
-        input_list = ""
-        for idx, data in enumerate(calibration_dataset):
-            feature, target = data
-            targets.append(target)
-            file_name = f"{dest_path}/input_{idx}_0.raw"
-            input_list += file_name + " "
-            if not isinstance(feature, torch.Tensor):
-                feature = torch.tensor(feature)
-            feature.detach().numpy().tofile(file_name)
-            input_files.append(file_name)
-
-        inp_list_file = os.path.join(dest_path, "in_list.txt")
+
+        input_files, targets = dump_inputs(calibration_dataset, dest_path)
+        inp_list_file = dest_path / "in_list.txt"
         with open(inp_list_file, "w") as f:
-            input_list = input_list.strip() + "\n"
-            f.write(input_list)
+            f.write("\n".join(input_files) + "\n")
 
         # 2: Run the executor
         print("Run openvino_executor_runner...")
-        import subprocess
 
         subprocess.run(
             [
@@ -161,16 +175,15 @@ def transform(x):
                 f"--model_path={model_name}",
                 f"--input_list_path={inp_list_file}",
                 f"--output_folder_path={out_path}",
-                # f"--num_iter={len(input_files)}"
             ]
         )
 
         # 3: load the outputs and compare with the targets
-        import numpy as np
 
         predictions = []
         for i in range(len(input_files)):
-            predictions.append(np.fromfile(os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32))
+            tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32)
+            predictions.append(torch.tensor(np.argmax(tensor)))
 
         acc_top1 = accuracy_score(predictions, targets)
         print(f"acc@1: {acc_top1}")
diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp
index 41268751b2f..f9a85c03a53 100644
--- a/examples/openvino/executor_runner/openvino_executor_runner.cpp
+++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp
@@ -11,6 +11,7 @@
 #include <fstream>
 #include <iostream>
 #include <memory>
+#include <tuple>
 
 #include <gflags/gflags.h>
 
@@ -25,22 +26,16 @@
 // Define a fixed-size memory pool for the method allocator (4 MB)
 static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB
 
-// Define command-line flags for model path, the number of iterations, input list path, and output folder path
+// Define command-line flags for model path, the number of iterations, input
+// list path, and output folder path
+DEFINE_string(model_path, "",
+              "Path to the model serialized in flatbuffer format (required).");
+DEFINE_int32(num_iter, 1, "Number of inference iterations (default is 1).");
+DEFINE_string(input_list_path, "",
+              "Path to the input list file which includes the list of raw "
+              "input tensor files (optional).");
 DEFINE_string(
-    model_path,
-    "",
-    "Path to the model serialized in flatbuffer format (required).");
-DEFINE_int32(
-    num_iter,
-    1,
-    "Number of inference iterations (default is 1).");
-DEFINE_string(
-    input_list_path,
-    "",
-    "Path to the input list file which includes the list of raw input tensor files (optional).");
-DEFINE_string(
-    output_folder_path,
-    "",
+    output_folder_path, "",
     "Path to the output folder to save raw output tensor files (optional).");
 
 using executorch::extension::FileDataLoader;
@@ -57,7 +52,119 @@ using executorch::runtime::Result;
 using executorch::runtime::Span;
 using executorch::runtime::TensorInfo;
 
-int main(int argc, char** argv) {
+std::pair<double, Error> benchmark_method(Result<Method> &method,
+                                          int num_iterations) {
+  Error status = Error::Ok;
+  auto before_exec = std::chrono::high_resolution_clock::now();
+  for (int i = 0; i < num_iterations; ++i) {
+    status = method->execute();
+  }
+  auto after_exec = std::chrono::high_resolution_clock::now();
+  double elapsed_time = std::chrono::duration_cast<std::chrono::microseconds>(
+                            after_exec - before_exec)
+                            .count() /
+                        1000.0;
+  return std::make_pair(elapsed_time, status);
+}
+
+void dump_outputs(Result<Method> &method, const char *output_folder_path,
+                  size_t index = 0) {
+  std::vector<EValue> outputs(method->outputs_size());
+  Error status = Error::Ok;
+  status = method->get_outputs(outputs.data(), outputs.size());
+  ET_CHECK(status == Error::Ok);
+  for (size_t output_index = 0; output_index < method->outputs_size();
+       output_index++) {
+    auto output_tensor = outputs[output_index].toTensor();
+    auto output_file_name = std::string(output_folder_path) + "/output_" +
+                            std::to_string(index) + "_" +
+                            std::to_string(output_index) + ".raw";
+    std::ofstream fout(output_file_name.c_str(), std::ios::binary);
+    fout.write(output_tensor.const_data_ptr<char>(), output_tensor.nbytes());
+    fout.close();
+    ET_LOG(Info, "Write outputs to file %s", output_file_name.c_str());
+  }
+}
+
+struct ProcessInputsResult {
+  double total_time;
+  size_t num_iter;
+  Error status;
+};
+
+ProcessInputsResult process_inputs(Result<Method> &method,
+                                   const char *input_list_path,
+                                   const char *output_folder_path) {
+  std::vector<EValue> inputs(method->inputs_size());
+  ET_LOG(Info, "%zu inputs: ", inputs.size());
+  double total_time_elapsed = 0.;
+  size_t idx = 0;
+
+  Error status = Error::Ok;
+  status = method->get_inputs(inputs.data(), inputs.size());
+  ET_CHECK(status == Error::Ok);
+
+  auto split = [](std::string s, std::string delimiter) {
+    size_t pos_start = 0, pos_end, delim_len = delimiter.length();
+    std::string token;
+    std::vector<std::string> res;
+
+    while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) {
+      token = s.substr(pos_start, pos_end - pos_start);
+      pos_start = pos_end + delim_len;
+      res.push_back(token);
+    }
+    res.push_back(s.substr(pos_start));
+    return res;
+  };
+
+  // Read raw input tensor file names from input list file and
+  // iterate each raw input tensor file to read values
+  std::ifstream input_list(input_list_path);
+  if (input_list.is_open()) {
+    size_t num_inputs = method->inputs_size();
+    std::string file_path;
+    while (std::getline(input_list, file_path)) {
+      auto input_files = split(file_path, " ");
+      if (input_files.size() == 0) {
+        break;
+      }
+      for (int input_index = 0; input_index < num_inputs; ++input_index) {
+        MethodMeta method_meta = method->method_meta();
+        Result<TensorInfo> tensor_meta =
+            method_meta.input_tensor_meta(input_index);
+        auto input_data_ptr = inputs[input_index].toTensor().data_ptr<char>();
+
+        ET_LOG(Info, "Read inputs from file %s",
+               input_files[input_index].c_str());
+        std::ifstream fin(input_files[input_index], std::ios::binary);
+        fin.seekg(0, fin.end);
+        size_t file_size = fin.tellg();
+
+        ET_CHECK_MSG(
+            file_size == tensor_meta->nbytes(),
+            "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu",
+            input_index, file_size, tensor_meta->nbytes());
+
+        fin.seekg(0, fin.beg);
+        fin.read(static_cast<char *>(input_data_ptr), file_size);
+        fin.close();
+      }
+      double time_elapsed;
+      std::tie(time_elapsed, status) = benchmark_method(method, 1);
+      if (status != Error::Ok) {
+        return {total_time_elapsed, idx, status};
+      }
+      total_time_elapsed += time_elapsed;
+      dump_outputs(method, output_folder_path, idx++);
+    }
+  } else {
+    ET_CHECK_MSG(false, "Failed to read input list file: %s", input_list_path);
+  }
+  return {total_time_elapsed, idx, status};
+}
+
+int main(int argc, char **argv) {
   // Initialize the runtime environment
   executorch::runtime::runtime_init();
 
@@ -68,22 +175,21 @@ int main(int argc, char** argv) {
   if (FLAGS_model_path.empty()) {
     std::cerr << "Error: --model_path is required." << std::endl;
     std::cerr << "Usage: " << argv[0]
-              << " --model_path=<path_to_model> --num_iter=<iterations>" << std::endl;
+              << " --model_path=<path_to_model> --num_iter=<iterations>"
+              << std::endl;
     return 1;
   }
 
   // Retrieve the model path and number of iterations
-  const char* model_path = FLAGS_model_path.c_str();
+  const char *model_path = FLAGS_model_path.c_str();
   int num_iterations = FLAGS_num_iter;
   std::cout << "Model path: " << model_path << std::endl;
   std::cout << "Number of iterations: " << num_iterations << std::endl;
 
   // Load the model using FileDataLoader
   Result<FileDataLoader> loader = FileDataLoader::from(model_path);
-  ET_CHECK_MSG(
-      loader.ok(),
-      "FileDataLoader::from() failed: 0x%" PRIx32,
-      static_cast<uint32_t>(loader.error()));
+  ET_CHECK_MSG(loader.ok(), "FileDataLoader::from() failed: 0x%" PRIx32,
+               static_cast<uint32_t>(loader.error()));
 
   // Load the program from the loaded model
   Result<Program> program = Program::load(&loader.get());
@@ -93,8 +199,9 @@ int main(int argc, char** argv) {
   }
   ET_LOG(Info, "Model file %s is loaded.", model_path);
 
-  // Retrieve the method name from the program (assumes the first method is used)
-  const char* method_name = nullptr;
+  // Retrieve the method name from the program (assumes the first method is
+  // used)
+  const char *method_name = nullptr;
   {
     const auto method_name_result = program->get_method_name(0);
     ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
@@ -104,11 +211,8 @@ int main(int argc, char** argv) {
 
   // Retrieve metadata about the method
   Result<MethodMeta> method_meta = program->method_meta(method_name);
-  ET_CHECK_MSG(
-      method_meta.ok(),
-      "Failed to get method_meta for %s: 0x%" PRIx32,
-      method_name,
-      static_cast<uint32_t>(method_meta.error()));
+  ET_CHECK_MSG(method_meta.ok(), "Failed to get method_meta for %s: 0x%" PRIx32,
+               method_name, static_cast<uint32_t>(method_meta.error()));
 
   // Set up a memory allocator for the method
   MemoryAllocator method_allocator{
@@ -133,138 +237,53 @@ int main(int argc, char** argv) {
 
   // Load the method into the program
   Result<Method> method = program->load_method(method_name, &memory_manager);
-  ET_CHECK_MSG(
-      method.ok(),
-      "Loading of method %s failed with status 0x%" PRIx32,
-      method_name,
-      static_cast<uint32_t>(method.error()));
+  ET_CHECK_MSG(method.ok(),
+               "Loading of method %s failed with status 0x%" PRIx32,
+               method_name, static_cast<uint32_t>(method.error()));
   ET_LOG(Info, "Method loaded.");
 
   // Prepare the input tensors for the method
   auto inputs = prepare_input_tensors(*method);
-  ET_CHECK_MSG(
-      inputs.ok(),
-      "Could not prepare inputs: 0x%" PRIx32,
-      static_cast<uint32_t>(inputs.error()));
+  ET_CHECK_MSG(inputs.ok(), "Could not prepare inputs: 0x%" PRIx32,
+               static_cast<uint32_t>(inputs.error()));
+
+  double elapsed_time;
+  Error status = Error::Ok;
 
   // If the input path list is provided, read input tensors from the files
-  if (!(FLAGS_input_list_path.empty())) {
-    const char* input_list_path = FLAGS_input_list_path.c_str();
-    ET_LOG(Info, "Loading input tensors from the list provided in %s.", input_list_path);
-    Error status = Error::Ok;
-    std::vector<EValue> inputs(method->inputs_size());
-    ET_LOG(Info, "%zu inputs: ", inputs.size());
-    status = method->get_inputs(inputs.data(), inputs.size());
-    ET_CHECK(status == Error::Ok);
-
-    auto split = [](std::string s, std::string delimiter) {
-      size_t pos_start = 0, pos_end, delim_len = delimiter.length();
-      std::string token;
-      std::vector<std::string> res;
-
-      while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) {
-        token = s.substr(pos_start, pos_end - pos_start);
-        pos_start = pos_end + delim_len;
-        res.push_back(token);
-      }
-      res.push_back(s.substr(pos_start));
-      return res;
-    };
-
-    // Read raw input tensor file names from input list file and
-    // iterate each raw input tensor file to read values
-    std::ifstream input_list(input_list_path);
-    if (input_list.is_open()) {
-      size_t num_inputs = method->inputs_size();
-      std::string file_path;
-      while (std::getline(input_list, file_path)) {
-        auto input_files = split(file_path, " ");
-        ET_LOG(Info, "INPUT_FILES.SIZE: %ld", input_files.size());
-        ET_LOG(Info, "NUM_INPUTS: %ld", num_inputs);
-        if (input_files.size() == 0) {
-          break;
-        }
-        for (int input_index = 0; input_index < num_inputs; ++input_index) {
-            MethodMeta method_meta = method->method_meta();
-            Result<TensorInfo> tensor_meta =
-                method_meta.input_tensor_meta(input_index);
-            auto input_data_ptr = inputs[input_index].toTensor().data_ptr<char>();
-
-            ET_LOG(Info, "READ FILE %s", std::string(input_files[input_index]));
-            std::ifstream fin(input_files[input_index], std::ios::binary);
-            fin.seekg(0, fin.end);
-            size_t file_size = fin.tellg();
-
-            ET_CHECK_MSG(
-                file_size == tensor_meta->nbytes(),
-                "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu",
-                input_index,
-                file_size,
-                tensor_meta->nbytes());
-
-            fin.seekg(0, fin.beg);
-            fin.read(
-                static_cast<char*>(input_data_ptr),
-                file_size);
-            fin.close();
-        }
-      }
-    } else {
-      ET_CHECK_MSG(false,
-          "Failed to read input list file: %s",
-          input_list_path);
+  if (!(FLAGS_input_list_path.empty()) and
+      !(FLAGS_output_folder_path.empty())) {
+    const char *input_list_path = FLAGS_input_list_path.c_str();
+    ET_LOG(Info, "Loading input tensors from the list provided in %s.",
+           input_list_path);
+    const char *output_folder_path = FLAGS_output_folder_path.c_str();
+    auto res = process_inputs(method, input_list_path, output_folder_path);
+    elapsed_time = res.total_time;
+    status = res.status;
+    num_iterations = res.num_iter;
+  } else {
+
+    // Measure execution time for inference
+    std::tie(elapsed_time, status) = benchmark_method(method, num_iterations);
+    // Retrieve and print the method outputs
+    ET_LOG(Info, "%zu Number of outputs: ", method->outputs_size());
+
+    // If output folder path is provided, save output tensors
+    // into raw tensor files.
+    if (!(FLAGS_output_folder_path.empty())) {
+      const char *output_folder_path = FLAGS_output_folder_path.c_str();
+      ET_LOG(Info, "Saving output tensors into the output folder: %s.",
+             output_folder_path);
+      dump_outputs(method, output_folder_path);
     }
   }
-  ET_LOG(Info, "Inputs prepared.");
-
-  // Measure execution time for inference
-  auto before_exec = std::chrono::high_resolution_clock::now();
-  Error status = Error::Ok;
-  for (int i = 0; i < num_iterations; ++i) {
-    status = method->execute();
-  }
-  auto after_exec = std::chrono::high_resolution_clock::now();
-  double elapsed_time = std::chrono::duration_cast<std::chrono::microseconds>(
-                            after_exec - before_exec)
-                            .count() / 1000.0;
-
   // Log execution time and average time per iteration
-  ET_LOG(
-      Info,
-      "%d inference took %f ms, avg %f ms",
-      num_iterations,
-      elapsed_time,
-      elapsed_time / static_cast<float>(num_iterations));
-  ET_CHECK_MSG(
-      status == Error::Ok,
-      "Execution of method %s failed with status 0x%" PRIx32,
-      method_name,
-      static_cast<uint32_t>(status));
+  ET_LOG(Info, "%d inference took %f ms, avg %f ms", num_iterations,
+         elapsed_time, elapsed_time / static_cast<float>(num_iterations));
+  ET_CHECK_MSG(status == Error::Ok,
+               "Execution of method %s failed with status 0x%" PRIx32,
+               method_name, static_cast<uint32_t>(status));
   ET_LOG(Info, "Model executed successfully.");
 
-  // Retrieve and print the method outputs
-  std::vector<EValue> outputs(method->outputs_size());
-  ET_LOG(Info, "%zu Number of outputs: ", outputs.size());
-  status = method->get_outputs(outputs.data(), outputs.size());
-  ET_CHECK(status == Error::Ok);
-
-  // If output folder path is provided, save output tensors
-  // into raw tensor files.
-  if (!(FLAGS_output_folder_path.empty())) {
-    const char* output_folder_path = FLAGS_output_folder_path.c_str();
-    ET_LOG(Info, "Saving output tensors into the output folder: %s.", output_folder_path);
-    for (size_t output_index = 0; output_index < method->outputs_size();
-         output_index++) {
-      auto output_tensor = outputs[output_index].toTensor();
-      auto output_file_name = std::string(output_folder_path) + "/output_" +
-          std::to_string(output_index) + ".raw";
-      std::ofstream fout(output_file_name.c_str(), std::ios::binary);
-      fout.write(
-          output_tensor.const_data_ptr<char>(), output_tensor.nbytes());
-      fout.close();
-    }
-  }
-
   return 0;
 }
-

From 8cbb1175902efab402d814805e4348b9c817b1f1 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Tue, 11 Feb 2025 14:28:25 +0100
Subject: [PATCH 07/18] Validate option / minor

---
 .../openvino/aot/aot_openvino_compiler.py     | 33 ++++++++++++-------
 .../openvino_executor_runner.cpp              |  3 --
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index 64f2ca2b955..3bdaf947a69 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -51,11 +51,11 @@ def load_model(suite: str, model_name: str):
         raise ValueError(msg)
 
 
-def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module):
+def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module, model_name: str):
     val_dir = f"{dataset_path}/val"
 
     if suite == "torchvision":
-        transform = torchvision_models.get_model_weights(model.name).transforms()
+        transform = torchvision_models.get_model_weights(model_name).DEFAULT.transforms()
     else:
         transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model))
 
@@ -87,7 +87,7 @@ def dump_inputs(calibration_dataset, dest_path):
     return input_files, targets
 
 
-def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str):
+def main(suite: str, model_name: str, input_shape, quantize: bool, validate: bool, dataset_path: str, device: str):
     # Ensure input_shape is a tuple
     if isinstance(input_shape, list):
         input_shape = tuple(input_shape)
@@ -95,6 +95,8 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
         msg = "Input shape must be a list or tuple."
         raise ValueError(msg)
 
+    calibration_dataset = None
+
     # Load the selected model
     model = load_model(suite, model_name)
     model = model.eval()
@@ -114,7 +116,7 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
         if not dataset_path:
             msg = "Quantization requires a calibration dataset."
             raise ValueError(msg)
-        calibration_dataset = load_calibration_dataset(dataset_path, suite, model)
+        calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name)
 
         captured_model = aten_dialect.module()
         quantizer = OpenVINOQuantizer()
@@ -146,12 +148,15 @@ def transform(x):
     exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig())
 
     # Serialize and save it to a file
-    model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte"
-    with open(model_name, "wb") as file:
+    model_file_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte"
+    with open(model_file_name, "wb") as file:
         exec_prog.write_to_file(file)
-    print(f"Model exported and saved as {model_name} on {device}.")
+    print(f"Model exported and saved as {model_file_name} on {device}.")
+
+    if validate:
+        if calibration_dataset is None:
+            calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name)
 
-    if quantize:
         print("Start validation of the quantized model:")
         # 1: Dump inputs
         dest_path = Path("tmp_inputs")
@@ -172,18 +177,17 @@ def transform(x):
         subprocess.run(
             [
                 "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner",
-                f"--model_path={model_name}",
+                f"--model_path={model_file_name}",
                 f"--input_list_path={inp_list_file}",
                 f"--output_folder_path={out_path}",
             ]
         )
 
         # 3: load the outputs and compare with the targets
-
         predictions = []
         for i in range(len(input_files)):
             tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32)
-            predictions.append(torch.tensor(np.argmax(tensor)))
+            predictions.append(torch.argmax(torch.tensor(tensor)))
 
         acc_top1 = accuracy_score(predictions, targets)
         print(f"acc@1: {acc_top1}")
@@ -207,6 +211,11 @@ def transform(x):
         help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).",
     )
     parser.add_argument("--quantize", action="store_true", help="Enable model quantization.")
+    parser.add_argument(
+        "--validate",
+        action="store_true",
+        help="Enable model validation. --dataset argument is requred for the validation.",
+    )
     parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.")
     parser.add_argument(
         "--device",
@@ -219,4 +228,4 @@ def transform(x):
 
     # Run the main function with parsed arguments
     with nncf.torch.disable_patching():
-        main(args.suite, args.model, args.input_shape, args.quantize, args.dataset, args.device)
+        main(args.suite, args.model, args.input_shape, args.quantize, args.validate, args.dataset, args.device)
diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp
index f9a85c03a53..36c957bc433 100644
--- a/examples/openvino/executor_runner/openvino_executor_runner.cpp
+++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp
@@ -82,7 +82,6 @@ void dump_outputs(Result<Method> &method, const char *output_folder_path,
     std::ofstream fout(output_file_name.c_str(), std::ios::binary);
     fout.write(output_tensor.const_data_ptr<char>(), output_tensor.nbytes());
     fout.close();
-    ET_LOG(Info, "Write outputs to file %s", output_file_name.c_str());
   }
 }
 
@@ -135,8 +134,6 @@ ProcessInputsResult process_inputs(Result<Method> &method,
             method_meta.input_tensor_meta(input_index);
         auto input_data_ptr = inputs[input_index].toTensor().data_ptr<char>();
 
-        ET_LOG(Info, "Read inputs from file %s",
-               input_files[input_index].c_str());
         std::ifstream fin(input_files[input_index], std::ios::binary);
         fin.seekg(0, fin.end);
         size_t file_size = fin.tellg();

From 4b60fb4934d39c683f323e2bd526d422bf39fcd5 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Tue, 11 Feb 2025 14:56:18 +0100
Subject: [PATCH 08/18] Input shape from the input dataset

---
 .../openvino/aot/aot_openvino_compiler.py     | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index 3bdaf947a69..e4ef955b40c 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -88,19 +88,20 @@ def dump_inputs(calibration_dataset, dest_path):
 
 
 def main(suite: str, model_name: str, input_shape, quantize: bool, validate: bool, dataset_path: str, device: str):
-    # Ensure input_shape is a tuple
-    if isinstance(input_shape, list):
-        input_shape = tuple(input_shape)
-    elif not isinstance(input_shape, tuple):
-        msg = "Input shape must be a list or tuple."
-        raise ValueError(msg)
-
-    calibration_dataset = None
-
     # Load the selected model
     model = load_model(suite, model_name)
     model = model.eval()
 
+    if dataset_path:
+        calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name)
+        input_shape = tuple(next(iter(calibration_dataset))[0].shape)
+        print(f"Input shape retrieved from the model config: {input_shape}")
+    # Ensure input_shape is a tuple
+    elif isinstance(input_shape, list):
+        input_shape = tuple(input_shape)
+    else:
+        msg = "Input shape must be a list or tuple."
+        raise ValueError(msg)
     # Provide input
     example_args = (torch.randn(*input_shape),)
 
@@ -116,7 +117,6 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, validate: boo
         if not dataset_path:
             msg = "Quantization requires a calibration dataset."
             raise ValueError(msg)
-        calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name)
 
         captured_model = aten_dialect.module()
         quantizer = OpenVINOQuantizer()
@@ -154,8 +154,13 @@ def transform(x):
     print(f"Model exported and saved as {model_file_name} on {device}.")
 
     if validate:
-        if calibration_dataset is None:
-            calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name)
+        if suite == "huggingface":
+            msg = f"Validation of {suite} models did not support yet."
+            raise ValueError(msg)
+
+        if not dataset_path:
+            msg = "Validateion requires a calibration dataset."
+            raise ValueError(msg)
 
         print("Start validation of the quantized model:")
         # 1: Dump inputs
@@ -207,7 +212,6 @@ def transform(x):
     parser.add_argument(
         "--input_shape",
         type=eval,
-        required=True,
         help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).",
     )
     parser.add_argument("--quantize", action="store_true", help="Enable model quantization.")

From e0cd6448ef57210b2e91f5aa93393b0860371e48 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Tue, 11 Feb 2025 15:23:27 +0100
Subject: [PATCH 09/18] --batch_size

---
 .../openvino/aot/aot_openvino_compiler.py     | 39 +++++++++++++++----
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index e4ef955b40c..dba47c0dde3 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -51,7 +51,7 @@ def load_model(suite: str, model_name: str):
         raise ValueError(msg)
 
 
-def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module, model_name: str):
+def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, model: torch.nn.Module, model_name: str):
     val_dir = f"{dataset_path}/val"
 
     if suite == "torchvision":
@@ -62,7 +62,7 @@ def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Modu
     val_dataset = datasets.ImageFolder(val_dir, transform=transform)
 
     calibration_dataset = torch.utils.data.DataLoader(
-        val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True
+        val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True
     )
 
     return calibration_dataset
@@ -77,7 +77,7 @@ def dump_inputs(calibration_dataset, dest_path):
     input_files, targets = [], []
     for idx, data in enumerate(calibration_dataset):
         feature, target = data
-        targets.append(target)
+        targets.extend(target)
         file_name = f"{dest_path}/input_{idx}_0.raw"
         if not isinstance(feature, torch.Tensor):
             feature = torch.tensor(feature)
@@ -87,13 +87,22 @@ def dump_inputs(calibration_dataset, dest_path):
     return input_files, targets
 
 
-def main(suite: str, model_name: str, input_shape, quantize: bool, validate: bool, dataset_path: str, device: str):
+def main(
+    suite: str,
+    model_name: str,
+    input_shape,
+    quantize: bool,
+    validate: bool,
+    dataset_path: str,
+    device: str,
+    batch_size: int,
+):
     # Load the selected model
     model = load_model(suite, model_name)
     model = model.eval()
 
     if dataset_path:
-        calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name)
+        calibration_dataset = load_calibration_dataset(dataset_path, batch_size, suite, model, model_name)
         input_shape = tuple(next(iter(calibration_dataset))[0].shape)
         print(f"Input shape retrieved from the model config: {input_shape}")
     # Ensure input_shape is a tuple
@@ -192,7 +201,7 @@ def transform(x):
         predictions = []
         for i in range(len(input_files)):
             tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32)
-            predictions.append(torch.argmax(torch.tensor(tensor)))
+            predictions.extend(torch.tensor(tensor).reshape(-1, 1000).argmax(-1))
 
         acc_top1 = accuracy_score(predictions, targets)
         print(f"acc@1: {acc_top1}")
@@ -214,6 +223,13 @@ def transform(x):
         type=eval,
         help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).",
     )
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=1,
+        help="Batch size for the validation. Default batch_size == 1."
+        " The dataset length must be evenly divisible by the batch size.",
+    )
     parser.add_argument("--quantize", action="store_true", help="Enable model quantization.")
     parser.add_argument(
         "--validate",
@@ -232,4 +248,13 @@ def transform(x):
 
     # Run the main function with parsed arguments
     with nncf.torch.disable_patching():
-        main(args.suite, args.model, args.input_shape, args.quantize, args.validate, args.dataset, args.device)
+        main(
+            args.suite,
+            args.model,
+            args.input_shape,
+            args.quantize,
+            args.validate,
+            args.dataset,
+            args.device,
+            args.batch_size,
+        )

From 2a04ee6a6d27357c71086761e02be2ef66904076 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Tue, 11 Feb 2025 16:20:53 +0100
Subject: [PATCH 10/18] Adapt subset size to keep +- 300 pics for calibration

---
 examples/openvino/aot/aot_openvino_compiler.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index dba47c0dde3..909eabe3677 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -135,9 +135,12 @@ def main(
         def transform(x):
             return x[0]
 
+        default_subset_size = 300
+        batch_size = calibration_dataset.batch_size
         quantized_model = quantize_pt2e(
             captured_model,
             quantizer,
+            subset_size=(default_subset_size // batch_size) + int(default_subset_size % batch_size > 0),
             calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform),
             fold_quantize=False,
         )

From db7dc1318c9e54a64ebc54e7f5b5cd1d945e42ac Mon Sep 17 00:00:00 2001
From: Daniil Lyakhov <daniil.lyakhov@intel.com>
Date: Tue, 11 Feb 2025 16:35:59 +0100
Subject: [PATCH 11/18] Apply suggestions from code review

Co-authored-by: Alexander Suslov <alexander.suslov@intel.com>
---
 examples/openvino/aot/aot_openvino_compiler.py | 4 +---
 examples/openvino/openvino_build_example.sh    | 8 +++-----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index 909eabe3677..cf41ff318fd 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -30,8 +30,6 @@
 from transformers import AutoModel
 
 import nncf
-import nncf.experimental
-import nncf.experimental.torch
 from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e
 
 
@@ -239,7 +237,7 @@ def transform(x):
         action="store_true",
         help="Enable model validation. --dataset argument is requred for the validation.",
     )
-    parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.")
+    parser.add_argument("--dataset", type=str, help="Path to the validation dataset.")
     parser.add_argument(
         "--device",
         type=str,
diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh
index 52c508d8ee2..a490ff30154 100755
--- a/examples/openvino/openvino_build_example.sh
+++ b/examples/openvino/openvino_build_example.sh
@@ -34,7 +34,6 @@ main() {
     local example_dir=examples/openvino
     local example_build_dir="${build_dir}/${example_dir}"
     local cmake_prefix_path="${PWD}/${build_dir}/lib/cmake/ExecuTorch;${PWD}/${build_dir}/third-party/gflags;"
-
     rm -rf "${example_build_dir}"
 
     ## OpenVINO original
@@ -43,11 +42,10 @@ main() {
           -B"${example_build_dir}" \
           $EXECUTORCH_ROOT/$example_dir
 
+<<<<<<< HEAD:examples/openvino/openvino_build_example.sh
     cmake --build "${example_build_dir}" -j$(nproc)
-
-    # Switch back to the original directory
-    cd - > /dev/null
-
+=======
+    cmake --build "${example_build_dir}" -j5
     # Print a success message
     echo "Build successfully completed."
 }

From de3f50b5d33f79acd28b37be64f7b40de7e04278 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Tue, 11 Feb 2025 17:17:06 +0100
Subject: [PATCH 12/18] Comments

---
 examples/openvino/aot/README.md               |  46 ++++--
 .../openvino/aot/aot_openvino_compiler.py     | 146 +++++++++++-------
 2 files changed, 125 insertions(+), 67 deletions(-)

diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md
index 46e476a8408..5fd97dba21e 100644
--- a/examples/openvino/aot/README.md
+++ b/examples/openvino/aot/README.md
@@ -11,34 +11,41 @@ python aot_openvino_compiler.py --suite <MODEL_SUITE> --model <MODEL_NAME> --inp
 ```
 
 ### **Arguments**
-- **`--suite`** (required):  
-  Specifies the model suite to use.  
+- **`--suite`** (required):
+  Specifies the model suite to use.
   Supported values:
   - `timm` (e.g., VGG16, ResNet50)
   - `torchvision` (e.g., resnet18, mobilenet_v2)
   - `huggingface` (e.g., bert-base-uncased)
 
-- **`--model`** (required):  
-  Name of the model to export.  
+- **`--model`** (required):
+  Name of the model to export.
   Examples:
   - For `timm`: `vgg16`, `resnet50`
   - For `torchvision`: `resnet18`, `mobilenet_v2`
   - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased`
 
-- **`--input_shape`** (required):  
-  Input shape for the model. Provide this as a **list** or **tuple**.  
+- **`--input_shape`**:
+  Input shape for the model. Provide this as a **list** or **tuple**.
   Examples:
   - `[1, 3, 224, 224]` (Zsh users: wrap in quotes)
   - `(1, 3, 224, 224)`
 
+- **`--batch_size`** :
+  Batch size for the validation. Default batch_size == 1.
+  The dataset length must be evenly divisible by the batch size.
+
 - **`--quantize`** (optional):
   Enable model quantization: Default is False.
 
+- **`--quantize`** (optional):
+  Enable model validation. --dataset argument is requred for the validation.
+
 - **`--dataset`** (optional):
-  Path to the calibration dataset. TODO: It is necessary to think in what form to support the dataset. For the experiment, tiny-imagenet is used, which can be downloaded from here http://cs231n.stanford.edu/tiny-imagenet-200.zip and specify the path to it. 
+  Path to the imagenet-like calibration dataset.
 
-- **`--device`** (optional):  
-  Target device for the compiled model. Default is `CPU`.  
+- **`--device`** (optional)
+  Target device for the compiled model. Default is `CPU`.
   Examples: `CPU`, `GPU`
 
 
@@ -58,22 +65,31 @@ python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_sha
 ```bash
 python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU
 ```
+### Export and validate TIMM Resnet50d model for the CPU
+```bash
+python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset
+```
+
+### Export, quantize and validate TIMM Resnet50d model for the CPU
+```bash
+python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize
+```
 
 ## **Notes**
-1. **Input Shape in Zsh**:  
+1. **Input Shape in Zsh**:
    If you are using Zsh, wrap `--input_shape` in quotes or use a tuple:
    ```bash
    --input_shape '[1, 3, 224, 224]'
    --input_shape "(1, 3, 224, 224)"
    ```
 
-2. **Model Compatibility**:  
+2. **Model Compatibility**:
    Ensure the specified `model_name` exists in the selected `suite`. Use the corresponding library's documentation to verify model availability.
 
-3. **Output File**:  
+3. **Output File**:
    The exported model will be saved as `<MODEL_NAME>.pte` in the current directory.
 
-4. **Dependencies**:  
+4. **Dependencies**:
    - Python 3.8+
    - PyTorch
    - Executorch
@@ -82,14 +98,14 @@ python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --
    - Transformers (`pip install transformers`)
 
 ## **Error Handling**
-- **Model Not Found**:  
+- **Model Not Found**:
   If the script raises an error such as:
   ```bash
   ValueError: Model <MODEL_NAME> not found
   ```
   Verify that the model name is correct for the chosen suite.
 
-- **Unsupported Input Shape**:  
+- **Unsupported Input Shape**:
   Ensure `--input_shape` is provided as a valid list or tuple.
 
 
diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index cf41ff318fd..4f45fc1d426 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -8,6 +8,7 @@
 import os
 import shutil
 import subprocess
+from itertools import islice
 from pathlib import Path
 
 import executorch
@@ -24,6 +25,8 @@
 from sklearn.metrics import accuracy_score
 from timm.data import resolve_data_config
 from timm.data.transforms_factory import create_transform
+from torch.ao.quantization.quantize_pt2e import convert_pt2e
+from torch.ao.quantization.quantize_pt2e import prepare_pt2e
 from torch.export import export
 from torch.export.exported_program import ExportedProgram
 from torch.fx.passes.graph_drawer import FxGraphDrawer
@@ -54,8 +57,11 @@ def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, mod
 
     if suite == "torchvision":
         transform = torchvision_models.get_model_weights(model_name).DEFAULT.transforms()
-    else:
+    elif suite == "timm":
         transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model))
+    else:
+        msg = f"Validation is not supported yet for the suite {suite}"
+        raise ValueError(msg)
 
     val_dataset = datasets.ImageFolder(val_dir, transform=transform)
 
@@ -85,6 +91,76 @@ def dump_inputs(calibration_dataset, dest_path):
     return input_files, targets
 
 
+def quantize_model(
+    captured_model: torch.fx.GraphModule, calibration_dataset: torch.utils.data.DataLoader, use_nncf: bool
+) -> torch.fx.GraphModule:
+    quantizer = OpenVINOQuantizer()
+
+    print("PTQ: Quantize the model")
+    default_subset_size = 300
+    batch_size = calibration_dataset.batch_size
+    subset_size = (default_subset_size // batch_size) + int(default_subset_size % batch_size > 0)
+
+    def transform(x):
+        return x[0]
+
+    if use_nncf:
+
+        quantized_model = quantize_pt2e(
+            captured_model,
+            quantizer,
+            subset_size=subset_size,
+            calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform),
+            fold_quantize=False,
+        )
+    else:
+        annotated_model = prepare_pt2e(captured_model, quantizer)
+
+        print("PTQ: Calibrate the model...")
+        for data in islice(calibration_dataset, subset_size):
+            annotated_model(transform(data))
+
+        print("PTQ: Convert the quantized model...")
+        quantized_model = convert_pt2e(annotated_model, fold_quantize=False)
+
+    return quantized_model
+
+
+def validate_model(model_file_name: str, calibration_dataset: torch.utils.data.DataLoader) -> float:
+    # 1: Dump inputs
+    dest_path = Path("tmp_inputs")
+    out_path = Path("tmp_outputs")
+    for d in [dest_path, out_path]:
+        if os.path.exists(d):
+            shutil.rmtree(d)
+        os.makedirs(d)
+
+    input_files, targets = dump_inputs(calibration_dataset, dest_path)
+    inp_list_file = dest_path / "in_list.txt"
+    with open(inp_list_file, "w") as f:
+        f.write("\n".join(input_files) + "\n")
+
+    # 2: Run the executor
+    print("Run openvino_executor_runner...")
+
+    subprocess.run(
+        [
+            "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner",
+            f"--model_path={model_file_name}",
+            f"--input_list_path={inp_list_file}",
+            f"--output_folder_path={out_path}",
+        ]
+    )
+
+    # 3: load the outputs and compare with the targets
+    predictions = []
+    for i in range(len(input_files)):
+        tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32)
+        predictions.extend(torch.tensor(tensor).reshape(-1, 1000).argmax(-1))
+
+    return accuracy_score(predictions, targets)
+
+
 def main(
     suite: str,
     model_name: str,
@@ -94,6 +170,7 @@ def main(
     dataset_path: str,
     device: str,
     batch_size: int,
+    quantization_flow: str,
 ):
     # Load the selected model
     model = load_model(suite, model_name)
@@ -104,7 +181,7 @@ def main(
         input_shape = tuple(next(iter(calibration_dataset))[0].shape)
         print(f"Input shape retrieved from the model config: {input_shape}")
     # Ensure input_shape is a tuple
-    elif isinstance(input_shape, list):
+    elif isinstance(input_shape, (list, tuple)):
         input_shape = tuple(input_shape)
     else:
         msg = "Input shape must be a list or tuple."
@@ -124,23 +201,8 @@ def main(
         if not dataset_path:
             msg = "Quantization requires a calibration dataset."
             raise ValueError(msg)
-
-        captured_model = aten_dialect.module()
-        quantizer = OpenVINOQuantizer()
-
-        print("PTQ: Quantize the model")
-
-        def transform(x):
-            return x[0]
-
-        default_subset_size = 300
-        batch_size = calibration_dataset.batch_size
-        quantized_model = quantize_pt2e(
-            captured_model,
-            quantizer,
-            subset_size=(default_subset_size // batch_size) + int(default_subset_size % batch_size > 0),
-            calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform),
-            fold_quantize=False,
+        quantized_model = quantize_model(
+            aten_dialect.module(), calibration_dataset, use_nncf=quantization_flow == "nncf"
         )
         visualize_fx_model(quantized_model, f"{model_name}_int8.svg")
 
@@ -172,39 +234,8 @@ def transform(x):
             msg = "Validateion requires a calibration dataset."
             raise ValueError(msg)
 
-        print("Start validation of the quantized model:")
-        # 1: Dump inputs
-        dest_path = Path("tmp_inputs")
-        out_path = Path("tmp_outputs")
-        for d in [dest_path, out_path]:
-            if os.path.exists(d):
-                shutil.rmtree(d)
-            os.makedirs(d)
-
-        input_files, targets = dump_inputs(calibration_dataset, dest_path)
-        inp_list_file = dest_path / "in_list.txt"
-        with open(inp_list_file, "w") as f:
-            f.write("\n".join(input_files) + "\n")
-
-        # 2: Run the executor
-        print("Run openvino_executor_runner...")
-
-        subprocess.run(
-            [
-                "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner",
-                f"--model_path={model_file_name}",
-                f"--input_list_path={inp_list_file}",
-                f"--output_folder_path={out_path}",
-            ]
-        )
-
-        # 3: load the outputs and compare with the targets
-        predictions = []
-        for i in range(len(input_files)):
-            tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32)
-            predictions.extend(torch.tensor(tensor).reshape(-1, 1000).argmax(-1))
-
-        acc_top1 = accuracy_score(predictions, targets)
+        print("Start validation of the model:")
+        acc_top1 = validate_model(model_file_name, calibration_dataset)
         print(f"acc@1: {acc_top1}")
 
 
@@ -244,10 +275,20 @@ def transform(x):
         default="CPU",
         help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.",
     )
+    parser.add_argument(
+        "--quantization_flow",
+        type=str,
+        choices=["pt2e", "nncf"],
+        default="nncf",
+        help="Select the quantization flow (nncf or pt2e):"
+        " pt2e is the default torch.ao quantization flow, while"
+        " nncf is a custom method with additional algorithms to improve model performance.",
+    )
 
     args = parser.parse_args()
 
     # Run the main function with parsed arguments
+    # Disable nncf patching as export of the patched model is not supported.
     with nncf.torch.disable_patching():
         main(
             args.suite,
@@ -258,4 +299,5 @@ def transform(x):
             args.dataset,
             args.device,
             args.batch_size,
+            args.quantization_flow,
         )

From 17fe62f001fd731be97b5242d8f41893c144944a Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Tue, 11 Feb 2025 18:02:54 +0100
Subject: [PATCH 13/18] OpenVINOQuantizer: constructor arguments have been
 refined

---
 backends/openvino/quantizer/quantizer.py | 71 +++++++++++-------------
 1 file changed, 33 insertions(+), 38 deletions(-)

diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py
index 63da8325e4f..8ce1ce6dda1 100644
--- a/backends/openvino/quantizer/quantizer.py
+++ b/backends/openvino/quantizer/quantizer.py
@@ -5,6 +5,7 @@
 # directory of this source tree for more details.
 
 from collections import defaultdict
+from enum import Enum
 from typing import Dict, List, Optional, Tuple
 
 import torch.fx
@@ -20,13 +21,25 @@
 import nncf
 import nncf.common.quantization as q
 import nncf.experimental.torch.fx as nncf_fx
-import nncf.parameters as p
-import nncf.quantization.advanced_parameters as advanced_p
 from nncf.common.graph.graph import NNCFGraph
 
 QUANT_ANNOTATION_KEY = "quantization_annotation"
 
 
+class QuantizationMode(Enum):
+    """
+    Defines special quantization modes.
+
+    - INT8_SYM: INT8 symmetric quantization for both activations and weights.
+    - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
+    - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
+    """
+
+    INT8_SYM = "int8_sym"
+    INT8_MIXED = "int8_mixed"
+    INT8_TRANSFORMER = "int8_transformer"
+
+
 class OpenVINOQuantizer(Quantizer):
     """
     Implementation of the Torch AO quantizer which annotates models with quantization annotations
@@ -36,49 +49,31 @@ class OpenVINOQuantizer(Quantizer):
     def __init__(
         self,
         *,
-        mode: Optional[p.QuantizationMode] = None,
-        preset: Optional[q.structs.QuantizationPreset] = None,
-        target_device: p.TargetDevice = p.TargetDevice.ANY,
-        transformer_model: bool = False,
+        mode: Optional[QuantizationMode] = QuantizationMode.INT8_SYM,
         ignored_scope: Optional[nncf.IgnoredScope] = None,
-        overflow_fix: Optional[advanced_p.OverflowFix] = None,
-        quantize_outputs: bool = False,
-        activations_quantization_params: Optional[advanced_p.QuantizationParameters] = None,
-        weights_quantization_params: Optional[advanced_p.QuantizationParameters] = None,
+        **kwargs,
     ):
         """
-        :param mode: Defines optimization mode for the algorithm. None by default.
-        :param preset: A preset controls the quantization mode (symmetric and asymmetric).
-            It can take the following values:
-            - `performance`: Symmetric quantization of weights and activations.
-            - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
-            Default value is None. In this case, `mixed` preset is used for `transformer`
-            model type otherwise `performance`.
-        :param target_device: A target device the specificity of which will be taken
-            into account while compressing in order to obtain the best performance
-            for this type of device, defaults to TargetDevice.ANY.
-        :param model_type: Model type is needed to specify additional patterns
-            in the model. Supported only `transformer` now.
+        :param mode: Defines special quantization modes.
+            - INT8_SYM: INT8 symmetric quantization for both activations and weights.
+            - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
+            - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
+            Default value is INT8_SYM.
         :param ignored_scope: An ignored scope that defined the list of model control
             flow graph nodes to be ignored during quantization.
-        :param overflow_fix: This option controls whether to apply the overflow issue
-            fix for the 8-bit quantization.
-        :param quantize_outputs: Whether to insert additional quantizers right before
-            each of the model outputs.
-        :param activations_quantization_params: Quantization parameters for model
-            activations.
-        :param weights_quantization_params: Quantization parameters for model weights.
+        :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm.
         """
+        if mode == QuantizationMode.INT8_SYM:
+            preset = q.structs.QuantizationPreset.PERFORMANCE
+            model_type = None
+        elif mode == QuantizationMode.INT8_MIXED:
+            preset = q.structs.QuantizationPreset.MIXED
+            model_type = None
+        else:
+            preset = None
+            model_type = nncf.parameters.ModelType.TRANSFORMER
         self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization(
-            mode=mode,
-            preset=preset,
-            target_device=target_device,
-            model_type=p.ModelType.TRANSFORMER if transformer_model else None,
-            ignored_scope=ignored_scope,
-            overflow_fix=overflow_fix,
-            quantize_outputs=quantize_outputs,
-            activations_quantization_params=activations_quantization_params,
-            weights_quantization_params=weights_quantization_params,
+            preset=preset, model_type=model_type, ignored_scope=ignored_scope, **kwargs
         )
 
     def get_nncf_quantization_setup(

From c7e07586a7d639d4f252efdbc77d4768f7ef5278 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Wed, 12 Feb 2025 15:04:16 +0100
Subject: [PATCH 14/18] set_ignored_scope | readme updates

---
 backends/openvino/quantizer/quantizer.py    | 56 +++++++++++++++------
 examples/openvino/aot/README.md             | 10 ++--
 examples/openvino/openvino_build_example.sh |  7 +--
 3 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py
index 8ce1ce6dda1..480faeee635 100644
--- a/backends/openvino/quantizer/quantizer.py
+++ b/backends/openvino/quantizer/quantizer.py
@@ -19,7 +19,7 @@
 from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec
 
 import nncf
-import nncf.common.quantization as q
+import nncf.common.quantization as quantization
 import nncf.experimental.torch.fx as nncf_fx
 from nncf.common.graph.graph import NNCFGraph
 
@@ -50,7 +50,6 @@ def __init__(
         self,
         *,
         mode: Optional[QuantizationMode] = QuantizationMode.INT8_SYM,
-        ignored_scope: Optional[nncf.IgnoredScope] = None,
         **kwargs,
     ):
         """
@@ -59,26 +58,53 @@ def __init__(
             - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
             - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
             Default value is INT8_SYM.
-        :param ignored_scope: An ignored scope that defined the list of model control
-            flow graph nodes to be ignored during quantization.
         :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm.
         """
         if mode == QuantizationMode.INT8_SYM:
-            preset = q.structs.QuantizationPreset.PERFORMANCE
+            preset = quantization.structs.QuantizationPreset.PERFORMANCE
             model_type = None
         elif mode == QuantizationMode.INT8_MIXED:
-            preset = q.structs.QuantizationPreset.MIXED
+            preset = quantization.structs.QuantizationPreset.MIXED
             model_type = None
         else:
             preset = None
             model_type = nncf.parameters.ModelType.TRANSFORMER
         self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization(
-            preset=preset, model_type=model_type, ignored_scope=ignored_scope, **kwargs
+            preset=preset, model_type=model_type, **kwargs
+        )
+
+    def set_ignored_scope(
+        self,
+        names: Optional[List[str]] = None,
+        patterns: Optional[List[str]] = None,
+        types: Optional[List[str]] = None,
+        subgraphs: Optional[List[Tuple[List[str], List[str]]]] = None,
+        validate: bool = True,
+    ) -> None:
+        """
+        Provides an option to specify portions of model to be excluded from compression.
+        The ignored scope defines model sub-graphs that should be excluded from the quantization process.
+
+        :param names: List of ignored node names.
+        :param patterns: List of regular expressions that define patterns for names of ignored nodes.
+        :param types: List of ignored operation types.
+        :param subgraphs: List of ignored subgraphs.
+        :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match
+          in the model graph.
+        """
+        self._min_max_algo.set_ignored_scope(
+            nncf.IgnoredScope(
+                names=names or [],
+                patterns=patterns or [],
+                types=types or [],
+                subgraphs=subgraphs or [],
+                validate=validate,
+            )
         )
 
     def get_nncf_quantization_setup(
         self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph
-    ) -> q.quantizer_setup.SingleConfigQuantizerSetup:
+    ) -> quantization.quantizer_setup.SingleConfigQuantizerSetup:
         self._min_max_algo._set_backend_entity(model)
         return self._min_max_algo.find_quantization_setup(model, nncf_graph)
 
@@ -134,7 +160,9 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
 
     @staticmethod
     def _get_unified_scales_root_quantizer_id(
-        nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: q.quantizer_setup.SingleConfigQuantizerSetup
+        nncf_graph: NNCFGraph,
+        quantizer_ids: List[int],
+        quantizer_setup: quantization.quantizer_setup.SingleConfigQuantizerSetup,
     ) -> int:
         """
         Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id`
@@ -160,7 +188,7 @@ def _get_unified_scales_root_quantizer_id(
     def _get_edge_or_node_and_annotation(
         graph: torch.fx.Graph,
         nncf_graph: NNCFGraph,
-        qp: q.quantizer_setup.QuantizationPointBase,
+        qp: quantization.quantizer_setup.QuantizationPointBase,
         node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation],
     ) -> Tuple[EdgeOrNode, QuantizationAnnotation]:
         """
@@ -181,7 +209,7 @@ def _get_edge_or_node_and_annotation(
 
     @staticmethod
     def _get_edge_or_node(
-        target_node: torch.fx.Node, qp: q.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph
+        target_node: torch.fx.Node, qp: quantization.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph
     ) -> EdgeOrNode:
         """
         Returns the edge or node based on the given target node and quantization point.
@@ -231,7 +259,7 @@ def _fill_torch_ao_annotation(
             annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec
 
     @staticmethod
-    def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) -> QuantizationSpec:
+    def _get_torch_ao_qspec_from_qp(qp: quantization.quantizer_setup.QuantizationPointBase) -> QuantizationSpec:
         """
         Retrieves the quantization configuration from the given quantization point and
         converts it into a QuantizationSpec.
@@ -247,13 +275,13 @@ def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) ->
         if qconfig.per_channel:
             torch_qscheme = (
                 torch.per_channel_symmetric
-                if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC
+                if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC
                 else torch.per_channel_affine
             )
         else:
             torch_qscheme = (
                 torch.per_tensor_symmetric
-                if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC
+                if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC
                 else torch.per_tensor_affine
             )
         if is_weight:
diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md
index 5fd97dba21e..900a5b6cbe0 100644
--- a/examples/openvino/aot/README.md
+++ b/examples/openvino/aot/README.md
@@ -16,7 +16,7 @@ python aot_openvino_compiler.py --suite <MODEL_SUITE> --model <MODEL_NAME> --inp
   Supported values:
   - `timm` (e.g., VGG16, ResNet50)
   - `torchvision` (e.g., resnet18, mobilenet_v2)
-  - `huggingface` (e.g., bert-base-uncased)
+  - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet.
 
 - **`--model`** (required):
   Name of the model to export.
@@ -36,10 +36,12 @@ python aot_openvino_compiler.py --suite <MODEL_SUITE> --model <MODEL_NAME> --inp
   The dataset length must be evenly divisible by the batch size.
 
 - **`--quantize`** (optional):
-  Enable model quantization: Default is False.
+  Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite  does not supported yet.
+
+
+- **`--validate`** (optional):
+  Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet.
 
-- **`--quantize`** (optional):
-  Enable model validation. --dataset argument is requred for the validation.
 
 - **`--dataset`** (optional):
   Path to the imagenet-like calibration dataset.
diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh
index a490ff30154..ee16658941d 100755
--- a/examples/openvino/openvino_build_example.sh
+++ b/examples/openvino/openvino_build_example.sh
@@ -42,10 +42,11 @@ main() {
           -B"${example_build_dir}" \
           $EXECUTORCH_ROOT/$example_dir
 
-<<<<<<< HEAD:examples/openvino/openvino_build_example.sh
     cmake --build "${example_build_dir}" -j$(nproc)
-=======
-    cmake --build "${example_build_dir}" -j5
+
+    # Switch back to the original directory
+    cd - > /dev/null
+
     # Print a success message
     echo "Build successfully completed."
 }

From 19cbc69adbb6310f266b9f6bdfaeb47e6eeb18ff Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Fri, 14 Feb 2025 14:10:02 +0100
Subject: [PATCH 15/18] openvino_executor_runner.cpp: comments

---
 .../openvino_executor_runner.cpp              | 210 +++++++++---------
 1 file changed, 108 insertions(+), 102 deletions(-)

diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp
index 36c957bc433..c3922c793a3 100644
--- a/examples/openvino/executor_runner/openvino_executor_runner.cpp
+++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp
@@ -9,6 +9,7 @@
 #include <chrono>
 #include <ctime>
 #include <fstream>
+#include <functional>
 #include <iostream>
 #include <memory>
 #include <tuple>
@@ -52,57 +53,54 @@ using executorch::runtime::Result;
 using executorch::runtime::Span;
 using executorch::runtime::TensorInfo;
 
-std::pair<double, Error> benchmark_method(Result<Method> &method,
-                                          int num_iterations) {
-  Error status = Error::Ok;
-  auto before_exec = std::chrono::high_resolution_clock::now();
-  for (int i = 0; i < num_iterations; ++i) {
-    status = method->execute();
-  }
-  auto after_exec = std::chrono::high_resolution_clock::now();
-  double elapsed_time = std::chrono::duration_cast<std::chrono::microseconds>(
-                            after_exec - before_exec)
-                            .count() /
-                        1000.0;
-  return std::make_pair(elapsed_time, status);
+std::function<void(size_t)> build_set_input_tensor(
+    Result<Method> &method, std::vector<EValue> &inputs,
+    const std::vector<std::vector<std::string>> input_paths) {
+  return [&inputs, &method, input_paths](size_t idx) -> void {
+    const MethodMeta method_meta = method->method_meta();
+    for (int input_index = 0; input_index < method->inputs_size();
+         ++input_index) {
+
+      Result<TensorInfo> tensor_meta =
+          method_meta.input_tensor_meta(input_index);
+      auto input_data_ptr = inputs[input_index].toTensor().data_ptr<char>();
+
+      std::ifstream fin(input_paths[idx][input_index], std::ios::binary);
+      fin.seekg(0, fin.end);
+      size_t file_size = fin.tellg();
+
+      ET_CHECK_MSG(
+          file_size == tensor_meta->nbytes(),
+          "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu",
+          input_index, file_size, tensor_meta->nbytes());
+
+      fin.seekg(0, fin.beg);
+      fin.read(static_cast<char *>(input_data_ptr), file_size);
+      fin.close();
+    }
+  };
 }
 
-void dump_outputs(Result<Method> &method, const char *output_folder_path,
-                  size_t index = 0) {
-  std::vector<EValue> outputs(method->outputs_size());
-  Error status = Error::Ok;
-  status = method->get_outputs(outputs.data(), outputs.size());
-  ET_CHECK(status == Error::Ok);
-  for (size_t output_index = 0; output_index < method->outputs_size();
-       output_index++) {
-    auto output_tensor = outputs[output_index].toTensor();
-    auto output_file_name = std::string(output_folder_path) + "/output_" +
-                            std::to_string(index) + "_" +
-                            std::to_string(output_index) + ".raw";
-    std::ofstream fout(output_file_name.c_str(), std::ios::binary);
-    fout.write(output_tensor.const_data_ptr<char>(), output_tensor.nbytes());
-    fout.close();
-  }
+std::function<void(size_t)>
+build_dump_outputs(std::vector<EValue> &outputs, const size_t output_size,
+                   const std::string output_folder_path) {
+  return [&outputs, output_folder_path, output_size](size_t idx) -> void {
+    for (size_t output_index = 0; output_index < output_size; output_index++) {
+      auto output_tensor = outputs[output_index].toTensor();
+      auto output_file_name = output_folder_path + "/output_" +
+                              std::to_string(idx) + "_" +
+                              std::to_string(output_index) + ".raw";
+      std::ofstream fout(output_file_name.c_str(), std::ios::binary);
+      fout.write(output_tensor.const_data_ptr<char>(), output_tensor.nbytes());
+      fout.close();
+    }
+  };
 }
 
-struct ProcessInputsResult {
-  double total_time;
-  size_t num_iter;
-  Error status;
-};
-
-ProcessInputsResult process_inputs(Result<Method> &method,
-                                   const char *input_list_path,
-                                   const char *output_folder_path) {
-  std::vector<EValue> inputs(method->inputs_size());
-  ET_LOG(Info, "%zu inputs: ", inputs.size());
-  double total_time_elapsed = 0.;
+std::vector<std::vector<std::string>>
+get_inputs_paths(const char *input_list_path) {
   size_t idx = 0;
 
-  Error status = Error::Ok;
-  status = method->get_inputs(inputs.data(), inputs.size());
-  ET_CHECK(status == Error::Ok);
-
   auto split = [](std::string s, std::string delimiter) {
     size_t pos_start = 0, pos_end, delim_len = delimiter.length();
     std::string token;
@@ -120,45 +118,19 @@ ProcessInputsResult process_inputs(Result<Method> &method,
   // Read raw input tensor file names from input list file and
   // iterate each raw input tensor file to read values
   std::ifstream input_list(input_list_path);
-  if (input_list.is_open()) {
-    size_t num_inputs = method->inputs_size();
-    std::string file_path;
-    while (std::getline(input_list, file_path)) {
-      auto input_files = split(file_path, " ");
-      if (input_files.size() == 0) {
-        break;
-      }
-      for (int input_index = 0; input_index < num_inputs; ++input_index) {
-        MethodMeta method_meta = method->method_meta();
-        Result<TensorInfo> tensor_meta =
-            method_meta.input_tensor_meta(input_index);
-        auto input_data_ptr = inputs[input_index].toTensor().data_ptr<char>();
-
-        std::ifstream fin(input_files[input_index], std::ios::binary);
-        fin.seekg(0, fin.end);
-        size_t file_size = fin.tellg();
-
-        ET_CHECK_MSG(
-            file_size == tensor_meta->nbytes(),
-            "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu",
-            input_index, file_size, tensor_meta->nbytes());
-
-        fin.seekg(0, fin.beg);
-        fin.read(static_cast<char *>(input_data_ptr), file_size);
-        fin.close();
-      }
-      double time_elapsed;
-      std::tie(time_elapsed, status) = benchmark_method(method, 1);
-      if (status != Error::Ok) {
-        return {total_time_elapsed, idx, status};
-      }
-      total_time_elapsed += time_elapsed;
-      dump_outputs(method, output_folder_path, idx++);
-    }
-  } else {
+  if (!input_list.is_open()) {
     ET_CHECK_MSG(false, "Failed to read input list file: %s", input_list_path);
   }
-  return {total_time_elapsed, idx, status};
+  std::string file_path;
+  auto retval = std::vector<std::vector<std::string>>();
+  while (std::getline(input_list, file_path)) {
+    auto input_files = split(file_path, " ");
+    if (input_files.size() == 0) {
+      break;
+    }
+    retval.push_back(input_files);
+  }
+  return retval;
 }
 
 int main(int argc, char **argv) {
@@ -240,43 +212,77 @@ int main(int argc, char **argv) {
   ET_LOG(Info, "Method loaded.");
 
   // Prepare the input tensors for the method
-  auto inputs = prepare_input_tensors(*method);
-  ET_CHECK_MSG(inputs.ok(), "Could not prepare inputs: 0x%" PRIx32,
-               static_cast<uint32_t>(inputs.error()));
+  auto method_inputs = prepare_input_tensors(*method);
+  ET_CHECK_MSG(method_inputs.ok(), "Could not prepare inputs: 0x%" PRIx32,
+               static_cast<uint32_t>(method_inputs.error()));
 
-  double elapsed_time;
   Error status = Error::Ok;
+  std::vector<EValue> inputs(method->inputs_size());
+  ET_LOG(Info, "Number of input layers: %zu", inputs.size());
+
+  status = method->get_inputs(inputs.data(), inputs.size());
+  ET_CHECK(status == Error::Ok);
 
   // If the input path list is provided, read input tensors from the files
-  if (!(FLAGS_input_list_path.empty()) and
-      !(FLAGS_output_folder_path.empty())) {
+  std::function<void(size_t)> set_input_tensor;
+  if (!FLAGS_input_list_path.empty()) {
     const char *input_list_path = FLAGS_input_list_path.c_str();
     ET_LOG(Info, "Loading input tensors from the list provided in %s.",
            input_list_path);
-    const char *output_folder_path = FLAGS_output_folder_path.c_str();
-    auto res = process_inputs(method, input_list_path, output_folder_path);
-    elapsed_time = res.total_time;
-    status = res.status;
-    num_iterations = res.num_iter;
+    const auto input_paths = get_inputs_paths(input_list_path);
+    num_iterations = input_paths.size();
+    ET_LOG(Info, "Number of iters is set to the len of the inputs: %u.",
+           num_iterations);
+
+    set_input_tensor = build_set_input_tensor(method, inputs, input_paths);
   } else {
+    set_input_tensor = [](size_t idx) -> void {};
+  }
+
+  ET_LOG(Info, "%zu Number of output layers: ", method->outputs_size());
+
+  std::vector<EValue> outputs(method->outputs_size());
+  status = method->get_outputs(outputs.data(), outputs.size());
+  ET_CHECK(status == Error::Ok);
 
-    // Measure execution time for inference
-    std::tie(elapsed_time, status) = benchmark_method(method, num_iterations);
+  std::function<void(size_t)> dump_outputs;
+  if (!FLAGS_output_folder_path.empty()) {
     // Retrieve and print the method outputs
-    ET_LOG(Info, "%zu Number of outputs: ", method->outputs_size());
 
     // If output folder path is provided, save output tensors
     // into raw tensor files.
-    if (!(FLAGS_output_folder_path.empty())) {
-      const char *output_folder_path = FLAGS_output_folder_path.c_str();
-      ET_LOG(Info, "Saving output tensors into the output folder: %s.",
-             output_folder_path);
-      dump_outputs(method, output_folder_path);
+    const char *output_folder_path = FLAGS_output_folder_path.c_str();
+    ET_LOG(Info, "Saving output tensors into the output folder: %s.",
+           output_folder_path);
+    dump_outputs = build_dump_outputs(outputs, outputs.size(),
+                                      std::string(output_folder_path));
+
+  } else {
+    dump_outputs = [](size_t idx) {};
+  }
+
+  // Measure execution time for inference
+
+  double total_time_elapsed = 0.;
+  for (int i = 0; (i < num_iterations and status == Error::Ok); ++i) {
+    set_input_tensor(i);
+    auto before_exec = std::chrono::high_resolution_clock::now();
+    status = method->execute();
+    auto after_exec = std::chrono::high_resolution_clock::now();
+    if (status == Error::Ok) {
+      dump_outputs(i);
     }
+    double elapsed_time = std::chrono::duration_cast<std::chrono::microseconds>(
+                              after_exec - before_exec)
+                              .count() /
+                          1000.0;
+    total_time_elapsed += elapsed_time;
   }
+
   // Log execution time and average time per iteration
   ET_LOG(Info, "%d inference took %f ms, avg %f ms", num_iterations,
-         elapsed_time, elapsed_time / static_cast<float>(num_iterations));
+         total_time_elapsed,
+         total_time_elapsed / static_cast<float>(num_iterations));
   ET_CHECK_MSG(status == Error::Ok,
                "Execution of method %s failed with status 0x%" PRIx32,
                method_name, static_cast<uint32_t>(status));

From 0892b9d47760d330d06e1f2e816872f01ef2fbed Mon Sep 17 00:00:00 2001
From: Daniil Lyakhov <daniil.lyakhov@intel.com>
Date: Fri, 14 Feb 2025 15:52:53 +0100
Subject: [PATCH 16/18] Apply suggestions from code review

Co-authored-by: Yamini Nimmagadda <yamini.nimmagadda@intel.com>
---
 examples/openvino/aot/aot_openvino_compiler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index 4f45fc1d426..25537910fe2 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -231,7 +231,7 @@ def main(
             raise ValueError(msg)
 
         if not dataset_path:
-            msg = "Validateion requires a calibration dataset."
+            msg = "Validation requires a calibration dataset."
             raise ValueError(msg)
 
         print("Start validation of the model:")
@@ -266,7 +266,7 @@ def main(
     parser.add_argument(
         "--validate",
         action="store_true",
-        help="Enable model validation. --dataset argument is requred for the validation.",
+        help="Enable model validation. --dataset argument is required for the validation.",
     )
     parser.add_argument("--dataset", type=str, help="Path to the validation dataset.")
     parser.add_argument(

From d1aa42556665eb837368e2f74faf286fc52ba562 Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Fri, 14 Feb 2025 16:09:24 +0100
Subject: [PATCH 17/18] aot_openvino_compiler.py: comments

---
 examples/openvino/aot/aot_openvino_compiler.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py
index 25537910fe2..f0844289580 100644
--- a/examples/openvino/aot/aot_openvino_compiler.py
+++ b/examples/openvino/aot/aot_openvino_compiler.py
@@ -29,7 +29,6 @@
 from torch.ao.quantization.quantize_pt2e import prepare_pt2e
 from torch.export import export
 from torch.export.exported_program import ExportedProgram
-from torch.fx.passes.graph_drawer import FxGraphDrawer
 from transformers import AutoModel
 
 import nncf
@@ -72,11 +71,6 @@ def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, mod
     return calibration_dataset
 
 
-def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str):
-    g = FxGraphDrawer(model, output_svg_path)
-    g.get_dot_graph().write_svg(output_svg_path)
-
-
 def dump_inputs(calibration_dataset, dest_path):
     input_files, targets = [], []
     for idx, data in enumerate(calibration_dataset):
@@ -204,7 +198,6 @@ def main(
         quantized_model = quantize_model(
             aten_dialect.module(), calibration_dataset, use_nncf=quantization_flow == "nncf"
         )
-        visualize_fx_model(quantized_model, f"{model_name}_int8.svg")
 
         aten_dialect: ExportedProgram = export(quantized_model, example_args)
 

From b9b604d8ed231355ed437fff05a0d213010f793e Mon Sep 17 00:00:00 2001
From: dlyakhov <daniil.lyakhov@intel.com>
Date: Fri, 14 Feb 2025 17:24:28 +0100
Subject: [PATCH 18/18] README

---
 examples/openvino/aot/README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md
index 900a5b6cbe0..884ed55849f 100644
--- a/examples/openvino/aot/README.md
+++ b/examples/openvino/aot/README.md
@@ -25,7 +25,7 @@ python aot_openvino_compiler.py --suite <MODEL_SUITE> --model <MODEL_NAME> --inp
   - For `torchvision`: `resnet18`, `mobilenet_v2`
   - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased`
 
-- **`--input_shape`**:
+- **`--input_shape`**(optional):
   Input shape for the model. Provide this as a **list** or **tuple**.
   Examples:
   - `[1, 3, 224, 224]` (Zsh users: wrap in quotes)
@@ -38,11 +38,15 @@ python aot_openvino_compiler.py --suite <MODEL_SUITE> --model <MODEL_NAME> --inp
 - **`--quantize`** (optional):
   Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite  does not supported yet.
 
+- **`--quantization_flow`** (optional):
+  Specifies the way to quantize torch.fx.GraphModule.
+  Supported values:
+  - `nncf`: `nncf quantize_pt2e` API (default)
+  - `pt2e`: torch ao quantization pipeline.
 
 - **`--validate`** (optional):
   Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet.
 
-
 - **`--dataset`** (optional):
   Path to the imagenet-like calibration dataset.