From 5d2784d9a76cbf2dd5ad5ec5e116b9e9ceccfa1f Mon Sep 17 00:00:00 2001 From: Aleksandr Suslov Date: Wed, 5 Feb 2025 10:47:24 +0400 Subject: [PATCH 01/18] added init integration of quantization --- backends/openvino/__init__.py | 3 +- backends/openvino/quantizer/__init__.py | 3 + backends/openvino/quantizer/quantizer.py | 309 ++++++++++++++++++ backends/openvino/requirements.txt | 1 + examples/openvino/CMakeLists.txt | 1 + examples/openvino/aot/README.md | 7 + .../openvino/aot/aot_openvino_compiler.py | 71 +++- examples/openvino/openvino_build_example.sh | 1 + 8 files changed, 392 insertions(+), 4 deletions(-) create mode 100644 backends/openvino/quantizer/__init__.py create mode 100644 backends/openvino/quantizer/quantizer.py diff --git a/backends/openvino/__init__.py b/backends/openvino/__init__.py index dac275d3f12..4a69f6b75ff 100644 --- a/backends/openvino/__init__.py +++ b/backends/openvino/__init__.py @@ -1,4 +1,5 @@ from .partitioner import OpenvinoPartitioner from .preprocess import OpenvinoBackend +from .quantizer.quantizer import OpenVINOQuantizer -__all__ = [OpenvinoBackend, OpenvinoPartitioner] +__all__ = [OpenvinoBackend, OpenvinoPartitioner, OpenVINOQuantizer] diff --git a/backends/openvino/quantizer/__init__.py b/backends/openvino/quantizer/__init__.py new file mode 100644 index 00000000000..03ea98e2c5b --- /dev/null +++ b/backends/openvino/quantizer/__init__.py @@ -0,0 +1,3 @@ +from .quantizer import OpenVINOQuantizer + +__all__ = [OpenVINOQuantizer] diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py new file mode 100644 index 00000000000..58fde3e23f1 --- /dev/null +++ b/backends/openvino/quantizer/quantizer.py @@ -0,0 +1,309 @@ +# Copyright (c) 2025 Intel Corporation +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from collections import defaultdict +from typing import Dict, List, Optional, Tuple, Union + +import torch.fx +from torch.ao.quantization.observer import HistogramObserver +from torch.ao.quantization.observer import PerChannelMinMaxObserver +from torch.ao.quantization.quantizer.quantizer import EdgeOrNode +from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation +from torch.ao.quantization.quantizer.quantizer import QuantizationSpec +from torch.ao.quantization.quantizer.quantizer import QuantizationSpecBase +from torch.ao.quantization.quantizer.quantizer import Quantizer +from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec + +import nncf +from nncf.common.graph.graph import NNCFGraph +from nncf.common.logging import nncf_logger +from nncf.common.quantization.quantizer_propagation.solver import QuantizerPropagationRule +from nncf.common.quantization.quantizer_setup import QuantizationPointBase +from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup +from nncf.common.quantization.structs import QuantizationPreset +from nncf.common.quantization.structs import QuantizationScheme +from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter +from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name +from nncf.experimental.torch.fx.transformations import fold_constant_except_qdq +from nncf.parameters import ModelType +from nncf.parameters import QuantizationMode +from nncf.parameters import TargetDevice +from nncf.quantization.advanced_parameters import FP8QuantizationParameters +from nncf.quantization.advanced_parameters import OverflowFix +from nncf.quantization.advanced_parameters import QuantizationParameters +from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization +from nncf.scopes import IgnoredScope +from nncf.torch.model_graph_manager import get_weight_tensor_port_ids + +QUANT_ANNOTATION_KEY = "quantization_annotation" + + +class OpenVINOQuantizer(Quantizer): + """ + Implementation of the Torch AO quantizer which annotates models with quantization annotations + optimally for the inference via OpenVINO. + """ + + def __init__( + self, + *, + mode: Optional[QuantizationMode] = None, + preset: Optional[QuantizationPreset] = None, + target_device: TargetDevice = TargetDevice.ANY, + model_type: Optional[ModelType] = None, + ignored_scope: Optional[IgnoredScope] = None, + overflow_fix: Optional[OverflowFix] = None, + quantize_outputs: bool = False, + activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, + weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, + quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE, + ): + """ + :param mode: Defines optimization mode for the algorithm. None by default. + :param preset: A preset controls the quantization mode (symmetric and asymmetric). + It can take the following values: + - `performance`: Symmetric quantization of weights and activations. + - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. + Default value is None. In this case, `mixed` preset is used for `transformer` + model type otherwise `performance`. + :param target_device: A target device the specificity of which will be taken + into account while compressing in order to obtain the best performance + for this type of device, defaults to TargetDevice.ANY. + :param model_type: Model type is needed to specify additional patterns + in the model. Supported only `transformer` now. + :param ignored_scope: An ignored scope that defined the list of model control + flow graph nodes to be ignored during quantization. + :param overflow_fix: This option controls whether to apply the overflow issue + fix for the 8-bit quantization. + :param quantize_outputs: Whether to insert additional quantizers right before + each of the model outputs. + :param activations_quantization_params: Quantization parameters for model + activations. + :param weights_quantization_params: Quantization parameters for model weights. + :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers. + MERGE_ALL_IN_ONE by default. + """ + self._min_max_algo = MinMaxQuantization( + mode=mode, + preset=preset, + target_device=target_device, + model_type=model_type, + ignored_scope=ignored_scope, + overflow_fix=overflow_fix, + quantize_outputs=quantize_outputs, + activations_quantization_params=activations_quantization_params, + weights_quantization_params=weights_quantization_params, + quantizer_propagation_rule=quantizer_propagation_rule, + ) + + def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: + self._min_max_algo._set_backend_entity(model) + return self._min_max_algo.find_quantization_setup(model, nncf_graph) + + def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: + nncf_graph = GraphConverter.create_nncf_graph(model) + quantization_setup = self.get_quantization_setup(model, nncf_graph) + + graph = model.graph + node_vs_torch_annotation = defaultdict(QuantizationAnnotation) + + for qp in quantization_setup.quantization_points.values(): + edge_or_node, annotation = self._get_edge_or_node_and_annotation( + graph, nncf_graph, qp, node_vs_torch_annotation + ) + qspec = self._get_torch_ao_qspec_from_qp(qp) + self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) + + for quantizer_ids in quantization_setup.unified_scale_groups.values(): + + root_quantizer_id = self._get_unified_scales_root_quantizer_id( + nncf_graph, quantizer_ids, quantization_setup + ) + root_qp = quantization_setup.quantization_points[root_quantizer_id] + + if any(root_qp.qconfig != quantization_setup.quantization_points[q_id].qconfig for q_id in quantizer_ids): + qps = [quantization_setup.quantization_points[q_id] for q_id in quantizer_ids] + msg = ( + "Different quantization configs are set to one unified scale group:" + f"{[(qp.insertion_point.__dict__, str(qp.qconfig)) for qp in qps]}" + ) + raise nncf.InternalError(msg) + + root_target_node = get_graph_node_by_name(graph, root_qp.insertion_point.target_node_name) + root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph) + + for quantizer_id in quantizer_ids: + if quantizer_id == root_quantizer_id: + continue + + qspec = SharedQuantizationSpec(root_edge_or_node) + qp = quantization_setup.quantization_points[quantizer_id] + edge_or_node, annotation = self._get_edge_or_node_and_annotation( + graph, nncf_graph, qp, node_vs_torch_annotation + ) + self._fill_torch_ao_annotation(edge_or_node, qspec, annotation) + + for node, annotation in node_vs_torch_annotation.items(): + assert QUANT_ANNOTATION_KEY not in node.meta + node.meta[QUANT_ANNOTATION_KEY] = annotation + + @staticmethod + def _get_unified_scales_root_quantizer_id( + nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: SingleConfigQuantizerSetup + ) -> int: + """ + Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` + in the given NNCFGraph. This is required by the `_get_obs_or_fq_map` function. + Refer to: https://github.com/pytorch/pytorch/blob/main/torch/ao/quantization/pt2e/prepare.py#L291 + + :param nncf_graph: The NNCFGraph instance. + :param quantizer_ids: The list of quantizer IDs to evaluate. + :param quantizer_setup: The instance of SingleConfigQuantizerSetup. + :return: The ID of the earliest quantizer node in terms of `nncf_node.node_id`. + """ + nncf_node_quantizer_id = None + root_quantizer_id = None + for quantizer_id in quantizer_ids: + target_node_name = quantizer_setup.quantization_points[quantizer_id].insertion_point.target_node_name + nncf_node = nncf_graph.get_node_by_name(target_node_name) + if nncf_node_quantizer_id is None or nncf_node.node_id < nncf_node_quantizer_id: + root_quantizer_id = quantizer_id + nncf_node_quantizer_id = nncf_node.node_id + return root_quantizer_id + + @staticmethod + def _get_edge_or_node_and_annotation( + graph: torch.fx.Graph, + nncf_graph: NNCFGraph, + qp: QuantizationPointBase, + node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation], + ) -> Tuple[EdgeOrNode, QuantizationAnnotation]: + """ + Retrieves the edge or node and its corresponding QuantizationAnnotation based on the given graph, + quantization point, and node-to-annotation mapping. + + :param graph: torch.fx.Graph instance. + :param nncf_graph: NNCFGraph instance. + :param qp: QuantizationPointBase instance. + :param node_vs_torch_annotation: A dictionary mapping torch.fx.GraphNode objects to their respective + QuantizationAnnotations. + :return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation. + """ + target_node = get_graph_node_by_name(graph, qp.insertion_point.target_node_name) + annotation = node_vs_torch_annotation[target_node] + edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph) + return edge_or_node, annotation + + @staticmethod + def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nncf_graph: NNCFGraph) -> EdgeOrNode: + """ + Returns the edge or node based on the given target node and quantization point. + + :param target_node: Target node instance. + :param qp: QuantizationPointBase instance. + :param graph: NNCFGraph instance. + :return: The corresponding EdgeOrNode derived from the target node and quantization point. + """ + ip = qp.insertion_point + if qp.is_weight_quantization_point(): + nncf_node = nncf_graph.get_node_by_name(target_node.name) + weights_ports_ids = get_weight_tensor_port_ids(nncf_node, nncf_graph) + if len(weights_ports_ids) > 1: + # TODO(dlyakhov): support quantization for nodes with several weights + nncf_logger.warning( + f"Quantization of the weighted node {target_node.name}" + " is not yet supported by the OpenVINOQuantizer." + f" Only the weight on port ID {weights_ports_ids[0]} will be quantized." + f" Quantizable weights are located on ports: {weights_ports_ids}." + ) + weight_node = target_node.all_input_nodes[weights_ports_ids[0]] + return (weight_node, target_node) + + if ip.input_port_id is None: + return target_node + + node = target_node.all_input_nodes[ip.input_port_id] + return (node, target_node) + + @staticmethod + def _fill_torch_ao_annotation( + edge_or_node: EdgeOrNode, + qspec: QuantizationSpecBase, + annotation_to_update: QuantizationAnnotation, + ) -> None: + """ + Helper method to update the annotation_to_update based on the specified edge_or_node and qspec. + + :param edge_or_node: The target EdgeOrNode to be used for the update. + :param qspec: An instance of QuantizationSpecBase representing the quantization specification to apply. + :param annotation_to_update: The annotation to update based on the edge_or_node and qspec. + """ + if isinstance(edge_or_node, torch.fx.Node): + annotation_to_update.output_qspec = qspec + else: + annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec + + @staticmethod + def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec: + """ + Retrieves the quantization configuration from the given quantization point and + converts it into a QuantizationSpec. + + :param qp: An instance of QuantizationPointBase. + :return: A QuantizationSpec retrieved and converted from the quantization point. + """ + # Eps value is copied from nncf/torch/quantization/layers.py + extra_args = {"eps": 1e-16} + qconfig = qp.qconfig + is_weight = qp.is_weight_quantization_point() + + if qconfig.per_channel: + torch_qscheme = ( + torch.per_channel_symmetric + if qconfig.mode is QuantizationScheme.SYMMETRIC + else torch.per_channel_affine + ) + else: + torch_qscheme = ( + torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine + ) + if is_weight: + observer = PerChannelMinMaxObserver + quant_min = -128 + quant_max = 127 + dtype = torch.int8 + channel_axis = 0 + else: + observer = ( + HistogramObserver + if torch_qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine] + else PerChannelMinMaxObserver + ) + quant_min = 0 + quant_max = 255 + dtype = torch.int8 if qconfig.signedness_to_force else torch.uint8 + channel_axis = 1 # channel dim for activations + return QuantizationSpec( + dtype=dtype, + observer_or_fake_quant_ctr=observer.with_args(**extra_args), + quant_min=quant_min, + quant_max=quant_max, + qscheme=torch_qscheme, + ch_axis=channel_axis, + is_dynamic=False, + ) + + def validate(self, model: torch.fx.GraphModule) -> None: + pass + + def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: + fold_constant_except_qdq(model) + return model diff --git a/backends/openvino/requirements.txt b/backends/openvino/requirements.txt index 7c3de886e27..f00257127a3 100644 --- a/backends/openvino/requirements.txt +++ b/backends/openvino/requirements.txt @@ -6,3 +6,4 @@ tokenizers transformers piq pillow +nncf @ https://github.com/openvinotoolkit/nncf.git diff --git a/examples/openvino/CMakeLists.txt b/examples/openvino/CMakeLists.txt index 4a1917fa3af..10638a7b5f7 100644 --- a/examples/openvino/CMakeLists.txt +++ b/examples/openvino/CMakeLists.txt @@ -55,6 +55,7 @@ target_include_directories(openvino_portable_ops_lib PUBLIC ${_common_include_di # Build Executor Runner add_executable(openvino_executor_runner ${_openvino_executor_runner__srcs}) + target_include_directories( openvino_executor_runner PUBLIC ${_common_include_directories} ${EXECUTORCH_ROOT}/cmake-openvino-out/third-party/gflags/include ) diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md index 6c59f1dad41..46e476a8408 100644 --- a/examples/openvino/aot/README.md +++ b/examples/openvino/aot/README.md @@ -31,10 +31,17 @@ python aot_openvino_compiler.py --suite --model --inp - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) - `(1, 3, 224, 224)` +- **`--quantize`** (optional): + Enable model quantization: Default is False. + +- **`--dataset`** (optional): + Path to the calibration dataset. TODO: It is necessary to think in what form to support the dataset. For the experiment, tiny-imagenet is used, which can be downloaded from here http://cs231n.stanford.edu/tiny-imagenet-200.zip and specify the path to it. + - **`--device`** (optional): Target device for the compiled model. Default is `CPU`. Examples: `CPU`, `GPU` + ## **Examples** ### Export a TIMM VGG16 model for the CPU diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 4674fbbd755..cc31e011e38 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -4,10 +4,15 @@ # except in compliance with the License. See the license file in the root # directory of this source tree for more details. +import nncf.experimental +import nncf.experimental.torch import executorch +import nncf import timm import torch +import torchvision.datasets as datasets import torchvision.models as torchvision_models +import torchvision.transforms as transforms from transformers import AutoModel from executorch.exir.backend.backend_details import CompileSpec from executorch.backends.openvino.preprocess import OpenvinoBackend @@ -16,6 +21,12 @@ from torch.export import export, ExportedProgram from torch.export.exported_program import ExportedProgram import argparse +from executorch.backends.openvino import OpenVINOQuantizer +from torch.ao.quantization.quantize_pt2e import ( + convert_pt2e, + prepare_pt2e, +) + # Function to load a model based on the selected suite def load_model(suite: str, model_name: str): @@ -30,7 +41,48 @@ def load_model(suite: str, model_name: str): else: raise ValueError(f"Unsupported model suite: {suite}") -def main(suite: str, model_name: str, input_shape, device: str): + +def load_calibration_dataset(dataset_path: str): + val_dir = f"{dataset_path}/val" + + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + val_dataset = datasets.ImageFolder( + val_dir, + transforms.Compose( + [ + transforms.Resize(64), # for tiny imagenet + transforms.ToTensor(), + normalize, + ] + ), + ) + + calibration_dataset = torch.utils.data.DataLoader( + val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True + ) + + return calibration_dataset + + +def quantize_model(model: torch.fx.GraphModule, calibration_dataset: torch.utils.data.DataLoader, subset_size=300): + quantizer = OpenVINOQuantizer() + + print("PTQ: Annotate the model...") + annotated_model = prepare_pt2e(model, quantizer) + + print("PTQ: Calibrate the model...") + for idx, data in enumerate(calibration_dataset): + if idx >= subset_size: + break + annotated_model(data[0]) + + print("PTQ: Convert the quantized model...") + quantized_model = convert_pt2e(annotated_model) + return quantized_model + + +def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str): # Ensure input_shape is a tuple if isinstance(input_shape, list): input_shape = tuple(input_shape) @@ -44,9 +96,19 @@ def main(suite: str, model_name: str, input_shape, device: str): # Provide input example_args = (torch.randn(*input_shape), ) - # Export to aten dialect using torch.export + # Export the model to the aten dialect aten_dialect: ExportedProgram = export(model, example_args) + if quantize: + # Quantize model + if not dataset_path: + raise ValueError("Quantization requires a calibration dataset.") + calibration_dataset = load_calibration_dataset(dataset_path) + + captured_model = aten_dialect.module() + quantized_model = quantize_model(captured_model, calibration_dataset) + aten_dialect: ExportedProgram = export(quantized_model, example_args) + # Convert to edge dialect edge_program: EdgeProgramManager = to_edge(aten_dialect) to_be_lowered_module = edge_program.exported_program() @@ -71,10 +133,13 @@ def main(suite: str, model_name: str, input_shape, device: str): parser.add_argument("--model", type=str, required=True, help="Model name to be loaded.") parser.add_argument("--input_shape", type=eval, required=True, help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).") + parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") + parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.") parser.add_argument("--device", type=str, default="CPU", help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.") args = parser.parse_args() # Run the main function with parsed arguments - main(args.suite, args.model, args.input_shape, args.device) + with nncf.torch.disable_patching(): + main(args.suite, args.model, args.input_shape, args.quantize, args.dataset, args.device) diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh index ee16658941d..52c508d8ee2 100755 --- a/examples/openvino/openvino_build_example.sh +++ b/examples/openvino/openvino_build_example.sh @@ -34,6 +34,7 @@ main() { local example_dir=examples/openvino local example_build_dir="${build_dir}/${example_dir}" local cmake_prefix_path="${PWD}/${build_dir}/lib/cmake/ExecuTorch;${PWD}/${build_dir}/third-party/gflags;" + rm -rf "${example_build_dir}" ## OpenVINO original From 61488d5a9d77ebf86658392c8ee0e24b6eb9f550 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 7 Feb 2025 18:12:06 +0100 Subject: [PATCH 02/18] deit3_small_patch16_224_in21ft1k --- backends/openvino/quantizer/quantizer.py | 2 ++ .../openvino/aot/aot_openvino_compiler.py | 25 +++++++++++-------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index 58fde3e23f1..aefa91f7455 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -15,6 +15,7 @@ import torch.fx from torch.ao.quantization.observer import HistogramObserver from torch.ao.quantization.observer import PerChannelMinMaxObserver +from torch.ao.quantization.observer import MinMaxObserver from torch.ao.quantization.quantizer.quantizer import EdgeOrNode from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation from torch.ao.quantization.quantizer.quantizer import QuantizationSpec @@ -276,6 +277,7 @@ def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec: torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine ) if is_weight: + observer = PerChannelMinMaxObserver if qconfig.per_channel else MinMaxObserver observer = PerChannelMinMaxObserver quant_min = -128 quant_max = 127 diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index cc31e011e38..dabf1c964fa 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -65,20 +65,17 @@ def load_calibration_dataset(dataset_path: str): return calibration_dataset -def quantize_model(model: torch.fx.GraphModule, calibration_dataset: torch.utils.data.DataLoader, subset_size=300): - quantizer = OpenVINOQuantizer() +def quantize_model(model: torch.fx.GraphModule, example_args, subset_size=300): + quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"])) print("PTQ: Annotate the model...") annotated_model = prepare_pt2e(model, quantizer) print("PTQ: Calibrate the model...") - for idx, data in enumerate(calibration_dataset): - if idx >= subset_size: - break - annotated_model(data[0]) + annotated_model(*example_args) print("PTQ: Convert the quantized model...") - quantized_model = convert_pt2e(annotated_model) + quantized_model = convert_pt2e(annotated_model, fold_quantize=False) return quantized_model @@ -106,7 +103,9 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: calibration_dataset = load_calibration_dataset(dataset_path) captured_model = aten_dialect.module() - quantized_model = quantize_model(captured_model, calibration_dataset) + visualize_fx_model(captured_model, f"{model_name}_fp32.svg") + quantized_model = quantize_model(captured_model, example_args) + visualize_fx_model(quantized_model, f"{model_name}_int8.svg") aten_dialect: ExportedProgram = export(quantized_model, example_args) # Convert to edge dialect @@ -121,9 +120,15 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) # Serialize and save it to a file - with open(f"{model_name}.pte", "wb") as file: + model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" + with open(model_name, "wb") as file: exec_prog.write_to_file(file) - print(f"Model exported and saved as {model_name}.pte on {device}.") + print(f"Model exported and saved as {model_name} on {device}.") + +from torch.fx.passes.graph_drawer import FxGraphDrawer +def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str): + g = FxGraphDrawer(model, output_svg_path) + g.get_dot_graph().write_svg(output_svg_path) if __name__ == "__main__": # Argument parser for dynamic inputs From 42155a1d433d87428781099b9c1ba276e7aebb55 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 7 Feb 2025 18:28:57 +0100 Subject: [PATCH 03/18] Resnet-like model checked --- examples/openvino/aot/aot_openvino_compiler.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index dabf1c964fa..a062af4d001 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -66,7 +66,8 @@ def load_calibration_dataset(dataset_path: str): def quantize_model(model: torch.fx.GraphModule, example_args, subset_size=300): - quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"])) + #quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"])) + quantizer = OpenVINOQuantizer() print("PTQ: Annotate the model...") annotated_model = prepare_pt2e(model, quantizer) @@ -100,12 +101,12 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: # Quantize model if not dataset_path: raise ValueError("Quantization requires a calibration dataset.") - calibration_dataset = load_calibration_dataset(dataset_path) + #calibration_dataset = load_calibration_dataset(dataset_path) captured_model = aten_dialect.module() - visualize_fx_model(captured_model, f"{model_name}_fp32.svg") + #visualize_fx_model(captured_model, f"{model_name}_fp32.svg") quantized_model = quantize_model(captured_model, example_args) - visualize_fx_model(quantized_model, f"{model_name}_int8.svg") + #visualize_fx_model(quantized_model, f"{model_name}_int8.svg") aten_dialect: ExportedProgram = export(quantized_model, example_args) # Convert to edge dialect From 7c66314296db63523872df6407bfbc271d4d8e4c Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Mon, 10 Feb 2025 19:39:26 +0100 Subject: [PATCH 04/18] WIP --- backends/openvino/quantizer/quantizer.py | 87 +++++------- .../openvino/aot/aot_openvino_compiler.py | 132 ++++++++++++++---- .../openvino_executor_runner.cpp | 1 + 3 files changed, 142 insertions(+), 78 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index aefa91f7455..b5f43251426 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -10,12 +10,11 @@ # limitations under the License. from collections import defaultdict -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple import torch.fx from torch.ao.quantization.observer import HistogramObserver from torch.ao.quantization.observer import PerChannelMinMaxObserver -from torch.ao.quantization.observer import MinMaxObserver from torch.ao.quantization.quantizer.quantizer import EdgeOrNode from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation from torch.ao.quantization.quantizer.quantizer import QuantizationSpec @@ -24,25 +23,11 @@ from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec import nncf +import nncf.common.quantization as q +import nncf.experimental.torch.fx as nncf_fx +import nncf.parameters as p +import nncf.quantization.advanced_parameters as advanced_p from nncf.common.graph.graph import NNCFGraph -from nncf.common.logging import nncf_logger -from nncf.common.quantization.quantizer_propagation.solver import QuantizerPropagationRule -from nncf.common.quantization.quantizer_setup import QuantizationPointBase -from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup -from nncf.common.quantization.structs import QuantizationPreset -from nncf.common.quantization.structs import QuantizationScheme -from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter -from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name -from nncf.experimental.torch.fx.transformations import fold_constant_except_qdq -from nncf.parameters import ModelType -from nncf.parameters import QuantizationMode -from nncf.parameters import TargetDevice -from nncf.quantization.advanced_parameters import FP8QuantizationParameters -from nncf.quantization.advanced_parameters import OverflowFix -from nncf.quantization.advanced_parameters import QuantizationParameters -from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization -from nncf.scopes import IgnoredScope -from nncf.torch.model_graph_manager import get_weight_tensor_port_ids QUANT_ANNOTATION_KEY = "quantization_annotation" @@ -56,16 +41,15 @@ class OpenVINOQuantizer(Quantizer): def __init__( self, *, - mode: Optional[QuantizationMode] = None, - preset: Optional[QuantizationPreset] = None, - target_device: TargetDevice = TargetDevice.ANY, - model_type: Optional[ModelType] = None, - ignored_scope: Optional[IgnoredScope] = None, - overflow_fix: Optional[OverflowFix] = None, + mode: Optional[p.QuantizationMode] = None, + preset: Optional[q.structs.QuantizationPreset] = None, + target_device: p.TargetDevice = p.TargetDevice.ANY, + transformer_model: bool = False, + ignored_scope: Optional[nncf.IgnoredScope] = None, + overflow_fix: Optional[advanced_p.OverflowFix] = None, quantize_outputs: bool = False, - activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, - weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None, - quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE, + activations_quantization_params: Optional[advanced_p.QuantizationParameters] = None, + weights_quantization_params: Optional[advanced_p.QuantizationParameters] = None, ): """ :param mode: Defines optimization mode for the algorithm. None by default. @@ -89,29 +73,28 @@ def __init__( :param activations_quantization_params: Quantization parameters for model activations. :param weights_quantization_params: Quantization parameters for model weights. - :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers. - MERGE_ALL_IN_ONE by default. """ - self._min_max_algo = MinMaxQuantization( + self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( mode=mode, preset=preset, target_device=target_device, - model_type=model_type, + model_type=p.ModelType.TRANSFORMER if transformer_model else None, ignored_scope=ignored_scope, overflow_fix=overflow_fix, quantize_outputs=quantize_outputs, activations_quantization_params=activations_quantization_params, weights_quantization_params=weights_quantization_params, - quantizer_propagation_rule=quantizer_propagation_rule, ) - def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup: + def get_nncf_quantization_setup( + self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph + ) -> q.quantizer_setup.SingleConfigQuantizerSetup: self._min_max_algo._set_backend_entity(model) return self._min_max_algo.find_quantization_setup(model, nncf_graph) def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: - nncf_graph = GraphConverter.create_nncf_graph(model) - quantization_setup = self.get_quantization_setup(model, nncf_graph) + nncf_graph = nncf_fx.nncf_graph_builder.GraphConverter.create_nncf_graph(model) + quantization_setup = self.get_nncf_quantization_setup(model, nncf_graph) graph = model.graph node_vs_torch_annotation = defaultdict(QuantizationAnnotation) @@ -138,7 +121,9 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: ) raise nncf.InternalError(msg) - root_target_node = get_graph_node_by_name(graph, root_qp.insertion_point.target_node_name) + root_target_node = nncf_fx.node_utils.get_graph_node_by_name( + graph, root_qp.insertion_point.target_node_name + ) root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph) for quantizer_id in quantizer_ids: @@ -155,10 +140,11 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: for node, annotation in node_vs_torch_annotation.items(): assert QUANT_ANNOTATION_KEY not in node.meta node.meta[QUANT_ANNOTATION_KEY] = annotation + return model @staticmethod def _get_unified_scales_root_quantizer_id( - nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: SingleConfigQuantizerSetup + nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: q.quantizer_setup.SingleConfigQuantizerSetup ) -> int: """ Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` @@ -184,7 +170,7 @@ def _get_unified_scales_root_quantizer_id( def _get_edge_or_node_and_annotation( graph: torch.fx.Graph, nncf_graph: NNCFGraph, - qp: QuantizationPointBase, + qp: q.quantizer_setup.QuantizationPointBase, node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation], ) -> Tuple[EdgeOrNode, QuantizationAnnotation]: """ @@ -198,13 +184,15 @@ def _get_edge_or_node_and_annotation( QuantizationAnnotations. :return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation. """ - target_node = get_graph_node_by_name(graph, qp.insertion_point.target_node_name) + target_node = nncf_fx.node_utils.get_graph_node_by_name(graph, qp.insertion_point.target_node_name) annotation = node_vs_torch_annotation[target_node] edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph) return edge_or_node, annotation @staticmethod - def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nncf_graph: NNCFGraph) -> EdgeOrNode: + def _get_edge_or_node( + target_node: torch.fx.Node, qp: q.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph + ) -> EdgeOrNode: """ Returns the edge or node based on the given target node and quantization point. @@ -216,10 +204,10 @@ def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nnc ip = qp.insertion_point if qp.is_weight_quantization_point(): nncf_node = nncf_graph.get_node_by_name(target_node.name) - weights_ports_ids = get_weight_tensor_port_ids(nncf_node, nncf_graph) + weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids(nncf_node, nncf_graph) if len(weights_ports_ids) > 1: # TODO(dlyakhov): support quantization for nodes with several weights - nncf_logger.warning( + nncf.common.logging.nncf_logger.warning( f"Quantization of the weighted node {target_node.name}" " is not yet supported by the OpenVINOQuantizer." f" Only the weight on port ID {weights_ports_ids[0]} will be quantized." @@ -253,7 +241,7 @@ def _fill_torch_ao_annotation( annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec @staticmethod - def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec: + def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) -> QuantizationSpec: """ Retrieves the quantization configuration from the given quantization point and converts it into a QuantizationSpec. @@ -269,15 +257,16 @@ def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec: if qconfig.per_channel: torch_qscheme = ( torch.per_channel_symmetric - if qconfig.mode is QuantizationScheme.SYMMETRIC + if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC else torch.per_channel_affine ) else: torch_qscheme = ( - torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine + torch.per_tensor_symmetric + if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC + else torch.per_tensor_affine ) if is_weight: - observer = PerChannelMinMaxObserver if qconfig.per_channel else MinMaxObserver observer = PerChannelMinMaxObserver quant_min = -128 quant_max = 127 @@ -307,5 +296,5 @@ def validate(self, model: torch.fx.GraphModule) -> None: pass def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: - fold_constant_except_qdq(model) + nncf_fx.transformations.fold_constant_except_qdq(model) return model diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index a062af4d001..928757c32e2 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -22,11 +22,15 @@ from torch.export.exported_program import ExportedProgram import argparse from executorch.backends.openvino import OpenVINOQuantizer +#from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer +from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e from torch.ao.quantization.quantize_pt2e import ( convert_pt2e, prepare_pt2e, ) - +from sklearn.metrics import accuracy_score +from timm.data import resolve_data_config +from timm.data.transforms_factory import create_transform # Function to load a model based on the selected suite def load_model(suite: str, model_name: str): @@ -42,20 +46,17 @@ def load_model(suite: str, model_name: str): raise ValueError(f"Unsupported model suite: {suite}") -def load_calibration_dataset(dataset_path: str): +def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module): val_dir = f"{dataset_path}/val" - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + if suite == "torchvision": + transform = torchvision_models.get_model_weights(model.name).transforms() + else: + transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model)) val_dataset = datasets.ImageFolder( val_dir, - transforms.Compose( - [ - transforms.Resize(64), # for tiny imagenet - transforms.ToTensor(), - normalize, - ] - ), + transform=transform ) calibration_dataset = torch.utils.data.DataLoader( @@ -65,21 +66,6 @@ def load_calibration_dataset(dataset_path: str): return calibration_dataset -def quantize_model(model: torch.fx.GraphModule, example_args, subset_size=300): - #quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"])) - quantizer = OpenVINOQuantizer() - - print("PTQ: Annotate the model...") - annotated_model = prepare_pt2e(model, quantizer) - - print("PTQ: Calibrate the model...") - annotated_model(*example_args) - - print("PTQ: Convert the quantized model...") - quantized_model = convert_pt2e(annotated_model, fold_quantize=False) - return quantized_model - - def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str): # Ensure input_shape is a tuple if isinstance(input_shape, list): @@ -98,15 +84,24 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: aten_dialect: ExportedProgram = export(model, example_args) if quantize: + if suite == "huggingface": + raise ValueError("Quantization of {suite} models did not support yet.") + # Quantize model if not dataset_path: raise ValueError("Quantization requires a calibration dataset.") - #calibration_dataset = load_calibration_dataset(dataset_path) + calibration_dataset = load_calibration_dataset(dataset_path, suite, model) captured_model = aten_dialect.module() #visualize_fx_model(captured_model, f"{model_name}_fp32.svg") - quantized_model = quantize_model(captured_model, example_args) - #visualize_fx_model(quantized_model, f"{model_name}_int8.svg") + quantizer = OpenVINOQuantizer() + + print("PTQ: Quantize the model") + def transform(x): + return x[0] + + quantized_model = quantize_pt2e(captured_model, quantizer, calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False) + aten_dialect: ExportedProgram = export(quantized_model, example_args) # Convert to edge dialect @@ -121,16 +116,95 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) # Serialize and save it to a file - model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" + model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" with open(model_name, "wb") as file: exec_prog.write_to_file(file) print(f"Model exported and saved as {model_name} on {device}.") + if quantize: + print("Start validation of the quantized model:") + + # 1: Dump inputs + import os + import shutil + + dest_path = "tmp_inputs" + out_path = "tmp_outputs" + targets, input_files = [], [] + for d in [dest_path, out_path]: + if os.path.exists(d): + shutil.rmtree(d) + os.makedirs(d) + input_list = "" + for idx, data in enumerate(calibration_dataset): + feature, target = data + targets.append(target) + file_name = f"{dest_path}/input_{idx}_0.raw" + input_list += file_name + " " + if not isinstance(feature, torch.Tensor): + feature = torch.tensor(feature) + feature.detach().numpy().tofile(file_name) + input_files.append(file_name) + + inp_list_file = os.path.join(dest_path, "in_list.txt") + with open(inp_list_file, "w") as f: + input_list = input_list.strip() + "\n" + f.write(input_list) + + # 2: Run the executor + print("Run openvino_executor_runner...") + import subprocess + breakpoint() + subprocess.run(["../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", + f"--model_path={model_name}", + f"--input_list_path={inp_list_file}", + f"--output_folder_path={out_path}", + #f"--num_iter={len(input_files)}" + ]) + + # 3: load the outputs and compare with the targets + import numpy as np + predictions = [] + for i in range(len(input_files)): + predictions.append( + np.fromfile( + os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32 + ) + ) + + k_val = [1, 5] + acc_top1 = accuracy_score(predictions, targets) + print(f"acc@1: {acc_top1}") + + from torch.fx.passes.graph_drawer import FxGraphDrawer def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str): g = FxGraphDrawer(model, output_svg_path) g.get_dot_graph().write_svg(output_svg_path) +def generate_inputs(dest_path: str, file_name: str, inputs=None, input_list=None): + input_list_file = None + input_files = [] + + # Prepare input list + if input_list is not None: + input_list_file = f"{dest_path}/{file_name}" + with open(input_list_file, "w") as f: + f.write(input_list) + f.flush() + + # Prepare input data + if inputs is not None: + for idx, data in enumerate(inputs): + for i, d in enumerate(data): + file_name = f"{dest_path}/input_{idx}_{i}.raw" + if not isinstance(d, torch.Tensor): + d = torch.tensor(d) + d.detach().numpy().tofile(file_name) + input_files.append(file_name) + + return input_list_file, input_files + if __name__ == "__main__": # Argument parser for dynamic inputs parser = argparse.ArgumentParser(description="Export models with executorch.") diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 7615b63649a..b0d3a9004c2 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -179,6 +179,7 @@ int main(int argc, char** argv) { std::string file_path; while (std::getline(input_list, file_path)) { auto input_files = split(file_path, " "); + ET_LOG(Info, "INPUT_FILES.SIZE: %ld", input_files.size()); if (input_files.size() == 0) { break; } From c1fa9e25851b5819dea18b0070c9ab46cc2e0c3a Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 10:31:15 +0100 Subject: [PATCH 05/18] Formating --- backends/openvino/quantizer/quantizer.py | 15 +- .../openvino/aot/aot_openvino_compiler.py | 128 ++++++++---------- .../openvino_executor_runner.cpp | 2 + 3 files changed, 63 insertions(+), 82 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index b5f43251426..63da8325e4f 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -1,13 +1,8 @@ -# Copyright (c) 2025 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Copyright (c) Intel Corporation +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. from collections import defaultdict from typing import Dict, List, Optional, Tuple diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 928757c32e2..91df971403c 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -4,33 +4,31 @@ # except in compliance with the License. See the license file in the root # directory of this source tree for more details. -import nncf.experimental -import nncf.experimental.torch +import argparse + import executorch -import nncf import timm import torch import torchvision.datasets as datasets import torchvision.models as torchvision_models -import torchvision.transforms as transforms -from transformers import AutoModel -from executorch.exir.backend.backend_details import CompileSpec -from executorch.backends.openvino.preprocess import OpenvinoBackend -from executorch.backends.openvino.partitioner import OpenvinoPartitioner -from executorch.exir import EdgeProgramManager, to_edge -from torch.export import export, ExportedProgram -from torch.export.exported_program import ExportedProgram -import argparse from executorch.backends.openvino import OpenVINOQuantizer -#from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer -from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e -from torch.ao.quantization.quantize_pt2e import ( - convert_pt2e, - prepare_pt2e, -) +from executorch.backends.openvino.partitioner import OpenvinoPartitioner +from executorch.exir import EdgeProgramManager +from executorch.exir import to_edge +from executorch.exir.backend.backend_details import CompileSpec from sklearn.metrics import accuracy_score from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform +from torch.export import ExportedProgram +from torch.export import export +from torch.export.exported_program import ExportedProgram +from transformers import AutoModel + +import nncf +import nncf.experimental +import nncf.experimental.torch +from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e + # Function to load a model based on the selected suite def load_model(suite: str, model_name: str): @@ -54,10 +52,7 @@ def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Modu else: transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model)) - val_dataset = datasets.ImageFolder( - val_dir, - transform=transform - ) + val_dataset = datasets.ImageFolder(val_dir, transform=transform) calibration_dataset = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True @@ -78,7 +73,7 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: model = model.eval() # Provide input - example_args = (torch.randn(*input_shape), ) + example_args = (torch.randn(*input_shape),) # Export the model to the aten dialect aten_dialect: ExportedProgram = export(model, example_args) @@ -93,14 +88,19 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: calibration_dataset = load_calibration_dataset(dataset_path, suite, model) captured_model = aten_dialect.module() - #visualize_fx_model(captured_model, f"{model_name}_fp32.svg") quantizer = OpenVINOQuantizer() print("PTQ: Quantize the model") + def transform(x): return x[0] - quantized_model = quantize_pt2e(captured_model, quantizer, calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False) + quantized_model = quantize_pt2e( + captured_model, + quantizer, + calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), + fold_quantize=False, + ) aten_dialect: ExportedProgram = export(quantized_model, example_args) @@ -154,69 +154,53 @@ def transform(x): # 2: Run the executor print("Run openvino_executor_runner...") import subprocess - breakpoint() - subprocess.run(["../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", - f"--model_path={model_name}", - f"--input_list_path={inp_list_file}", - f"--output_folder_path={out_path}", - #f"--num_iter={len(input_files)}" - ]) + + subprocess.run( + [ + "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", + f"--model_path={model_name}", + f"--input_list_path={inp_list_file}", + f"--output_folder_path={out_path}", + # f"--num_iter={len(input_files)}" + ] + ) # 3: load the outputs and compare with the targets import numpy as np + predictions = [] for i in range(len(input_files)): - predictions.append( - np.fromfile( - os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32 - ) - ) + predictions.append(np.fromfile(os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32)) - k_val = [1, 5] acc_top1 = accuracy_score(predictions, targets) print(f"acc@1: {acc_top1}") -from torch.fx.passes.graph_drawer import FxGraphDrawer -def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str): - g = FxGraphDrawer(model, output_svg_path) - g.get_dot_graph().write_svg(output_svg_path) - -def generate_inputs(dest_path: str, file_name: str, inputs=None, input_list=None): - input_list_file = None - input_files = [] - - # Prepare input list - if input_list is not None: - input_list_file = f"{dest_path}/{file_name}" - with open(input_list_file, "w") as f: - f.write(input_list) - f.flush() - - # Prepare input data - if inputs is not None: - for idx, data in enumerate(inputs): - for i, d in enumerate(data): - file_name = f"{dest_path}/input_{idx}_{i}.raw" - if not isinstance(d, torch.Tensor): - d = torch.tensor(d) - d.detach().numpy().tofile(file_name) - input_files.append(file_name) - - return input_list_file, input_files - if __name__ == "__main__": # Argument parser for dynamic inputs parser = argparse.ArgumentParser(description="Export models with executorch.") - parser.add_argument("--suite", type=str, required=True, choices=["timm", "torchvision", "huggingface"], - help="Select the model suite (timm, torchvision, huggingface).") + parser.add_argument( + "--suite", + type=str, + required=True, + choices=["timm", "torchvision", "huggingface"], + help="Select the model suite (timm, torchvision, huggingface).", + ) parser.add_argument("--model", type=str, required=True, help="Model name to be loaded.") - parser.add_argument("--input_shape", type=eval, required=True, - help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).") + parser.add_argument( + "--input_shape", + type=eval, + required=True, + help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", + ) parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.") - parser.add_argument("--device", type=str, default="CPU", - help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.") + parser.add_argument( + "--device", + type=str, + default="CPU", + help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.", + ) args = parser.parse_args() diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index b0d3a9004c2..41268751b2f 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -180,6 +180,7 @@ int main(int argc, char** argv) { while (std::getline(input_list, file_path)) { auto input_files = split(file_path, " "); ET_LOG(Info, "INPUT_FILES.SIZE: %ld", input_files.size()); + ET_LOG(Info, "NUM_INPUTS: %ld", num_inputs); if (input_files.size() == 0) { break; } @@ -189,6 +190,7 @@ int main(int argc, char** argv) { method_meta.input_tensor_meta(input_index); auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); + ET_LOG(Info, "READ FILE %s", std::string(input_files[input_index])); std::ifstream fin(input_files[input_index], std::ios::binary); fin.seekg(0, fin.end); size_t file_size = fin.tellg(); From e2415afba91eaf52eda3b9f8a1e20c739f3183f8 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 12:10:03 +0100 Subject: [PATCH 06/18] openvino_executor_runner.cpp can run on several inputs --- .../openvino/aot/aot_openvino_compiler.py | 75 ++-- .../openvino_executor_runner.cpp | 321 ++++++++++-------- 2 files changed, 214 insertions(+), 182 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 91df971403c..64f2ca2b955 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -5,8 +5,13 @@ # directory of this source tree for more details. import argparse +import os +import shutil +import subprocess +from pathlib import Path import executorch +import numpy as np import timm import torch import torchvision.datasets as datasets @@ -19,9 +24,9 @@ from sklearn.metrics import accuracy_score from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform -from torch.export import ExportedProgram from torch.export import export from torch.export.exported_program import ExportedProgram +from torch.fx.passes.graph_drawer import FxGraphDrawer from transformers import AutoModel import nncf @@ -36,12 +41,14 @@ def load_model(suite: str, model_name: str): return timm.create_model(model_name, pretrained=True) elif suite == "torchvision": if not hasattr(torchvision_models, model_name): - raise ValueError(f"Model {model_name} not found in torchvision.") + msg = f"Model {model_name} not found in torchvision." + raise ValueError(msg) return getattr(torchvision_models, model_name)(pretrained=True) elif suite == "huggingface": return AutoModel.from_pretrained(model_name) else: - raise ValueError(f"Unsupported model suite: {suite}") + msg = f"Unsupported model suite: {suite}" + raise ValueError(msg) def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module): @@ -61,12 +68,32 @@ def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Modu return calibration_dataset +def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str): + g = FxGraphDrawer(model, output_svg_path) + g.get_dot_graph().write_svg(output_svg_path) + + +def dump_inputs(calibration_dataset, dest_path): + input_files, targets = [], [] + for idx, data in enumerate(calibration_dataset): + feature, target = data + targets.append(target) + file_name = f"{dest_path}/input_{idx}_0.raw" + if not isinstance(feature, torch.Tensor): + feature = torch.tensor(feature) + feature.detach().numpy().tofile(file_name) + input_files.append(file_name) + + return input_files, targets + + def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str): # Ensure input_shape is a tuple if isinstance(input_shape, list): input_shape = tuple(input_shape) elif not isinstance(input_shape, tuple): - raise ValueError("Input shape must be a list or tuple.") + msg = "Input shape must be a list or tuple." + raise ValueError(msg) # Load the selected model model = load_model(suite, model_name) @@ -80,11 +107,13 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: if quantize: if suite == "huggingface": - raise ValueError("Quantization of {suite} models did not support yet.") + msg = f"Quantization of {suite} models did not support yet." + raise ValueError(msg) # Quantize model if not dataset_path: - raise ValueError("Quantization requires a calibration dataset.") + msg = "Quantization requires a calibration dataset." + raise ValueError(msg) calibration_dataset = load_calibration_dataset(dataset_path, suite, model) captured_model = aten_dialect.module() @@ -101,6 +130,7 @@ def transform(x): calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False, ) + visualize_fx_model(quantized_model, f"{model_name}_int8.svg") aten_dialect: ExportedProgram = export(quantized_model, example_args) @@ -123,37 +153,21 @@ def transform(x): if quantize: print("Start validation of the quantized model:") - # 1: Dump inputs - import os - import shutil - - dest_path = "tmp_inputs" - out_path = "tmp_outputs" - targets, input_files = [], [] + dest_path = Path("tmp_inputs") + out_path = Path("tmp_outputs") for d in [dest_path, out_path]: if os.path.exists(d): shutil.rmtree(d) os.makedirs(d) - input_list = "" - for idx, data in enumerate(calibration_dataset): - feature, target = data - targets.append(target) - file_name = f"{dest_path}/input_{idx}_0.raw" - input_list += file_name + " " - if not isinstance(feature, torch.Tensor): - feature = torch.tensor(feature) - feature.detach().numpy().tofile(file_name) - input_files.append(file_name) - - inp_list_file = os.path.join(dest_path, "in_list.txt") + + input_files, targets = dump_inputs(calibration_dataset, dest_path) + inp_list_file = dest_path / "in_list.txt" with open(inp_list_file, "w") as f: - input_list = input_list.strip() + "\n" - f.write(input_list) + f.write("\n".join(input_files) + "\n") # 2: Run the executor print("Run openvino_executor_runner...") - import subprocess subprocess.run( [ @@ -161,16 +175,15 @@ def transform(x): f"--model_path={model_name}", f"--input_list_path={inp_list_file}", f"--output_folder_path={out_path}", - # f"--num_iter={len(input_files)}" ] ) # 3: load the outputs and compare with the targets - import numpy as np predictions = [] for i in range(len(input_files)): - predictions.append(np.fromfile(os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32)) + tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) + predictions.append(torch.tensor(np.argmax(tensor))) acc_top1 = accuracy_score(predictions, targets) print(f"acc@1: {acc_top1}") diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 41268751b2f..f9a85c03a53 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -25,22 +26,16 @@ // Define a fixed-size memory pool for the method allocator (4 MB) static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB -// Define command-line flags for model path, the number of iterations, input list path, and output folder path +// Define command-line flags for model path, the number of iterations, input +// list path, and output folder path +DEFINE_string(model_path, "", + "Path to the model serialized in flatbuffer format (required)."); +DEFINE_int32(num_iter, 1, "Number of inference iterations (default is 1)."); +DEFINE_string(input_list_path, "", + "Path to the input list file which includes the list of raw " + "input tensor files (optional)."); DEFINE_string( - model_path, - "", - "Path to the model serialized in flatbuffer format (required)."); -DEFINE_int32( - num_iter, - 1, - "Number of inference iterations (default is 1)."); -DEFINE_string( - input_list_path, - "", - "Path to the input list file which includes the list of raw input tensor files (optional)."); -DEFINE_string( - output_folder_path, - "", + output_folder_path, "", "Path to the output folder to save raw output tensor files (optional)."); using executorch::extension::FileDataLoader; @@ -57,7 +52,119 @@ using executorch::runtime::Result; using executorch::runtime::Span; using executorch::runtime::TensorInfo; -int main(int argc, char** argv) { +std::pair benchmark_method(Result &method, + int num_iterations) { + Error status = Error::Ok; + auto before_exec = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < num_iterations; ++i) { + status = method->execute(); + } + auto after_exec = std::chrono::high_resolution_clock::now(); + double elapsed_time = std::chrono::duration_cast( + after_exec - before_exec) + .count() / + 1000.0; + return std::make_pair(elapsed_time, status); +} + +void dump_outputs(Result &method, const char *output_folder_path, + size_t index = 0) { + std::vector outputs(method->outputs_size()); + Error status = Error::Ok; + status = method->get_outputs(outputs.data(), outputs.size()); + ET_CHECK(status == Error::Ok); + for (size_t output_index = 0; output_index < method->outputs_size(); + output_index++) { + auto output_tensor = outputs[output_index].toTensor(); + auto output_file_name = std::string(output_folder_path) + "/output_" + + std::to_string(index) + "_" + + std::to_string(output_index) + ".raw"; + std::ofstream fout(output_file_name.c_str(), std::ios::binary); + fout.write(output_tensor.const_data_ptr(), output_tensor.nbytes()); + fout.close(); + ET_LOG(Info, "Write outputs to file %s", output_file_name.c_str()); + } +} + +struct ProcessInputsResult { + double total_time; + size_t num_iter; + Error status; +}; + +ProcessInputsResult process_inputs(Result &method, + const char *input_list_path, + const char *output_folder_path) { + std::vector inputs(method->inputs_size()); + ET_LOG(Info, "%zu inputs: ", inputs.size()); + double total_time_elapsed = 0.; + size_t idx = 0; + + Error status = Error::Ok; + status = method->get_inputs(inputs.data(), inputs.size()); + ET_CHECK(status == Error::Ok); + + auto split = [](std::string s, std::string delimiter) { + size_t pos_start = 0, pos_end, delim_len = delimiter.length(); + std::string token; + std::vector res; + + while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { + token = s.substr(pos_start, pos_end - pos_start); + pos_start = pos_end + delim_len; + res.push_back(token); + } + res.push_back(s.substr(pos_start)); + return res; + }; + + // Read raw input tensor file names from input list file and + // iterate each raw input tensor file to read values + std::ifstream input_list(input_list_path); + if (input_list.is_open()) { + size_t num_inputs = method->inputs_size(); + std::string file_path; + while (std::getline(input_list, file_path)) { + auto input_files = split(file_path, " "); + if (input_files.size() == 0) { + break; + } + for (int input_index = 0; input_index < num_inputs; ++input_index) { + MethodMeta method_meta = method->method_meta(); + Result tensor_meta = + method_meta.input_tensor_meta(input_index); + auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); + + ET_LOG(Info, "Read inputs from file %s", + input_files[input_index].c_str()); + std::ifstream fin(input_files[input_index], std::ios::binary); + fin.seekg(0, fin.end); + size_t file_size = fin.tellg(); + + ET_CHECK_MSG( + file_size == tensor_meta->nbytes(), + "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", + input_index, file_size, tensor_meta->nbytes()); + + fin.seekg(0, fin.beg); + fin.read(static_cast(input_data_ptr), file_size); + fin.close(); + } + double time_elapsed; + std::tie(time_elapsed, status) = benchmark_method(method, 1); + if (status != Error::Ok) { + return {total_time_elapsed, idx, status}; + } + total_time_elapsed += time_elapsed; + dump_outputs(method, output_folder_path, idx++); + } + } else { + ET_CHECK_MSG(false, "Failed to read input list file: %s", input_list_path); + } + return {total_time_elapsed, idx, status}; +} + +int main(int argc, char **argv) { // Initialize the runtime environment executorch::runtime::runtime_init(); @@ -68,22 +175,21 @@ int main(int argc, char** argv) { if (FLAGS_model_path.empty()) { std::cerr << "Error: --model_path is required." << std::endl; std::cerr << "Usage: " << argv[0] - << " --model_path= --num_iter=" << std::endl; + << " --model_path= --num_iter=" + << std::endl; return 1; } // Retrieve the model path and number of iterations - const char* model_path = FLAGS_model_path.c_str(); + const char *model_path = FLAGS_model_path.c_str(); int num_iterations = FLAGS_num_iter; std::cout << "Model path: " << model_path << std::endl; std::cout << "Number of iterations: " << num_iterations << std::endl; // Load the model using FileDataLoader Result loader = FileDataLoader::from(model_path); - ET_CHECK_MSG( - loader.ok(), - "FileDataLoader::from() failed: 0x%" PRIx32, - static_cast(loader.error())); + ET_CHECK_MSG(loader.ok(), "FileDataLoader::from() failed: 0x%" PRIx32, + static_cast(loader.error())); // Load the program from the loaded model Result program = Program::load(&loader.get()); @@ -93,8 +199,9 @@ int main(int argc, char** argv) { } ET_LOG(Info, "Model file %s is loaded.", model_path); - // Retrieve the method name from the program (assumes the first method is used) - const char* method_name = nullptr; + // Retrieve the method name from the program (assumes the first method is + // used) + const char *method_name = nullptr; { const auto method_name_result = program->get_method_name(0); ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); @@ -104,11 +211,8 @@ int main(int argc, char** argv) { // Retrieve metadata about the method Result method_meta = program->method_meta(method_name); - ET_CHECK_MSG( - method_meta.ok(), - "Failed to get method_meta for %s: 0x%" PRIx32, - method_name, - static_cast(method_meta.error())); + ET_CHECK_MSG(method_meta.ok(), "Failed to get method_meta for %s: 0x%" PRIx32, + method_name, static_cast(method_meta.error())); // Set up a memory allocator for the method MemoryAllocator method_allocator{ @@ -133,138 +237,53 @@ int main(int argc, char** argv) { // Load the method into the program Result method = program->load_method(method_name, &memory_manager); - ET_CHECK_MSG( - method.ok(), - "Loading of method %s failed with status 0x%" PRIx32, - method_name, - static_cast(method.error())); + ET_CHECK_MSG(method.ok(), + "Loading of method %s failed with status 0x%" PRIx32, + method_name, static_cast(method.error())); ET_LOG(Info, "Method loaded."); // Prepare the input tensors for the method auto inputs = prepare_input_tensors(*method); - ET_CHECK_MSG( - inputs.ok(), - "Could not prepare inputs: 0x%" PRIx32, - static_cast(inputs.error())); + ET_CHECK_MSG(inputs.ok(), "Could not prepare inputs: 0x%" PRIx32, + static_cast(inputs.error())); + + double elapsed_time; + Error status = Error::Ok; // If the input path list is provided, read input tensors from the files - if (!(FLAGS_input_list_path.empty())) { - const char* input_list_path = FLAGS_input_list_path.c_str(); - ET_LOG(Info, "Loading input tensors from the list provided in %s.", input_list_path); - Error status = Error::Ok; - std::vector inputs(method->inputs_size()); - ET_LOG(Info, "%zu inputs: ", inputs.size()); - status = method->get_inputs(inputs.data(), inputs.size()); - ET_CHECK(status == Error::Ok); - - auto split = [](std::string s, std::string delimiter) { - size_t pos_start = 0, pos_end, delim_len = delimiter.length(); - std::string token; - std::vector res; - - while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { - token = s.substr(pos_start, pos_end - pos_start); - pos_start = pos_end + delim_len; - res.push_back(token); - } - res.push_back(s.substr(pos_start)); - return res; - }; - - // Read raw input tensor file names from input list file and - // iterate each raw input tensor file to read values - std::ifstream input_list(input_list_path); - if (input_list.is_open()) { - size_t num_inputs = method->inputs_size(); - std::string file_path; - while (std::getline(input_list, file_path)) { - auto input_files = split(file_path, " "); - ET_LOG(Info, "INPUT_FILES.SIZE: %ld", input_files.size()); - ET_LOG(Info, "NUM_INPUTS: %ld", num_inputs); - if (input_files.size() == 0) { - break; - } - for (int input_index = 0; input_index < num_inputs; ++input_index) { - MethodMeta method_meta = method->method_meta(); - Result tensor_meta = - method_meta.input_tensor_meta(input_index); - auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); - - ET_LOG(Info, "READ FILE %s", std::string(input_files[input_index])); - std::ifstream fin(input_files[input_index], std::ios::binary); - fin.seekg(0, fin.end); - size_t file_size = fin.tellg(); - - ET_CHECK_MSG( - file_size == tensor_meta->nbytes(), - "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", - input_index, - file_size, - tensor_meta->nbytes()); - - fin.seekg(0, fin.beg); - fin.read( - static_cast(input_data_ptr), - file_size); - fin.close(); - } - } - } else { - ET_CHECK_MSG(false, - "Failed to read input list file: %s", - input_list_path); + if (!(FLAGS_input_list_path.empty()) and + !(FLAGS_output_folder_path.empty())) { + const char *input_list_path = FLAGS_input_list_path.c_str(); + ET_LOG(Info, "Loading input tensors from the list provided in %s.", + input_list_path); + const char *output_folder_path = FLAGS_output_folder_path.c_str(); + auto res = process_inputs(method, input_list_path, output_folder_path); + elapsed_time = res.total_time; + status = res.status; + num_iterations = res.num_iter; + } else { + + // Measure execution time for inference + std::tie(elapsed_time, status) = benchmark_method(method, num_iterations); + // Retrieve and print the method outputs + ET_LOG(Info, "%zu Number of outputs: ", method->outputs_size()); + + // If output folder path is provided, save output tensors + // into raw tensor files. + if (!(FLAGS_output_folder_path.empty())) { + const char *output_folder_path = FLAGS_output_folder_path.c_str(); + ET_LOG(Info, "Saving output tensors into the output folder: %s.", + output_folder_path); + dump_outputs(method, output_folder_path); } } - ET_LOG(Info, "Inputs prepared."); - - // Measure execution time for inference - auto before_exec = std::chrono::high_resolution_clock::now(); - Error status = Error::Ok; - for (int i = 0; i < num_iterations; ++i) { - status = method->execute(); - } - auto after_exec = std::chrono::high_resolution_clock::now(); - double elapsed_time = std::chrono::duration_cast( - after_exec - before_exec) - .count() / 1000.0; - // Log execution time and average time per iteration - ET_LOG( - Info, - "%d inference took %f ms, avg %f ms", - num_iterations, - elapsed_time, - elapsed_time / static_cast(num_iterations)); - ET_CHECK_MSG( - status == Error::Ok, - "Execution of method %s failed with status 0x%" PRIx32, - method_name, - static_cast(status)); + ET_LOG(Info, "%d inference took %f ms, avg %f ms", num_iterations, + elapsed_time, elapsed_time / static_cast(num_iterations)); + ET_CHECK_MSG(status == Error::Ok, + "Execution of method %s failed with status 0x%" PRIx32, + method_name, static_cast(status)); ET_LOG(Info, "Model executed successfully."); - // Retrieve and print the method outputs - std::vector outputs(method->outputs_size()); - ET_LOG(Info, "%zu Number of outputs: ", outputs.size()); - status = method->get_outputs(outputs.data(), outputs.size()); - ET_CHECK(status == Error::Ok); - - // If output folder path is provided, save output tensors - // into raw tensor files. - if (!(FLAGS_output_folder_path.empty())) { - const char* output_folder_path = FLAGS_output_folder_path.c_str(); - ET_LOG(Info, "Saving output tensors into the output folder: %s.", output_folder_path); - for (size_t output_index = 0; output_index < method->outputs_size(); - output_index++) { - auto output_tensor = outputs[output_index].toTensor(); - auto output_file_name = std::string(output_folder_path) + "/output_" + - std::to_string(output_index) + ".raw"; - std::ofstream fout(output_file_name.c_str(), std::ios::binary); - fout.write( - output_tensor.const_data_ptr(), output_tensor.nbytes()); - fout.close(); - } - } - return 0; } - From 8cbb1175902efab402d814805e4348b9c817b1f1 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 14:28:25 +0100 Subject: [PATCH 07/18] Validate option / minor --- .../openvino/aot/aot_openvino_compiler.py | 33 ++++++++++++------- .../openvino_executor_runner.cpp | 3 -- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 64f2ca2b955..3bdaf947a69 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -51,11 +51,11 @@ def load_model(suite: str, model_name: str): raise ValueError(msg) -def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module): +def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module, model_name: str): val_dir = f"{dataset_path}/val" if suite == "torchvision": - transform = torchvision_models.get_model_weights(model.name).transforms() + transform = torchvision_models.get_model_weights(model_name).DEFAULT.transforms() else: transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model)) @@ -87,7 +87,7 @@ def dump_inputs(calibration_dataset, dest_path): return input_files, targets -def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str): +def main(suite: str, model_name: str, input_shape, quantize: bool, validate: bool, dataset_path: str, device: str): # Ensure input_shape is a tuple if isinstance(input_shape, list): input_shape = tuple(input_shape) @@ -95,6 +95,8 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: msg = "Input shape must be a list or tuple." raise ValueError(msg) + calibration_dataset = None + # Load the selected model model = load_model(suite, model_name) model = model.eval() @@ -114,7 +116,7 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: if not dataset_path: msg = "Quantization requires a calibration dataset." raise ValueError(msg) - calibration_dataset = load_calibration_dataset(dataset_path, suite, model) + calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) captured_model = aten_dialect.module() quantizer = OpenVINOQuantizer() @@ -146,12 +148,15 @@ def transform(x): exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig()) # Serialize and save it to a file - model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" - with open(model_name, "wb") as file: + model_file_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte" + with open(model_file_name, "wb") as file: exec_prog.write_to_file(file) - print(f"Model exported and saved as {model_name} on {device}.") + print(f"Model exported and saved as {model_file_name} on {device}.") + + if validate: + if calibration_dataset is None: + calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) - if quantize: print("Start validation of the quantized model:") # 1: Dump inputs dest_path = Path("tmp_inputs") @@ -172,18 +177,17 @@ def transform(x): subprocess.run( [ "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", - f"--model_path={model_name}", + f"--model_path={model_file_name}", f"--input_list_path={inp_list_file}", f"--output_folder_path={out_path}", ] ) # 3: load the outputs and compare with the targets - predictions = [] for i in range(len(input_files)): tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) - predictions.append(torch.tensor(np.argmax(tensor))) + predictions.append(torch.argmax(torch.tensor(tensor))) acc_top1 = accuracy_score(predictions, targets) print(f"acc@1: {acc_top1}") @@ -207,6 +211,11 @@ def transform(x): help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", ) parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") + parser.add_argument( + "--validate", + action="store_true", + help="Enable model validation. --dataset argument is requred for the validation.", + ) parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.") parser.add_argument( "--device", @@ -219,4 +228,4 @@ def transform(x): # Run the main function with parsed arguments with nncf.torch.disable_patching(): - main(args.suite, args.model, args.input_shape, args.quantize, args.dataset, args.device) + main(args.suite, args.model, args.input_shape, args.quantize, args.validate, args.dataset, args.device) diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index f9a85c03a53..36c957bc433 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -82,7 +82,6 @@ void dump_outputs(Result &method, const char *output_folder_path, std::ofstream fout(output_file_name.c_str(), std::ios::binary); fout.write(output_tensor.const_data_ptr(), output_tensor.nbytes()); fout.close(); - ET_LOG(Info, "Write outputs to file %s", output_file_name.c_str()); } } @@ -135,8 +134,6 @@ ProcessInputsResult process_inputs(Result &method, method_meta.input_tensor_meta(input_index); auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); - ET_LOG(Info, "Read inputs from file %s", - input_files[input_index].c_str()); std::ifstream fin(input_files[input_index], std::ios::binary); fin.seekg(0, fin.end); size_t file_size = fin.tellg(); From 4b60fb4934d39c683f323e2bd526d422bf39fcd5 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 14:56:18 +0100 Subject: [PATCH 08/18] Input shape from the input dataset --- .../openvino/aot/aot_openvino_compiler.py | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 3bdaf947a69..e4ef955b40c 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -88,19 +88,20 @@ def dump_inputs(calibration_dataset, dest_path): def main(suite: str, model_name: str, input_shape, quantize: bool, validate: bool, dataset_path: str, device: str): - # Ensure input_shape is a tuple - if isinstance(input_shape, list): - input_shape = tuple(input_shape) - elif not isinstance(input_shape, tuple): - msg = "Input shape must be a list or tuple." - raise ValueError(msg) - - calibration_dataset = None - # Load the selected model model = load_model(suite, model_name) model = model.eval() + if dataset_path: + calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) + input_shape = tuple(next(iter(calibration_dataset))[0].shape) + print(f"Input shape retrieved from the model config: {input_shape}") + # Ensure input_shape is a tuple + elif isinstance(input_shape, list): + input_shape = tuple(input_shape) + else: + msg = "Input shape must be a list or tuple." + raise ValueError(msg) # Provide input example_args = (torch.randn(*input_shape),) @@ -116,7 +117,6 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, validate: boo if not dataset_path: msg = "Quantization requires a calibration dataset." raise ValueError(msg) - calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) captured_model = aten_dialect.module() quantizer = OpenVINOQuantizer() @@ -154,8 +154,13 @@ def transform(x): print(f"Model exported and saved as {model_file_name} on {device}.") if validate: - if calibration_dataset is None: - calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) + if suite == "huggingface": + msg = f"Validation of {suite} models did not support yet." + raise ValueError(msg) + + if not dataset_path: + msg = "Validateion requires a calibration dataset." + raise ValueError(msg) print("Start validation of the quantized model:") # 1: Dump inputs @@ -207,7 +212,6 @@ def transform(x): parser.add_argument( "--input_shape", type=eval, - required=True, help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", ) parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") From e0cd6448ef57210b2e91f5aa93393b0860371e48 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 15:23:27 +0100 Subject: [PATCH 09/18] --batch_size --- .../openvino/aot/aot_openvino_compiler.py | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index e4ef955b40c..dba47c0dde3 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -51,7 +51,7 @@ def load_model(suite: str, model_name: str): raise ValueError(msg) -def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module, model_name: str): +def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, model: torch.nn.Module, model_name: str): val_dir = f"{dataset_path}/val" if suite == "torchvision": @@ -62,7 +62,7 @@ def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Modu val_dataset = datasets.ImageFolder(val_dir, transform=transform) calibration_dataset = torch.utils.data.DataLoader( - val_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True + val_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True ) return calibration_dataset @@ -77,7 +77,7 @@ def dump_inputs(calibration_dataset, dest_path): input_files, targets = [], [] for idx, data in enumerate(calibration_dataset): feature, target = data - targets.append(target) + targets.extend(target) file_name = f"{dest_path}/input_{idx}_0.raw" if not isinstance(feature, torch.Tensor): feature = torch.tensor(feature) @@ -87,13 +87,22 @@ def dump_inputs(calibration_dataset, dest_path): return input_files, targets -def main(suite: str, model_name: str, input_shape, quantize: bool, validate: bool, dataset_path: str, device: str): +def main( + suite: str, + model_name: str, + input_shape, + quantize: bool, + validate: bool, + dataset_path: str, + device: str, + batch_size: int, +): # Load the selected model model = load_model(suite, model_name) model = model.eval() if dataset_path: - calibration_dataset = load_calibration_dataset(dataset_path, suite, model, model_name) + calibration_dataset = load_calibration_dataset(dataset_path, batch_size, suite, model, model_name) input_shape = tuple(next(iter(calibration_dataset))[0].shape) print(f"Input shape retrieved from the model config: {input_shape}") # Ensure input_shape is a tuple @@ -192,7 +201,7 @@ def transform(x): predictions = [] for i in range(len(input_files)): tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) - predictions.append(torch.argmax(torch.tensor(tensor))) + predictions.extend(torch.tensor(tensor).reshape(-1, 1000).argmax(-1)) acc_top1 = accuracy_score(predictions, targets) print(f"acc@1: {acc_top1}") @@ -214,6 +223,13 @@ def transform(x): type=eval, help="Input shape for the model as a list or tuple (e.g., [1, 3, 224, 224] or (1, 3, 224, 224)).", ) + parser.add_argument( + "--batch_size", + type=int, + default=1, + help="Batch size for the validation. Default batch_size == 1." + " The dataset length must be evenly divisible by the batch size.", + ) parser.add_argument("--quantize", action="store_true", help="Enable model quantization.") parser.add_argument( "--validate", @@ -232,4 +248,13 @@ def transform(x): # Run the main function with parsed arguments with nncf.torch.disable_patching(): - main(args.suite, args.model, args.input_shape, args.quantize, args.validate, args.dataset, args.device) + main( + args.suite, + args.model, + args.input_shape, + args.quantize, + args.validate, + args.dataset, + args.device, + args.batch_size, + ) From 2a04ee6a6d27357c71086761e02be2ef66904076 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 16:20:53 +0100 Subject: [PATCH 10/18] Adapt subset size to keep +- 300 pics for calibration --- examples/openvino/aot/aot_openvino_compiler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index dba47c0dde3..909eabe3677 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -135,9 +135,12 @@ def main( def transform(x): return x[0] + default_subset_size = 300 + batch_size = calibration_dataset.batch_size quantized_model = quantize_pt2e( captured_model, quantizer, + subset_size=(default_subset_size // batch_size) + int(default_subset_size % batch_size > 0), calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False, ) From db7dc1318c9e54a64ebc54e7f5b5cd1d945e42ac Mon Sep 17 00:00:00 2001 From: Daniil Lyakhov Date: Tue, 11 Feb 2025 16:35:59 +0100 Subject: [PATCH 11/18] Apply suggestions from code review Co-authored-by: Alexander Suslov --- examples/openvino/aot/aot_openvino_compiler.py | 4 +--- examples/openvino/openvino_build_example.sh | 8 +++----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 909eabe3677..cf41ff318fd 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -30,8 +30,6 @@ from transformers import AutoModel import nncf -import nncf.experimental -import nncf.experimental.torch from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e @@ -239,7 +237,7 @@ def transform(x): action="store_true", help="Enable model validation. --dataset argument is requred for the validation.", ) - parser.add_argument("--dataset", type=str, help="Path to the calibration dataset.") + parser.add_argument("--dataset", type=str, help="Path to the validation dataset.") parser.add_argument( "--device", type=str, diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh index 52c508d8ee2..a490ff30154 100755 --- a/examples/openvino/openvino_build_example.sh +++ b/examples/openvino/openvino_build_example.sh @@ -34,7 +34,6 @@ main() { local example_dir=examples/openvino local example_build_dir="${build_dir}/${example_dir}" local cmake_prefix_path="${PWD}/${build_dir}/lib/cmake/ExecuTorch;${PWD}/${build_dir}/third-party/gflags;" - rm -rf "${example_build_dir}" ## OpenVINO original @@ -43,11 +42,10 @@ main() { -B"${example_build_dir}" \ $EXECUTORCH_ROOT/$example_dir +<<<<<<< HEAD:examples/openvino/openvino_build_example.sh cmake --build "${example_build_dir}" -j$(nproc) - - # Switch back to the original directory - cd - > /dev/null - +======= + cmake --build "${example_build_dir}" -j5 # Print a success message echo "Build successfully completed." } From de3f50b5d33f79acd28b37be64f7b40de7e04278 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 17:17:06 +0100 Subject: [PATCH 12/18] Comments --- examples/openvino/aot/README.md | 46 ++++-- .../openvino/aot/aot_openvino_compiler.py | 146 +++++++++++------- 2 files changed, 125 insertions(+), 67 deletions(-) diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md index 46e476a8408..5fd97dba21e 100644 --- a/examples/openvino/aot/README.md +++ b/examples/openvino/aot/README.md @@ -11,34 +11,41 @@ python aot_openvino_compiler.py --suite --model --inp ``` ### **Arguments** -- **`--suite`** (required): - Specifies the model suite to use. +- **`--suite`** (required): + Specifies the model suite to use. Supported values: - `timm` (e.g., VGG16, ResNet50) - `torchvision` (e.g., resnet18, mobilenet_v2) - `huggingface` (e.g., bert-base-uncased) -- **`--model`** (required): - Name of the model to export. +- **`--model`** (required): + Name of the model to export. Examples: - For `timm`: `vgg16`, `resnet50` - For `torchvision`: `resnet18`, `mobilenet_v2` - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` -- **`--input_shape`** (required): - Input shape for the model. Provide this as a **list** or **tuple**. +- **`--input_shape`**: + Input shape for the model. Provide this as a **list** or **tuple**. Examples: - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) - `(1, 3, 224, 224)` +- **`--batch_size`** : + Batch size for the validation. Default batch_size == 1. + The dataset length must be evenly divisible by the batch size. + - **`--quantize`** (optional): Enable model quantization: Default is False. +- **`--quantize`** (optional): + Enable model validation. --dataset argument is requred for the validation. + - **`--dataset`** (optional): - Path to the calibration dataset. TODO: It is necessary to think in what form to support the dataset. For the experiment, tiny-imagenet is used, which can be downloaded from here http://cs231n.stanford.edu/tiny-imagenet-200.zip and specify the path to it. + Path to the imagenet-like calibration dataset. -- **`--device`** (optional): - Target device for the compiled model. Default is `CPU`. +- **`--device`** (optional) + Target device for the compiled model. Default is `CPU`. Examples: `CPU`, `GPU` @@ -58,22 +65,31 @@ python aot_openvino_compiler.py --suite torchvision --model resnet50 --input_sha ```bash python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased --input_shape "(1, 512)" --device CPU ``` +### Export and validate TIMM Resnet50d model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset +``` + +### Export, quantize and validate TIMM Resnet50d model for the CPU +```bash +python aot_openvino_compiler.py --suite timm --model vgg16 --input_shape [1, 3, 224, 224] --device CPU --validate --dataset /path/to/dataset --quantize +``` ## **Notes** -1. **Input Shape in Zsh**: +1. **Input Shape in Zsh**: If you are using Zsh, wrap `--input_shape` in quotes or use a tuple: ```bash --input_shape '[1, 3, 224, 224]' --input_shape "(1, 3, 224, 224)" ``` -2. **Model Compatibility**: +2. **Model Compatibility**: Ensure the specified `model_name` exists in the selected `suite`. Use the corresponding library's documentation to verify model availability. -3. **Output File**: +3. **Output File**: The exported model will be saved as `.pte` in the current directory. -4. **Dependencies**: +4. **Dependencies**: - Python 3.8+ - PyTorch - Executorch @@ -82,14 +98,14 @@ python aot_openvino_compiler.py --suite huggingface --model bert-base-uncased -- - Transformers (`pip install transformers`) ## **Error Handling** -- **Model Not Found**: +- **Model Not Found**: If the script raises an error such as: ```bash ValueError: Model not found ``` Verify that the model name is correct for the chosen suite. -- **Unsupported Input Shape**: +- **Unsupported Input Shape**: Ensure `--input_shape` is provided as a valid list or tuple. diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index cf41ff318fd..4f45fc1d426 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -8,6 +8,7 @@ import os import shutil import subprocess +from itertools import islice from pathlib import Path import executorch @@ -24,6 +25,8 @@ from sklearn.metrics import accuracy_score from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform +from torch.ao.quantization.quantize_pt2e import convert_pt2e +from torch.ao.quantization.quantize_pt2e import prepare_pt2e from torch.export import export from torch.export.exported_program import ExportedProgram from torch.fx.passes.graph_drawer import FxGraphDrawer @@ -54,8 +57,11 @@ def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, mod if suite == "torchvision": transform = torchvision_models.get_model_weights(model_name).DEFAULT.transforms() - else: + elif suite == "timm": transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model)) + else: + msg = f"Validation is not supported yet for the suite {suite}" + raise ValueError(msg) val_dataset = datasets.ImageFolder(val_dir, transform=transform) @@ -85,6 +91,76 @@ def dump_inputs(calibration_dataset, dest_path): return input_files, targets +def quantize_model( + captured_model: torch.fx.GraphModule, calibration_dataset: torch.utils.data.DataLoader, use_nncf: bool +) -> torch.fx.GraphModule: + quantizer = OpenVINOQuantizer() + + print("PTQ: Quantize the model") + default_subset_size = 300 + batch_size = calibration_dataset.batch_size + subset_size = (default_subset_size // batch_size) + int(default_subset_size % batch_size > 0) + + def transform(x): + return x[0] + + if use_nncf: + + quantized_model = quantize_pt2e( + captured_model, + quantizer, + subset_size=subset_size, + calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), + fold_quantize=False, + ) + else: + annotated_model = prepare_pt2e(captured_model, quantizer) + + print("PTQ: Calibrate the model...") + for data in islice(calibration_dataset, subset_size): + annotated_model(transform(data)) + + print("PTQ: Convert the quantized model...") + quantized_model = convert_pt2e(annotated_model, fold_quantize=False) + + return quantized_model + + +def validate_model(model_file_name: str, calibration_dataset: torch.utils.data.DataLoader) -> float: + # 1: Dump inputs + dest_path = Path("tmp_inputs") + out_path = Path("tmp_outputs") + for d in [dest_path, out_path]: + if os.path.exists(d): + shutil.rmtree(d) + os.makedirs(d) + + input_files, targets = dump_inputs(calibration_dataset, dest_path) + inp_list_file = dest_path / "in_list.txt" + with open(inp_list_file, "w") as f: + f.write("\n".join(input_files) + "\n") + + # 2: Run the executor + print("Run openvino_executor_runner...") + + subprocess.run( + [ + "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", + f"--model_path={model_file_name}", + f"--input_list_path={inp_list_file}", + f"--output_folder_path={out_path}", + ] + ) + + # 3: load the outputs and compare with the targets + predictions = [] + for i in range(len(input_files)): + tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) + predictions.extend(torch.tensor(tensor).reshape(-1, 1000).argmax(-1)) + + return accuracy_score(predictions, targets) + + def main( suite: str, model_name: str, @@ -94,6 +170,7 @@ def main( dataset_path: str, device: str, batch_size: int, + quantization_flow: str, ): # Load the selected model model = load_model(suite, model_name) @@ -104,7 +181,7 @@ def main( input_shape = tuple(next(iter(calibration_dataset))[0].shape) print(f"Input shape retrieved from the model config: {input_shape}") # Ensure input_shape is a tuple - elif isinstance(input_shape, list): + elif isinstance(input_shape, (list, tuple)): input_shape = tuple(input_shape) else: msg = "Input shape must be a list or tuple." @@ -124,23 +201,8 @@ def main( if not dataset_path: msg = "Quantization requires a calibration dataset." raise ValueError(msg) - - captured_model = aten_dialect.module() - quantizer = OpenVINOQuantizer() - - print("PTQ: Quantize the model") - - def transform(x): - return x[0] - - default_subset_size = 300 - batch_size = calibration_dataset.batch_size - quantized_model = quantize_pt2e( - captured_model, - quantizer, - subset_size=(default_subset_size // batch_size) + int(default_subset_size % batch_size > 0), - calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), - fold_quantize=False, + quantized_model = quantize_model( + aten_dialect.module(), calibration_dataset, use_nncf=quantization_flow == "nncf" ) visualize_fx_model(quantized_model, f"{model_name}_int8.svg") @@ -172,39 +234,8 @@ def transform(x): msg = "Validateion requires a calibration dataset." raise ValueError(msg) - print("Start validation of the quantized model:") - # 1: Dump inputs - dest_path = Path("tmp_inputs") - out_path = Path("tmp_outputs") - for d in [dest_path, out_path]: - if os.path.exists(d): - shutil.rmtree(d) - os.makedirs(d) - - input_files, targets = dump_inputs(calibration_dataset, dest_path) - inp_list_file = dest_path / "in_list.txt" - with open(inp_list_file, "w") as f: - f.write("\n".join(input_files) + "\n") - - # 2: Run the executor - print("Run openvino_executor_runner...") - - subprocess.run( - [ - "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner", - f"--model_path={model_file_name}", - f"--input_list_path={inp_list_file}", - f"--output_folder_path={out_path}", - ] - ) - - # 3: load the outputs and compare with the targets - predictions = [] - for i in range(len(input_files)): - tensor = np.fromfile(out_path / f"output_{i}_0.raw", dtype=np.float32) - predictions.extend(torch.tensor(tensor).reshape(-1, 1000).argmax(-1)) - - acc_top1 = accuracy_score(predictions, targets) + print("Start validation of the model:") + acc_top1 = validate_model(model_file_name, calibration_dataset) print(f"acc@1: {acc_top1}") @@ -244,10 +275,20 @@ def transform(x): default="CPU", help="Target device for compiling the model (e.g., CPU, GPU). Default is CPU.", ) + parser.add_argument( + "--quantization_flow", + type=str, + choices=["pt2e", "nncf"], + default="nncf", + help="Select the quantization flow (nncf or pt2e):" + " pt2e is the default torch.ao quantization flow, while" + " nncf is a custom method with additional algorithms to improve model performance.", + ) args = parser.parse_args() # Run the main function with parsed arguments + # Disable nncf patching as export of the patched model is not supported. with nncf.torch.disable_patching(): main( args.suite, @@ -258,4 +299,5 @@ def transform(x): args.dataset, args.device, args.batch_size, + args.quantization_flow, ) From 17fe62f001fd731be97b5242d8f41893c144944a Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Tue, 11 Feb 2025 18:02:54 +0100 Subject: [PATCH 13/18] OpenVINOQuantizer: constructor arguments have been refined --- backends/openvino/quantizer/quantizer.py | 71 +++++++++++------------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index 63da8325e4f..8ce1ce6dda1 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -5,6 +5,7 @@ # directory of this source tree for more details. from collections import defaultdict +from enum import Enum from typing import Dict, List, Optional, Tuple import torch.fx @@ -20,13 +21,25 @@ import nncf import nncf.common.quantization as q import nncf.experimental.torch.fx as nncf_fx -import nncf.parameters as p -import nncf.quantization.advanced_parameters as advanced_p from nncf.common.graph.graph import NNCFGraph QUANT_ANNOTATION_KEY = "quantization_annotation" +class QuantizationMode(Enum): + """ + Defines special quantization modes. + + - INT8_SYM: INT8 symmetric quantization for both activations and weights. + - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. + - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models + """ + + INT8_SYM = "int8_sym" + INT8_MIXED = "int8_mixed" + INT8_TRANSFORMER = "int8_transformer" + + class OpenVINOQuantizer(Quantizer): """ Implementation of the Torch AO quantizer which annotates models with quantization annotations @@ -36,49 +49,31 @@ class OpenVINOQuantizer(Quantizer): def __init__( self, *, - mode: Optional[p.QuantizationMode] = None, - preset: Optional[q.structs.QuantizationPreset] = None, - target_device: p.TargetDevice = p.TargetDevice.ANY, - transformer_model: bool = False, + mode: Optional[QuantizationMode] = QuantizationMode.INT8_SYM, ignored_scope: Optional[nncf.IgnoredScope] = None, - overflow_fix: Optional[advanced_p.OverflowFix] = None, - quantize_outputs: bool = False, - activations_quantization_params: Optional[advanced_p.QuantizationParameters] = None, - weights_quantization_params: Optional[advanced_p.QuantizationParameters] = None, + **kwargs, ): """ - :param mode: Defines optimization mode for the algorithm. None by default. - :param preset: A preset controls the quantization mode (symmetric and asymmetric). - It can take the following values: - - `performance`: Symmetric quantization of weights and activations. - - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations. - Default value is None. In this case, `mixed` preset is used for `transformer` - model type otherwise `performance`. - :param target_device: A target device the specificity of which will be taken - into account while compressing in order to obtain the best performance - for this type of device, defaults to TargetDevice.ANY. - :param model_type: Model type is needed to specify additional patterns - in the model. Supported only `transformer` now. + :param mode: Defines special quantization modes. + - INT8_SYM: INT8 symmetric quantization for both activations and weights. + - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. + - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models + Default value is INT8_SYM. :param ignored_scope: An ignored scope that defined the list of model control flow graph nodes to be ignored during quantization. - :param overflow_fix: This option controls whether to apply the overflow issue - fix for the 8-bit quantization. - :param quantize_outputs: Whether to insert additional quantizers right before - each of the model outputs. - :param activations_quantization_params: Quantization parameters for model - activations. - :param weights_quantization_params: Quantization parameters for model weights. + :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm. """ + if mode == QuantizationMode.INT8_SYM: + preset = q.structs.QuantizationPreset.PERFORMANCE + model_type = None + elif mode == QuantizationMode.INT8_MIXED: + preset = q.structs.QuantizationPreset.MIXED + model_type = None + else: + preset = None + model_type = nncf.parameters.ModelType.TRANSFORMER self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( - mode=mode, - preset=preset, - target_device=target_device, - model_type=p.ModelType.TRANSFORMER if transformer_model else None, - ignored_scope=ignored_scope, - overflow_fix=overflow_fix, - quantize_outputs=quantize_outputs, - activations_quantization_params=activations_quantization_params, - weights_quantization_params=weights_quantization_params, + preset=preset, model_type=model_type, ignored_scope=ignored_scope, **kwargs ) def get_nncf_quantization_setup( From c7e07586a7d639d4f252efdbc77d4768f7ef5278 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Wed, 12 Feb 2025 15:04:16 +0100 Subject: [PATCH 14/18] set_ignored_scope | readme updates --- backends/openvino/quantizer/quantizer.py | 56 +++++++++++++++------ examples/openvino/aot/README.md | 10 ++-- examples/openvino/openvino_build_example.sh | 7 +-- 3 files changed, 52 insertions(+), 21 deletions(-) diff --git a/backends/openvino/quantizer/quantizer.py b/backends/openvino/quantizer/quantizer.py index 8ce1ce6dda1..480faeee635 100644 --- a/backends/openvino/quantizer/quantizer.py +++ b/backends/openvino/quantizer/quantizer.py @@ -19,7 +19,7 @@ from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec import nncf -import nncf.common.quantization as q +import nncf.common.quantization as quantization import nncf.experimental.torch.fx as nncf_fx from nncf.common.graph.graph import NNCFGraph @@ -50,7 +50,6 @@ def __init__( self, *, mode: Optional[QuantizationMode] = QuantizationMode.INT8_SYM, - ignored_scope: Optional[nncf.IgnoredScope] = None, **kwargs, ): """ @@ -59,26 +58,53 @@ def __init__( - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights. - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models Default value is INT8_SYM. - :param ignored_scope: An ignored scope that defined the list of model control - flow graph nodes to be ignored during quantization. :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm. """ if mode == QuantizationMode.INT8_SYM: - preset = q.structs.QuantizationPreset.PERFORMANCE + preset = quantization.structs.QuantizationPreset.PERFORMANCE model_type = None elif mode == QuantizationMode.INT8_MIXED: - preset = q.structs.QuantizationPreset.MIXED + preset = quantization.structs.QuantizationPreset.MIXED model_type = None else: preset = None model_type = nncf.parameters.ModelType.TRANSFORMER self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization( - preset=preset, model_type=model_type, ignored_scope=ignored_scope, **kwargs + preset=preset, model_type=model_type, **kwargs + ) + + def set_ignored_scope( + self, + names: Optional[List[str]] = None, + patterns: Optional[List[str]] = None, + types: Optional[List[str]] = None, + subgraphs: Optional[List[Tuple[List[str], List[str]]]] = None, + validate: bool = True, + ) -> None: + """ + Provides an option to specify portions of model to be excluded from compression. + The ignored scope defines model sub-graphs that should be excluded from the quantization process. + + :param names: List of ignored node names. + :param patterns: List of regular expressions that define patterns for names of ignored nodes. + :param types: List of ignored operation types. + :param subgraphs: List of ignored subgraphs. + :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match + in the model graph. + """ + self._min_max_algo.set_ignored_scope( + nncf.IgnoredScope( + names=names or [], + patterns=patterns or [], + types=types or [], + subgraphs=subgraphs or [], + validate=validate, + ) ) def get_nncf_quantization_setup( self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph - ) -> q.quantizer_setup.SingleConfigQuantizerSetup: + ) -> quantization.quantizer_setup.SingleConfigQuantizerSetup: self._min_max_algo._set_backend_entity(model) return self._min_max_algo.find_quantization_setup(model, nncf_graph) @@ -134,7 +160,9 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: @staticmethod def _get_unified_scales_root_quantizer_id( - nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: q.quantizer_setup.SingleConfigQuantizerSetup + nncf_graph: NNCFGraph, + quantizer_ids: List[int], + quantizer_setup: quantization.quantizer_setup.SingleConfigQuantizerSetup, ) -> int: """ Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id` @@ -160,7 +188,7 @@ def _get_unified_scales_root_quantizer_id( def _get_edge_or_node_and_annotation( graph: torch.fx.Graph, nncf_graph: NNCFGraph, - qp: q.quantizer_setup.QuantizationPointBase, + qp: quantization.quantizer_setup.QuantizationPointBase, node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation], ) -> Tuple[EdgeOrNode, QuantizationAnnotation]: """ @@ -181,7 +209,7 @@ def _get_edge_or_node_and_annotation( @staticmethod def _get_edge_or_node( - target_node: torch.fx.Node, qp: q.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph + target_node: torch.fx.Node, qp: quantization.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph ) -> EdgeOrNode: """ Returns the edge or node based on the given target node and quantization point. @@ -231,7 +259,7 @@ def _fill_torch_ao_annotation( annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec @staticmethod - def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) -> QuantizationSpec: + def _get_torch_ao_qspec_from_qp(qp: quantization.quantizer_setup.QuantizationPointBase) -> QuantizationSpec: """ Retrieves the quantization configuration from the given quantization point and converts it into a QuantizationSpec. @@ -247,13 +275,13 @@ def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) -> if qconfig.per_channel: torch_qscheme = ( torch.per_channel_symmetric - if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC + if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC else torch.per_channel_affine ) else: torch_qscheme = ( torch.per_tensor_symmetric - if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC + if qconfig.mode is quantization.structs.QuantizationScheme.SYMMETRIC else torch.per_tensor_affine ) if is_weight: diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md index 5fd97dba21e..900a5b6cbe0 100644 --- a/examples/openvino/aot/README.md +++ b/examples/openvino/aot/README.md @@ -16,7 +16,7 @@ python aot_openvino_compiler.py --suite --model --inp Supported values: - `timm` (e.g., VGG16, ResNet50) - `torchvision` (e.g., resnet18, mobilenet_v2) - - `huggingface` (e.g., bert-base-uncased) + - `huggingface` (e.g., bert-base-uncased). NB: Quantization and validation is not supported yet. - **`--model`** (required): Name of the model to export. @@ -36,10 +36,12 @@ python aot_openvino_compiler.py --suite --model --inp The dataset length must be evenly divisible by the batch size. - **`--quantize`** (optional): - Enable model quantization: Default is False. + Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. + + +- **`--validate`** (optional): + Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. -- **`--quantize`** (optional): - Enable model validation. --dataset argument is requred for the validation. - **`--dataset`** (optional): Path to the imagenet-like calibration dataset. diff --git a/examples/openvino/openvino_build_example.sh b/examples/openvino/openvino_build_example.sh index a490ff30154..ee16658941d 100755 --- a/examples/openvino/openvino_build_example.sh +++ b/examples/openvino/openvino_build_example.sh @@ -42,10 +42,11 @@ main() { -B"${example_build_dir}" \ $EXECUTORCH_ROOT/$example_dir -<<<<<<< HEAD:examples/openvino/openvino_build_example.sh cmake --build "${example_build_dir}" -j$(nproc) -======= - cmake --build "${example_build_dir}" -j5 + + # Switch back to the original directory + cd - > /dev/null + # Print a success message echo "Build successfully completed." } From 19cbc69adbb6310f266b9f6bdfaeb47e6eeb18ff Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 14 Feb 2025 14:10:02 +0100 Subject: [PATCH 15/18] openvino_executor_runner.cpp: comments --- .../openvino_executor_runner.cpp | 210 +++++++++--------- 1 file changed, 108 insertions(+), 102 deletions(-) diff --git a/examples/openvino/executor_runner/openvino_executor_runner.cpp b/examples/openvino/executor_runner/openvino_executor_runner.cpp index 36c957bc433..c3922c793a3 100644 --- a/examples/openvino/executor_runner/openvino_executor_runner.cpp +++ b/examples/openvino/executor_runner/openvino_executor_runner.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -52,57 +53,54 @@ using executorch::runtime::Result; using executorch::runtime::Span; using executorch::runtime::TensorInfo; -std::pair benchmark_method(Result &method, - int num_iterations) { - Error status = Error::Ok; - auto before_exec = std::chrono::high_resolution_clock::now(); - for (int i = 0; i < num_iterations; ++i) { - status = method->execute(); - } - auto after_exec = std::chrono::high_resolution_clock::now(); - double elapsed_time = std::chrono::duration_cast( - after_exec - before_exec) - .count() / - 1000.0; - return std::make_pair(elapsed_time, status); +std::function build_set_input_tensor( + Result &method, std::vector &inputs, + const std::vector> input_paths) { + return [&inputs, &method, input_paths](size_t idx) -> void { + const MethodMeta method_meta = method->method_meta(); + for (int input_index = 0; input_index < method->inputs_size(); + ++input_index) { + + Result tensor_meta = + method_meta.input_tensor_meta(input_index); + auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); + + std::ifstream fin(input_paths[idx][input_index], std::ios::binary); + fin.seekg(0, fin.end); + size_t file_size = fin.tellg(); + + ET_CHECK_MSG( + file_size == tensor_meta->nbytes(), + "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", + input_index, file_size, tensor_meta->nbytes()); + + fin.seekg(0, fin.beg); + fin.read(static_cast(input_data_ptr), file_size); + fin.close(); + } + }; } -void dump_outputs(Result &method, const char *output_folder_path, - size_t index = 0) { - std::vector outputs(method->outputs_size()); - Error status = Error::Ok; - status = method->get_outputs(outputs.data(), outputs.size()); - ET_CHECK(status == Error::Ok); - for (size_t output_index = 0; output_index < method->outputs_size(); - output_index++) { - auto output_tensor = outputs[output_index].toTensor(); - auto output_file_name = std::string(output_folder_path) + "/output_" + - std::to_string(index) + "_" + - std::to_string(output_index) + ".raw"; - std::ofstream fout(output_file_name.c_str(), std::ios::binary); - fout.write(output_tensor.const_data_ptr(), output_tensor.nbytes()); - fout.close(); - } +std::function +build_dump_outputs(std::vector &outputs, const size_t output_size, + const std::string output_folder_path) { + return [&outputs, output_folder_path, output_size](size_t idx) -> void { + for (size_t output_index = 0; output_index < output_size; output_index++) { + auto output_tensor = outputs[output_index].toTensor(); + auto output_file_name = output_folder_path + "/output_" + + std::to_string(idx) + "_" + + std::to_string(output_index) + ".raw"; + std::ofstream fout(output_file_name.c_str(), std::ios::binary); + fout.write(output_tensor.const_data_ptr(), output_tensor.nbytes()); + fout.close(); + } + }; } -struct ProcessInputsResult { - double total_time; - size_t num_iter; - Error status; -}; - -ProcessInputsResult process_inputs(Result &method, - const char *input_list_path, - const char *output_folder_path) { - std::vector inputs(method->inputs_size()); - ET_LOG(Info, "%zu inputs: ", inputs.size()); - double total_time_elapsed = 0.; +std::vector> +get_inputs_paths(const char *input_list_path) { size_t idx = 0; - Error status = Error::Ok; - status = method->get_inputs(inputs.data(), inputs.size()); - ET_CHECK(status == Error::Ok); - auto split = [](std::string s, std::string delimiter) { size_t pos_start = 0, pos_end, delim_len = delimiter.length(); std::string token; @@ -120,45 +118,19 @@ ProcessInputsResult process_inputs(Result &method, // Read raw input tensor file names from input list file and // iterate each raw input tensor file to read values std::ifstream input_list(input_list_path); - if (input_list.is_open()) { - size_t num_inputs = method->inputs_size(); - std::string file_path; - while (std::getline(input_list, file_path)) { - auto input_files = split(file_path, " "); - if (input_files.size() == 0) { - break; - } - for (int input_index = 0; input_index < num_inputs; ++input_index) { - MethodMeta method_meta = method->method_meta(); - Result tensor_meta = - method_meta.input_tensor_meta(input_index); - auto input_data_ptr = inputs[input_index].toTensor().data_ptr(); - - std::ifstream fin(input_files[input_index], std::ios::binary); - fin.seekg(0, fin.end); - size_t file_size = fin.tellg(); - - ET_CHECK_MSG( - file_size == tensor_meta->nbytes(), - "Input(%d) size mismatch. file bytes: %zu, tensor bytes: %zu", - input_index, file_size, tensor_meta->nbytes()); - - fin.seekg(0, fin.beg); - fin.read(static_cast(input_data_ptr), file_size); - fin.close(); - } - double time_elapsed; - std::tie(time_elapsed, status) = benchmark_method(method, 1); - if (status != Error::Ok) { - return {total_time_elapsed, idx, status}; - } - total_time_elapsed += time_elapsed; - dump_outputs(method, output_folder_path, idx++); - } - } else { + if (!input_list.is_open()) { ET_CHECK_MSG(false, "Failed to read input list file: %s", input_list_path); } - return {total_time_elapsed, idx, status}; + std::string file_path; + auto retval = std::vector>(); + while (std::getline(input_list, file_path)) { + auto input_files = split(file_path, " "); + if (input_files.size() == 0) { + break; + } + retval.push_back(input_files); + } + return retval; } int main(int argc, char **argv) { @@ -240,43 +212,77 @@ int main(int argc, char **argv) { ET_LOG(Info, "Method loaded."); // Prepare the input tensors for the method - auto inputs = prepare_input_tensors(*method); - ET_CHECK_MSG(inputs.ok(), "Could not prepare inputs: 0x%" PRIx32, - static_cast(inputs.error())); + auto method_inputs = prepare_input_tensors(*method); + ET_CHECK_MSG(method_inputs.ok(), "Could not prepare inputs: 0x%" PRIx32, + static_cast(method_inputs.error())); - double elapsed_time; Error status = Error::Ok; + std::vector inputs(method->inputs_size()); + ET_LOG(Info, "Number of input layers: %zu", inputs.size()); + + status = method->get_inputs(inputs.data(), inputs.size()); + ET_CHECK(status == Error::Ok); // If the input path list is provided, read input tensors from the files - if (!(FLAGS_input_list_path.empty()) and - !(FLAGS_output_folder_path.empty())) { + std::function set_input_tensor; + if (!FLAGS_input_list_path.empty()) { const char *input_list_path = FLAGS_input_list_path.c_str(); ET_LOG(Info, "Loading input tensors from the list provided in %s.", input_list_path); - const char *output_folder_path = FLAGS_output_folder_path.c_str(); - auto res = process_inputs(method, input_list_path, output_folder_path); - elapsed_time = res.total_time; - status = res.status; - num_iterations = res.num_iter; + const auto input_paths = get_inputs_paths(input_list_path); + num_iterations = input_paths.size(); + ET_LOG(Info, "Number of iters is set to the len of the inputs: %u.", + num_iterations); + + set_input_tensor = build_set_input_tensor(method, inputs, input_paths); } else { + set_input_tensor = [](size_t idx) -> void {}; + } + + ET_LOG(Info, "%zu Number of output layers: ", method->outputs_size()); + + std::vector outputs(method->outputs_size()); + status = method->get_outputs(outputs.data(), outputs.size()); + ET_CHECK(status == Error::Ok); - // Measure execution time for inference - std::tie(elapsed_time, status) = benchmark_method(method, num_iterations); + std::function dump_outputs; + if (!FLAGS_output_folder_path.empty()) { // Retrieve and print the method outputs - ET_LOG(Info, "%zu Number of outputs: ", method->outputs_size()); // If output folder path is provided, save output tensors // into raw tensor files. - if (!(FLAGS_output_folder_path.empty())) { - const char *output_folder_path = FLAGS_output_folder_path.c_str(); - ET_LOG(Info, "Saving output tensors into the output folder: %s.", - output_folder_path); - dump_outputs(method, output_folder_path); + const char *output_folder_path = FLAGS_output_folder_path.c_str(); + ET_LOG(Info, "Saving output tensors into the output folder: %s.", + output_folder_path); + dump_outputs = build_dump_outputs(outputs, outputs.size(), + std::string(output_folder_path)); + + } else { + dump_outputs = [](size_t idx) {}; + } + + // Measure execution time for inference + + double total_time_elapsed = 0.; + for (int i = 0; (i < num_iterations and status == Error::Ok); ++i) { + set_input_tensor(i); + auto before_exec = std::chrono::high_resolution_clock::now(); + status = method->execute(); + auto after_exec = std::chrono::high_resolution_clock::now(); + if (status == Error::Ok) { + dump_outputs(i); } + double elapsed_time = std::chrono::duration_cast( + after_exec - before_exec) + .count() / + 1000.0; + total_time_elapsed += elapsed_time; } + // Log execution time and average time per iteration ET_LOG(Info, "%d inference took %f ms, avg %f ms", num_iterations, - elapsed_time, elapsed_time / static_cast(num_iterations)); + total_time_elapsed, + total_time_elapsed / static_cast(num_iterations)); ET_CHECK_MSG(status == Error::Ok, "Execution of method %s failed with status 0x%" PRIx32, method_name, static_cast(status)); From 0892b9d47760d330d06e1f2e816872f01ef2fbed Mon Sep 17 00:00:00 2001 From: Daniil Lyakhov Date: Fri, 14 Feb 2025 15:52:53 +0100 Subject: [PATCH 16/18] Apply suggestions from code review Co-authored-by: Yamini Nimmagadda --- examples/openvino/aot/aot_openvino_compiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 4f45fc1d426..25537910fe2 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -231,7 +231,7 @@ def main( raise ValueError(msg) if not dataset_path: - msg = "Validateion requires a calibration dataset." + msg = "Validation requires a calibration dataset." raise ValueError(msg) print("Start validation of the model:") @@ -266,7 +266,7 @@ def main( parser.add_argument( "--validate", action="store_true", - help="Enable model validation. --dataset argument is requred for the validation.", + help="Enable model validation. --dataset argument is required for the validation.", ) parser.add_argument("--dataset", type=str, help="Path to the validation dataset.") parser.add_argument( From d1aa42556665eb837368e2f74faf286fc52ba562 Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 14 Feb 2025 16:09:24 +0100 Subject: [PATCH 17/18] aot_openvino_compiler.py: comments --- examples/openvino/aot/aot_openvino_compiler.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/examples/openvino/aot/aot_openvino_compiler.py b/examples/openvino/aot/aot_openvino_compiler.py index 25537910fe2..f0844289580 100644 --- a/examples/openvino/aot/aot_openvino_compiler.py +++ b/examples/openvino/aot/aot_openvino_compiler.py @@ -29,7 +29,6 @@ from torch.ao.quantization.quantize_pt2e import prepare_pt2e from torch.export import export from torch.export.exported_program import ExportedProgram -from torch.fx.passes.graph_drawer import FxGraphDrawer from transformers import AutoModel import nncf @@ -72,11 +71,6 @@ def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, mod return calibration_dataset -def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str): - g = FxGraphDrawer(model, output_svg_path) - g.get_dot_graph().write_svg(output_svg_path) - - def dump_inputs(calibration_dataset, dest_path): input_files, targets = [], [] for idx, data in enumerate(calibration_dataset): @@ -204,7 +198,6 @@ def main( quantized_model = quantize_model( aten_dialect.module(), calibration_dataset, use_nncf=quantization_flow == "nncf" ) - visualize_fx_model(quantized_model, f"{model_name}_int8.svg") aten_dialect: ExportedProgram = export(quantized_model, example_args) From b9b604d8ed231355ed437fff05a0d213010f793e Mon Sep 17 00:00:00 2001 From: dlyakhov Date: Fri, 14 Feb 2025 17:24:28 +0100 Subject: [PATCH 18/18] README --- examples/openvino/aot/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/openvino/aot/README.md b/examples/openvino/aot/README.md index 900a5b6cbe0..884ed55849f 100644 --- a/examples/openvino/aot/README.md +++ b/examples/openvino/aot/README.md @@ -25,7 +25,7 @@ python aot_openvino_compiler.py --suite --model --inp - For `torchvision`: `resnet18`, `mobilenet_v2` - For `huggingface`: `bert-base-uncased`, `distilbert-base-uncased` -- **`--input_shape`**: +- **`--input_shape`**(optional): Input shape for the model. Provide this as a **list** or **tuple**. Examples: - `[1, 3, 224, 224]` (Zsh users: wrap in quotes) @@ -38,11 +38,15 @@ python aot_openvino_compiler.py --suite --model --inp - **`--quantize`** (optional): Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet. +- **`--quantization_flow`** (optional): + Specifies the way to quantize torch.fx.GraphModule. + Supported values: + - `nncf`: `nncf quantize_pt2e` API (default) + - `pt2e`: torch ao quantization pipeline. - **`--validate`** (optional): Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet. - - **`--dataset`** (optional): Path to the imagenet-like calibration dataset.