Skip to content

Commit f39a51b

Browse files
WIP
1 parent 8dea0e3 commit f39a51b

File tree

3 files changed

+142
-78
lines changed

3 files changed

+142
-78
lines changed

backends/openvino/quantizer/quantizer.py

Lines changed: 38 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,11 @@
1010
# limitations under the License.
1111

1212
from collections import defaultdict
13-
from typing import Dict, List, Optional, Tuple, Union
13+
from typing import Dict, List, Optional, Tuple
1414

1515
import torch.fx
1616
from torch.ao.quantization.observer import HistogramObserver
1717
from torch.ao.quantization.observer import PerChannelMinMaxObserver
18-
from torch.ao.quantization.observer import MinMaxObserver
1918
from torch.ao.quantization.quantizer.quantizer import EdgeOrNode
2019
from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation
2120
from torch.ao.quantization.quantizer.quantizer import QuantizationSpec
@@ -24,25 +23,11 @@
2423
from torch.ao.quantization.quantizer.quantizer import SharedQuantizationSpec
2524

2625
import nncf
26+
import nncf.common.quantization as q
27+
import nncf.experimental.torch.fx as nncf_fx
28+
import nncf.parameters as p
29+
import nncf.quantization.advanced_parameters as advanced_p
2730
from nncf.common.graph.graph import NNCFGraph
28-
from nncf.common.logging import nncf_logger
29-
from nncf.common.quantization.quantizer_propagation.solver import QuantizerPropagationRule
30-
from nncf.common.quantization.quantizer_setup import QuantizationPointBase
31-
from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup
32-
from nncf.common.quantization.structs import QuantizationPreset
33-
from nncf.common.quantization.structs import QuantizationScheme
34-
from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
35-
from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name
36-
from nncf.experimental.torch.fx.transformations import fold_constant_except_qdq
37-
from nncf.parameters import ModelType
38-
from nncf.parameters import QuantizationMode
39-
from nncf.parameters import TargetDevice
40-
from nncf.quantization.advanced_parameters import FP8QuantizationParameters
41-
from nncf.quantization.advanced_parameters import OverflowFix
42-
from nncf.quantization.advanced_parameters import QuantizationParameters
43-
from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
44-
from nncf.scopes import IgnoredScope
45-
from nncf.torch.model_graph_manager import get_weight_tensor_port_ids
4631

4732
QUANT_ANNOTATION_KEY = "quantization_annotation"
4833

@@ -56,16 +41,15 @@ class OpenVINOQuantizer(Quantizer):
5641
def __init__(
5742
self,
5843
*,
59-
mode: Optional[QuantizationMode] = None,
60-
preset: Optional[QuantizationPreset] = None,
61-
target_device: TargetDevice = TargetDevice.ANY,
62-
model_type: Optional[ModelType] = None,
63-
ignored_scope: Optional[IgnoredScope] = None,
64-
overflow_fix: Optional[OverflowFix] = None,
44+
mode: Optional[p.QuantizationMode] = None,
45+
preset: Optional[q.structs.QuantizationPreset] = None,
46+
target_device: p.TargetDevice = p.TargetDevice.ANY,
47+
transformer_model: bool = False,
48+
ignored_scope: Optional[nncf.IgnoredScope] = None,
49+
overflow_fix: Optional[advanced_p.OverflowFix] = None,
6550
quantize_outputs: bool = False,
66-
activations_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None,
67-
weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None,
68-
quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE,
51+
activations_quantization_params: Optional[advanced_p.QuantizationParameters] = None,
52+
weights_quantization_params: Optional[advanced_p.QuantizationParameters] = None,
6953
):
7054
"""
7155
:param mode: Defines optimization mode for the algorithm. None by default.
@@ -89,29 +73,28 @@ def __init__(
8973
:param activations_quantization_params: Quantization parameters for model
9074
activations.
9175
:param weights_quantization_params: Quantization parameters for model weights.
92-
:param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers.
93-
MERGE_ALL_IN_ONE by default.
9476
"""
95-
self._min_max_algo = MinMaxQuantization(
77+
self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization(
9678
mode=mode,
9779
preset=preset,
9880
target_device=target_device,
99-
model_type=model_type,
81+
model_type=p.ModelType.TRANSFORMER if transformer_model else None,
10082
ignored_scope=ignored_scope,
10183
overflow_fix=overflow_fix,
10284
quantize_outputs=quantize_outputs,
10385
activations_quantization_params=activations_quantization_params,
10486
weights_quantization_params=weights_quantization_params,
105-
quantizer_propagation_rule=quantizer_propagation_rule,
10687
)
10788

108-
def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
89+
def get_nncf_quantization_setup(
90+
self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph
91+
) -> q.quantizer_setup.SingleConfigQuantizerSetup:
10992
self._min_max_algo._set_backend_entity(model)
11093
return self._min_max_algo.find_quantization_setup(model, nncf_graph)
11194

11295
def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
113-
nncf_graph = GraphConverter.create_nncf_graph(model)
114-
quantization_setup = self.get_quantization_setup(model, nncf_graph)
96+
nncf_graph = nncf_fx.nncf_graph_builder.GraphConverter.create_nncf_graph(model)
97+
quantization_setup = self.get_nncf_quantization_setup(model, nncf_graph)
11598

11699
graph = model.graph
117100
node_vs_torch_annotation = defaultdict(QuantizationAnnotation)
@@ -138,7 +121,9 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
138121
)
139122
raise nncf.InternalError(msg)
140123

141-
root_target_node = get_graph_node_by_name(graph, root_qp.insertion_point.target_node_name)
124+
root_target_node = nncf_fx.node_utils.get_graph_node_by_name(
125+
graph, root_qp.insertion_point.target_node_name
126+
)
142127
root_edge_or_node = self._get_edge_or_node(root_target_node, root_qp, nncf_graph)
143128

144129
for quantizer_id in quantizer_ids:
@@ -155,10 +140,11 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
155140
for node, annotation in node_vs_torch_annotation.items():
156141
assert QUANT_ANNOTATION_KEY not in node.meta
157142
node.meta[QUANT_ANNOTATION_KEY] = annotation
143+
return model
158144

159145
@staticmethod
160146
def _get_unified_scales_root_quantizer_id(
161-
nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: SingleConfigQuantizerSetup
147+
nncf_graph: NNCFGraph, quantizer_ids: List[int], quantizer_setup: q.quantizer_setup.SingleConfigQuantizerSetup
162148
) -> int:
163149
"""
164150
Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id`
@@ -184,7 +170,7 @@ def _get_unified_scales_root_quantizer_id(
184170
def _get_edge_or_node_and_annotation(
185171
graph: torch.fx.Graph,
186172
nncf_graph: NNCFGraph,
187-
qp: QuantizationPointBase,
173+
qp: q.quantizer_setup.QuantizationPointBase,
188174
node_vs_torch_annotation: Dict[torch.fx.Node, QuantizationAnnotation],
189175
) -> Tuple[EdgeOrNode, QuantizationAnnotation]:
190176
"""
@@ -198,13 +184,15 @@ def _get_edge_or_node_and_annotation(
198184
QuantizationAnnotations.
199185
:return: A tuple containing the EdgeOrNode and its associated QuantizationAnnotation.
200186
"""
201-
target_node = get_graph_node_by_name(graph, qp.insertion_point.target_node_name)
187+
target_node = nncf_fx.node_utils.get_graph_node_by_name(graph, qp.insertion_point.target_node_name)
202188
annotation = node_vs_torch_annotation[target_node]
203189
edge_or_node = OpenVINOQuantizer._get_edge_or_node(target_node, qp, nncf_graph)
204190
return edge_or_node, annotation
205191

206192
@staticmethod
207-
def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nncf_graph: NNCFGraph) -> EdgeOrNode:
193+
def _get_edge_or_node(
194+
target_node: torch.fx.Node, qp: q.quantizer_setup.QuantizationPointBase, nncf_graph: NNCFGraph
195+
) -> EdgeOrNode:
208196
"""
209197
Returns the edge or node based on the given target node and quantization point.
210198
@@ -216,10 +204,10 @@ def _get_edge_or_node(target_node: torch.fx.Node, qp: QuantizationPointBase, nnc
216204
ip = qp.insertion_point
217205
if qp.is_weight_quantization_point():
218206
nncf_node = nncf_graph.get_node_by_name(target_node.name)
219-
weights_ports_ids = get_weight_tensor_port_ids(nncf_node, nncf_graph)
207+
weights_ports_ids = nncf.torch.model_graph_manager.get_weight_tensor_port_ids(nncf_node, nncf_graph)
220208
if len(weights_ports_ids) > 1:
221209
# TODO(dlyakhov): support quantization for nodes with several weights
222-
nncf_logger.warning(
210+
nncf.common.logging.nncf_logger.warning(
223211
f"Quantization of the weighted node {target_node.name}"
224212
" is not yet supported by the OpenVINOQuantizer."
225213
f" Only the weight on port ID {weights_ports_ids[0]} will be quantized."
@@ -253,7 +241,7 @@ def _fill_torch_ao_annotation(
253241
annotation_to_update.input_qspec_map[edge_or_node[0]] = qspec
254242

255243
@staticmethod
256-
def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec:
244+
def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) -> QuantizationSpec:
257245
"""
258246
Retrieves the quantization configuration from the given quantization point and
259247
converts it into a QuantizationSpec.
@@ -269,15 +257,16 @@ def _get_torch_ao_qspec_from_qp(qp: QuantizationPointBase) -> QuantizationSpec:
269257
if qconfig.per_channel:
270258
torch_qscheme = (
271259
torch.per_channel_symmetric
272-
if qconfig.mode is QuantizationScheme.SYMMETRIC
260+
if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC
273261
else torch.per_channel_affine
274262
)
275263
else:
276264
torch_qscheme = (
277-
torch.per_tensor_symmetric if qconfig.mode is QuantizationScheme.SYMMETRIC else torch.per_tensor_affine
265+
torch.per_tensor_symmetric
266+
if qconfig.mode is q.structs.QuantizationScheme.SYMMETRIC
267+
else torch.per_tensor_affine
278268
)
279269
if is_weight:
280-
observer = PerChannelMinMaxObserver if qconfig.per_channel else MinMaxObserver
281270
observer = PerChannelMinMaxObserver
282271
quant_min = -128
283272
quant_max = 127
@@ -307,5 +296,5 @@ def validate(self, model: torch.fx.GraphModule) -> None:
307296
pass
308297

309298
def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
310-
fold_constant_except_qdq(model)
299+
nncf_fx.transformations.fold_constant_except_qdq(model)
311300
return model

examples/openvino/aot/aot_openvino_compiler.py

Lines changed: 103 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,15 @@
2222
from torch.export.exported_program import ExportedProgram
2323
import argparse
2424
from executorch.backends.openvino import OpenVINOQuantizer
25+
#from nncf.experimental.torch.fx.quantization.quantizer.openvino_quantizer import OpenVINOQuantizer
26+
from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e
2527
from torch.ao.quantization.quantize_pt2e import (
2628
convert_pt2e,
2729
prepare_pt2e,
2830
)
29-
31+
from sklearn.metrics import accuracy_score
32+
from timm.data import resolve_data_config
33+
from timm.data.transforms_factory import create_transform
3034

3135
# Function to load a model based on the selected suite
3236
def load_model(suite: str, model_name: str):
@@ -42,20 +46,17 @@ def load_model(suite: str, model_name: str):
4246
raise ValueError(f"Unsupported model suite: {suite}")
4347

4448

45-
def load_calibration_dataset(dataset_path: str):
49+
def load_calibration_dataset(dataset_path: str, suite: str, model: torch.nn.Module):
4650
val_dir = f"{dataset_path}/val"
4751

48-
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
52+
if suite == "torchvision":
53+
transform = torchvision_models.get_model_weights(model.name).transforms()
54+
else:
55+
transform = create_transform(**resolve_data_config(model.pretrained_cfg, model=model))
4956

5057
val_dataset = datasets.ImageFolder(
5158
val_dir,
52-
transforms.Compose(
53-
[
54-
transforms.Resize(64), # for tiny imagenet
55-
transforms.ToTensor(),
56-
normalize,
57-
]
58-
),
59+
transform=transform
5960
)
6061

6162
calibration_dataset = torch.utils.data.DataLoader(
@@ -65,21 +66,6 @@ def load_calibration_dataset(dataset_path: str):
6566
return calibration_dataset
6667

6768

68-
def quantize_model(model: torch.fx.GraphModule, example_args, subset_size=300):
69-
#quantizer = OpenVINOQuantizer(ignored_scope=nncf.IgnoredScope(types=["__getitem__", "layer_norm"]))
70-
quantizer = OpenVINOQuantizer()
71-
72-
print("PTQ: Annotate the model...")
73-
annotated_model = prepare_pt2e(model, quantizer)
74-
75-
print("PTQ: Calibrate the model...")
76-
annotated_model(*example_args)
77-
78-
print("PTQ: Convert the quantized model...")
79-
quantized_model = convert_pt2e(annotated_model, fold_quantize=False)
80-
return quantized_model
81-
82-
8369
def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path: str, device: str):
8470
# Ensure input_shape is a tuple
8571
if isinstance(input_shape, list):
@@ -98,15 +84,24 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
9884
aten_dialect: ExportedProgram = export(model, example_args)
9985

10086
if quantize:
87+
if suite == "huggingface":
88+
raise ValueError("Quantization of {suite} models did not support yet.")
89+
10190
# Quantize model
10291
if not dataset_path:
10392
raise ValueError("Quantization requires a calibration dataset.")
104-
#calibration_dataset = load_calibration_dataset(dataset_path)
93+
calibration_dataset = load_calibration_dataset(dataset_path, suite, model)
10594

10695
captured_model = aten_dialect.module()
10796
#visualize_fx_model(captured_model, f"{model_name}_fp32.svg")
108-
quantized_model = quantize_model(captured_model, example_args)
109-
#visualize_fx_model(quantized_model, f"{model_name}_int8.svg")
97+
quantizer = OpenVINOQuantizer()
98+
99+
print("PTQ: Quantize the model")
100+
def transform(x):
101+
return x[0]
102+
103+
quantized_model = quantize_pt2e(captured_model, quantizer, calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform), fold_quantize=False)
104+
110105
aten_dialect: ExportedProgram = export(quantized_model, example_args)
111106

112107
# Convert to edge dialect
@@ -121,16 +116,95 @@ def main(suite: str, model_name: str, input_shape, quantize: bool, dataset_path:
121116
exec_prog = lowered_module.to_executorch(config=executorch.exir.ExecutorchBackendConfig())
122117

123118
# Serialize and save it to a file
124-
model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte"
119+
model_name = f"{model_name}_{'int8' if quantize else 'fp32'}.pte"
125120
with open(model_name, "wb") as file:
126121
exec_prog.write_to_file(file)
127122
print(f"Model exported and saved as {model_name} on {device}.")
128123

124+
if quantize:
125+
print("Start validation of the quantized model:")
126+
127+
# 1: Dump inputs
128+
import os
129+
import shutil
130+
131+
dest_path = "tmp_inputs"
132+
out_path = "tmp_outputs"
133+
targets, input_files = [], []
134+
for d in [dest_path, out_path]:
135+
if os.path.exists(d):
136+
shutil.rmtree(d)
137+
os.makedirs(d)
138+
input_list = ""
139+
for idx, data in enumerate(calibration_dataset):
140+
feature, target = data
141+
targets.append(target)
142+
file_name = f"{dest_path}/input_{idx}_0.raw"
143+
input_list += file_name + " "
144+
if not isinstance(feature, torch.Tensor):
145+
feature = torch.tensor(feature)
146+
feature.detach().numpy().tofile(file_name)
147+
input_files.append(file_name)
148+
149+
inp_list_file = os.path.join(dest_path, "in_list.txt")
150+
with open(inp_list_file, "w") as f:
151+
input_list = input_list.strip() + "\n"
152+
f.write(input_list)
153+
154+
# 2: Run the executor
155+
print("Run openvino_executor_runner...")
156+
import subprocess
157+
breakpoint()
158+
subprocess.run(["../../../cmake-openvino-out/examples/openvino/openvino_executor_runner",
159+
f"--model_path={model_name}",
160+
f"--input_list_path={inp_list_file}",
161+
f"--output_folder_path={out_path}",
162+
#f"--num_iter={len(input_files)}"
163+
])
164+
165+
# 3: load the outputs and compare with the targets
166+
import numpy as np
167+
predictions = []
168+
for i in range(len(input_files)):
169+
predictions.append(
170+
np.fromfile(
171+
os.path.join(out_path, f"output_{i}.raw"), dtype=np.float32
172+
)
173+
)
174+
175+
k_val = [1, 5]
176+
acc_top1 = accuracy_score(predictions, targets)
177+
print(f"acc@1: {acc_top1}")
178+
179+
129180
from torch.fx.passes.graph_drawer import FxGraphDrawer
130181
def visualize_fx_model(model: torch.fx.GraphModule, output_svg_path: str):
131182
g = FxGraphDrawer(model, output_svg_path)
132183
g.get_dot_graph().write_svg(output_svg_path)
133184

185+
def generate_inputs(dest_path: str, file_name: str, inputs=None, input_list=None):
186+
input_list_file = None
187+
input_files = []
188+
189+
# Prepare input list
190+
if input_list is not None:
191+
input_list_file = f"{dest_path}/{file_name}"
192+
with open(input_list_file, "w") as f:
193+
f.write(input_list)
194+
f.flush()
195+
196+
# Prepare input data
197+
if inputs is not None:
198+
for idx, data in enumerate(inputs):
199+
for i, d in enumerate(data):
200+
file_name = f"{dest_path}/input_{idx}_{i}.raw"
201+
if not isinstance(d, torch.Tensor):
202+
d = torch.tensor(d)
203+
d.detach().numpy().tofile(file_name)
204+
input_files.append(file_name)
205+
206+
return input_list_file, input_files
207+
134208
if __name__ == "__main__":
135209
# Argument parser for dynamic inputs
136210
parser = argparse.ArgumentParser(description="Export models with executorch.")

0 commit comments

Comments
 (0)