Skip to content

Commit c1ad164

Browse files
committed
Merge remote-tracking branch 'ynimmaga/openvino_backend' into additional_updates
2 parents f30ee76 + 3e17d09 commit c1ad164

File tree

5 files changed

+52
-29
lines changed

5 files changed

+52
-29
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from .quantizer import OpenVINOQuantizer
1+
from .quantizer import OpenVINOQuantizer, quantize_model
22

3-
__all__ = [OpenVINOQuantizer]
3+
__all__ = [OpenVINOQuantizer, quantize_model]

backends/openvino/quantizer/quantizer.py

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from collections import defaultdict
88
from enum import Enum
9-
from typing import Dict, List, Optional, Tuple
9+
from typing import Any, Callable, Dict, List, Optional, Tuple
1010

1111
import nncf
1212
import nncf.common.quantization as quantization
@@ -341,42 +341,61 @@ def _get_torch_ao_qspec_from_qp(
341341
def validate(self, model: torch.fx.GraphModule) -> None:
342342
pass
343343

344-
def transform_for_annotation(
345-
self, model: torch.fx.GraphModule
346-
) -> torch.fx.GraphModule:
347-
# Fold constant branches to avoid their quantization
348-
nncf_fx.transformations.fold_constant_except_qdq(model)
349-
return model
350-
351344

352345
def quantize_model(
353346
captured_model: torch.fx.GraphModule,
354347
calibration_dataset: torch.utils.data.DataLoader,
348+
*,
349+
mode: QuantizationMode = QuantizationMode.INT8_SYM,
350+
subset_size: int = 300,
351+
fast_bias_correction: Optional[bool] = True,
352+
smooth_quant: bool = False,
353+
transform_fn: Optional[Callable[[Any], Any]] = None,
354+
extra_quantizer_options: Optional[Dict[str, Any]] = None,
355+
**kwargs,
355356
) -> torch.fx.GraphModule:
356357
"""
357-
Quantizes a model using either NNCF-based or PTQ-based quantization.
358+
Quantizes a model using NNCF quantize_pt2e API.
358359
359360
:param captured_model: The model to be quantized, represented as a torch.fx.GraphModule.
360361
:param calibration_dataset: A DataLoader containing calibration data for quantization.
362+
:param mode: Defines special quantization modes.
363+
- INT8_SYM: INT8 symmetric quantization for both activations and weights.
364+
- INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
365+
- INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
366+
Default value is INT8_SYM.
367+
:param subset_size: Size of a subset to calculate activations
368+
statistics used for quantization.
369+
:param fast_bias_correction: Setting this option to `False` enables a different
370+
bias correction method which is more accurate, in general, and takes
371+
more time but requires less memory. None disables the bias correction algorithm.
372+
:param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm.
373+
:param extra_quantizer_options: A dictionary containing additional configuration options
374+
for the OpenVINOQuantizer.
375+
:param kwargs: The keyword arguments for the nncf quantize_pt2e function.
361376
:return: The quantized model as a torch.fx.GraphModule.
362377
"""
363-
quantizer = OpenVINOQuantizer()
378+
extra_quantizer_options = extra_quantizer_options or {}
379+
if "mode" in extra_quantizer_options:
380+
print(
381+
f'Ignoring "mode" from the quantizer_config. Using parameter mode = {mode}'
382+
)
383+
del extra_quantizer_options["mode"]
384+
385+
quantizer = OpenVINOQuantizer(mode=mode, **extra_quantizer_options)
364386

365387
print("PTQ: Quantize the model")
366-
default_subset_size = 300
367-
batch_size = calibration_dataset.batch_size
368-
subset_size = (default_subset_size // batch_size) + int(
369-
default_subset_size % batch_size > 0
370-
)
371388

372-
def transform(x):
373-
return x[0]
389+
if "fold_quantize" not in kwargs:
390+
kwargs["fold_quantize"] = False
374391

375392
quantized_model = nncf_fx.quantize_pt2e(
376393
captured_model,
377394
quantizer,
378395
subset_size=subset_size,
379-
calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform),
380-
fold_quantize=False,
396+
calibration_dataset=nncf.Dataset(calibration_dataset, transform_fn),
397+
fast_bias_correction=fast_bias_correction,
398+
smooth_quant=smooth_quant,
399+
**kwargs,
381400
)
382401
return quantized_model

backends/openvino/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
transformers
2-
git+https://github.com/openvinotoolkit/nncf@191b53d#egg=nncf
2+
git+https://github.com/openvinotoolkit/nncf@6b0fc1c#egg=nncf

examples/openvino/README.md

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,6 @@ The python script called `aot_optimize_and_infer.py` allows users to export deep
5858
- **`--quantize`** (optional):
5959
Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet.
6060

61-
- **`--quantization_flow`** (optional):
62-
Specifies the way to quantize torch.fx.GraphModule.
63-
Supported values:
64-
- `nncf`: `nncf quantize_pt2e` API (default)
65-
- `pt2e`: torch ao quantization pipeline.
66-
6761
- **`--validate`** (optional):
6862
Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet.
6963

examples/openvino/aot_optimize_and_infer.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import torch
1515
import torchvision.models as torchvision_models
1616
from executorch.backends.openvino.partitioner import OpenvinoPartitioner
17-
from executorch.backends.openvino.quantizer.quantizer import quantize_model
17+
from executorch.backends.openvino.quantizer import quantize_model
1818
from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower
1919
from executorch.exir.backend.backend_details import CompileSpec
2020
from executorch.extension.pybindings.portable_lib import ( # @manual
@@ -246,9 +246,19 @@ def main(
246246
if not dataset_path:
247247
msg = "Quantization requires a calibration dataset."
248248
raise ValueError(msg)
249+
250+
subset_size = 300
251+
batch_size = calibration_dataset.batch_size
252+
subset_size = (subset_size // batch_size) + int(subset_size % batch_size > 0)
253+
254+
def transform_fn(x):
255+
return x[0]
256+
249257
quantized_model = quantize_model(
250258
aten_dialect.module(),
251259
calibration_dataset,
260+
subset_size=subset_size,
261+
transform_fn=transform_fn,
252262
)
253263

254264
aten_dialect: ExportedProgram = export(quantized_model, example_args)

0 commit comments

Comments
 (0)