Skip to content

Commit 3e17d09

Browse files
authored
Merge pull request #29 from daniil-lyakhov/dl/quantize_model_ex
Improve quantize_model UX
2 parents 8b9f76e + e76c440 commit 3e17d09

File tree

4 files changed

+51
-21
lines changed

4 files changed

+51
-21
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from .quantizer import OpenVINOQuantizer
1+
from .quantizer import OpenVINOQuantizer, quantize_model
22

3-
__all__ = [OpenVINOQuantizer]
3+
__all__ = [OpenVINOQuantizer, quantize_model]

backends/openvino/quantizer/quantizer.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from collections import defaultdict
88
from enum import Enum
9-
from typing import Dict, List, Optional, Tuple
9+
from typing import Any, Callable, Dict, List, Optional, Tuple
1010

1111
import nncf
1212
import nncf.common.quantization as quantization
@@ -345,31 +345,57 @@ def validate(self, model: torch.fx.GraphModule) -> None:
345345
def quantize_model(
346346
captured_model: torch.fx.GraphModule,
347347
calibration_dataset: torch.utils.data.DataLoader,
348+
*,
349+
mode: QuantizationMode = QuantizationMode.INT8_SYM,
350+
subset_size: int = 300,
351+
fast_bias_correction: Optional[bool] = True,
352+
smooth_quant: bool = False,
353+
transform_fn: Optional[Callable[[Any], Any]] = None,
354+
extra_quantizer_options: Optional[Dict[str, Any]] = None,
355+
**kwargs,
348356
) -> torch.fx.GraphModule:
349357
"""
350-
Quantizes a model using either NNCF-based or PTQ-based quantization.
358+
Quantizes a model using NNCF quantize_pt2e API.
351359
352360
:param captured_model: The model to be quantized, represented as a torch.fx.GraphModule.
353361
:param calibration_dataset: A DataLoader containing calibration data for quantization.
362+
:param mode: Defines special quantization modes.
363+
- INT8_SYM: INT8 symmetric quantization for both activations and weights.
364+
- INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
365+
- INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
366+
Default value is INT8_SYM.
367+
:param subset_size: Size of a subset to calculate activations
368+
statistics used for quantization.
369+
:param fast_bias_correction: Setting this option to `False` enables a different
370+
bias correction method which is more accurate, in general, and takes
371+
more time but requires less memory. None disables the bias correction algorithm.
372+
:param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm.
373+
:param extra_quantizer_options: A dictionary containing additional configuration options
374+
for the OpenVINOQuantizer.
375+
:param kwargs: The keyword arguments for the nncf quantize_pt2e function.
354376
:return: The quantized model as a torch.fx.GraphModule.
355377
"""
356-
quantizer = OpenVINOQuantizer()
378+
extra_quantizer_options = extra_quantizer_options or {}
379+
if "mode" in extra_quantizer_options:
380+
print(
381+
f'Ignoring "mode" from the quantizer_config. Using parameter mode = {mode}'
382+
)
383+
del extra_quantizer_options["mode"]
384+
385+
quantizer = OpenVINOQuantizer(mode=mode, **extra_quantizer_options)
357386

358387
print("PTQ: Quantize the model")
359-
default_subset_size = 300
360-
batch_size = calibration_dataset.batch_size
361-
subset_size = (default_subset_size // batch_size) + int(
362-
default_subset_size % batch_size > 0
363-
)
364388

365-
def transform(x):
366-
return x[0]
389+
if "fold_quantize" not in kwargs:
390+
kwargs["fold_quantize"] = False
367391

368392
quantized_model = nncf_fx.quantize_pt2e(
369393
captured_model,
370394
quantizer,
371395
subset_size=subset_size,
372-
calibration_dataset=nncf.Dataset(calibration_dataset, transform_func=transform),
373-
fold_quantize=False,
396+
calibration_dataset=nncf.Dataset(calibration_dataset, transform_fn),
397+
fast_bias_correction=fast_bias_correction,
398+
smooth_quant=smooth_quant,
399+
**kwargs,
374400
)
375401
return quantized_model

examples/openvino/README.md

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,6 @@ python aot_openvino_compiler.py --suite <MODEL_SUITE> --model <MODEL_NAME> --inp
5757
- **`--quantize`** (optional):
5858
Enable model quantization. --dataset argument is requred for the quantization. `huggingface` suite does not supported yet.
5959

60-
- **`--quantization_flow`** (optional):
61-
Specifies the way to quantize torch.fx.GraphModule.
62-
Supported values:
63-
- `nncf`: `nncf quantize_pt2e` API (default)
64-
- `pt2e`: torch ao quantization pipeline.
65-
6660
- **`--validate`** (optional):
6761
Enable model validation. --dataset argument is requred for the validation. `huggingface` suite does not supported yet.
6862

examples/openvino/aot_openvino_compiler.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import torch
1414
import torchvision.models as torchvision_models
1515
from executorch.backends.openvino.partitioner import OpenvinoPartitioner
16-
from executorch.backends.openvino.quantizer.quantizer import quantize_model
16+
from executorch.backends.openvino.quantizer import quantize_model
1717
from executorch.exir import EdgeProgramManager, to_edge_transform_and_lower
1818
from executorch.exir.backend.backend_details import CompileSpec
1919
from executorch.extension.pybindings.portable_lib import ( # @manual
@@ -182,9 +182,19 @@ def main(
182182
if not dataset_path:
183183
msg = "Quantization requires a calibration dataset."
184184
raise ValueError(msg)
185+
186+
subset_size = 300
187+
batch_size = calibration_dataset.batch_size
188+
subset_size = (subset_size // batch_size) + int(subset_size % batch_size > 0)
189+
190+
def transform_fn(x):
191+
return x[0]
192+
185193
quantized_model = quantize_model(
186194
aten_dialect.module(),
187195
calibration_dataset,
196+
subset_size=subset_size,
197+
transform_fn=transform_fn,
188198
)
189199

190200
aten_dialect: ExportedProgram = export(quantized_model, example_args)

0 commit comments

Comments
 (0)