Skip to content

Commit ddcbb11

Browse files
OpenVINOQuantizer: constructor arguments have been refined
1 parent cf6c20e commit ddcbb11

File tree

1 file changed

+33
-38
lines changed

1 file changed

+33
-38
lines changed

backends/openvino/quantizer/quantizer.py

Lines changed: 33 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# directory of this source tree for more details.
66

77
from collections import defaultdict
8+
from enum import Enum
89
from typing import Dict, List, Optional, Tuple
910

1011
import torch.fx
@@ -20,13 +21,25 @@
2021
import nncf
2122
import nncf.common.quantization as q
2223
import nncf.experimental.torch.fx as nncf_fx
23-
import nncf.parameters as p
24-
import nncf.quantization.advanced_parameters as advanced_p
2524
from nncf.common.graph.graph import NNCFGraph
2625

2726
QUANT_ANNOTATION_KEY = "quantization_annotation"
2827

2928

29+
class QuantizationMode(Enum):
30+
"""
31+
Defines special quantization modes.
32+
33+
- INT8_SYM: INT8 symmetric quantization for both activations and weights.
34+
- INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
35+
- INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
36+
"""
37+
38+
INT8_SYM = "int8_sym"
39+
INT8_MIXED = "int8_mixed"
40+
INT8_TRANSFORMER = "int8_transformer"
41+
42+
3043
class OpenVINOQuantizer(Quantizer):
3144
"""
3245
Implementation of the Torch AO quantizer which annotates models with quantization annotations
@@ -36,49 +49,31 @@ class OpenVINOQuantizer(Quantizer):
3649
def __init__(
3750
self,
3851
*,
39-
mode: Optional[p.QuantizationMode] = None,
40-
preset: Optional[q.structs.QuantizationPreset] = None,
41-
target_device: p.TargetDevice = p.TargetDevice.ANY,
42-
transformer_model: bool = False,
52+
mode: Optional[QuantizationMode] = QuantizationMode.INT8_SYM,
4353
ignored_scope: Optional[nncf.IgnoredScope] = None,
44-
overflow_fix: Optional[advanced_p.OverflowFix] = None,
45-
quantize_outputs: bool = False,
46-
activations_quantization_params: Optional[advanced_p.QuantizationParameters] = None,
47-
weights_quantization_params: Optional[advanced_p.QuantizationParameters] = None,
54+
**kwargs,
4855
):
4956
"""
50-
:param mode: Defines optimization mode for the algorithm. None by default.
51-
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
52-
It can take the following values:
53-
- `performance`: Symmetric quantization of weights and activations.
54-
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
55-
Default value is None. In this case, `mixed` preset is used for `transformer`
56-
model type otherwise `performance`.
57-
:param target_device: A target device the specificity of which will be taken
58-
into account while compressing in order to obtain the best performance
59-
for this type of device, defaults to TargetDevice.ANY.
60-
:param model_type: Model type is needed to specify additional patterns
61-
in the model. Supported only `transformer` now.
57+
:param mode: Defines special quantization modes.
58+
- INT8_SYM: INT8 symmetric quantization for both activations and weights.
59+
- INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
60+
- INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
61+
Default value is INT8_SYM.
6262
:param ignored_scope: An ignored scope that defined the list of model control
6363
flow graph nodes to be ignored during quantization.
64-
:param overflow_fix: This option controls whether to apply the overflow issue
65-
fix for the 8-bit quantization.
66-
:param quantize_outputs: Whether to insert additional quantizers right before
67-
each of the model outputs.
68-
:param activations_quantization_params: Quantization parameters for model
69-
activations.
70-
:param weights_quantization_params: Quantization parameters for model weights.
64+
:param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm.
7165
"""
66+
if mode == QuantizationMode.INT8_MIXED:
67+
preset = q.structs.QuantizationPreset.MIXED
68+
model_type = None
69+
elif mode == QuantizationMode.INT8_SYM:
70+
preset = q.structs.QuantizationPreset.PERFORMANCE
71+
model_type = None
72+
else:
73+
preset = None
74+
model_type = nncf.parameters.ModelType.TRANSFORMER
7275
self._min_max_algo = nncf.quantization.algorithms.min_max.algorithm.MinMaxQuantization(
73-
mode=mode,
74-
preset=preset,
75-
target_device=target_device,
76-
model_type=p.ModelType.TRANSFORMER if transformer_model else None,
77-
ignored_scope=ignored_scope,
78-
overflow_fix=overflow_fix,
79-
quantize_outputs=quantize_outputs,
80-
activations_quantization_params=activations_quantization_params,
81-
weights_quantization_params=weights_quantization_params,
76+
preset=preset, model_type=model_type, ignored_scope=ignored_scope, **kwargs
8277
)
8378

8479
def get_nncf_quantization_setup(

0 commit comments

Comments
 (0)