55# directory of this source tree for more details.
66
77from collections import defaultdict
8+ from enum import Enum
89from typing import Dict , List , Optional , Tuple
910
1011import torch .fx
2021import nncf
2122import nncf .common .quantization as q
2223import nncf .experimental .torch .fx as nncf_fx
23- import nncf .parameters as p
24- import nncf .quantization .advanced_parameters as advanced_p
2524from nncf .common .graph .graph import NNCFGraph
2625
2726QUANT_ANNOTATION_KEY = "quantization_annotation"
2827
2928
29+ class QuantizationMode (Enum ):
30+ """
31+ Defines special quantization modes.
32+
33+ - INT8_SYM: INT8 symmetric quantization for both activations and weights.
34+ - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
35+ - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
36+ """
37+
38+ INT8_SYM = "int8_sym"
39+ INT8_MIXED = "int8_mixed"
40+ INT8_TRANSFORMER = "int8_transformer"
41+
42+
3043class OpenVINOQuantizer (Quantizer ):
3144 """
3245 Implementation of the Torch AO quantizer which annotates models with quantization annotations
@@ -36,49 +49,31 @@ class OpenVINOQuantizer(Quantizer):
3649 def __init__ (
3750 self ,
3851 * ,
39- mode : Optional [p .QuantizationMode ] = None ,
40- preset : Optional [q .structs .QuantizationPreset ] = None ,
41- target_device : p .TargetDevice = p .TargetDevice .ANY ,
42- transformer_model : bool = False ,
52+ mode : Optional [QuantizationMode ] = QuantizationMode .INT8_SYM ,
4353 ignored_scope : Optional [nncf .IgnoredScope ] = None ,
44- overflow_fix : Optional [advanced_p .OverflowFix ] = None ,
45- quantize_outputs : bool = False ,
46- activations_quantization_params : Optional [advanced_p .QuantizationParameters ] = None ,
47- weights_quantization_params : Optional [advanced_p .QuantizationParameters ] = None ,
54+ ** kwargs ,
4855 ):
4956 """
50- :param mode: Defines optimization mode for the algorithm. None by default.
51- :param preset: A preset controls the quantization mode (symmetric and asymmetric).
52- It can take the following values:
53- - `performance`: Symmetric quantization of weights and activations.
54- - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
55- Default value is None. In this case, `mixed` preset is used for `transformer`
56- model type otherwise `performance`.
57- :param target_device: A target device the specificity of which will be taken
58- into account while compressing in order to obtain the best performance
59- for this type of device, defaults to TargetDevice.ANY.
60- :param model_type: Model type is needed to specify additional patterns
61- in the model. Supported only `transformer` now.
57+ :param mode: Defines special quantization modes.
58+ - INT8_SYM: INT8 symmetric quantization for both activations and weights.
59+ - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
60+ - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
61+ Default value is INT8_SYM.
6262 :param ignored_scope: An ignored scope that defined the list of model control
6363 flow graph nodes to be ignored during quantization.
64- :param overflow_fix: This option controls whether to apply the overflow issue
65- fix for the 8-bit quantization.
66- :param quantize_outputs: Whether to insert additional quantizers right before
67- each of the model outputs.
68- :param activations_quantization_params: Quantization parameters for model
69- activations.
70- :param weights_quantization_params: Quantization parameters for model weights.
64+ :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm.
7165 """
66+ if mode == QuantizationMode .INT8_SYM :
67+ preset = q .structs .QuantizationPreset .PERFORMANCE
68+ model_type = None
69+ elif mode == QuantizationMode .INT8_MIXED :
70+ preset = q .structs .QuantizationPreset .MIXED
71+ model_type = None
72+ else :
73+ preset = None
74+ model_type = nncf .parameters .ModelType .TRANSFORMER
7275 self ._min_max_algo = nncf .quantization .algorithms .min_max .algorithm .MinMaxQuantization (
73- mode = mode ,
74- preset = preset ,
75- target_device = target_device ,
76- model_type = p .ModelType .TRANSFORMER if transformer_model else None ,
77- ignored_scope = ignored_scope ,
78- overflow_fix = overflow_fix ,
79- quantize_outputs = quantize_outputs ,
80- activations_quantization_params = activations_quantization_params ,
81- weights_quantization_params = weights_quantization_params ,
76+ preset = preset , model_type = model_type , ignored_scope = ignored_scope , ** kwargs
8277 )
8378
8479 def get_nncf_quantization_setup (
0 commit comments