5
5
# directory of this source tree for more details.
6
6
7
7
from collections import defaultdict
8
+ from enum import Enum
8
9
from typing import Dict , List , Optional , Tuple
9
10
10
11
import torch .fx
20
21
import nncf
21
22
import nncf .common .quantization as q
22
23
import nncf .experimental .torch .fx as nncf_fx
23
- import nncf .parameters as p
24
- import nncf .quantization .advanced_parameters as advanced_p
25
24
from nncf .common .graph .graph import NNCFGraph
26
25
27
26
QUANT_ANNOTATION_KEY = "quantization_annotation"
28
27
29
28
29
+ class QuantizationMode (Enum ):
30
+ """
31
+ Defines special quantization modes.
32
+
33
+ - INT8_SYM: INT8 symmetric quantization for both activations and weights.
34
+ - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
35
+ - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
36
+ """
37
+
38
+ INT8_SYM = "int8_sym"
39
+ INT8_MIXED = "int8_mixed"
40
+ INT8_TRANSFORMER = "int8_transformer"
41
+
42
+
30
43
class OpenVINOQuantizer (Quantizer ):
31
44
"""
32
45
Implementation of the Torch AO quantizer which annotates models with quantization annotations
@@ -36,49 +49,31 @@ class OpenVINOQuantizer(Quantizer):
36
49
def __init__ (
37
50
self ,
38
51
* ,
39
- mode : Optional [p .QuantizationMode ] = None ,
40
- preset : Optional [q .structs .QuantizationPreset ] = None ,
41
- target_device : p .TargetDevice = p .TargetDevice .ANY ,
42
- transformer_model : bool = False ,
52
+ mode : Optional [QuantizationMode ] = QuantizationMode .INT8_SYM ,
43
53
ignored_scope : Optional [nncf .IgnoredScope ] = None ,
44
- overflow_fix : Optional [advanced_p .OverflowFix ] = None ,
45
- quantize_outputs : bool = False ,
46
- activations_quantization_params : Optional [advanced_p .QuantizationParameters ] = None ,
47
- weights_quantization_params : Optional [advanced_p .QuantizationParameters ] = None ,
54
+ ** kwargs ,
48
55
):
49
56
"""
50
- :param mode: Defines optimization mode for the algorithm. None by default.
51
- :param preset: A preset controls the quantization mode (symmetric and asymmetric).
52
- It can take the following values:
53
- - `performance`: Symmetric quantization of weights and activations.
54
- - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
55
- Default value is None. In this case, `mixed` preset is used for `transformer`
56
- model type otherwise `performance`.
57
- :param target_device: A target device the specificity of which will be taken
58
- into account while compressing in order to obtain the best performance
59
- for this type of device, defaults to TargetDevice.ANY.
60
- :param model_type: Model type is needed to specify additional patterns
61
- in the model. Supported only `transformer` now.
57
+ :param mode: Defines special quantization modes.
58
+ - INT8_SYM: INT8 symmetric quantization for both activations and weights.
59
+ - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
60
+ - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
61
+ Default value is INT8_SYM.
62
62
:param ignored_scope: An ignored scope that defined the list of model control
63
63
flow graph nodes to be ignored during quantization.
64
- :param overflow_fix: This option controls whether to apply the overflow issue
65
- fix for the 8-bit quantization.
66
- :param quantize_outputs: Whether to insert additional quantizers right before
67
- each of the model outputs.
68
- :param activations_quantization_params: Quantization parameters for model
69
- activations.
70
- :param weights_quantization_params: Quantization parameters for model weights.
64
+ :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm.
71
65
"""
66
+ if mode == QuantizationMode .INT8_MIXED :
67
+ preset = q .structs .QuantizationPreset .MIXED
68
+ model_type = None
69
+ elif mode == QuantizationMode .INT8_SYM :
70
+ preset = q .structs .QuantizationPreset .PERFORMANCE
71
+ model_type = None
72
+ else :
73
+ preset = None
74
+ model_type = nncf .parameters .ModelType .TRANSFORMER
72
75
self ._min_max_algo = nncf .quantization .algorithms .min_max .algorithm .MinMaxQuantization (
73
- mode = mode ,
74
- preset = preset ,
75
- target_device = target_device ,
76
- model_type = p .ModelType .TRANSFORMER if transformer_model else None ,
77
- ignored_scope = ignored_scope ,
78
- overflow_fix = overflow_fix ,
79
- quantize_outputs = quantize_outputs ,
80
- activations_quantization_params = activations_quantization_params ,
81
- weights_quantization_params = weights_quantization_params ,
76
+ preset = preset , model_type = model_type , ignored_scope = ignored_scope , ** kwargs
82
77
)
83
78
84
79
def get_nncf_quantization_setup (
0 commit comments