4747QUANT_ANNOTATION_KEY = "quantization_annotation"
4848
4949
50- @api ()
50+ @api (canonical_alias = "nncf.experimental.torch.fx.OpenVINOQuantizer" )
5151class OpenVINOQuantizer (TorchAOQuantizer ):
5252 """
5353 Implementation of the Torch AO quantizer which annotates models with quantization annotations
5454 optimally for the inference via OpenVINO.
55+
56+ :param mode: Defines optimization mode for the algorithm. None by default.
57+ :param preset: A preset controls the quantization mode (symmetric and asymmetric).
58+ It can take the following values:
59+ - `performance`: Symmetric quantization of weights and activations.
60+ - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
61+ Default value is None. In this case, `mixed` preset is used for `transformer`
62+ model type otherwise `performance`.
63+ :param target_device: A target device the specificity of which will be taken
64+ into account while compressing in order to obtain the best performance
65+ for this type of device, defaults to TargetDevice.ANY.
66+ :param model_type: Model type is needed to specify additional patterns
67+ in the model. Supported only `transformer` now.
68+ :param ignored_scope: An ignored scope that defined the list of model control
69+ flow graph nodes to be ignored during quantization.
70+ :param overflow_fix: This option controls whether to apply the overflow issue
71+ fix for the 8-bit quantization.
72+ :param quantize_outputs: Whether to insert additional quantizers right before
73+ each of the model outputs.
74+ :param activations_quantization_params: Quantization parameters for model
75+ activations.
76+ :param weights_quantization_params: Quantization parameters for model weights.
77+ :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers.
78+ MERGE_ALL_IN_ONE by default.
5579 """
5680
5781 def __init__ (
@@ -68,31 +92,6 @@ def __init__(
6892 weights_quantization_params : Optional [Union [QuantizationParameters , FP8QuantizationParameters ]] = None ,
6993 quantizer_propagation_rule : QuantizerPropagationRule = QuantizerPropagationRule .MERGE_ALL_IN_ONE ,
7094 ):
71- """
72- :param mode: Defines optimization mode for the algorithm. None by default.
73- :param preset: A preset controls the quantization mode (symmetric and asymmetric).
74- It can take the following values:
75- - `performance`: Symmetric quantization of weights and activations.
76- - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
77- Default value is None. In this case, `mixed` preset is used for `transformer`
78- model type otherwise `performance`.
79- :param target_device: A target device the specificity of which will be taken
80- into account while compressing in order to obtain the best performance
81- for this type of device, defaults to TargetDevice.ANY.
82- :param model_type: Model type is needed to specify additional patterns
83- in the model. Supported only `transformer` now.
84- :param ignored_scope: An ignored scope that defined the list of model control
85- flow graph nodes to be ignored during quantization.
86- :param overflow_fix: This option controls whether to apply the overflow issue
87- fix for the 8-bit quantization.
88- :param quantize_outputs: Whether to insert additional quantizers right before
89- each of the model outputs.
90- :param activations_quantization_params: Quantization parameters for model
91- activations.
92- :param weights_quantization_params: Quantization parameters for model weights.
93- :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers.
94- MERGE_ALL_IN_ONE by default.
95- """
9695 self ._min_max_algo = MinMaxQuantization (
9796 mode = mode ,
9897 preset = preset ,
0 commit comments