Skip to content

Commit 2f941e1

Browse files
Fix conf.py
1 parent 678f4ed commit 2f941e1

File tree

3 files changed

+35
-27
lines changed

3 files changed

+35
-27
lines changed

docs/api/source/conf.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,14 @@ def collect_api_entities() -> APIInfo:
146146
"nncf.tensor.functions.torch_io",
147147
"nncf.tensor.functions.numpy_io",
148148
"nncf.tensor.functions.openvino_numeric",
149+
"nncf.torch.dynamic_graph.patch_pytorch",
149150
]
150151

151152
with mock(mock_modules):
153+
import torch
154+
155+
# Set torch version to allow nncf.torch import
156+
torch.__version__ = "0.0.0"
152157
api_info = collect_api_entities()
153158

154159
module_fqns = set()

nncf/experimental/torch/fx/quantization/quantize_pt2e.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from nncf.quantization.advanced_parameters import RangeEstimatorParameters
4040

4141

42-
@api()
42+
@api(canonical_alias="nncf.experimental.torch.fx.quantize_pt2e")
4343
def quantize_pt2e(
4444
model: torch.fx.GraphModule,
4545
quantizer: Quantizer,
@@ -59,8 +59,11 @@ def quantize_pt2e(
5959
Applies post-training quantization to the torch.fx.GraphModule provided model
6060
using provided torch.ao quantizer.
6161
62+
:param model: A torch.fx.GraphModule instance to be quantized.
6263
:param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
6364
to convey the desired way of quantization.
65+
:param calibration_dataset: A representative dataset for the
66+
calibration process.
6467
:param subset_size: Size of a subset to calculate activations
6568
statistics used for quantization.
6669
:param fast_bias_correction: Setting this option to `False` enables a different
@@ -79,6 +82,7 @@ def quantize_pt2e(
7982
:param fold_quantize: Boolean flag for whether fold the quantize op or not. The value is True by default.
8083
:param do_copy: The copy of the given model is being quantized if do_copy == True,
8184
otherwise the model is quantized inplace. Default value is False.
85+
:return: The quantized torch.fx.GraphModule instance.
8286
"""
8387
nncf_logger.warning("This is an experimental feature and may change in the future without notice.")
8488

nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,35 @@
4747
QUANT_ANNOTATION_KEY = "quantization_annotation"
4848

4949

50-
@api()
50+
@api(canonical_alias="nncf.experimental.torch.fx.OpenVINOQuantizer")
5151
class OpenVINOQuantizer(TorchAOQuantizer):
5252
"""
5353
Implementation of the Torch AO quantizer which annotates models with quantization annotations
5454
optimally for the inference via OpenVINO.
55+
56+
:param mode: Defines optimization mode for the algorithm. None by default.
57+
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
58+
It can take the following values:
59+
- `performance`: Symmetric quantization of weights and activations.
60+
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
61+
Default value is None. In this case, `mixed` preset is used for `transformer`
62+
model type otherwise `performance`.
63+
:param target_device: A target device the specificity of which will be taken
64+
into account while compressing in order to obtain the best performance
65+
for this type of device, defaults to TargetDevice.ANY.
66+
:param model_type: Model type is needed to specify additional patterns
67+
in the model. Supported only `transformer` now.
68+
:param ignored_scope: An ignored scope that defined the list of model control
69+
flow graph nodes to be ignored during quantization.
70+
:param overflow_fix: This option controls whether to apply the overflow issue
71+
fix for the 8-bit quantization.
72+
:param quantize_outputs: Whether to insert additional quantizers right before
73+
each of the model outputs.
74+
:param activations_quantization_params: Quantization parameters for model
75+
activations.
76+
:param weights_quantization_params: Quantization parameters for model weights.
77+
:param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers.
78+
MERGE_ALL_IN_ONE by default.
5579
"""
5680

5781
def __init__(
@@ -68,31 +92,6 @@ def __init__(
6892
weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None,
6993
quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE,
7094
):
71-
"""
72-
:param mode: Defines optimization mode for the algorithm. None by default.
73-
:param preset: A preset controls the quantization mode (symmetric and asymmetric).
74-
It can take the following values:
75-
- `performance`: Symmetric quantization of weights and activations.
76-
- `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
77-
Default value is None. In this case, `mixed` preset is used for `transformer`
78-
model type otherwise `performance`.
79-
:param target_device: A target device the specificity of which will be taken
80-
into account while compressing in order to obtain the best performance
81-
for this type of device, defaults to TargetDevice.ANY.
82-
:param model_type: Model type is needed to specify additional patterns
83-
in the model. Supported only `transformer` now.
84-
:param ignored_scope: An ignored scope that defined the list of model control
85-
flow graph nodes to be ignored during quantization.
86-
:param overflow_fix: This option controls whether to apply the overflow issue
87-
fix for the 8-bit quantization.
88-
:param quantize_outputs: Whether to insert additional quantizers right before
89-
each of the model outputs.
90-
:param activations_quantization_params: Quantization parameters for model
91-
activations.
92-
:param weights_quantization_params: Quantization parameters for model weights.
93-
:param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers.
94-
MERGE_ALL_IN_ONE by default.
95-
"""
9695
self._min_max_algo = MinMaxQuantization(
9796
mode=mode,
9897
preset=preset,

0 commit comments

Comments
 (0)