Fix conf.py

daniil-lyakhov · daniil-lyakhov · commit 2f941e1b82df · 2025-02-20T16:40:04.000+01:00
diff --git a/docs/api/source/conf.py b/docs/api/source/conf.py
@@ -146,9 +146,14 @@ def collect_api_entities() -> APIInfo:
     "nncf.tensor.functions.torch_io",
     "nncf.tensor.functions.numpy_io",
     "nncf.tensor.functions.openvino_numeric",
+    "nncf.torch.dynamic_graph.patch_pytorch",
 ]
 
 with mock(mock_modules):
+    import torch
+
+    # Set torch version to allow nncf.torch import
+    torch.__version__ = "0.0.0"
     api_info = collect_api_entities()
 
 module_fqns = set()
diff --git a/nncf/experimental/torch/fx/quantization/quantize_pt2e.py b/nncf/experimental/torch/fx/quantization/quantize_pt2e.py
@@ -39,7 +39,7 @@
 from nncf.quantization.advanced_parameters import RangeEstimatorParameters
 
 
-@api()
+@api(canonical_alias="nncf.experimental.torch.fx.quantize_pt2e")
 def quantize_pt2e(
     model: torch.fx.GraphModule,
     quantizer: Quantizer,
@@ -59,8 +59,11 @@ def quantize_pt2e(
     Applies post-training quantization to the torch.fx.GraphModule provided model
     using provided torch.ao quantizer.
 
+    :param model: A torch.fx.GraphModule instance to be quantized.
     :param quantizer: Torch ao quantizer to annotate nodes in the graph with quantization setups
         to convey the desired way of quantization.
+    :param calibration_dataset: A representative dataset for the
+        calibration process.
     :param subset_size: Size of a subset to calculate activations
         statistics used for quantization.
     :param fast_bias_correction: Setting this option to `False` enables a different
@@ -79,6 +82,7 @@ def quantize_pt2e(
     :param fold_quantize: Boolean flag for whether fold the quantize op or not. The value is True by default.
     :param do_copy: The copy of the given model is being quantized if do_copy == True,
         otherwise the model is quantized inplace. Default value is False.
+    :return: The quantized torch.fx.GraphModule instance.
     """
     nncf_logger.warning("This is an experimental feature and may change in the future without notice.")
 
diff --git a/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py b/nncf/experimental/torch/fx/quantization/quantizer/openvino_quantizer.py
@@ -47,11 +47,35 @@
 QUANT_ANNOTATION_KEY = "quantization_annotation"
 
 
-@api()
+@api(canonical_alias="nncf.experimental.torch.fx.OpenVINOQuantizer")
 class OpenVINOQuantizer(TorchAOQuantizer):
     """
     Implementation of the Torch AO quantizer which annotates models with quantization annotations
     optimally for the inference via OpenVINO.
+
+    :param mode: Defines optimization mode for the algorithm. None by default.
+    :param preset: A preset controls the quantization mode (symmetric and asymmetric).
+        It can take the following values:
+        - `performance`: Symmetric quantization of weights and activations.
+        - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
+        Default value is None. In this case, `mixed` preset is used for `transformer`
+        model type otherwise `performance`.
+    :param target_device: A target device the specificity of which will be taken
+        into account while compressing in order to obtain the best performance
+        for this type of device, defaults to TargetDevice.ANY.
+    :param model_type: Model type is needed to specify additional patterns
+        in the model. Supported only `transformer` now.
+    :param ignored_scope: An ignored scope that defined the list of model control
+        flow graph nodes to be ignored during quantization.
+    :param overflow_fix: This option controls whether to apply the overflow issue
+        fix for the 8-bit quantization.
+    :param quantize_outputs: Whether to insert additional quantizers right before
+        each of the model outputs.
+    :param activations_quantization_params: Quantization parameters for model
+        activations.
+    :param weights_quantization_params: Quantization parameters for model weights.
+    :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers.
+    MERGE_ALL_IN_ONE by default.
     """
 
     def __init__(
@@ -68,31 +92,6 @@ def __init__(
         weights_quantization_params: Optional[Union[QuantizationParameters, FP8QuantizationParameters]] = None,
         quantizer_propagation_rule: QuantizerPropagationRule = QuantizerPropagationRule.MERGE_ALL_IN_ONE,
     ):
-        """
-        :param mode: Defines optimization mode for the algorithm. None by default.
-        :param preset: A preset controls the quantization mode (symmetric and asymmetric).
-            It can take the following values:
-            - `performance`: Symmetric quantization of weights and activations.
-            - `mixed`: Symmetric quantization of weights and asymmetric quantization of activations.
-            Default value is None. In this case, `mixed` preset is used for `transformer`
-            model type otherwise `performance`.
-        :param target_device: A target device the specificity of which will be taken
-            into account while compressing in order to obtain the best performance
-            for this type of device, defaults to TargetDevice.ANY.
-        :param model_type: Model type is needed to specify additional patterns
-            in the model. Supported only `transformer` now.
-        :param ignored_scope: An ignored scope that defined the list of model control
-            flow graph nodes to be ignored during quantization.
-        :param overflow_fix: This option controls whether to apply the overflow issue
-            fix for the 8-bit quantization.
-        :param quantize_outputs: Whether to insert additional quantizers right before
-            each of the model outputs.
-        :param activations_quantization_params: Quantization parameters for model
-            activations.
-        :param weights_quantization_params: Quantization parameters for model weights.
-        :param quantizer_propagation_rule: The strategy to be used while propagating and merging quantizers.
-        MERGE_ALL_IN_ONE by default.
-        """
         self._min_max_algo = MinMaxQuantization(
             mode=mode,
             preset=preset,