Comments

daniil-lyakhov · daniil-lyakhov · commit 714d77dd9854 · 2025-07-10T19:07:24.000+02:00
diff --git a/src/nncf/experimental/quantization/structs.py b/src/nncf/experimental/quantization/structs.py
@@ -11,7 +11,6 @@
 
 from typing import Any, Literal, Optional
 
-import nncf
 from nncf.common.quantization.structs import QuantizationScheme
 from nncf.common.quantization.structs import QuantizerConfig
 from nncf.config.schemata.defaults import QUANTIZATION_BITS
@@ -48,9 +47,6 @@ def __init__(
         :param dest_dtype: Target integer data type for quantized values.
         """
         super().__init__(num_bits, mode, signedness_to_force, per_channel, narrow_range)
-        if dest_dtype not in [TensorDataType.int8, TensorDataType.uint8]:
-            msg = f"Quantization configurations with dest_dtype=={dest_dtype} are not supported."
-            raise nncf.ParameterNotSupportedError(msg)
         self.dest_dtype = dest_dtype
 
     def __str__(self) -> str:
diff --git a/src/nncf/quantization/algorithms/min_max/torch_fx_backend.py b/src/nncf/quantization/algorithms/min_max/torch_fx_backend.py
@@ -192,12 +192,16 @@ def _create_quantizer(
         quantizer_config: QuantizerConfig,
         channel_axis: int,
         parameters: FakeQuantizeParameters,
-        target_type: TargetType,
     ) -> FakeQuantize:
         per_channel = quantizer_config.per_channel
         dtype = None
         if isinstance(quantizer_config, ExtendedQuantizerConfig):
             dtype = quantizer_config.dest_dtype
+
+            if dtype not in [TensorDataType.int8, TensorDataType.uint8]:
+                msg = f"Quantization configurations with dest_dtype=={dtype} are not supported."
+                raise nncf.ParameterNotSupportedError(msg)
+
         elif quantizer_config.mode != QuantizationScheme.SYMMETRIC:
             dtype = TensorDataType.uint8
         else:
@@ -281,7 +285,6 @@ def create_quantizer_insertion_command(
             quantizer_config,
             channel_axis,
             parameters,
-            target_point.target_type,
         )
         transformation = qdq_insertion_transformation_builder(quantizer, [target_point])
         return FXApplyTransformationCommand(transformation)
@@ -299,7 +302,6 @@ def create_unified_scales_quantizers_insertion_commands(
             quantizer_config,
             channel_axis,
             parameters,
-            target_points[0].target_type,
         )
 
         transformations = []
diff --git a/tests/common/experimental/test_structs.py b/tests/common/experimental/test_structs.py
diff --git a/tests/post_training/conftest.py b/tests/post_training/conftest.py
@@ -22,9 +22,6 @@ def pytest_addoption(parser):
     parser.addoption("--subset-size", type=int, default=None, help="Set subset size")
     parser.addoption("--fp32", action="store_true", help="Test original model")
     parser.addoption("--cuda", action="store_true", help="Enable CUDA_TORCH backend")
-    parser.addoption(
-        "--x86quantizer", action="store_true", help="Enable X86_QUANTIZER_NNCF and X86_QUANTIZER_AO backends"
-    )
     parser.addoption("--benchmark", action="store_true", help="Run benchmark_app")
     parser.addoption(
         "--torch-compile-validation",
@@ -82,11 +79,6 @@ def fixture_run_torch_cuda_backend(pytestconfig):
     return pytestconfig.getoption("cuda")
 
 
-@pytest.fixture(scope="session", name="run_with_x86_quantizer")
-def fixture_run_with_x86quantizer(pytestconfig):
-    return pytestconfig.getoption("x86quantizer")
-
-
 @pytest.fixture(scope="session", name="run_benchmark_app")
 def fixture_run_benchmark_app(pytestconfig):
     return pytestconfig.getoption("benchmark")
diff --git a/tests/post_training/data/ptq_reference_data.yaml b/tests/post_training/data/ptq_reference_data.yaml
@@ -52,10 +52,6 @@ torchvision/resnet18_backend_OV_QUANTIZER_NNCF:
   metric_value: 0.6946
 torchvision/resnet18_backend_OV_QUANTIZER_AO:
   metric_value: 0.6959
-torchvision/resnet18_backend_X86_QUANTIZER_NNCF:
-  metric_value: 0.6946
-torchvision/resnet18_backend_X86_QUANTIZER_AO:
-  metric_value: 0.6959
 torchvision/mobilenet_v3_small_BC_backend_FP32:
   metric_value: 0.6766
 torchvision/mobilenet_v3_small_BC_backend_OV:
@@ -78,10 +74,6 @@ torchvision/mobilenet_v3_small_BC_backend_OV_QUANTIZER_NNCF:
   metric_value: 0.6679
 torchvision/mobilenet_v3_small_BC_backend_OV_QUANTIZER_AO:
   metric_value: 0.6561
-torchvision/mobilenet_v3_small_BC_backend_X86_QUANTIZER_NNCF:
-  metric_value: 0.0622
-torchvision/mobilenet_v3_small_BC_backend_X86_QUANTIZER_AO:
-  metric_value: 0.0538
 torchvision/vit_b_16_backend_FP32:
   metric_value: 0.8107
 torchvision/vit_b_16_backend_OV:
@@ -102,10 +94,6 @@ torchvision/vit_b_16_backend_OV_QUANTIZER_NNCF:
   metric_value: 0.80922
 torchvision/vit_b_16_backend_OV_QUANTIZER_AO:
   metric_value: 0.2429
-torchvision/vit_b_16_backend_X86_QUANTIZER_NNCF:
-  metric_value: 0.80922
-torchvision/vit_b_16_backend_X86_QUANTIZER_AO:
-  metric_value: 0.1498
 torchvision/swin_v2_s_backend_FP32:
   metric_value: 0.83712
 torchvision/swin_v2_s_backend_OV:
@@ -126,10 +114,6 @@ torchvision/swin_v2_s_backend_OV_QUANTIZER_NNCF:
   metric_value: 0.8360
 torchvision/swin_v2_s_backend_OV_QUANTIZER_AO:
   metric_value: 0.8360
-torchvision/swin_v2_s_backend_X86_QUANTIZER_NNCF:
-  metric_value: 0.8360
-torchvision/swin_v2_s_backend_X86_QUANTIZER_AO:
-  metric_value: 0.8360
 timm/crossvit_9_240_backend_CUDA_TORCH:
   metric_value: 0.7275
 timm/crossvit_9_240_backend_FP32:
diff --git a/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py b/tests/post_training/experimental/sparsify_activations/test_sparsify_activations_conformance.py
@@ -58,7 +58,6 @@ def test_sparsify_activations(
         batch_size,
         run_fp32_backend,
         run_torch_cuda_backend,
-        False,  # Do not run with X86Quantizers
         subset_size,
         run_benchmark_app,
         capsys,
diff --git a/tests/post_training/pipelines/base.py b/tests/post_training/pipelines/base.py
@@ -59,8 +59,6 @@ class BackendType(Enum):
     CUDA_FX_TORCH = "CUDA_FX_TORCH"
     OV_QUANTIZER_NNCF = "OV_QUANTIZER_NNCF"
     OV_QUANTIZER_AO = "OV_QUANTIZER_AO"
-    X86_QUANTIZER_NNCF = "X86_QUANTIZER_NNCF"
-    X86_QUANTIZER_AO = "X86_QUANTIZER_AO"
     ONNX = "ONNX"
     OV = "OV"
     OPTIMUM = "OPTIMUM"
@@ -74,8 +72,6 @@ class BackendType(Enum):
     BackendType.CUDA_FX_TORCH,
     BackendType.OV_QUANTIZER_NNCF,
     BackendType.OV_QUANTIZER_AO,
-    BackendType.X86_QUANTIZER_NNCF,
-    BackendType.X86_QUANTIZER_AO,
 ]
 OV_BACKENDS = [BackendType.OV, BackendType.OPTIMUM]
 
diff --git a/tests/post_training/pipelines/image_classification_base.py b/tests/post_training/pipelines/image_classification_base.py
@@ -27,8 +27,6 @@
 from torch.ao.quantization.quantize_pt2e import convert_pt2e
 from torch.ao.quantization.quantize_pt2e import prepare_pt2e
 from torch.ao.quantization.quantizer.quantizer import Quantizer as TorchAOQuantizer
-from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
-from torch.ao.quantization.quantizer.x86_inductor_quantizer import get_default_x86_inductor_quantization_config
 from torchvision import datasets
 
 import nncf
@@ -195,16 +193,12 @@ def _compress(self):
 
         quantizer = self._build_quantizer()
 
-        if self.backend in [BackendType.OV_QUANTIZER_NNCF, BackendType.X86_QUANTIZER_NNCF]:
+        if self.backend in [BackendType.OV_QUANTIZER_NNCF]:
             self._compress_nncf_pt2e(quantizer)
         else:
             self._compress_torch_ao(quantizer)
 
     def _build_quantizer(self) -> TorchAOQuantizer:
-        if self.backend in [BackendType.X86_QUANTIZER_AO, BackendType.X86_QUANTIZER_NNCF]:
-            quantizer = X86InductorQuantizer()
-            quantizer.set_global(get_default_x86_inductor_quantization_config())
-            return quantizer
         quantizer_kwargs = {}
         for key in (
             "mode",
diff --git a/tests/post_training/test_quantize_conformance.py b/tests/post_training/test_quantize_conformance.py
@@ -125,9 +125,7 @@ def fixture_report_data(output_dir, run_benchmark_app, forked):
             df.to_csv(output_file, index=False)
 
 
-def maybe_skip_test_case(
-    test_model_param, run_fp32_backend, run_torch_cuda_backend, run_with_x86_quantizer, batch_size
-):
+def maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size):
     if test_model_param["backend"] == BackendType.FP32 and not run_fp32_backend:
         pytest.skip("To run test for not quantized model use --fp32 argument")
     if (
@@ -137,11 +135,6 @@ def maybe_skip_test_case(
         pytest.skip(f"To run test for {test_model_param['backend'].value} backend use --cuda argument")
     if batch_size and batch_size > 1 and test_model_param.get("batch_size", 1) == 1:
         pytest.skip("The model does not support batch_size > 1. Please use --batch-size 1.")
-    if (
-        test_model_param["backend"] in [BackendType.X86_QUANTIZER_AO, BackendType.X86_QUANTIZER_NNCF]
-        and not run_with_x86_quantizer
-    ):
-        pytest.skip("To validate quantization with the X86Quantizer use the --x86quantizer argument")
     return test_model_param
 
 
@@ -209,7 +202,6 @@ def run_pipeline(
     batch_size: Optional[int],
     run_fp32_backend: bool,
     run_torch_cuda_backend: bool,
-    run_with_x86_quantizer: bool,
     subset_size: Optional[int],
     run_benchmark_app: bool,
     capsys: pytest.CaptureFixture,
@@ -222,7 +214,7 @@ def run_pipeline(
         msg = f"{test_case_name} does not exist in 'reference_data.yaml'"
         raise nncf.ValidationError(msg)
     test_model_param = test_cases[test_case_name]
-    maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, run_with_x86_quantizer, batch_size)
+    maybe_skip_test_case(test_model_param, run_fp32_backend, run_torch_cuda_backend, batch_size)
     pipeline_cls = test_model_param["pipeline_cls"]
     pipeline_kwargs = create_pipeline_kwargs(test_model_param, subset_size, test_case_name, reference_data)
     pipeline_kwargs.update(
@@ -280,7 +272,6 @@ def test_ptq_quantization(
     batch_size: Optional[int],
     run_fp32_backend: bool,
     run_torch_cuda_backend: bool,
-    run_with_x86_quantizer: bool,
     subset_size: Optional[int],
     run_benchmark_app: bool,
     capsys: pytest.CaptureFixture,
@@ -298,7 +289,6 @@ def test_ptq_quantization(
         batch_size,
         run_fp32_backend,
         run_torch_cuda_backend,
-        run_with_x86_quantizer,
         subset_size,
         run_benchmark_app,
         capsys,
@@ -335,7 +325,6 @@ def test_weight_compression(
         batch_size,
         run_fp32_backend,
         run_torch_cuda_backend,
-        False,  # Do not run with the X86Quantizer
         subset_size,
         run_benchmark_app,
         capsys,
diff --git a/tests/torch2/fx/test_calculation_quantizer_params.py b/tests/torch2/fx/test_calculation_quantizer_params.py
@@ -16,14 +16,15 @@
 import pytest
 import torch
 
-from nncf.common.graph.transformations.commands import TargetType
+import nncf
 from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode
 from nncf.common.quantization.structs import QuantizerConfig
 from nncf.common.quantization.structs import QuantizerGroup
 from nncf.experimental.common.tensor_statistics.statistics import MinMaxTensorStatistic
 from nncf.experimental.quantization.structs import ExtendedQuantizerConfig
 from nncf.experimental.quantization.structs import IntDtype
 from nncf.quantization.algorithms.min_max.torch_fx_backend import FXMinMaxAlgoBackend
+from nncf.quantization.fake_quantize import FakeQuantizeParameters
 from nncf.quantization.fake_quantize import calculate_quantizer_parameters
 from nncf.tensor import Tensor
 from nncf.tensor.definitions import TensorDataType
@@ -414,12 +415,7 @@ def _get_quantizer(case_to_test: CaseQuantParams, qconfig: QuantizerConfig):
     fq_params = calculate_quantizer_parameters(case_to_test.stat, qconfig, case_to_test.quant_group, half_range=False)
 
     ch_axis = 1 if case_to_test.per_channel and case_to_test.quant_group == QuantizerGroup.WEIGHTS else 0
-    target_type = (
-        TargetType.OPERATION_WITH_WEIGHTS
-        if case_to_test.quant_group == QuantizerGroup.WEIGHTS
-        else TargetType.PRE_LAYER_OPERATION
-    )
-    quantizer = FXMinMaxAlgoBackend._create_quantizer(qconfig, ch_axis, fq_params, target_type)
+    quantizer = FXMinMaxAlgoBackend._create_quantizer(qconfig, ch_axis, fq_params)
 
     assert quantizer.ch_axis == ch_axis
 
@@ -436,3 +432,27 @@ def _check_q_min_q_max(quantizer, signed, narrow_range):
 
     assert quantizer.quant_min == ref_quant_min
     assert quantizer.quant_max == ref_quant_max
+
+
+@pytest.mark.parametrize(
+    "dest_dtype",
+    [
+        TensorDataType.float16,
+        TensorDataType.bfloat16,
+        TensorDataType.float32,
+        TensorDataType.float64,
+        TensorDataType.f8e4m3,
+        TensorDataType.f8e5m2,
+        TensorDataType.nf4,
+        TensorDataType.int32,
+        TensorDataType.int64,
+        TensorDataType.uint4,
+        TensorDataType.int4,
+        None,
+    ],
+)
+def test_extended_q_config_non_supported_dest_dtype(dest_dtype):
+    qconfig = ExtendedQuantizerConfig(dest_dtype=dest_dtype)
+    params = FakeQuantizeParameters(-1.0, 1.0, -1.0, 1.0, 255)
+    with pytest.raises(nncf.ParameterNotSupportedError):
+        FXMinMaxAlgoBackend._create_quantizer(quantizer_config=qconfig, channel_axis=1, parameters=params)