openvinotoolkit · ljaljushkin · Dec 9, 2025 · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025
@@ -11,6 +11,7 @@
 
 from typing import Optional, Union
 
+import nncf
 from nncf import CompressWeightsMode
 from nncf.common.utils.caching import disable_results_caching
 from nncf.openvino.optimized_functions.models import OV_MODEL_CACHE
@@ -274,6 +275,7 @@ def get_integer_quantization_error(
     weight: Tensor,
     reduction_axes: ReductionAxes,
     config: WeightCompressionConfig,
+    reduction: str,
 ) -> float:
     """
     Calculates a quantity characterizing the difference between floating point weights and fake quantized
@@ -285,8 +287,13 @@ def get_integer_quantization_error(
     :param weight: Weight array to compress.
     :param reduction_axes: Axes, along which to reduce (collect) different statistics (e.g. min, max).
     :param config: Information on how to compress (quantize) a specific weight.
+    :param reduction: Reduction mode to aggregate error values. Supported modes: "max_mean", "frobenius".
     :return: The quantity characterizing the error of integer quantization.
     """
+    if reduction not in ["max_mean", "frobenius"]:
+        exception_str = f"Unsupported aggregation mode: {reduction}."
+        raise nncf.InternalError(exception_str)
+
     original_weight_shape = weight.shape
     original_reduction_axes = reduction_axes
 
@@ -298,7 +305,7 @@ def get_integer_quantization_error(
     ov_model_params = OVModelParameters()
     ov_model_params.input_dtypes["weight"] = weight.dtype
     model = get_integer_quantization_error_model(
-        ov_model_params, config, original_weight_shape, weight.shape, original_reduction_axes, reduction_axes
+        ov_model_params, config, reduction, weight.shape, reduction_axes, original_weight_shape, original_reduction_axes
     )
 
     quantization_error = model([weight])[0].item()

@@ -390,10 +390,11 @@ def get_integer_quantize_dequantize_weight_model(
 def get_integer_quantization_error_model(
     ov_model_params: OVModelParameters,
     config: WeightCompressionConfig,
-    original_weight_shape: tuple,
+    reduction: str,
     weight_shape: tuple,
-    original_reduction_axes: ReductionAxes,
     reduction_axes: ReductionAxes,
+    original_weight_shape: tuple,
+    original_reduction_axes: ReductionAxes,
 ) -> ModelCallable:
     """
     Get a model that calculates the quantization error for a given weight.
@@ -403,16 +404,23 @@ def get_integer_quantization_error_model(
 
     :param ov_model_params: OV model parameters.
     :param config: Compression configuration.
-    :param original_weight_shape: Shape of the original weight tensor.
+    :param reduction: Reduction mode to aggregate error values. Supported modes: "max_mean", "frobenius".
     :param weight_shape: Shape of the weight tensor to be compressed.
-    :param original_reduction_axes: Reduction axes of the original weight tensor before reshaping.
     :param reduction_axes: Axes to reduce the weight tensor.
+    :param original_weight_shape: Shape of the original weight tensor.
+    :param original_reduction_axes: Reduction axes of the original weight tensor before reshaping.
     :return: A model callable that returns the quantization error.
     """
     weight_shape, _, _ = _prepare_quantization_model_inputs(ov_model_params, weight_shape, None, None, reduction_axes)
 
     return _build_integer_quantization_error_model(
-        config, ov_model_params, original_weight_shape, weight_shape, original_reduction_axes, reduction_axes
+        config,
+        ov_model_params,
+        reduction,
+        weight_shape,
+        reduction_axes,
+        original_weight_shape,
+        original_reduction_axes,
     )
 
 
@@ -769,10 +777,11 @@ def _build_float_quantize_dequantize_weight_model(
 def _build_integer_quantization_error_model(
     config: WeightCompressionConfig,
     ov_model_params: OVModelParameters,
-    original_weight_shape: tuple,
+    reduction: str,
     weight_shape: tuple,
-    original_reduction_axes: ReductionAxes,
     reduction_axes: ReductionAxes,
+    original_weight_shape: tuple,
+    original_reduction_axes: ReductionAxes,
 ) -> ModelCallable:
     ov_parameters, ov_results, ov_model_params = _build_integer_quantize_dequantize_weight_model(
         config,
@@ -786,13 +795,20 @@ def _build_integer_quantization_error_model(
     weight = ov_parameters[0]
     decompressed_weight = ov_results[0]
 
-    weight = convert_op(opset.reshape(weight, original_weight_shape, special_zero=False), ov.Type.f32)
-    decompressed_weight = convert_op(
-        opset.reshape(decompressed_weight, original_weight_shape, special_zero=False), ov.Type.f32
-    )
-    diff = opset.squared_difference(decompressed_weight, weight)
-    layer_err = opset.reduce_mean(diff, reduction_axes=original_reduction_axes)
-    quantization_error = opset.reduce_max(layer_err, reduction_axes=tuple(range(len(layer_err.shape))))
+    weight = convert_op(weight, ov.Type.f32)
+    if reduction == "max_mean":
+        weight = opset.reshape(weight, original_weight_shape, special_zero=False)
+        decompressed_weight = opset.reshape(decompressed_weight, original_weight_shape, special_zero=False)
+        diff = opset.squared_difference(decompressed_weight, weight)
+        layer_err = opset.reduce_mean(diff, reduction_axes=original_reduction_axes)
+        quantization_error = opset.reduce_max(layer_err, reduction_axes=tuple(range(len(layer_err.shape))))
+    elif reduction == "frobenius":
+        diff = opset.reshape(decompressed_weight - weight, (-1,), special_zero=False)
+        quantization_error = opset.matmul(diff, diff, transpose_a=False, transpose_b=False)
+        quantization_error = opset.sqrt(quantization_error)
+    else:
+        msg = f"Unsupported aggregation method: {reduction}."
+        raise ValueError(msg)
 
     model = ov.Model([quantization_error], ov_parameters)
     compiled_model = _compile_ov_model(model, device_name="CPU", config={inference_precision(): ov.Type.f32})

@@ -29,10 +29,8 @@
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionParameters
 from nncf.quantization.algorithms.weight_compression.weight_lowering import get_integer_quantization_error
-from nncf.quantization.algorithms.weight_compression.weight_lowering import integer_quantize_dequantize_weight
 from nncf.tensor import Tensor
 from nncf.tensor import functions as fns
-from nncf.tensor.definitions import TensorDataType
 
 TModel = TypeVar("TModel")
 MIXED_PRECISION_CRITERIA = Registry("mixed_precision_criteria")
@@ -174,7 +172,7 @@ def _calc_weight_sensitivity(
         )
         backup_config = WeightCompressionConfig()
         reduction_axes = weight_param.reduction_axes
-        int_error = get_integer_quantization_error(weight, reduction_axes, backup_config)
+        int_error = get_integer_quantization_error(weight, reduction_axes, backup_config, reduction="max_mean")
         eps = fns.finfo(weight).eps
         return 1 / (int_error + eps)
 
@@ -360,15 +358,7 @@ def _calc_weight_sensitivity(
         )
         backup_config = WeightCompressionConfig()
         reduction_axes = weight_param.reduction_axes
-
-        orig_shape = weight.shape
-
-        if weight.dtype != TensorDataType.float32:
-            weight = weight.astype(TensorDataType.float32)
-
-        decompressed_weight = integer_quantize_dequantize_weight(weight, backup_config, reduction_axes)
-        decompressed_weight = decompressed_weight.reshape(orig_shape)
-        return fns.linalg.norm(decompressed_weight - weight, ord="fro").item()
+        return get_integer_quantization_error(weight, reduction_axes, backup_config, reduction="frobenius")
 
     def _get_statistic_collector(self):
         return self._backend_entity.hawq_statistic_collector(self._subset_size)

@@ -299,6 +299,7 @@ def get_integer_quantization_error(
     weight: Tensor,
     reduction_axes: ReductionAxes,
     config: WeightCompressionConfig,
+    reduction: str,
 ) -> float:
     """
     Calculates a quantity characterizing the difference between floating point weights and fake quantized
@@ -310,29 +311,35 @@ def get_integer_quantization_error(
     :param weight: Weight array to compress.
     :param reduction_axes: Axes, along which to reduce (collect) different statistics (e.g. min, max).
     :param config: Information on how to compress (quantize) a specific weight.
+    :param reduction: Reduction mode to aggregate error values. Supported modes: "max_mean", "frobenius".
     :return: The quantity characterizing the error of integer quantization.
     """
+    if reduction not in ["max_mean", "frobenius"]:
+        exception_str = f"Unsupported aggregation mode: {reduction}."
+        raise nncf.InternalError(exception_str)
+
     # Optimized implementation
     if _can_run_optimized(weight, config.mode):
         from nncf.openvino.optimized_functions import (
             get_integer_quantization_error as get_integer_quantization_error_ov,
         )
 
-        return get_integer_quantization_error_ov(weight, reduction_axes, config)
+        return get_integer_quantization_error_ov(weight, reduction_axes, config, reduction)
 
     if weight.backend == TensorBackend.ov:
         weight = weight.as_numpy_tensor()
-    orig_shape = weight.shape
 
     if weight.dtype != TensorDataType.float32:
         weight = weight.astype(TensorDataType.float32)
 
     decompressed_weight = integer_quantize_dequantize_weight(weight, config, reduction_axes)
-
-    decompressed_weight = decompressed_weight.reshape(orig_shape)
-    diff = (decompressed_weight - weight) ** 2
-    layer_err = fns.mean(diff, axis=reduction_axes)
-    val = fns.max(layer_err)
+    decompressed_weight = decompressed_weight.reshape(weight.shape)
+    if reduction == "max_mean":
+        diff = (decompressed_weight - weight) ** 2
+        layer_err = fns.mean(diff, axis=reduction_axes)
+        val = fns.max(layer_err)
+    else:
+        val = fns.linalg.norm(decompressed_weight - weight, ord="fro")
     return val.item()
 
 

@@ -730,7 +730,7 @@ def __str__(self):
 def test_quantization_error_calculation(desc: QuantErrorDesc):
     weight = Tensor(desc.weight)
     axis = 1
-    actual_error = get_integer_quantization_error(weight, axis, desc.config)
+    actual_error = get_integer_quantization_error(weight, axis, desc.config, reduction="max_mean")
     ref_error = desc.ref_error
     atol = desc.atol if desc.atol is not None else 1e-8
     assert np.allclose(actual_error, ref_error, atol=atol)

@@ -274,7 +274,8 @@ def test_quantization_alignment(weight_shape, config, quantization_task, tensor_
 @pytest.mark.parametrize("config", INT4_COMPRESSION_CONFIGS, ids=[str(c) for c in INT4_COMPRESSION_CONFIGS])
 @pytest.mark.parametrize("tensor_backend", [TensorBackend.numpy, "auto"])
 @pytest.mark.parametrize("dtype", SUPPORTED_WEIGHT_DTYPES)
-def test_integer_quantization_error_alignment(weight_shape, config, tensor_backend, dtype):
+@pytest.mark.parametrize("reduction", ["max_mean", "frobenius"])
+def test_integer_quantization_error_alignment(weight_shape, config, tensor_backend, dtype, reduction):
     results = defaultdict(dict)
     # Iterate over two implementations
     for cb in [ComputationBackend.NumPy, ComputationBackend.OV]:
@@ -289,16 +290,20 @@ def test_integer_quantization_error_alignment(weight_shape, config, tensor_backe
             fn_to_patch = opt_fns.get_integer_quantization_error
             patch_path = f"nncf.openvino.optimized_functions.{fn_to_patch.__name__}"
             with patch(patch_path, side_effect=fn_to_patch) as mock:
-                results[cb]["quantization_error"] = get_integer_quantization_error(weight, REDUCTION_AXES, config)
+                results[cb]["quantization_error"] = get_integer_quantization_error(
+                    weight, REDUCTION_AXES, config, reduction=reduction
+                )
 
             if cb == ComputationBackend.NumPy:
                 mock.assert_not_called()
             else:
                 mock.assert_called_once()
 
-    # It seems like numpy and openvino summate elements in different order during reduce_sum / reduce_mean computation.
-    # This results in small numerical differences.
-    _check_values(results, atol=1e-6)
+    # For "max_mean", it seems like numpy and openvino summate elements in different order during
+    # reduce_sum / reduce_mean computation. This results in small numerical differences.
+    # For "frobenius", there is a bit larger error, possibly because np.linalg.norm relies on BLAS/LAPACK
+    # implementations.
+    _check_values(results, atol=1e-6 if reduction == "max_mean" else 0, rtol=1e-4 if reduction == "frobenius" else 0)
 
 
 @pytest.mark.parametrize("weight_shape", [WEIGHT_SHAPE], ids=[""])
@@ -498,7 +503,7 @@ def _check_backends_and_dtypes(
             assert decompressed_weight.dtype == TensorDataType.float32
 
 
-def _check_values(results, atol=0.0):
+def _check_values(results, atol=0.0, rtol=0.0):
     def format_list_of_floats(lst, n_first=32):
         return ", ".join(f"{x:.10f}" for x in lst[:n_first])
 
@@ -516,7 +521,7 @@ def format_list_of_floats(lst, n_first=32):
         # misalignments equal to 1 quant between OV and NumPy. For more details see ticket 156511.
 
         try:
-            np.testing.assert_allclose(ov_result.data, numpy_result.data, atol=atol, rtol=0)
+            np.testing.assert_allclose(ov_result.data, numpy_result.data, atol=atol, rtol=rtol)
         except AssertionError:
             not_equal_mask = np.not_equal(ov_result.data, numpy_result.data)
             msg = (

@@ -145,6 +145,7 @@ def get(self, ov_model_params_kwargs=None, get_model_kwargs=None):
             weight_shape=(10, 2, 2),
             original_reduction_axes=(1,),
             reduction_axes=(2,),
+            reduction="max_mean",
         ),
     ),
     ModelGetter(