Fix optimized OV compression on ARM (#3743)

nikita-savelyevv · web-flow · commit 3f8ca3c532c9 · 2025-11-18T21:45:44.000+02:00
### Changes Reorder multiply operands during decompressed weight computation. ### Reason for changes Due to an unexpected behavior the order of operands for Multiply nodes affects the results on ARM machines. See ticket 176803. ### Related tickets 176803 ### Tests Removed xfails added because of another issue. Test result with the fix: https://github.com/openvinotoolkit/nncf/actions/runs/19458847436/job/55678153100?pr=3743 Test results without the fix: https://github.com/openvinotoolkit/nncf/actions/runs/19461075228/job/55685167697?pr=3743
diff --git a/src/nncf/openvino/optimized_functions/models.py b/src/nncf/openvino/optimized_functions/models.py
@@ -698,7 +698,7 @@ def _build_integer_quantize_dequantize_weight_model(
             compressed_weight = ov_results[0]
             scale = ov_parameters[1]
 
-    decompressed_weight = opset.multiply(scale, convert_op(compressed_weight, ov.Type.f32))
+    decompressed_weight = opset.multiply(convert_op(compressed_weight, ov.Type.f32), scale)
 
     ov_results = [decompressed_weight] + ov_results if return_compressed_weight else [decompressed_weight]
 
diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
@@ -31,7 +31,6 @@
 from nncf.common.utils.helpers import set_env_variable
 from nncf.data.dataset import Dataset
 from nncf.experimental.common.tensor_statistics.collectors import AggregatorBase
-from nncf.openvino.cpu_info import is_arm_cpu
 from nncf.openvino.graph.model_transformer import OVModelTransformer
 from nncf.openvino.graph.node_utils import get_const_value_as_numpy_tensor
 from nncf.openvino.optimized_functions import astype
@@ -1940,10 +1939,6 @@ def test_compression_with_transposed_activations(kwargs):
         )
 
 
-@pytest.mark.xfail(
-    is_arm_cpu(),
-    reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
-)
 @pytest.mark.parametrize("disabled", [False, True])
 def test_disabled_optimized_compression(disabled):
     hidden_dim = (MIN_INPUT_SIZE_FOR_OPTIMIZED_COMPRESSION // LMLinearModel.OUTPUT_DIM) + 1
diff --git a/tests/openvino/optimized_functions/test_compression_functions.py b/tests/openvino/optimized_functions/test_compression_functions.py
@@ -26,7 +26,6 @@
 from nncf.common.factory import NNCFGraphFactory
 from nncf.common.utils.caching import ResultsCache
 from nncf.common.utils.caching import cache_results
-from nncf.openvino.cpu_info import is_arm_cpu
 from nncf.openvino.graph.node_utils import get_const_value_as_ov_tensor
 from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
 from nncf.quantization.algorithms.weight_compression.weight_lowering import MIN_INPUT_SIZE_FOR_OPTIMIZED_COMPRESSION
@@ -123,10 +122,6 @@ def openvino_available(available: bool):
         yield
 
 
-@pytest.mark.xfail(
-    is_arm_cpu(),
-    reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
-)
 @pytest.mark.parametrize(
     "weight_shape,is_disabled",
     [
@@ -154,10 +149,6 @@ def test_optimized_compression_is_disabled(weight_shape, is_disabled, quantizati
             mock.assert_called_once()
 
 
-@pytest.mark.xfail(
-    is_arm_cpu(),
-    reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
-)
 @pytest.mark.parametrize("weight_shape", [WEIGHT_SHAPE], ids=[""])
 @pytest.mark.parametrize("config", COMPRESSION_CONFIGS, ids=[str(c) for c in COMPRESSION_CONFIGS])
 @pytest.mark.parametrize(
@@ -277,10 +268,6 @@ def test_quantization_alignment(weight_shape, config, quantization_task, tensor_
     _check_values(results)
 
 
-@pytest.mark.xfail(
-    is_arm_cpu(),
-    reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
-)
 @pytest.mark.parametrize("weight_shape", [WEIGHT_SHAPE], ids=[""])
 @pytest.mark.parametrize("config", INT4_COMPRESSION_CONFIGS, ids=[str(c) for c in INT4_COMPRESSION_CONFIGS])
 @pytest.mark.parametrize("tensor_backend", [TensorBackend.numpy, "auto"])
@@ -312,10 +299,6 @@ def test_integer_quantization_error_alignment(weight_shape, config, tensor_backe
     _check_values(results, atol=1e-6)
 
 
-@pytest.mark.xfail(
-    is_arm_cpu(),
-    reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
-)
 @pytest.mark.parametrize("weight_shape", [WEIGHT_SHAPE], ids=[""])
 @pytest.mark.parametrize("weight_dtype", SUPPORTED_WEIGHT_DTYPES)
 @pytest.mark.parametrize("config", COMPRESSION_CONFIGS, ids=[str(c) for c in COMPRESSION_CONFIGS])
@@ -512,8 +495,8 @@ def _check_backends_and_dtypes(
 
 
 def _check_values(results, atol=0.0):
-    def format_list_of_floats(lst):
-        return ", ".join(f"{x:.10f}" for x in lst)
+    def format_list_of_floats(lst, n_first=32):
+        return ", ".join(f"{x:.10f}" for x in lst[:n_first])
 
     # Check that the computed tensors are equal between implementations
     keys = set(results[ComputationBackend.OV]).union(set(results[ComputationBackend.NumPy]))
@@ -535,16 +518,18 @@ def format_list_of_floats(lst):
             msg = (
                 f"Results do not align for {key} with "
                 f"{not_equal_mask.sum() / ov_result.data.size * 100:.2f} % misalignment ratio.\n"
-                f"OV result:    {format_list_of_floats(ov_result.data[not_equal_mask])}\n"
-                f"NumPy result: {format_list_of_floats(numpy_result.data[not_equal_mask])}\n"
+                f"OV result (first 32 values):    {format_list_of_floats(ov_result.data[not_equal_mask])}\n"
+                f"NumPy result (first 32 values): {format_list_of_floats(numpy_result.data[not_equal_mask])}\n"
             )
             if "input" in results[ComputationBackend.OV] and "input" in results[ComputationBackend.NumPy]:
                 numpy_input = results[ComputationBackend.NumPy]["input"].data
                 ov_input = results[ComputationBackend.OV]["input"].data
                 np.testing.assert_allclose(numpy_input, ov_input, atol=0, rtol=0)
-                msg += f"Input values   : {format_list_of_floats(numpy_input[not_equal_mask])}\n"
+                if "weight" in key:
+                    msg += f"Input values (first 32 values)    : {format_list_of_floats(numpy_input[not_equal_mask])}\n"
                 misaligned_groups_mask = np.any(not_equal_mask, axis=-1)
                 misaligned_groups = numpy_input[misaligned_groups_mask, ...]
                 misaligned_groups = np.reshape(misaligned_groups, (-1, misaligned_groups.shape[-1]))
-                msg += f"First 10 misaligned groups: {[it for it in misaligned_groups][:10]}\n"
+                msg += "First 10 misaligned groups:\n"
+                msg += "\n".join(format_list_of_floats(it, misaligned_groups.shape[1]) for it in misaligned_groups[:10])
             raise AssertionError(msg)
diff --git a/tests/openvino/optimized_functions/test_ov_model_parameters.py b/tests/openvino/optimized_functions/test_ov_model_parameters.py
@@ -216,10 +216,6 @@ def get(self, ov_model_params_kwargs=None, get_model_kwargs=None):
 ]
 
 
-@pytest.mark.xfail(
-    is_arm_cpu(),
-    reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
-)
 @pytest.mark.parametrize(
     "model_getter,input_shapes,ref_cache_size",
     [
@@ -333,10 +329,6 @@ def test_dynamic_shapes(model_getter, input_shapes, ref_cache_size, dynamic_shap
     assert len(OV_MODEL_CACHE._cache) == ref_cache_size[dynamic_shapes]
 
 
-@pytest.mark.xfail(
-    is_arm_cpu(),
-    reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
-)
 @pytest.mark.parametrize("model_getter", MODEL_GETTERS)
 @pytest.mark.parametrize("recompile", [True, False])
 def test_recompile(model_getter, recompile):
@@ -446,10 +438,6 @@ def test_share_inputs_outputs(mocker, share_inputs, share_outputs, return_ov_ten
     )
 
 
-@pytest.mark.xfail(
-    is_arm_cpu(),
-    reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
-)
 @pytest.mark.parametrize(
     "weight,convertable_division,ref_compressed_weight",
     [