Skip to content

Commit 3f8ca3c

Browse files
Fix optimized OV compression on ARM (#3743)
### Changes Reorder multiply operands during decompressed weight computation. ### Reason for changes Due to an unexpected behavior the order of operands for Multiply nodes affects the results on ARM machines. See ticket 176803. ### Related tickets 176803 ### Tests Removed xfails added because of another issue. Test result with the fix: https://github.com/openvinotoolkit/nncf/actions/runs/19458847436/job/55678153100?pr=3743 Test results without the fix: https://github.com/openvinotoolkit/nncf/actions/runs/19461075228/job/55685167697?pr=3743
1 parent 65c06a1 commit 3f8ca3c

File tree

4 files changed

+9
-41
lines changed

4 files changed

+9
-41
lines changed

src/nncf/openvino/optimized_functions/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ def _build_integer_quantize_dequantize_weight_model(
698698
compressed_weight = ov_results[0]
699699
scale = ov_parameters[1]
700700

701-
decompressed_weight = opset.multiply(scale, convert_op(compressed_weight, ov.Type.f32))
701+
decompressed_weight = opset.multiply(convert_op(compressed_weight, ov.Type.f32), scale)
702702

703703
ov_results = [decompressed_weight] + ov_results if return_compressed_weight else [decompressed_weight]
704704

tests/openvino/native/quantization/test_weights_compression.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
from nncf.common.utils.helpers import set_env_variable
3232
from nncf.data.dataset import Dataset
3333
from nncf.experimental.common.tensor_statistics.collectors import AggregatorBase
34-
from nncf.openvino.cpu_info import is_arm_cpu
3534
from nncf.openvino.graph.model_transformer import OVModelTransformer
3635
from nncf.openvino.graph.node_utils import get_const_value_as_numpy_tensor
3736
from nncf.openvino.optimized_functions import astype
@@ -1940,10 +1939,6 @@ def test_compression_with_transposed_activations(kwargs):
19401939
)
19411940

19421941

1943-
@pytest.mark.xfail(
1944-
is_arm_cpu(),
1945-
reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
1946-
)
19471942
@pytest.mark.parametrize("disabled", [False, True])
19481943
def test_disabled_optimized_compression(disabled):
19491944
hidden_dim = (MIN_INPUT_SIZE_FOR_OPTIMIZED_COMPRESSION // LMLinearModel.OUTPUT_DIM) + 1

tests/openvino/optimized_functions/test_compression_functions.py

Lines changed: 8 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
from nncf.common.factory import NNCFGraphFactory
2727
from nncf.common.utils.caching import ResultsCache
2828
from nncf.common.utils.caching import cache_results
29-
from nncf.openvino.cpu_info import is_arm_cpu
3029
from nncf.openvino.graph.node_utils import get_const_value_as_ov_tensor
3130
from nncf.quantization.algorithms.weight_compression.config import WeightCompressionConfig
3231
from nncf.quantization.algorithms.weight_compression.weight_lowering import MIN_INPUT_SIZE_FOR_OPTIMIZED_COMPRESSION
@@ -123,10 +122,6 @@ def openvino_available(available: bool):
123122
yield
124123

125124

126-
@pytest.mark.xfail(
127-
is_arm_cpu(),
128-
reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
129-
)
130125
@pytest.mark.parametrize(
131126
"weight_shape,is_disabled",
132127
[
@@ -154,10 +149,6 @@ def test_optimized_compression_is_disabled(weight_shape, is_disabled, quantizati
154149
mock.assert_called_once()
155150

156151

157-
@pytest.mark.xfail(
158-
is_arm_cpu(),
159-
reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
160-
)
161152
@pytest.mark.parametrize("weight_shape", [WEIGHT_SHAPE], ids=[""])
162153
@pytest.mark.parametrize("config", COMPRESSION_CONFIGS, ids=[str(c) for c in COMPRESSION_CONFIGS])
163154
@pytest.mark.parametrize(
@@ -277,10 +268,6 @@ def test_quantization_alignment(weight_shape, config, quantization_task, tensor_
277268
_check_values(results)
278269

279270

280-
@pytest.mark.xfail(
281-
is_arm_cpu(),
282-
reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
283-
)
284271
@pytest.mark.parametrize("weight_shape", [WEIGHT_SHAPE], ids=[""])
285272
@pytest.mark.parametrize("config", INT4_COMPRESSION_CONFIGS, ids=[str(c) for c in INT4_COMPRESSION_CONFIGS])
286273
@pytest.mark.parametrize("tensor_backend", [TensorBackend.numpy, "auto"])
@@ -312,10 +299,6 @@ def test_integer_quantization_error_alignment(weight_shape, config, tensor_backe
312299
_check_values(results, atol=1e-6)
313300

314301

315-
@pytest.mark.xfail(
316-
is_arm_cpu(),
317-
reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
318-
)
319302
@pytest.mark.parametrize("weight_shape", [WEIGHT_SHAPE], ids=[""])
320303
@pytest.mark.parametrize("weight_dtype", SUPPORTED_WEIGHT_DTYPES)
321304
@pytest.mark.parametrize("config", COMPRESSION_CONFIGS, ids=[str(c) for c in COMPRESSION_CONFIGS])
@@ -512,8 +495,8 @@ def _check_backends_and_dtypes(
512495

513496

514497
def _check_values(results, atol=0.0):
515-
def format_list_of_floats(lst):
516-
return ", ".join(f"{x:.10f}" for x in lst)
498+
def format_list_of_floats(lst, n_first=32):
499+
return ", ".join(f"{x:.10f}" for x in lst[:n_first])
517500

518501
# Check that the computed tensors are equal between implementations
519502
keys = set(results[ComputationBackend.OV]).union(set(results[ComputationBackend.NumPy]))
@@ -535,16 +518,18 @@ def format_list_of_floats(lst):
535518
msg = (
536519
f"Results do not align for {key} with "
537520
f"{not_equal_mask.sum() / ov_result.data.size * 100:.2f} % misalignment ratio.\n"
538-
f"OV result: {format_list_of_floats(ov_result.data[not_equal_mask])}\n"
539-
f"NumPy result: {format_list_of_floats(numpy_result.data[not_equal_mask])}\n"
521+
f"OV result (first 32 values): {format_list_of_floats(ov_result.data[not_equal_mask])}\n"
522+
f"NumPy result (first 32 values): {format_list_of_floats(numpy_result.data[not_equal_mask])}\n"
540523
)
541524
if "input" in results[ComputationBackend.OV] and "input" in results[ComputationBackend.NumPy]:
542525
numpy_input = results[ComputationBackend.NumPy]["input"].data
543526
ov_input = results[ComputationBackend.OV]["input"].data
544527
np.testing.assert_allclose(numpy_input, ov_input, atol=0, rtol=0)
545-
msg += f"Input values : {format_list_of_floats(numpy_input[not_equal_mask])}\n"
528+
if "weight" in key:
529+
msg += f"Input values (first 32 values) : {format_list_of_floats(numpy_input[not_equal_mask])}\n"
546530
misaligned_groups_mask = np.any(not_equal_mask, axis=-1)
547531
misaligned_groups = numpy_input[misaligned_groups_mask, ...]
548532
misaligned_groups = np.reshape(misaligned_groups, (-1, misaligned_groups.shape[-1]))
549-
msg += f"First 10 misaligned groups: {[it for it in misaligned_groups][:10]}\n"
533+
msg += "First 10 misaligned groups:\n"
534+
msg += "\n".join(format_list_of_floats(it, misaligned_groups.shape[1]) for it in misaligned_groups[:10])
550535
raise AssertionError(msg)

tests/openvino/optimized_functions/test_ov_model_parameters.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -216,10 +216,6 @@ def get(self, ov_model_params_kwargs=None, get_model_kwargs=None):
216216
]
217217

218218

219-
@pytest.mark.xfail(
220-
is_arm_cpu(),
221-
reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
222-
)
223219
@pytest.mark.parametrize(
224220
"model_getter,input_shapes,ref_cache_size",
225221
[
@@ -333,10 +329,6 @@ def test_dynamic_shapes(model_getter, input_shapes, ref_cache_size, dynamic_shap
333329
assert len(OV_MODEL_CACHE._cache) == ref_cache_size[dynamic_shapes]
334330

335331

336-
@pytest.mark.xfail(
337-
is_arm_cpu(),
338-
reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
339-
)
340332
@pytest.mark.parametrize("model_getter", MODEL_GETTERS)
341333
@pytest.mark.parametrize("recompile", [True, False])
342334
def test_recompile(model_getter, recompile):
@@ -446,10 +438,6 @@ def test_share_inputs_outputs(mocker, share_inputs, share_outputs, return_ov_ten
446438
)
447439

448440

449-
@pytest.mark.xfail(
450-
is_arm_cpu(),
451-
reason="Due to a bug in CPU plugin compression models can fail at compilation on ARM CPUs. Ticket: 164135.",
452-
)
453441
@pytest.mark.parametrize(
454442
"weight,convertable_division,ref_compressed_weight",
455443
[

0 commit comments

Comments
 (0)