Skip to content

Commit 69d276e

Browse files
[OpenVINO] Adopt new mxfp4 quantization logic (#1465)
* Remove cb4 pre-release logic; adopt new mxfp4 logic * Fix tests * Fix tests 2 * Update setup.py
1 parent 0f1cfc4 commit 69d276e

File tree

4 files changed

+11
-33
lines changed

4 files changed

+11
-33
lines changed

optimum/intel/openvino/configuration.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ def __init__(
715715
)
716716
self.bits = bits
717717
self.sym = sym
718-
self.group_size = group_size or (-1 if bits == 8 else 128)
718+
self.group_size = group_size
719719
self.ratio = ratio
720720
self.all_layers = all_layers
721721
self.sensitivity_metric = sensitivity_metric
@@ -794,7 +794,7 @@ def post_init(self):
794794
raise ValueError(
795795
f"For 8-bit quantization, `ratio` is expected to be set to 1.0, but was set to {self.ratio}"
796796
)
797-
if self.group_size != -1:
797+
if self.group_size is not None and self.group_size != -1:
798798
raise ValueError(
799799
f"For 8-bit quantization, `group_size` is expected to be set to -1, but was set to {self.group_size}"
800800
)
@@ -843,11 +843,6 @@ def post_init(self):
843843
f"['int4', 'int8', 'mxfp4', 'nf4', 'cb4'], but found: {self.dtype}."
844844
)
845845
if self.dtype in ["mxfp4", "nf4", "cb4"]:
846-
if self.dtype == "cb4" and is_nncf_version("<=", "2.17"):
847-
raise ImportError(
848-
"Codebook quantization is currently supported only with NNCF develop. "
849-
"Please run `pip install git+https://github.com/openvinotoolkit/nncf.git`."
850-
)
851846
if self.bits != 4:
852847
raise ValueError(
853848
f"When applying weight compression with '{self.dtype}' data type, the `bits` parameter must be set to 4, but found {self.bits}"
@@ -874,7 +869,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
874869
if mode in signed_bitness.values():
875870
mode += "_sym" if self.sym else "_asym"
876871
if mode == "mxfp4":
877-
mode = "e2m1"
872+
mode = "e2m1" if is_nncf_version("<=", "2.18") else "mxfp4"
878873
if mode == "cb4":
879874
mode = "cb4_f8e4m3"
880875
mode = nncf.CompressWeightsMode(mode)

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@
6666
QUALITY_REQUIRE = ["black~=23.1", "ruff==0.4.4"]
6767

6868
EXTRAS_REQUIRE = {
69-
"nncf": ["nncf>=2.16.0"],
70-
"openvino": ["nncf>=2.16.0", "openvino>=2025.1.0", "openvino-tokenizers>=2025.1.0"],
69+
"nncf": ["nncf>=2.18.0"],
70+
"openvino": ["nncf>=2.18.0", "openvino>=2025.1.0", "openvino-tokenizers>=2025.1.0"],
7171
"neural-compressor": ["neural-compressor[pt]>=3.4.1", "accelerate", "transformers<4.46"],
7272
"ipex": ["intel-extension-for-pytorch>=2.8", "transformers>4.54,<4.56", "accelerate"],
7373
"diffusers": ["diffusers"],

tests/openvino/test_exporters_cli.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from typing import Dict
1919
from unittest.mock import Mock
2020

21-
import pytest
2221
from parameterized import parameterized
2322
from transformers import AutoModelForCausalLM, AutoModelForZeroShotImageClassification, AutoProcessor, AutoTokenizer
2423
from utils_tests import (
@@ -63,7 +62,6 @@
6362
from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS, TemporaryDirectory
6463
from optimum.intel.utils.import_utils import (
6564
compare_versions,
66-
is_nncf_version,
6765
is_openvino_tokenizers_available,
6866
is_openvino_version,
6967
is_tokenizers_version,
@@ -443,7 +441,7 @@ class OVCLIExportTestCase(unittest.TestCase):
443441
if is_transformers_version("<=", "4.45")
444442
else {
445443
"encoder": 30,
446-
"decoder": 62 if is_nncf_version("<=", "2.17") and is_openvino_version("<", "2025.3") else 52,
444+
"decoder": 52,
447445
},
448446
(
449447
{"encoder": {"int8": 32}, "decoder": {"int8": 52}, "decoder_with_past": {"int8": 42}}
@@ -1026,8 +1024,6 @@ def test_exporters_cli_hybrid_quantization(
10261024
def test_exporters_cli_4bit(
10271025
self, task: str, model_type: str, option: str, expected_num_weight_nodes_per_model: Dict[str, Dict[str, int]]
10281026
):
1029-
if option.startswith("cb4") and is_nncf_version("<=", "2.17"):
1030-
pytest.skip("Codebook quantization is supported starting from NNCF 2.18")
10311027
with TemporaryDirectory() as tmpdir:
10321028
result = subprocess.run(
10331029
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}",
@@ -1084,8 +1080,6 @@ def test_exporters_cli_full_quantization(
10841080
expected_fake_nodes_per_model: Dict[str, int],
10851081
expected_num_weight_nodes_per_model: Dict[str, Dict[str, int]],
10861082
):
1087-
if quant_mode == "cb4_f8e4m3" and is_nncf_version("<=", "2.17"):
1088-
pytest.skip("Codebook quantization is supported starting from NNCF 2.18")
10891083
with TemporaryDirectory() as tmpdir:
10901084
subprocess.run(
10911085
f"optimum-cli export openvino --task {task} --model {MODEL_NAMES[model_type]} "

tests/openvino/test_quantization.py

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
from copy import deepcopy
8181

8282
from optimum.intel.openvino.quantization import InferRequestWrapper, OVCalibrationDatasetBuilder
83-
from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version, is_nncf_version
83+
from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version
8484
from utils_tests import (
8585
MODEL_NAMES,
8686
get_num_quantized_nodes,
@@ -394,7 +394,7 @@ class OVQuantizerTest(unittest.TestCase):
394394
if is_transformers_version("<=", "4.45")
395395
else {
396396
"encoder": 30,
397-
"decoder": 62 if is_nncf_version("<=", "2.17") and is_openvino_version("<", "2025.3") else 52,
397+
"decoder": 52,
398398
},
399399
(
400400
{"encoder": {"int8": 32}, "decoder": {"int8": 52}, "decoder_with_past": {"int8": 42}}
@@ -568,12 +568,6 @@ def test_ov_model_static_quantization_with_auto_dataset(
568568
expected_fake_nodes_per_model,
569569
expected_num_weight_nodes_per_model,
570570
):
571-
if (
572-
isinstance(quantization_config, dict)
573-
and quantization_config.get("weight_quantization_config", {}).get("dtype") == "cb4"
574-
and is_nncf_version("<=", "2.17")
575-
):
576-
pytest.skip("Codebook quantization is supported starting from NNCF 2.18")
577571
model_id = MODEL_NAMES[model_name]
578572

579573
with TemporaryDirectory() as tmp_dir:
@@ -1303,13 +1297,6 @@ def test_ovmodel_4bit_auto_compression(self, model_cls, model_type, expected_ov_
13031297
def test_ovmodel_4bit_auto_compression_with_config(
13041298
self, model_cls, model_name, trust_remote_code, quantization_config, expected_num_weight_nodes_per_model
13051299
):
1306-
if (
1307-
isinstance(quantization_config, dict)
1308-
and quantization_config.get("dtype") == "cb4"
1309-
and is_nncf_version("<=", "2.17")
1310-
):
1311-
pytest.skip("Codebook quantization is supported starting from NNCF 2.18")
1312-
13131300
model_id = MODEL_NAMES[model_name]
13141301
with TemporaryDirectory() as tmp_dir:
13151302
quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config)
@@ -1390,7 +1377,7 @@ def main_export_in_stacktrace(*args, **kwargs):
13901377
compression_params = {
13911378
"mode": nncf.CompressWeightsMode.INT8_ASYM,
13921379
"ratio": 1.0,
1393-
"group_size": -1,
1380+
"group_size": None,
13941381
"all_layers": None,
13951382
"sensitivity_metric": None,
13961383
"dataset": None,
@@ -1805,6 +1792,8 @@ def eval_expression_if_possible(expression):
18051792
config_value = (
18061793
"max_activation_variance" if sub_config.bits == 4 else "weight_quantization_error"
18071794
)
1795+
if param_name == "group_size" and config_value is None:
1796+
config_value = -1 if sub_config.bits == 8 else 128
18081797

18091798
if config_value is None and rt_info_value is False:
18101799
continue

0 commit comments

Comments
 (0)