Skip to content

Commit 31f49a2

Browse files
Update default NNCF configurations (#824)
* Add configs from 143530 * Fix wrong AWQ option * Apply comment * Add test * Add missed configuration * Apply comment
1 parent cb2f2ec commit 31f49a2

File tree

2 files changed

+55
-17
lines changed

2 files changed

+55
-17
lines changed

optimum/intel/openvino/configuration.py

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,25 @@
3232

3333
logger = logging.getLogger(__name__)
3434

35+
36+
class OVQuantizationMethod(str, Enum):
37+
DEFAULT = "default"
38+
HYBRID = "hybrid"
39+
AWQ = "awq"
40+
41+
3542
_DEFAULT_4BIT_CONFIGS = {
36-
"databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8},
43+
"databricks/dolly-v2-3b": {"bits": 4, "sym": False, "group_size": 128, "scale_estimation": True},
3744
"EleutherAI/gpt-j-6b": {"bits": 4, "sym": False, "group_size": 64},
3845
"facebook/opt-6.7b": {"bits": 4, "sym": False, "group_size": 64, "ratio": 0.8},
39-
"bigscience/bloomz-7b1": {"bits": 4, "sym": False, "group_size": 32, "ratio": 0.6},
4046
"togethercomputer/RedPajama-INCITE-7B-Instruct": {"bits": 4, "sym": False, "group_size": 128},
4147
"HuggingFaceH4/zephyr-7b-beta": {
4248
"bits": 4,
4349
"sym": True,
4450
"group_size": 128,
4551
"ratio": 0.8,
4652
"dataset": "wikitext2",
47-
"awq": True,
53+
"quant_method": OVQuantizationMethod.AWQ,
4854
},
4955
"meta-llama/Llama-2-7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6},
5056
"meta-llama/Llama-2-7b-chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8},
@@ -55,21 +61,21 @@
5561
"group_size": 64,
5662
"ratio": 0.8,
5763
"dataset": "wikitext2",
58-
"awq": True,
64+
"quant_method": OVQuantizationMethod.AWQ,
5965
},
6066
"stabilityai/stablelm-zephyr-3b": {
6167
"bits": 4,
6268
"sym": False,
6369
"group_size": 128,
6470
"ratio": 1.0,
6571
"dataset": "wikitext2",
66-
"awq": True,
72+
"quant_method": OVQuantizationMethod.AWQ,
6773
},
6874
"stabilityai/stable-code-3b": {"bits": 4, "sym": True, "group_size": 64, "ratio": 0.8},
6975
"pansophic/rocket-3B": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8},
7076
"THUDM/chatglm2-6b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.72},
7177
"Qwen/Qwen-7B-Chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6},
72-
"openlm-research/open_llama_3b": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
78+
"openlm-research/open_llama_3b": {"bits": 4, "sym": False, "group_size": 64, "all_layers": True},
7379
"openlm-research/open_llama_3b_v2": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
7480
"tiiuae/falcon-7b-instruct": {"bits": 4, "sym": True, "group_size": 64, "all_layers": True},
7581
"psmathur/orca_mini_3b": {
@@ -78,19 +84,24 @@
7884
"group_size": 64,
7985
"all_layers": True,
8086
"dataset": "wikitext2",
81-
"awq": True,
87+
"quant_method": OVQuantizationMethod.AWQ,
8288
},
8389
"bigscience/bloomz-560m": {
8490
"bits": 4,
8591
"sym": True,
8692
"group_size": 64,
8793
"ratio": 0.8,
8894
"dataset": "wikitext2",
89-
"awq": True,
95+
"quant_method": OVQuantizationMethod.AWQ,
9096
},
9197
"mistralai/Mixtral-8x7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8},
9298
"facebook/opt-2.7b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.7},
93-
"togethercomputer/RedPajama-INCITE-Chat-3B-v1": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8},
99+
"togethercomputer/RedPajama-INCITE-Chat-3B-v1": {
100+
"bits": 4,
101+
"sym": False,
102+
"group_size": 128,
103+
"scale_estimation": True,
104+
},
94105
"lmsys/vicuna-7b-v1.5": {"bits": 4, "sym": False, "group_size": 128, "ratio": 1.0},
95106
"stabilityai/stablelm-tuned-alpha-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8},
96107
"mistralai/Mistral-7B-v0.1": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.9},
@@ -100,8 +111,20 @@
100111
"group_size": 128,
101112
"ratio": 0.8,
102113
"dataset": "wikitext2",
103-
"awq": True,
114+
"quant_method": OVQuantizationMethod.AWQ,
104115
},
116+
"openai-community/gpt2": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.5, "scale_estimation": True},
117+
"lmsys/longchat-7b-16k": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9},
118+
"bigcode/starcoder2-3b": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9},
119+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.8},
120+
"stabilityai/stablelm-tuned-alpha-7b": {
121+
"bits": 4,
122+
"sym": False,
123+
"group_size": 128,
124+
"ratio": 0.6,
125+
"scale_estimation": True,
126+
},
127+
"microsoft/phi-2": {"bits": 4, "sym": False, "group_size": 128, "ratio": 0.9},
105128
}
106129

107130
_DEFAULT_4BIT_CONFIG = {
@@ -113,12 +136,6 @@
113136
}
114137

115138

116-
class OVQuantizationMethod(str, Enum):
117-
DEFAULT = "default"
118-
HYBRID = "hybrid"
119-
AWQ = "awq"
120-
121-
122139
@dataclass
123140
class OVQuantizationConfigBase(QuantizationConfigMixin):
124141
"""

tests/openvino/test_quantization.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,13 @@
6161
OVWeightQuantizationConfig,
6262
OVDynamicQuantizationConfig,
6363
)
64-
from optimum.intel.openvino.configuration import OVQuantizationMethod, OVQuantizationConfigBase
64+
from optimum.intel.openvino.configuration import (
65+
OVQuantizationMethod,
66+
OVQuantizationConfigBase,
67+
_DEFAULT_4BIT_CONFIGS,
68+
_DEFAULT_4BIT_CONFIG,
69+
)
70+
from copy import deepcopy
6571

6672
from optimum.intel.openvino.quantization import InferRequestWrapper
6773
from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version
@@ -820,6 +826,13 @@ class OVQuantizationConfigTest(unittest.TestCase):
820826
(dict(bits=8, fast_bias_correction=True, weight_only=False), OVQuantizationConfig, None),
821827
)
822828

829+
def get_default_configurations() -> dict:
830+
default_configurations = deepcopy(_DEFAULT_4BIT_CONFIGS)
831+
default_configurations.update({"default": _DEFAULT_4BIT_CONFIG})
832+
return default_configurations
833+
834+
DEFAULT_CONFIGURATIONS = get_default_configurations()
835+
823836
@parameterized.expand(QUANTIZATION_CONFIGS)
824837
def test_config_serialization(self, quantization_config: OVQuantizationConfigBase):
825838
ov_config = OVConfig(quantization_config=quantization_config)
@@ -849,6 +862,14 @@ def test_config_from_dict(self, quantization_config: dict, config_type: type, wa
849862
if hasattr(ov_config.quantization_config, k):
850863
self.assertEqual(getattr(ov_config.quantization_config, k), v)
851864

865+
@parameterized.expand(DEFAULT_CONFIGURATIONS)
866+
def test_named_default_configurations(self, config_id: str):
867+
custom_configuration = self.DEFAULT_CONFIGURATIONS[config_id]
868+
prepared_config = OVModelForCausalLM._prepare_weight_quantization_config(custom_configuration)
869+
for field_name, reference_value in custom_configuration.items():
870+
value = prepared_config.__getattribute__(field_name)
871+
self.assertEqual(value, reference_value)
872+
852873

853874
class InferRequestWrapperTest(unittest.TestCase):
854875
MODEL_ID = ("openai/whisper-tiny.en",)

0 commit comments

Comments
 (0)