Skip to content

Commit eac1f6c

Browse files
Increase default 4-bit compression ratio from 0.8 to 1.0 (#805)
* Increase default 4-bit ratio from 0.8 to 1.0 * Style * Fix test
1 parent 328259a commit eac1f6c

File tree

4 files changed

+22
-14
lines changed

4 files changed

+22
-14
lines changed

optimum/commands/export/openvino.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def parse_args(parser: "ArgumentParser"):
221221

222222
def run(self):
223223
from ...exporters.openvino.__main__ import infer_task, main_export, maybe_convert_tokenizers
224-
from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIGS, OVConfig
224+
from ...intel.openvino.configuration import _DEFAULT_4BIT_CONFIG, _DEFAULT_4BIT_CONFIGS, OVConfig
225225

226226
def _get_default_int4_config(model_id_or_path, library_name):
227227
if model_id_or_path in _DEFAULT_4BIT_CONFIGS:
@@ -233,13 +233,7 @@ def _get_default_int4_config(model_id_or_path, library_name):
233233
if original_model_name in _DEFAULT_4BIT_CONFIGS:
234234
return _DEFAULT_4BIT_CONFIGS[original_model_name]
235235

236-
return {
237-
"bits": 4,
238-
"ratio": 0.8,
239-
"sym": False,
240-
"group_size": None,
241-
"all_layers": None,
242-
}
236+
return _DEFAULT_4BIT_CONFIG
243237

244238
library_name = TasksManager.infer_library_from_model(self.args.model, library_name=self.args.library)
245239
if library_name == "sentence_transformers" and self.args.library is None:

optimum/intel/openvino/configuration.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,14 @@
104104
},
105105
}
106106

107+
_DEFAULT_4BIT_CONFIG = {
108+
"bits": 4,
109+
"ratio": 1.0,
110+
"sym": False,
111+
"group_size": 128,
112+
"all_layers": None,
113+
}
114+
107115

108116
class OVQuantizationMethod(str, Enum):
109117
DEFAULT = "default"

optimum/intel/openvino/modeling_decoder.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,13 @@
4040
from ...exporters.openvino.stateful import model_has_state
4141
from ..utils.import_utils import is_nncf_available, is_transformers_version
4242
from ..utils.modeling_utils import MULTI_QUERY_ATTN_MODELS
43-
from .configuration import _DEFAULT_4BIT_CONFIGS, OVConfig, OVWeightQuantizationConfig, _check_default_4bit_configs
43+
from .configuration import (
44+
_DEFAULT_4BIT_CONFIG,
45+
_DEFAULT_4BIT_CONFIGS,
46+
OVConfig,
47+
OVWeightQuantizationConfig,
48+
_check_default_4bit_configs,
49+
)
4450
from .modeling import _TOKENIZER_FOR_DOC, INPUTS_DOCSTRING, MODEL_START_DOCSTRING, OVModel
4551
from .utils import ONNX_WEIGHTS_NAME, OV_TO_NP_TYPE, OV_XML_FILE_NAME, STR_TO_OV_TYPE
4652

@@ -775,7 +781,7 @@ def _from_pretrained(
775781
init_cls = cls
776782

777783
if isinstance(quantization_config, dict) and quantization_config == {"bits": 4}:
778-
quantization_config = _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, quantization_config)
784+
quantization_config = _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, _DEFAULT_4BIT_CONFIG)
779785
quantization_config = cls._prepare_weight_quantization_config(quantization_config, load_in_8bit)
780786

781787
enable_compilation = kwargs.pop("compile", True) and not quantization_config

tests/openvino/test_exporters_cli.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ class OVCLIExportTestCase(unittest.TestCase):
8686
)
8787

8888
TEST_4BIT_CONFIGURATONS = [
89-
("text-generation-with-past", "opt125m", "int4_sym_g128", 62, 86),
90-
("text-generation-with-past", "opt125m", "int4_asym_g128", 62, 86),
91-
("text-generation-with-past", "opt125m", "int4_sym_g64", 62, 86),
92-
("text-generation-with-past", "opt125m", "int4_asym_g64", 62, 86),
89+
("text-generation-with-past", "opt125m", "int4_sym_g128", 4, 144),
90+
("text-generation-with-past", "opt125m", "int4_asym_g128", 4, 144),
91+
("text-generation-with-past", "opt125m", "int4_sym_g64", 4, 144),
92+
("text-generation-with-past", "opt125m", "int4_asym_g64", 4, 144),
9393
("text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 16 --all-layers", 0, 32),
9494
(
9595
"text-generation-with-past",

0 commit comments

Comments
 (0)