Skip to content

Commit 1ac67bd

Browse files
nikita-savelyevvmvafin
authored andcommitted
Add quantization statistics path argument (#1392)
* Add quantization statistics path argument * Add note * Hande additional cases
1 parent a26eb13 commit 1ac67bd

File tree

6 files changed

+96
-5
lines changed

6 files changed

+96
-5
lines changed

docs/source/openvino/export.mdx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,
3838
[--group-size GROUP_SIZE] [--backup-precision {none,int8_sym,int8_asym}]
3939
[--dataset DATASET] [--all-layers] [--awq] [--scale-estimation] [--gptq]
4040
[--lora-correction] [--sensitivity-metric SENSITIVITY_METRIC]
41+
[--quantization-statistics-path QUANTIZATION_STATISTICS_PATH]
4142
[--num-samples NUM_SAMPLES] [--disable-stateful] [--disable-convert-tokenizer]
4243
[--smooth-quant-alpha SMOOTH_QUANT_ALPHA]
4344
output
@@ -136,6 +137,12 @@ Optional arguments:
136137
The sensitivity metric for assigning quantization precision to layers. It can be one of the
137138
following: ['weight_quantization_error', 'hessian_input_activation',
138139
'mean_activation_variance', 'max_activation_variance', 'mean_activation_magnitude'].
140+
--quantization-statistics-path QUANTIZATION_STATISTICS_PATH
141+
Directory path to dump/load data-aware weight-only quantization statistics. This is useful when
142+
running data-aware quantization multiple times on the same model and dataset to avoid
143+
recomputing statistics. This option is applicable exclusively for weight-only quantization.
144+
Please note that the statistics depend on the dataset, so if you change the dataset, you should
145+
also change the statistics path to avoid confusion.
139146
--num-samples NUM_SAMPLES
140147
The maximum number of samples to take from the dataset for quantization.
141148
--disable-stateful Disable stateful converted models, stateless models will be generated instead. Stateful models

optimum/commands/export/openvino.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,17 @@ def parse_args_openvino(parser: "ArgumentParser"):
239239
"'max_activation_variance', 'mean_activation_magnitude']."
240240
),
241241
)
242+
optional_group.add_argument(
243+
"--quantization-statistics-path",
244+
type=str,
245+
default=None,
246+
help=(
247+
"Directory path to dump/load data-aware weight-only quantization statistics. This is useful when running "
248+
"data-aware quantization multiple times on the same model and dataset to avoid recomputing statistics. "
249+
"This option is applicable exclusively for weight-only quantization. Please note that the statistics depend "
250+
"on the dataset, so if you change the dataset, you should also change the statistics path to avoid confusion."
251+
),
252+
)
242253
optional_group.add_argument(
243254
"--num-samples",
244255
type=int,
@@ -278,6 +289,7 @@ def parse_args_openvino(parser: "ArgumentParser"):
278289

279290

280291
def no_compression_parameter_provided(args):
292+
# Except statistics path
281293
return all(
282294
(
283295
it is None
@@ -354,7 +366,7 @@ def run(self):
354366

355367
if self.args.weight_format is None and self.args.quant_mode is None:
356368
ov_config = None
357-
if not no_compression_parameter_provided(self.args):
369+
if not no_compression_parameter_provided(self.args) or self.args.quantization_statistics_path is not None:
358370
raise ValueError(
359371
"Some compression parameters are provided, but the weight format is not specified. "
360372
"Please provide it with --weight-format argument."
@@ -384,6 +396,8 @@ def run(self):
384396
else:
385397
quantization_config = _DEFAULT_4BIT_WQ_CONFIG
386398
log_message = f"Applying a default quantization config: {quantization_config}."
399+
if self.args.quantization_statistics_path is not None:
400+
quantization_config["statistics_path"] = self.args.quantization_statistics_path
387401
logger.info(log_message)
388402
else:
389403
quantization_config = prepare_wc_config(self.args, _DEFAULT_4BIT_WQ_CONFIG)
@@ -422,6 +436,11 @@ def run(self):
422436
"dataset": self.args.dataset,
423437
}
424438
else:
439+
if self.args.quantization_statistics_path is not None:
440+
logger.warning(
441+
"The --quantization-statistics-path argument is only applicable for weight-only "
442+
"quantization. It will be ignored."
443+
)
425444
quantization_config = prepare_q_config(self.args)
426445
quantization_config["trust_remote_code"] = self.args.trust_remote_code
427446
ov_config = OVConfig(quantization_config=quantization_config)
@@ -590,6 +609,7 @@ def prepare_wc_config(args, default_configs):
590609
"lora_correction": args.lora_correction,
591610
"dtype": args.weight_format,
592611
"backup_precision": args.backup_precision,
612+
"statistics_path": args.quantization_statistics_path,
593613
}
594614

595615

optimum/intel/openvino/configuration.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,11 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
682682
retained in their original precision without any quantization.
683683
- "int8_sym" stands for 8-bit integer symmetric quantization without zero point.
684684
- "int8_asym" stands for 8-bit integer asymmetric quantization with zero points per each quantization group.
685+
statistics_path (`str`, *optional*):
686+
Directory path to dump/load data-aware statistics. This is useful when running data-aware quantization
687+
multiple times on the same model and dataset to avoid recomputing statistics.
688+
Please note that the statistics depend on the dataset, so if you change the dataset, you should also change
689+
the statistics path to avoid confusion.
685690
kwargs: Additional parameters for nncf.compress_weights() call.
686691
"""
687692

@@ -705,6 +710,7 @@ def __init__(
705710
processor: Optional[str] = None,
706711
lora_correction: bool = None,
707712
backup_precision: Optional[str] = None,
713+
statistics_path: Optional[str] = None,
708714
**kwargs,
709715
):
710716
weight_format = kwargs.pop("weight_format", None)
@@ -735,6 +741,7 @@ def __init__(
735741
self.lora_correction = lora_correction
736742
self.backup_precision = backup_precision
737743
self.dtype = dtype
744+
self.statistics_path = statistics_path
738745
self.post_init()
739746

740747
def post_init(self):
@@ -891,6 +898,11 @@ def to_nncf_dict(self) -> Dict[str, Any]:
891898
awq = True if self.quant_method == OVQuantizationMethod.AWQ else None
892899
sensitivity_metric = nncf.SensitivityMetric(self.sensitivity_metric) if self.sensitivity_metric else None
893900
backup_mode = nncf.BackupMode(self.backup_precision) if self.backup_precision else None
901+
kwargs = self.kwargs.copy()
902+
if self.statistics_path:
903+
advanced_parameters = kwargs.get("advanced_parameters", nncf.AdvancedCompressionParameters())
904+
advanced_parameters = dataclasses.replace(advanced_parameters, statistics_path=self.statistics_path)
905+
kwargs["advanced_parameters"] = advanced_parameters
894906
result = {
895907
"mode": mode,
896908
"ratio": self.ratio,
@@ -904,7 +916,7 @@ def to_nncf_dict(self) -> Dict[str, Any]:
904916
"gptq": self.gptq,
905917
"lora_correction": self.lora_correction,
906918
"backup_mode": backup_mode,
907-
**self.kwargs,
919+
**kwargs,
908920
}
909921
return result
910922

optimum/intel/openvino/quantization.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import copy
16+
import dataclasses
1617
import inspect
1718
import logging
1819
import os
@@ -1674,6 +1675,11 @@ def _weight_only_quantization(
16741675
wc_kwargs.update(kwargs)
16751676
wc_kwargs.pop("weight_only", None)
16761677

1678+
advanced_parameters = wc_kwargs.get("advanced_parameters")
1679+
if advanced_parameters is not None and advanced_parameters.statistics_path is not None and dataset is None:
1680+
# Graceful handling of unnecessary statistics_path
1681+
wc_kwargs["advanced_parameters"] = dataclasses.replace(advanced_parameters, statistics_path=None)
1682+
16771683
compressed_model = nncf.compress_weights(
16781684
model,
16791685
dataset=dataset,

tests/openvino/test_exporters_cli.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,25 @@ def test_exporters_cli_4bit(
10261026
"--lora-correction" not in option or b"with correction of low-rank adapters" in result.stdout
10271027
)
10281028

1029+
def test_exporters_cli_4bit_with_statistics_path(self):
1030+
with TemporaryDirectory() as tmpdir:
1031+
statistics_path = f"{tmpdir}/statistics"
1032+
result = subprocess.run(
1033+
f"optimum-cli export openvino --model {MODEL_NAMES['llama']} --weight-format int4 --awq "
1034+
f"--dataset wikitext2 --group-size 4 --quantization-statistics-path {statistics_path} {tmpdir}",
1035+
shell=True,
1036+
check=True,
1037+
capture_output=True,
1038+
)
1039+
self.assertTrue(
1040+
b"Statistics were successfully saved to a directory " + bytes(statistics_path, "utf-8")
1041+
in result.stdout
1042+
)
1043+
self.assertTrue(
1044+
b"Statistics were successfully loaded from a directory " + bytes(statistics_path, "utf-8")
1045+
in result.stdout
1046+
)
1047+
10291048
@parameterized.expand(SUPPORTED_QUANTIZATION_ARCHITECTURES)
10301049
def test_exporters_cli_full_quantization(
10311050
self,
@@ -1069,7 +1088,7 @@ def test_exporters_cli_full_quantization(
10691088
[
10701089
(
10711090
"falcon-40b",
1072-
"tiiuae/falcon-7b-instruct",
1091+
"bigscience/bloomz-560m",
10731092
AutoModelForCausalLM,
10741093
OVModelForCausalLM,
10751094
"--task text-generation-with-past --weight-format int4",
@@ -1112,16 +1131,20 @@ def test_exporters_cli_with_default_config(
11121131
with open(Path(tmpdir) / "config.json", "w") as wf:
11131132
json.dump(config, wf)
11141133

1134+
is_weight_compression = "--weight-format" in options
1135+
run_command = f"optimum-cli export openvino --model {tmpdir} {options} {tmpdir}"
1136+
if is_weight_compression:
1137+
# Providing quantization statistics path should not interfere with the default configuration matching
1138+
run_command += f" --quantization-statistics-path {tmpdir}/statistics"
11151139
subprocess.run(
1116-
f"optimum-cli export openvino --model {tmpdir} {options} {tmpdir}",
1140+
run_command,
11171141
shell=True,
11181142
check=True,
11191143
)
11201144

11211145
model = ov_model_cls.from_pretrained(tmpdir)
11221146
rt_info = model.model.get_rt_info()
11231147
nncf_info = rt_info["nncf"]
1124-
is_weight_compression = "weight_compression" in nncf_info
11251148
model_quantization_config = nncf_info["weight_compression" if is_weight_compression else "quantization"]
11261149

11271150
default_config = {**default_configs_collection[model_id]}
@@ -1134,6 +1157,10 @@ def test_exporters_cli_with_default_config(
11341157
quant_method = default_config.pop("quant_method", None)
11351158
default_config["awq"] = quant_method == "awq"
11361159
default_config["gptq"] = quant_method == "gptq"
1160+
advanced_parameters = eval(model_quantization_config["advanced_parameters"].value)
1161+
model_quantization_config["statistics_path"] = Mock()
1162+
model_quantization_config["statistics_path"].value = advanced_parameters["statistics_path"]
1163+
default_config["statistics_path"] = f"{tmpdir}/statistics"
11371164
else:
11381165
dtype = default_config.pop("dtype", None)
11391166
self.assertEqual(dtype, "int8")

tests/openvino/test_quantization.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2114,6 +2114,25 @@ class OVQuantizationConfigTest(unittest.TestCase):
21142114
"some_arg": "some_value",
21152115
},
21162116
),
2117+
(
2118+
OVWeightQuantizationConfig,
2119+
{
2120+
"advanced_parameters": nncf.AdvancedCompressionParameters(statistics_path="statistics_path"),
2121+
"statistics_path": "statistics_path2",
2122+
},
2123+
{
2124+
"advanced_parameters": nncf.AdvancedCompressionParameters(statistics_path="statistics_path2"),
2125+
},
2126+
),
2127+
(
2128+
OVWeightQuantizationConfig,
2129+
{
2130+
"statistics_path": "statistics_path",
2131+
},
2132+
{
2133+
"advanced_parameters": nncf.AdvancedCompressionParameters(statistics_path="statistics_path"),
2134+
},
2135+
),
21172136
(
21182137
OVQuantizationConfig,
21192138
{

0 commit comments

Comments
 (0)