|
18 | 18 |
|
19 | 19 | from parameterized import parameterized |
20 | 20 | from utils_tests import ( |
21 | | - _ARCHITECTURES_TO_EXPECTED_INT4_INT8, |
22 | 21 | _ARCHITECTURES_TO_EXPECTED_INT8, |
23 | 22 | MODEL_NAMES, |
24 | 23 | get_num_quantized_nodes, |
@@ -84,14 +83,13 @@ class OVCLIExportTestCase(unittest.TestCase): |
84 | 83 | ("latent-consistency", 50, 135), |
85 | 84 | ) |
86 | 85 |
|
87 | | - SUPPORTED_4BIT_ARCHITECTURES = (("text-generation-with-past", "opt125m"),) |
88 | | - |
89 | | - SUPPORTED_4BIT_OPTIONS = ["int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"] |
90 | | - |
91 | | - TEST_4BIT_CONFIGURATONS = [] |
92 | | - for arch in SUPPORTED_4BIT_ARCHITECTURES: |
93 | | - for option in SUPPORTED_4BIT_OPTIONS: |
94 | | - TEST_4BIT_CONFIGURATONS.append([arch[0], arch[1], option]) |
| 86 | + TEST_4BIT_CONFIGURATONS = [ |
| 87 | + ("text-generation-with-past", "opt125m", "int4_sym_g128", 62, 86), |
| 88 | + ("text-generation-with-past", "opt125m", "int4_asym_g128", 62, 86), |
| 89 | + ("text-generation-with-past", "opt125m", "int4_sym_g64", 62, 86), |
| 90 | + ("text-generation-with-past", "opt125m", "int4_asym_g64", 62, 86), |
| 91 | + ("text-generation-with-past", "llama_awq", "int4 --ratio 1.0 --sym --group-size 16 --all-layers", 0, 32), |
| 92 | + ] |
95 | 93 |
|
96 | 94 | def _openvino_export( |
97 | 95 | self, model_name: str, task: str, compression_option: str = None, compression_ratio: float = None |
@@ -197,17 +195,16 @@ def test_exporters_cli_hybrid_quantization(self, model_type: str, exp_num_fq: in |
197 | 195 | self.assertEqual(exp_num_fq, num_fq) |
198 | 196 |
|
199 | 197 | @parameterized.expand(TEST_4BIT_CONFIGURATONS) |
200 | | - def test_exporters_cli_int4(self, task: str, model_type: str, option: str): |
| 198 | + def test_exporters_cli_int4(self, task: str, model_type: str, option: str, expected_int8: int, expected_int4: int): |
201 | 199 | with TemporaryDirectory() as tmpdir: |
202 | 200 | subprocess.run( |
203 | | - f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}", |
| 201 | + f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} --weight-format {option} {tmpdir}", |
204 | 202 | shell=True, |
205 | 203 | check=True, |
206 | 204 | ) |
207 | 205 | model_kwargs = {"use_cache": task.endswith("with-past")} if "generation" in task else {} |
208 | 206 | model = eval(_HEAD_TO_AUTOMODELS[task.replace("-with-past", "")]).from_pretrained(tmpdir, **model_kwargs) |
209 | 207 |
|
210 | | - expected_int8, expected_int4 = _ARCHITECTURES_TO_EXPECTED_INT4_INT8[model_type] |
211 | 208 | _, num_int8, num_int4 = get_num_quantized_nodes(model) |
212 | 209 | self.assertEqual(expected_int8, num_int8) |
213 | 210 | self.assertEqual(expected_int4, num_int4) |
|
0 commit comments