Skip to content

Commit f870a3e

Browse files
nikita-savelyevvmvafin
authored andcommitted
[OV] Update default int4 configs (#1393)
* Replace data-aware compression methods with data-free AWQ in default int4 configs for some models
1 parent df36a67 commit f870a3e

File tree

1 file changed

+2
-27
lines changed

1 file changed

+2
-27
lines changed

optimum/intel/openvino/configuration.py

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ class OVQuantizationMethod(str, Enum):
6868
"sym": True,
6969
"group_size": 128,
7070
"ratio": 0.8,
71-
"dataset": "wikitext2",
7271
"quant_method": OVQuantizationMethod.AWQ,
7372
},
7473
"meta-llama/Llama-2-7b-hf": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6},
@@ -77,17 +76,14 @@ class OVQuantizationMethod(str, Enum):
7776
"sym": True,
7877
"group_size": 128,
7978
"ratio": 1.0,
80-
"dataset": "wikitext2",
8179
"quant_method": OVQuantizationMethod.AWQ,
82-
"scale_estimation": True,
8380
},
8481
"meta-llama/Llama-2-13b-chat-hf": {"bits": 4, "sym": True, "group_size": 64, "ratio": 0.8},
8582
"stabilityai/stablelm-3b-4e1t": {
8683
"bits": 4,
8784
"sym": True,
8885
"group_size": 64,
8986
"ratio": 0.8,
90-
"dataset": "wikitext2",
9187
"quant_method": OVQuantizationMethod.AWQ,
9288
},
9389
"stabilityai/stablelm-zephyr-3b": {
@@ -116,9 +112,7 @@ class OVQuantizationMethod(str, Enum):
116112
"sym": False,
117113
"group_size": 128,
118114
"ratio": 1.0,
119-
"dataset": "wikitext2",
120115
"quant_method": OVQuantizationMethod.AWQ,
121-
"scale_estimation": True,
122116
},
123117
"Qwen/Qwen2.5-Coder-3B-Instruct": {
124118
"bits": 4,
@@ -142,9 +136,7 @@ class OVQuantizationMethod(str, Enum):
142136
"sym": True,
143137
"group_size": 128,
144138
"ratio": 1.0,
145-
"dataset": "wikitext2",
146139
"quant_method": OVQuantizationMethod.AWQ,
147-
"scale_estimation": True,
148140
},
149141
"Qwen/Qwen3-8B": {
150142
"bits": 4,
@@ -202,9 +194,7 @@ class OVQuantizationMethod(str, Enum):
202194
"sym": False,
203195
"group_size": 128,
204196
"ratio": 1.0,
205-
"dataset": "wikitext2",
206197
"quant_method": OVQuantizationMethod.AWQ,
207-
"scale_estimation": True,
208198
},
209199
"lmsys/longchat-7b-16k": {
210200
"bits": 4,
@@ -222,9 +212,7 @@ class OVQuantizationMethod(str, Enum):
222212
"sym": False,
223213
"group_size": 64,
224214
"ratio": 1.0,
225-
"dataset": "wikitext2",
226215
"quant_method": OVQuantizationMethod.AWQ,
227-
"scale_estimation": True,
228216
},
229217
"microsoft/phi-2": {
230218
"bits": 4,
@@ -248,25 +236,21 @@ class OVQuantizationMethod(str, Enum):
248236
"sym": False,
249237
"group_size": 64,
250238
"ratio": 0.8,
251-
"dataset": "wikitext2",
252-
"scale_estimation": True,
239+
"quant_method": OVQuantizationMethod.AWQ,
253240
},
254241
"meta-llama/Llama-3.2-1B-Instruct": {
255242
"bits": 4,
256243
"sym": False,
257244
"group_size": 128,
258245
"ratio": 1.0,
259-
"dataset": "wikitext2",
260246
"quant_method": OVQuantizationMethod.AWQ,
261-
"scale_estimation": True,
262247
},
263248
"meta-llama/Meta-Llama-3.1-8B": {
264249
"bits": 4,
265250
"sym": False,
266251
"group_size": 64,
267252
"ratio": 0.8,
268-
"dataset": "wikitext2",
269-
"scale_estimation": True,
253+
"quant_method": OVQuantizationMethod.AWQ,
270254
},
271255
"microsoft/Phi-3-mini-4k-instruct": {
272256
"bits": 4,
@@ -281,43 +265,34 @@ class OVQuantizationMethod(str, Enum):
281265
"sym": False,
282266
"group_size": 64,
283267
"ratio": 1.0,
284-
"dataset": "wikitext2",
285268
"quant_method": OVQuantizationMethod.AWQ,
286-
"scale_estimation": True,
287269
},
288270
"microsoft/Phi-4-mini-instruct": {
289271
"bits": 4,
290272
"sym": False,
291273
"group_size": 64,
292274
"ratio": 1.0,
293-
"dataset": "wikitext2",
294275
"quant_method": OVQuantizationMethod.AWQ,
295-
"scale_estimation": True,
296276
},
297277
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": {
298278
"bits": 4,
299279
"sym": False,
300280
"group_size": 32,
301281
"ratio": 0.7,
302-
"dataset": "wikitext2",
303282
"quant_method": OVQuantizationMethod.AWQ,
304-
"scale_estimation": True,
305283
},
306284
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": {
307285
"bits": 4,
308286
"sym": False,
309287
"group_size": 128,
310288
"ratio": 1.0,
311-
"dataset": "wikitext2",
312289
"quant_method": OVQuantizationMethod.AWQ,
313-
"scale_estimation": True,
314290
},
315291
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B": {
316292
"bits": 4,
317293
"sym": False,
318294
"group_size": 64,
319295
"ratio": 0.8,
320-
"dataset": "wikitext2",
321296
"quant_method": OVQuantizationMethod.AWQ,
322297
},
323298
"microsoft/Phi-4-multimodal-instruct": {

0 commit comments

Comments
 (0)