File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed
tests/test_quantization/lifecycle Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -279,13 +279,12 @@ def test_multi_apply_quantization_config():
279
279
"""
280
280
model = get_tinyllama_model ()
281
281
282
- # FP8 applied to mlp and self_attn.o_proj to validate overwriting
282
+ # FP8 applied to self_attn
283
283
qconfig1 = QuantizationConfig (
284
284
config_groups = {
285
285
"group_0" : QuantizationScheme (
286
286
targets = [
287
- r"re:.*model\.layers\.\d+\.mlp\.(down|gate|up)_proj$" ,
288
- r"re:.*model\.layers\.\d+\.self_attn\.o_proj$" ,
287
+ r"re:.*self_attn\.(k|q|o|v)_proj$" ,
289
288
],
290
289
weights = QuantizationArgs (
291
290
num_bits = 8 ,
@@ -305,12 +304,13 @@ def test_multi_apply_quantization_config():
305
304
},
306
305
ignore = ["lm_head" ],
307
306
)
308
- # W4A16_ASYM applied to self_attn
307
+ # W4A16_ASYM applied to mlp and self_attn.o_proj to validate overwriting
309
308
qconfig2 = QuantizationConfig (
310
309
config_groups = {
311
310
"group_0" : QuantizationScheme (
312
311
targets = [
313
- r"re:.*model\.layers\.\d+\.self_attn\.(k|q|o|v)_proj$" ,
312
+ r"re:.*mlp\.(down|gate|up)_proj$" ,
313
+ r"re:.*self_attn\.o_proj$" ,
314
314
],
315
315
weights = QuantizationArgs (
316
316
num_bits = 4 ,
You can’t perform that action at this time.
0 commit comments