Skip to content

Commit 199e3d1

Browse files
Merge pull request #113 from BrandonGroth/config_save_cleanup
fix: Config save cleanup
2 parents 09c7761 + 02535d1 commit 199e3d1

File tree

7 files changed

+150
-26
lines changed

7 files changed

+150
-26
lines changed

fms_mo/prep.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,7 @@ def qmodel_prep(
869869
model, device_ids=DPorDDPdevices
870870
)
871871

872-
qconfig_save(qcfg, "qcfg.json")
872+
qconfig_save(qcfg, fname="qcfg.json")
873873
qcfg["tb_writer"] = tb_writer
874874

875875
logger.info(f"--- Quantized model --- \n{model}\n")

fms_mo/quant/ptq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2537,7 +2537,7 @@ def dq_llm(model, scale, qcfg):
25372537

25382538
for name, module in model.named_modules():
25392539
if isinstance(module, (QLinear,)):
2540-
if any(x in name for x in qcfg["scale_layers"]):
2540+
if any(x in name for x in qcfg["smoothq_scale_layers"]):
25412541
module.set_act_scale(scale[name])
25422542
logger.info(
25432543
f"Apply layer {name} with activation scales (10)"

fms_mo/recipes/dq.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"decoder_arch": true,
33
"align_zero": true,
44
"qgroup": null,
5-
"act_scale_path": null,
5+
"smoothq_act_scale_path": null,
66
"qmodel_calibration_new": 10,
77
"qskip_large_mag_layers": true,
88
"ptq_nbatch": 128,

fms_mo/utils/aiu_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ def save_for_aiu(
470470
"qa_mode",
471471
"qw_mode",
472472
"smoothq",
473-
"scale_layers",
473+
"smoothq_scale_layers",
474474
"qskip_layer_name",
475475
"qskip_large_mag_layers",
476476
"recompute_narrow_weights",

fms_mo/utils/dq_utils.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ def config_quantize_smooth_layers(qcfg: dict):
1818
"""Update qcfg with model-dependent config parameters:
1919
- qlayer_name_pattern: identifier of transformer layers containing linear layers
2020
to quantize (if any, tracing is bypassed)
21-
- scale_layers: identifier of linear layers to apply smoothquant on
2221
- qskip_layer_name: full name of linear layers that will not be quantized
23-
- act_scale_path: path to save/load smoothquant activation scales
22+
- smoothq_scale_layers: identifier of linear layers to apply smoothquant on
23+
- smoothq_act_scale_path: path to save/load smoothquant activation scales
2424
2525
Selected model is determined by comparing all architecture identifiers against
2626
`model` and `model_type` fields in qcfg.
@@ -56,7 +56,7 @@ def config_quantize_smooth_layers(qcfg: dict):
5656
model in qcfg["model_type"] for model in llama_architecture
5757
):
5858
qcfg["qlayer_name_pattern"] = ["model.layers."]
59-
qcfg["scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
59+
qcfg["smoothq_scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
6060
if qcfg["qskip_large_mag_layers"]:
6161
large_mag_layers = {
6262
"2-7b": [1, 30],
@@ -75,13 +75,13 @@ def config_quantize_smooth_layers(qcfg: dict):
7575
model in qcfg["model_type"] for model in granite_architecture
7676
):
7777
qcfg["qlayer_name_pattern"] = ["model.layers."]
78-
qcfg["scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
78+
qcfg["smoothq_scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
7979
# NOTE: supported granite-v3 models do not need layer skip for large magnitude
8080
elif "mixtral" in qcfg["model"]:
8181
qcfg["qlayer_name_pattern"] = (
8282
["model.layers"] if qcfg["nbits_bmm1"] == 32 else []
8383
)
84-
qcfg["scale_layers"] = ["q_proj", "k_proj", "v_proj", "w1", "w3"]
84+
qcfg["smoothq_scale_layers"] = ["q_proj", "k_proj", "v_proj", "w1", "w3"]
8585
qcfg["qskip_layer_name"] += [
8686
f"model.layers.{i}.block_sparse_moe.gate" for i in range(32)
8787
]
@@ -98,22 +98,22 @@ def config_quantize_smooth_layers(qcfg: dict):
9898
[31, 7],
9999
]
100100
]
101-
qcfg["act_scale_path"] = "./act_scales/Mixtral-8x7B-v0.1.pt"
101+
qcfg["smoothq_act_scale_path"] = "./act_scales/Mixtral-8x7B-v0.1.pt"
102102
elif any(model in qcfg["model"] for model in bigcode_architecture):
103103
qcfg["qlayer_name_pattern"] = ["transformer.h"]
104-
qcfg["scale_layers"] = ["c_attn", "c_fc"]
104+
qcfg["smoothq_scale_layers"] = ["c_attn", "c_fc"]
105105
# NOTE: supported bigcode models do not need layer skip for large magnitude
106106
if "granite-3b-base-v2" in qcfg["model"]:
107-
qcfg["act_scale_path"] = "./act_scales/granite_3b_base_v2_500_nw.pt"
107+
qcfg["smoothq_act_scale_path"] = "./act_scales/granite_3b_base_v2_500_nw.pt"
108108
if "granite-13b-base-v2" in qcfg["model"]:
109-
qcfg["act_scale_path"] = "./act_scales/granite_13b_base_v2.pt"
109+
qcfg["smoothq_act_scale_path"] = "./act_scales/granite_13b_base_v2.pt"
110110
if "granite-20b-code-base" in qcfg["model"]:
111-
qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
111+
qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
112112
if "granite-20b-code-instruct" in qcfg["model"]:
113-
qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
113+
qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
114114
if "granite-34b-code-base" in qcfg["model"]:
115-
qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
115+
qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
116116
if "granite-34b-code-instruct" in qcfg["model"]:
117-
qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
117+
qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
118118
else:
119119
raise ValueError("The model architecture is not supported for DQ.")

fms_mo/utils/qconfig_utils.py

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from datetime import date
1919
from importlib.metadata import version
2020
from pathlib import Path
21-
from typing import Any
21+
from typing import Any, Union
2222
import json
2323
import logging
2424
import os
@@ -113,10 +113,10 @@ def config_defaults() -> dict:
113113
"qkvsync": False,
114114
"extend_act_range": False,
115115
"plotsvg": False,
116+
"qskip_large_mag_layers": False,
116117
# Iterable vars
117118
"qlayer_name_pattern": [],
118119
"qskip_layer_name": [],
119-
"qskip_large_mag_layers": False,
120120
"qspecial_layers": {},
121121
"qsinglesided_name": [],
122122
"clip_val_asst_percentile": (0.1, 99.9),
@@ -142,21 +142,24 @@ def config_defaults() -> dict:
142142
"temp_disable_calib": False,
143143
"org_batch_size": {},
144144
"ptqmod_to_be_optimized": [],
145+
# SmoothQuant vars
146+
"smoothq": False,
147+
"smoothq_scale_layers": [],
148+
"smoothq_act_scale_path": None,
145149
# Other vars
146150
"which2patch_contextmanager": None,
147151
"force_stop_if_qbmm_auto_check_failed": False,
148152
"world_size": max(1, torch.cuda.device_count()),
149153
"global_rank": 0,
150154
"batch_size": 2,
155+
"keys_to_save": [],
151156
# items could be obsoleted
152157
"output_attentions": False,
153158
"bias_corr": False,
154159
"qwav2vec": False,
155160
"qvit": False,
156161
"numparamsfromloadertomodel": 1,
157162
"gradclip": 0.0,
158-
"smoothq": False,
159-
"keys_to_save": [],
160163
}
161164

162165
return cfg_defaults
@@ -201,7 +204,7 @@ def find_recipe_json(recipe: str, subdir: str = None) -> Path:
201204
return json_file
202205

203206

204-
def get_recipe(recipe: str, subdir: str = None) -> Any:
207+
def get_recipe(recipe: str, subdir: str = None) -> Union[list, dict]:
205208
"""
206209
Get a json recipe.
207210
@@ -219,6 +222,10 @@ def get_recipe(recipe: str, subdir: str = None) -> Any:
219222
temp_data = json.load(openfile)
220223
logger.info(f"Loaded settings from {json_file}.")
221224

225+
# Any recipe should be a dict (qcfg) or list (keys_to_save)
226+
if not isinstance(temp_data, (dict, list)):
227+
raise ValueError(f"Loaded recipe {json_file} was not a dict or list")
228+
222229
return temp_data
223230

224231

@@ -378,8 +385,14 @@ def qconfig_init(recipe: str = None, args: Any = None) -> dict:
378385
# this can be used to load a previously saved ckpt as well
379386
if recipe:
380387
# qcfg recipes should reside in fms_mo/recipes
381-
temp_cfg = get_recipe(recipe)
388+
temp_cfg = qconfig_load(recipe)
389+
382390
if temp_cfg:
391+
if not isinstance(temp_cfg, dict):
392+
raise ValueError(
393+
f"Quantized config recipe={recipe} is not a dictionary"
394+
)
395+
383396
qcfg.update(temp_cfg)
384397
logger.info("Updated config with recipe values")
385398
else:
@@ -562,7 +575,12 @@ def qconfig_save(
562575

563576
# Next, check in fms_mo/recipes and merge them into a unique set (in case they differ)
564577
keys_to_save_json = get_recipe(recipe)
578+
565579
if keys_to_save_json:
580+
if not isinstance(keys_to_save_json, list):
581+
raise ValueError(f"Save recipe={recipe} is not a list!")
582+
583+
# Merge keys_to_save lists
566584
keys_to_save = list(set(keys_to_save + keys_to_save_json))
567585

568586
# If we found keys to save, fetch them from qcfg
@@ -604,9 +622,12 @@ def qconfig_save(
604622

605623
def qconfig_load(fname: str = "qcfg.json") -> dict:
606624
"""Read config in json format, work together with qconfig_save"""
607-
if os.path.isfile(fname):
608-
with open(fname, "r", encoding="utf-8") as openfile:
609-
config = json.load(openfile)
625+
config = get_recipe(fname)
626+
627+
if config:
628+
# Check that loaded file is a dict
629+
if not isinstance(config, dict):
630+
raise ValueError(f"Quantized config={fname} is not a dictionary")
610631

611632
# Add back wanted defaults for any missing vars
612633
add_wanted_defaults_to_config(config, minimal=False)
@@ -856,6 +877,8 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
856877
"plotsvg",
857878
"ptq_freezecvs",
858879
"ptq_qdrop",
880+
"qskip_large_mag_layers",
881+
"smoothq",
859882
]
860883
for boolean_var_str in boolean_vars_str:
861884
boolean_var = config.get(
@@ -912,6 +935,7 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
912935
"firstptqmodule",
913936
"params2optim",
914937
"clip_val_asst_percentile",
938+
"smoothq_scale_layers",
915939
]
916940
for iterable_var_str in iterable_vars_str:
917941
iterable_var_default = default_config.get(iterable_var_str)
@@ -990,3 +1014,7 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
9901014
f"which2patch_contextmanager = {which2patch_contextmanager} is not one of "
9911015
f"the following: {which2patch_contextmanager_settings}"
9921016
)
1017+
1018+
smoothq_act_scale_path = config.get("smoothq_act_scale_path", None)
1019+
if smoothq_act_scale_path and not smoothq_act_scale_path.endswith(".pt"):
1020+
raise ValueError(f"{smoothq_act_scale_path=} is not a .pt checkpoint")

tests/models/test_saveconfig.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,102 @@ def test_save_config_minimal(
179179

180180
delete_config()
181181

182+
183+
def test_double_qconfig_save(
184+
config_fp32: dict,
185+
):
186+
"""
187+
Ensure that using qconfig_save multiple times doesn't fail.
188+
189+
Args:
190+
config_fp32 (dict): Config for fp32 quantization
191+
"""
192+
delete_config()
193+
194+
# Creating a qcfg, then saving again will cause a warning -> ignore it
195+
with pytest.warns(UserWarning, match="qcfg.json already exist, will overwrite."):
196+
qconfig_save(config_fp32, minimal=False)
197+
qconfig_save(config_fp32, minimal=False)
198+
199+
delete_config()
200+
201+
def test_qconfig_save_list_as_dict(
202+
config_fp32: dict,
203+
):
204+
"""
205+
Test that save recipes can't be used as dictionary
206+
207+
Args:
208+
config_fp32 (dict): Config for fp32 quantization
209+
"""
210+
delete_config()
211+
212+
# Fill in keys_to_save as dict with nonsense val
213+
config_fp32["keys_to_save"] = {
214+
"qa_mode": None,
215+
"qw_mode": None,
216+
"smoothq": None,
217+
"scale_layers": None,
218+
"qskip_layer_name": None,
219+
"qskip_large_mag_layers": None,
220+
}
221+
222+
with pytest.raises(ValueError):
223+
qconfig_save(config_fp32, minimal=True)
224+
225+
delete_config()
226+
227+
228+
def test_qconfig_save_recipe_as_dict(
229+
config_fp32: dict,
230+
):
231+
"""
232+
Test that save recipes can't be used as dictionary
233+
234+
Args:
235+
config_fp32 (dict): Config for fp32 quantization
236+
"""
237+
delete_config()
238+
239+
# Fill in keys_to_save as dict with nonsense val
240+
save_dict = {
241+
"qa_mode": None,
242+
"qw_mode": None,
243+
"smoothq": None,
244+
"scale_layers": None,
245+
"qskip_layer_name": None,
246+
"qskip_large_mag_layers": None,
247+
}
248+
save_json(save_dict, file_path="keys_to_save.json")
249+
250+
251+
with pytest.raises(ValueError):
252+
qconfig_save(config_fp32, recipe="keys_to_save.json", minimal=True)
253+
254+
delete_config()
255+
256+
257+
def test_qconfig_load_with_recipe_as_list(
258+
config_fp32: dict,
259+
):
260+
"""
261+
Test if using qconfig_load errors when loading a json list
262+
263+
Args:
264+
config_fp32 (dict): Config for fp32 quantization
265+
"""
266+
delete_config()
267+
268+
config_list = list( config_fp32.keys() )
269+
270+
save_json(config_list, file_path="qcfg.json")
271+
272+
with pytest.raises(ValueError):
273+
_ = qconfig_load(fname="qcfg.json")
274+
275+
delete_config()
276+
277+
182278
def test_load_config_restored_pair(
183279
config_fp32: dict,
184280
wanted_pair: tuple,

0 commit comments

Comments
 (0)