fix: Added guards to qconfig save/load, added smoothq prefix for vars, and added smoothq vars to default config

BrandonGroth · BrandonGroth · commit 34e5ad1d71a3 · 2025-05-09T13:28:50.000-04:00
Signed-off-by: Brandon Groth &lt;brandon.m.groth@gmail.com&gt;
diff --git a/fms_mo/quant/ptq.py b/fms_mo/quant/ptq.py
@@ -2537,7 +2537,7 @@ def dq_llm(model, scale, qcfg):
 
     for name, module in model.named_modules():
         if isinstance(module, (QLinear,)):
-            if any(x in name for x in qcfg["scale_layers"]):
+            if any(x in name for x in qcfg["smoothq_scale_layers"]):
                 module.set_act_scale(scale[name])
                 logger.info(
                     f"Apply layer {name} with activation scales (10)"
diff --git a/fms_mo/recipes/dq.json b/fms_mo/recipes/dq.json
@@ -2,7 +2,7 @@
     "decoder_arch": true,
     "align_zero": true,
     "qgroup": null,
-    "act_scale_path": null,
+    "smoothq_act_scale_path": null,
     "qmodel_calibration_new": 10,
     "qskip_large_mag_layers": true,
     "ptq_nbatch": 128,
diff --git a/fms_mo/utils/dq_utils.py b/fms_mo/utils/dq_utils.py
@@ -18,9 +18,9 @@ def config_quantize_smooth_layers(qcfg: dict):
     """Update qcfg with model-dependent config parameters:
     - qlayer_name_pattern: identifier of transformer layers containing linear layers
     to quantize (if any, tracing is bypassed)
-    - scale_layers: identifier of linear layers to apply smoothquant on
     - qskip_layer_name: full name of linear layers that will not be quantized
-    - act_scale_path: path to save/load smoothquant activation scales
+    - smoothq_scale_layers: identifier of linear layers to apply smoothquant on
+    - smoothq_act_scale_path: path to save/load smoothquant activation scales
 
     Selected model is determined by comparing all architecture identifiers against
     `model` and `model_type` fields in qcfg.
@@ -56,7 +56,7 @@ def config_quantize_smooth_layers(qcfg: dict):
         model in qcfg["model_type"] for model in llama_architecture
     ):
         qcfg["qlayer_name_pattern"] = ["model.layers."]
-        qcfg["scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
+        qcfg["smoothq_scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
         if qcfg["qskip_large_mag_layers"]:
             large_mag_layers = {
                 "2-7b": [1, 30],
@@ -75,13 +75,13 @@ def config_quantize_smooth_layers(qcfg: dict):
         model in qcfg["model_type"] for model in granite_architecture
     ):
         qcfg["qlayer_name_pattern"] = ["model.layers."]
-        qcfg["scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
+        qcfg["smoothq_scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
         # NOTE: supported granite-v3 models do not need layer skip for large magnitude
     elif "mixtral" in qcfg["model"]:
         qcfg["qlayer_name_pattern"] = (
             ["model.layers"] if qcfg["nbits_bmm1"] == 32 else []
         )
-        qcfg["scale_layers"] = ["q_proj", "k_proj", "v_proj", "w1", "w3"]
+        qcfg["smoothq_scale_layers"] = ["q_proj", "k_proj", "v_proj", "w1", "w3"]
         qcfg["qskip_layer_name"] += [
             f"model.layers.{i}.block_sparse_moe.gate" for i in range(32)
         ]
@@ -98,22 +98,22 @@ def config_quantize_smooth_layers(qcfg: dict):
                     [31, 7],
                 ]
             ]
-        qcfg["act_scale_path"] = "./act_scales/Mixtral-8x7B-v0.1.pt"
+        qcfg["smoothq_act_scale_path"] = "./act_scales/Mixtral-8x7B-v0.1.pt"
     elif any(model in qcfg["model"] for model in bigcode_architecture):
         qcfg["qlayer_name_pattern"] = ["transformer.h"]
-        qcfg["scale_layers"] = ["c_attn", "c_fc"]
+        qcfg["smoothq_scale_layers"] = ["c_attn", "c_fc"]
         # NOTE: supported bigcode models do not need layer skip for large magnitude
         if "granite-3b-base-v2" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/granite_3b_base_v2_500_nw.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/granite_3b_base_v2_500_nw.pt"
         if "granite-13b-base-v2" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/granite_13b_base_v2.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/granite_13b_base_v2.pt"
         if "granite-20b-code-base" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
         if "granite-20b-code-instruct" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
         if "granite-34b-code-base" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
         if "granite-34b-code-instruct" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
     else:
         raise ValueError("The model architecture is not supported for DQ.")
diff --git a/fms_mo/utils/qconfig_utils.py b/fms_mo/utils/qconfig_utils.py
@@ -18,7 +18,7 @@
 from datetime import date
 from importlib.metadata import version
 from pathlib import Path
-from typing import Any
+from typing import Any, Union
 import json
 import logging
 import os
@@ -113,6 +113,7 @@ def config_defaults() -> dict:
         "qkvsync": False,
         "extend_act_range": False,
         "plotsvg": False,
+        "qskip_large_mag_layers": False,
         # Iterable vars
         "qlayer_name_pattern": [],
         "qskip_layer_name": [],
@@ -142,21 +143,24 @@ def config_defaults() -> dict:
         "temp_disable_calib": False,
         "org_batch_size": {},
         "ptqmod_to_be_optimized": [],
+        # SmoothQuant vars
+        "smoothq": False,
+        "smoothq_scale_layers": [],
+        "smoothq_act_scale_path": None,
         # Other vars
         "which2patch_contextmanager": None,
         "force_stop_if_qbmm_auto_check_failed": False,
         "world_size": max(1, torch.cuda.device_count()),
         "global_rank": 0,
         "batch_size": 2,
+        "keys_to_save": [],
         # items could be obsoleted
         "output_attentions": False,
         "bias_corr": False,
         "qwav2vec": False,
         "qvit": False,
         "numparamsfromloadertomodel": 1,
         "gradclip": 0.0,
-        "smoothq": False,
-        "keys_to_save": [],
     }
 
     return cfg_defaults
@@ -201,7 +205,7 @@ def find_recipe_json(recipe: str, subdir: str = None) -> Path:
     return json_file
 
 
-def get_recipe(recipe: str, subdir: str = None) -> Any:
+def get_recipe(recipe: str, subdir: str = None) -> Union[list, dict]:
     """
     Get a json recipe.
 
@@ -219,6 +223,10 @@ def get_recipe(recipe: str, subdir: str = None) -> Any:
             temp_data = json.load(openfile)
         logger.info(f"Loaded settings from {json_file}.")
 
+        # Any recipe should be a dict (qcfg) or list (keys_to_save)
+        if not isinstance(temp_data, (dict, list)):
+            raise ValueError(f"Loaded recipe {json_file} was not a dict or list")
+
     return temp_data
 
 
@@ -378,8 +386,14 @@ def qconfig_init(recipe: str = None, args: Any = None) -> dict:
     #    this can be used to load a previously saved ckpt as well
     if recipe:
         # qcfg recipes should reside in fms_mo/recipes
-        temp_cfg = get_recipe(recipe)
+        temp_cfg = qconfig_load(recipe)
+
         if temp_cfg:
+            if not isinstance(temp_cfg, dict):
+                raise ValueError(
+                    f"Quantized config recipe={recipe} is not a dictionary"
+                )
+
             qcfg.update(temp_cfg)
             logger.info("Updated config with recipe values")
         else:
@@ -562,7 +576,12 @@ def qconfig_save(
 
     # Next, check in fms_mo/recipes and merge them into a unique set (in case they differ)
     keys_to_save_json = get_recipe(recipe)
+
     if keys_to_save_json:
+        if not isinstance(keys_to_save_json, list):
+            raise ValueError(f"Save recipe={recipe} is not a list!")
+
+        # Merge keys_to_save lists
         keys_to_save = list(set(keys_to_save + keys_to_save_json))
 
     # If we found keys to save, fetch them from qcfg
@@ -604,9 +623,12 @@ def qconfig_save(
 
 def qconfig_load(fname: str = "qcfg.json") -> dict:
     """Read config in json format, work together with qconfig_save"""
-    if os.path.isfile(fname):
-        with open(fname, "r", encoding="utf-8") as openfile:
-            config = json.load(openfile)
+    config = get_recipe(fname)
+
+    if config:
+        # Check that loaded file is a dict
+        if not isinstance(config, dict):
+            raise ValueError(f"Quantized config={fname} is not a dictionary")
 
         # Add back wanted defaults for any missing vars
         add_wanted_defaults_to_config(config, minimal=False)
@@ -856,6 +878,8 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
         "plotsvg",
         "ptq_freezecvs",
         "ptq_qdrop",
+        "qskip_large_mag_layers",
+        "smoothq",
     ]
     for boolean_var_str in boolean_vars_str:
         boolean_var = config.get(
@@ -912,6 +936,7 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
         "firstptqmodule",
         "params2optim",
         "clip_val_asst_percentile",
+        "smoothq_scale_layers",
     ]
     for iterable_var_str in iterable_vars_str:
         iterable_var_default = default_config.get(iterable_var_str)
@@ -990,3 +1015,7 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
             f"which2patch_contextmanager = {which2patch_contextmanager} is not one of "
             f"the following: {which2patch_contextmanager_settings}"
         )
+
+    smoothq_act_scale_path = config.get("smoothq_act_scale_path", None)
+    if smoothq_act_scale_path and not smoothq_act_scale_path.endswith(".pt"):
+        raise ValueError(f"{smoothq_act_scale_path=} is not a .pt checkpoint")