fix: type hinting arguments and returns

bayo-ibm · bayo-ibm · commit fbdf19f63cb5 · 2025-09-05T18:30:53.000-04:00
Signed-off-by: Omobayode Fagbohungbe &lt;omobayode.fagbohungbe@ibm.com&gt;
diff --git a/fms_mo/dq.py b/fms_mo/dq.py
@@ -141,16 +141,16 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
     logger.info(f"Model is at {model.device} after intialization")
     logger.info(f"Tokenizer is {tokenizer}, block size is {block_size}")
 
-    quant_mode = check_quantization_setting(model)
+    inference_only = check_quantization_setting(model)
 
-    if not quant_mode:
+    if not inference_only:
         logger.info("quantization mode activated, initalizing the qcfg file ")
         qcfg = qconfig_init(recipe="dq", args=fms_mo_args)
     else:
         logger.info("inference mode activated")
         qcfg = load_inference_qconfig_file(model_args, fms_mo_args)
 
-    if quant_mode:
+    if inference_only:
         model = convert_fp8_vllm_to_fms_mo(model=model)
 
     model_size = model_size_Wb(model, unit="GB")
@@ -176,7 +176,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
 
     qcfg["model"] = model_args.model_name_or_path
     # config layers to skip, smooth scale
-    if not quant_mode:
+    if not inference_only:
         config_quantize_smooth_layers(qcfg)
 
     use_dynamo = True
@@ -209,7 +209,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
     )
 
     # For loading or creating smoothquant scale. Sometimes we may include scales in ckpt as well.
-    if not quant_mode and qcfg["smoothq"]:
+    if not inference_only and qcfg["smoothq"]:
         scale_file = Path(f"./act_scales/{qcfg['model'].replace('/', '-')}.pt")
         if qcfg.get("act_scale_path", None):
             # user provided a scale file (or a dir)
@@ -248,7 +248,7 @@ def run_dq(model_args, data_args, opt_args, fms_mo_args):
         logger.info(f"Quantized model {model}")
         logger.info("==" * 20)
 
-    if not quant_mode:
+    if not inference_only:
         if qcfg["smoothq"]:
             logger.info("Starting to apply smooth scale")
             dq_llm(model, act_scales, qcfg)
diff --git a/fms_mo/prep.py b/fms_mo/prep.py
@@ -394,17 +394,17 @@ def make_quant_module(module, curr_full_name, qcfg, verbose=False):
             if available_packages["compressed_tensors"]:
                 # Third Party
                 import compressed_tensors
-
-            if isinstance(
+            # checks if the layer is CompressedLinear. If it is a CompressedLinear layer,
+            # it does nothing. Otherwise, it throws the warning sign
+            if not isinstance(
                 module, compressed_tensors.linear.compressed_linear.CompressedLinear
             ):
-                pass
-            else:
                 logger.warning(
                     f"{curr_full_name} {type(module)} seems to be a wrapper of Linear."
                     "Please make sure it doesn't wrap BN and activ func. Otherwise"
                     "please create an equivalent Linear wrapper and change qcfg['mapping']."
                 )
+
         QLin = mapping.get(nn.Linear, None)
         if QLin is None:
             if verbose:
diff --git a/fms_mo/training_args.py b/fms_mo/training_args.py
@@ -209,8 +209,7 @@ class FMSMOArguments(TypeChecker):
         default=False,
         metadata={"help": "Apply recomputation during checkpoint saving for AIU."},
     )
-    fp8_use_subnormal: bool = field(default=False)
-    override_fms_args: bool = field(default=False)
+    override_qcfg_args: bool = field(default=False)
 
 
 @dataclass
diff --git a/fms_mo/utils/dq_inf.py b/fms_mo/utils/dq_inf.py
@@ -17,6 +17,7 @@
 """
 
 # Standard
+from typing import Any, Dict, List, Tuple, Union
 import glob
 import json
 import logging
@@ -36,7 +37,7 @@
 logger = logging.getLogger(__name__)
 
 
-def check_quantization_setting(model: nn.Module = None):
+def check_quantization_setting(model: nn.Module) -> bool:
     """
     function checks if the checkpoint is from fp8 quantization
     """
@@ -47,36 +48,49 @@ def check_quantization_setting(model: nn.Module = None):
         return False
 
     logger.info("Validating config settings")
-    if quant_config["quant_method"] == "compressed-tensors":
-        if quant_config["format"] != "float-quantized":
-            raise ValueError(
-                "The input activation and weight quantization dtypes are not supported"
-            )
-
-        if (
-            quant_config["config_groups"]["group_0"]["input_activations"]["num_bits"]
-            != 8
-        ):
-            raise ValueError("Only 8 bit FP input activation quantization is supported")
-
-        if quant_config["config_groups"]["group_0"]["weights"]["num_bits"] != 8:
-            raise ValueError("Only 8-bit FP weight quantization  is supported")
-
-        if quant_config["kv_cache_scheme"] is None:
-            pass
-        else:
-            if quant_config["kv_cache_scheme"]["type"] is not float:
-                raise ValueError("The KV-Cache quantization dtype is not supported")
-
-            if quant_config["kv_cache_scheme"]["num_bits"] != 8:
-                raise ValueError("Only 8-bit KV-Cache quantization dtype is supported")
-
-        return True
+    if "quant_method" in quant_config.keys():
+        if quant_config["quant_method"] == "compressed-tensors":
+            if quant_config["format"] != "float-quantized":
+                raise ValueError(
+                    "The input activation and weight quantization dtypes are not supported"
+                )
+
+            if (
+                quant_config["config_groups"]["group_0"]["input_activations"][
+                    "num_bits"
+                ]
+                != 8
+            ):
+                raise ValueError(
+                    "Only 8 bit FP input activation quantization is supported"
+                )
+
+            if quant_config["config_groups"]["group_0"]["weights"]["num_bits"] != 8:
+                raise ValueError("Only 8-bit FP weight quantization  is supported")
+
+            if quant_config["kv_cache_scheme"] is not None:
+                if quant_config["kv_cache_scheme"]["type"] is not float:
+                    raise ValueError("The KV-Cache quantization dtype is not supported")
+
+                if quant_config["kv_cache_scheme"]["num_bits"] != 8:
+                    raise ValueError(
+                        "Only 8-bit KV-Cache quantization dtype is supported"
+                    )
+
+            return True
+        raise ValueError(
+            "The quantization method is not supported for inferencing."
+            "Only Fp8 quantization is supported"
+        )
 
-    raise ValueError("This quantization method is not supported for inferencing")
+    raise ValueError(
+        "The quantization method is not found. Please check the config file"
+    )
 
 
-def load_inference_qconfig_file(model_args, fms_mo_args):
+def load_inference_qconfig_file(
+    model_args: Any = None, fms_mo_args: Any = None
+) -> Dict[str, Union[int, float, str]]:
     """
     Function to load the inference quantization config for fms_mo
     """
@@ -87,12 +101,13 @@ def load_inference_qconfig_file(model_args, fms_mo_args):
                 recipe=model_args.model_name_or_path + "/qcfg", args=fms_mo_args
             )
         else:
-            logger.info("qcfg file found, loading the qcfg file ")
+            logger.info(f"loading quantization configuration from\
+                        {model_args.model_name_or_path + '/qcfg.json'}")
             qcfg = qconfig_init(recipe=model_args.model_name_or_path + "/qcfg")
     else:
         logger.info(
-            f"qcfg file not found in {model_args.model_name_or_path},\
-                    loading fms_mo_args and recipe"
+            f"qcfg file not found in {model_args.model_name_or_path},"
+            "loading fms_mo_args and recipe"
         )
         qcfg = qconfig_init(recipe="dq", args=fms_mo_args)
         qcfg = update_qcfg_from_model_config(model_args, qcfg)
@@ -101,7 +116,9 @@ def load_inference_qconfig_file(model_args, fms_mo_args):
     return qcfg
 
 
-def update_qcfg_from_model_config(model_args, qcfg):
+def update_qcfg_from_model_config(
+    model_args: Any = None, qcfg: dict = None
+) -> Dict[str, Union[int, float, str]]:
     """
     function to update the default qcfg setting with settings in the model config file.
     Important for the case where qcfg file does not exist.
@@ -144,15 +161,16 @@ def update_qcfg_from_model_config(model_args, qcfg):
     return qcfg
 
 
-# def rename_fms_dict_to_vllm_dict (model_dict : dict= None, qcfg : dict = None):
-def rename_fms_dict_to_vllm_dict(model_dict: dict = None):
+def rename_fms_dict_to_vllm_dict(
+    model_dict: dict = None,
+) -> Tuple[Dict[str, Union[int, float]], Dict[str, Union[int, float]]]:
     """
     Function to rename the dict in fms_mo format to vllm_format.
     """
     st_dict = {}
     fms_dict = {}
     keys = model_dict.keys()
-
+    logger.info("WARNING: only static weights per-channel is supported at this time")
     for k, v in model_dict.items():
         if ".weight" in k:
             key = k.split("weight")[0]
@@ -167,7 +185,9 @@ def rename_fms_dict_to_vllm_dict(model_dict: dict = None):
     return st_dict, fms_dict
 
 
-def update_config(model_config_file: dict = None, qcfg: dict = None):
+def update_config(
+    model_config_file: dict = None, qcfg: dict = None
+) -> Dict[str, Union[int, str]]:
     """
     Function to update the model config file with quantization configuration
     """
@@ -181,7 +201,9 @@ def update_config(model_config_file: dict = None, qcfg: dict = None):
     return model_config_file
 
 
-def save_vllm_fp8(model: nn.Module, qcfg: dict, tokenizer=None, folder: str = None):
+def save_vllm_fp8(
+    model: nn.Module, qcfg: dict, tokenizer=None, folder: str = None
+) -> None:
     """
     Function to save fp8 DQ model in vllm fp8 format
     """
@@ -200,7 +222,9 @@ def save_vllm_fp8(model: nn.Module, qcfg: dict, tokenizer=None, folder: str = No
         json.dump(config, f, indent=4)
 
 
-def convert_fms_mo_to_vllm_fp8_format(checkpoint: str = None, folder: str = None):
+def convert_fms_mo_to_vllm_fp8_format(
+    checkpoint: str = None, folder: str = None
+) -> None:
     """
     Function to convert fp8 fms_mo DQ model checkpoint to vllm fp8 format
     """
@@ -231,7 +255,7 @@ def convert_fms_mo_to_vllm_fp8_format(checkpoint: str = None, folder: str = None
         json.dump(config, f, indent=4)
 
 
-def find_file_glob(pattern: str, search_path: str):
+def find_file_glob(pattern: str, search_path: str) -> List[str]:
     """
     Finds files matching a pattern within a directory and its subdirectories.
     """
@@ -243,7 +267,7 @@ def find_file_glob(pattern: str, search_path: str):
 
 def convert_fp8_vllm_dict_to_fms_mo_dict(
     checkpoint: str = None, output_dir: str = None
-):
+) -> None:
     """
     Function to help convert vllm fp8 checkpoint into fms_mo fp8 format
     """
@@ -257,7 +281,7 @@ def convert_fp8_vllm_dict_to_fms_mo_dict(
     save_torch_state_dict(fms_mo_dict, output_dir)
 
 
-def rename_vllm_dict_to_fms_mo(vllm_dict: dict = None):
+def rename_vllm_dict_to_fms_mo(vllm_dict: dict) -> dict:
     """
     Function to help rename vllm dict format to fms_mo dict format
     """
@@ -271,14 +295,12 @@ def rename_vllm_dict_to_fms_mo(vllm_dict: dict = None):
             fms_mo_dict[k] = v
         else:
             key = k.split("weight")[0]
-            if key + "weight_scale" in vllm_dict.keys():
-                pass
-            else:
+            if key + "weight_scale" not in vllm_dict.keys():
                 fms_mo_dict[k] = v
     return fms_mo_dict
 
 
-def convert_fp8_vllm_to_fms_mo(model: nn.Module = None):
+def convert_fp8_vllm_to_fms_mo(model: nn.Module = None) -> nn.Module:
     """
     Function to help convert fp8 vllm model dict format to fms_mo fp8 format
     """

Original file line number	Diff line number	Diff line change
`@@ -209,8 +209,7 @@ class FMSMOArguments(TypeChecker):`
`209`	`209`	`default=False,`
`210`	`210`	`metadata={"help": "Apply recomputation during checkpoint saving for AIU."},`
`211`	`211`	`)`
`212`		`- fp8_use_subnormal: bool = field(default=False)`
`213`		`- override_fms_args: bool = field(default=False)`
	`212`	`+ override_qcfg_args: bool = field(default=False)`
`214`	`213`
`215`	`214`
`216`	`215`	`@dataclass`