fix: corrected the inference file

bayo-ibm · bayo-ibm · commit 4878ba1ff739 · 2025-09-02T20:59:30.000-04:00
Signed-off-by: Omobayode Fagbohungbe &lt;omobayode.fagbohungbe@ibm.com&gt;
diff --git a/fms_mo/prep.py b/fms_mo/prep.py
@@ -404,7 +404,7 @@ def make_quant_module(module, curr_full_name, qcfg, verbose=False):
                     f"{curr_full_name} {type(module)} seems to be a wrapper of Linear."
                     "Please make sure it doesn't wrap BN and activ func. Otherwise"
                     "please create an equivalent Linear wrapper and change qcfg['mapping']."
-                )
+                    )
         QLin = mapping.get(nn.Linear, None)
         if QLin is None:
             if verbose:
diff --git a/fms_mo/utils/dq_inf.py b/fms_mo/utils/dq_inf.py
@@ -49,31 +49,31 @@ def check_quantization_setting(model: nn.Module = None):
     logger.info("Validating config settings")
     if quant_config["quant_method"] == "compressed-tensors":
         if quant_config["format"] != "float-quantized":
-            raise Exception(
+            raise ValueError(
                 "The input activation and weight quantization dtypes are not supported"
             )
 
         if (
             quant_config["config_groups"]["group_0"]["input_activations"]["num_bits"]
             != 8
         ):
-            raise Exception("Only 8 bit FP input activation quantization is supported")
+            raise ValueError("Only 8 bit FP input activation quantization is supported")
 
         if quant_config["config_groups"]["group_0"]["weights"]["num_bits"] != 8:
-            raise Exception("Only 8-bit FP weight quantization  is supported")
+            raise ValueError("Only 8-bit FP weight quantization  is supported")
 
         if quant_config["kv_cache_scheme"] is None:
             pass
         else:
             if quant_config["kv_cache_scheme"]["type"] is not float:
-                raise Exception("The KV-Cache quantization dtype is not supported")
+                raise ValueError("The KV-Cache quantization dtype is not supported")
 
             if quant_config["kv_cache_scheme"]["num_bits"] != 8:
-                raise Exception("Only 8-bit KV-Cache quantization dtype is supported")
+                raise ValueError("Only 8-bit KV-Cache quantization dtype is supported")
 
         return True
 
-    raise Exception("This quantization method is not supported for inferencing")
+    raise ValueError("This quantization method is not supported for inferencing")
 
 
 def load_inference_qconfig_file(model_args, fms_mo_args):
@@ -115,7 +115,7 @@ def update_qcfg_from_model_config(model_args, qcfg):
     ):
         qcfg["qa_mode"] = "fp8_e4m3_scale_perToken"
     else:
-        raise Exception("Only perToken Fp8 activation quantizer is supported")
+        raise ValueError("Only perToken Fp8 activation quantizer is supported")
 
     if (
         config["quantization_config"]["config_groups"]["group_0"]["weights"]["strategy"]
@@ -128,7 +128,7 @@ def update_qcfg_from_model_config(model_args, qcfg):
     ):
         qcfg["qw_mode"] = "fp8_e4m3_scale"
     else:
-        raise Exception(
+        raise ValueError(
             "Only perChannel or pertensor FP8 quantizers are currently supported"
         )
 
diff --git a/fms_mo/utils/import_utils.py b/fms_mo/utils/import_utils.py
@@ -42,7 +42,7 @@
     "torchvision",
     "huggingface_hub",
     "torchao",
-    "compressed_tensors",
+    #"compressed_tensors",
 ]
 
 available_packages = {}

Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@`
`42`	`42`	`"torchvision",`
`43`	`43`	`"huggingface_hub",`
`44`	`44`	`"torchao",`
`45`		`- "compressed_tensors",`
	`45`	`+ #"compressed_tensors",`
`46`	`46`	`]`
`47`	`47`
`48`	`48`	`available_packages = {}`