Fix lint and fmt

tharapalanivel · tharapalanivel · commit ab98dffd25f3 · 2025-01-23T10:37:07.000-08:00
Signed-off-by: Thara Palanivel &lt;130496890+tharapalanivel@users.noreply.github.com&gt;
diff --git a/fms_mo/custom_ext_kernels/utils.py b/fms_mo/custom_ext_kernels/utils.py
@@ -500,7 +500,7 @@ def exllama_ops_load_and_reg(qcfg=None, run_unit_test=False):
         1. need to install gptqmodel python package
         2. Op registration signature changed drastically from torch 2.1 - 2.4. TODO: add 2.4 support
 
-    see https://github.com/ModelCloud/GPTQModel/tree/main?tab=readme-ov-file for installation instructions
+    see https://github.com/ModelCloud/GPTQModel for installation instructions
     """
     if qcfg is None:
         qcfg = {}
@@ -509,7 +509,9 @@ def exllama_ops_load_and_reg(qcfg=None, run_unit_test=False):
 
     namespace = "gptqmodel_gemm"
     # check before compile
-    if hasattr(torch.ops, namespace) and hasattr(torch.ops.gptqmodel_gemm, "exv1_i4f16"):
+    if hasattr(torch.ops, namespace) and hasattr(
+        torch.ops.gptqmodel_gemm, "exv1_i4f16"
+    ):
         logger.info("Custom GPTQModel functions have been loaded already!")
         qcfg["GPTQMODEL_AVAILABLE"] = True
         need_registration = False
@@ -623,7 +625,8 @@ def exv2_i4f16_fxinputs_abstract(
             )
 
         logger.info(
-            f"New GPTQModel gemm functions have been loaded and registered to torch.ops.{namespace}."
+            f"New GPTQModel gemm functions have been loaded and registered to \
+            torch.ops.{namespace}."
         )
         if qcfg:
             qcfg["GPTQMODEL_AVAILABLE"] = True
@@ -1110,10 +1113,14 @@ def swap_nnlinear_to_quantlinear(model, qconfig, prefix=None, qlinear2use=None):
         QuantLinear = qlinear2use
     elif exVer == 1:
         # Third Party
-        from gptqmodel.nn_modules.qlinear.exllama import ExllamaQuantLinear as QuantLinear
+        from gptqmodel.nn_modules.qlinear.exllama import (
+            ExllamaQuantLinear as QuantLinear,
+        )
     else:
         # Third Party
-        from gptqmodel.nn_modules.qlinear.exllamav2 import ExllamaV2QuantLinear as QuantLinear
+        from gptqmodel.nn_modules.qlinear.exllamav2 import (
+            ExllamaV2QuantLinear as QuantLinear,
+        )
 
     num_swapped = 0
     for n, m in model.named_modules():
diff --git a/fms_mo/modules/linear.py b/fms_mo/modules/linear.py
@@ -1402,14 +1402,14 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 try:
     # Third Party
+    from exllama_kernels import prepare_buffers, set_tuning_params
     from gptqmodel.nn_modules.qlinear.exllama import (
         ExllamaQuantLinear as QLinearExllamaV1,
     )
     from gptqmodel.nn_modules.qlinear.exllamav2 import (
         ExllamaV2QuantLinear as QLinearExllamaV2,
     )
     from gptqmodel.nn_modules.qlinear.exllamav2 import ext_gemm_half_q_half
-    from exllama_kernels import prepare_buffers, set_tuning_params
     from transformers.pytorch_utils import Conv1D
 
     class QLinearExv1WI4AF16(QLinearExllamaV1):
diff --git a/fms_mo/run_quant.py b/fms_mo/run_quant.py
@@ -89,7 +89,7 @@ def quantize(
             raise ImportError(
                 "Quantization method has been selected as gptq but unable to use external library, "
                 "gptqmodel module not found. For more instructions on installing the appropriate "
-                "package, see https://github.com/ModelCloud/GPTQModel/tree/main?tab=readme-ov-file#install"
+                "package, see https://github.com/ModelCloud/GPTQModel"
             )
         run_gptq(model_args, data_args, opt_args, gptq_args)
     elif opt_args.quant_method == "fp8":
@@ -98,7 +98,7 @@ def quantize(
                 "Quantization method has been selected as fp8 but unable to use external library, "
                 "llmcompressor module not found. \n"
                 "For more instructions on installing the appropriate package, see "
-                "https://github.com/vllm-project/llm-compressor/tree/"
+                "https://github.com/vllm-project/llm-compressor"
                 "main?tab=readme-ov-file#installation"
             )
         run_fp8(model_args, data_args, opt_args, fp8_args)

Original file line number	Diff line number	Diff line change
`@@ -1402,14 +1402,14 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:`
`1402`	`1402`
`1403`	`1403`	`try:`
`1404`	`1404`	`# Third Party`
	`1405`	`+ from exllama_kernels import prepare_buffers, set_tuning_params`
`1405`	`1406`	`from gptqmodel.nn_modules.qlinear.exllama import (`
`1406`	`1407`	`ExllamaQuantLinear as QLinearExllamaV1,`
`1407`	`1408`	`)`
`1408`	`1409`	`from gptqmodel.nn_modules.qlinear.exllamav2 import (`
`1409`	`1410`	`ExllamaV2QuantLinear as QLinearExllamaV2,`
`1410`	`1411`	`)`
`1411`	`1412`	`from gptqmodel.nn_modules.qlinear.exllamav2 import ext_gemm_half_q_half`
`1412`		`- from exllama_kernels import prepare_buffers, set_tuning_params`
`1413`	`1413`	`from transformers.pytorch_utils import Conv1D`
`1414`	`1414`
`1415`	`1415`	`class QLinearExv1WI4AF16(QLinearExllamaV1):`