Initial commit for GPTQModel migration

tharapalanivel · tharapalanivel · commit 0f69223a13ee · 2025-01-22T21:17:25.000-08:00
Signed-off-by: Thara Palanivel &lt;130496890+tharapalanivel@users.noreply.github.com&gt;
diff --git a/fms_mo/run_quant.py b/fms_mo/run_quant.py
@@ -124,28 +124,28 @@ def run_gptq(model_args, data_args, opt_args, gptq_args):
     """
 
     # Third Party
-    from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
-    from auto_gptq.modeling._const import SUPPORTED_MODELS
-    from auto_gptq.modeling.auto import GPTQ_CAUSAL_LM_MODEL_MAP
+    from gptqmodel import GPTQModel, QuantizeConfig
+    from gptqmodel.models._const import SUPPORTED_MODELS
+    from gptqmodel.models.auto import MODEL_MAP
 
     # Local
     from fms_mo.utils.custom_gptq_models import custom_gptq_classes
 
     logger = set_log_level(opt_args.log_level, "fms_mo.run_gptq")
 
-    quantize_config = BaseQuantizeConfig(
+    quantize_config = QuantizeConfig(
         bits=gptq_args.bits,
         group_size=gptq_args.group_size,
         desc_act=gptq_args.desc_act,
         damp_percent=gptq_args.damp_percent,
     )
 
-    # Add custom model_type mapping to auto_gptq LUT so AutoGPTQForCausalLM can recognize them.
+    # Add custom model_type mapping to auto_gptq LUT so GPTQModel can recognize them.
     for mtype, cls in custom_gptq_classes.items():
         SUPPORTED_MODELS.append(mtype)
-        GPTQ_CAUSAL_LM_MODEL_MAP[mtype] = cls
+        MODEL_MAP[mtype] = cls
 
-    model = AutoGPTQForCausalLM.from_pretrained(
+    model = GPTQModel.from_pretrained(
         model_args.model_name_or_path,
         quantize_config=quantize_config,
         torch_dtype=model_args.torch_dtype,
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,7 +41,7 @@ dependencies = [
 [project.optional-dependencies]
 dev = ["pre-commit>=3.0.4,<5.0"]
 fp8 = ["llmcompressor"]
-gptq = ["auto_gptq>0.4.2", "optimum>=1.15.0"]
+gptq = ["gptqmodel"]
 visualize = ["matplotlib", "graphviz", "pygraphviz"]
 flash-attn = ["flash-attn>=2.5.3,<3.0"]
 opt = ["fms-model-optimizer[fp8, gptq]"]