foundation-model-stack · willmj · Jun 10, 2025 · Jun 5, 2025 · Jun 6, 2025 · Jun 10, 2025
diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py b/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py
@@ -26,7 +26,7 @@
     ModelPatcherTrigger,
 )
 from peft import LoraConfig
-from peft.tuners.lora.gptq import QuantLinear as LoraLinearGPTQ
+from peft.tuners.lora.gptq import GPTQLoraLinear
 import torch
 
 # these parameters are to be patched for triton v2
@@ -162,7 +162,7 @@ def create_new_module_peft(
     # to be installed
     new_module = None
     if isinstance(target, target_cls):
-        new_module = LoraLinearGPTQ(
+        new_module = GPTQLoraLinear(
             target, adapter_name, lora_config=lora_config, **kwargs
         )
 

diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py
@@ -30,7 +30,7 @@
 from peft.mapping import PEFT_TYPE_TO_CONFIG_MAPPING
 from peft.peft_model import PEFT_TYPE_TO_MODEL_MAPPING
 from peft.tuners.lora import LoraConfig, LoraModel
-from peft.tuners.lora.gptq import QuantLinear as LoraLinearGPTQ
+from peft.tuners.lora.gptq import GPTQLoraLinear
 import torch
 
 # Local
@@ -68,7 +68,7 @@ def _create_new_module(
         # to be installed
         new_module = None
         if isinstance(target, target_cls):
-            new_module = LoraLinearGPTQ(
+            new_module = GPTQLoraLinear(
                 target, adapter_name, lora_config=lora_config, **kwargs
             )
 

diff --git a/plugins/framework/pyproject.toml b/plugins/framework/pyproject.toml
@@ -24,7 +24,7 @@ classifiers=[
 dependencies = [
   "numpy<2.0", # numpy needs to be bounded due to incompatiblity with current torch<2.3
   "torch>2.2",
-  "peft<=0.14.0", # QuantLinear is not available for peft version > 0.14.0
+  "peft>=0.15.0",
   "accelerate",
   "pandas",
 ]

diff --git a/plugins/fused-ops-and-kernels/tests/test_fused_ops.py b/plugins/fused-ops-and-kernels/tests/test_fused_ops.py
@@ -28,7 +28,7 @@
 if _is_package_available("auto_gptq"):
     # pylint: disable=ungrouped-imports
     # Third Party
-    from peft.tuners.lora.gptq import QuantLinear as LoraGPTQLinear4bit
+    from peft.tuners.lora.gptq import GPTQLoraLinear as LoraGPTQLinear4bit
 
     LORA_QUANTIZED_CLASSES[GPTQ] = LoraGPTQLinear4bit