diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py b/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py index d0b01428..c073b82e 100644 --- a/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/autogptq_utils.py @@ -26,7 +26,7 @@ ModelPatcherTrigger, ) from peft import LoraConfig -from peft.tuners.lora.gptq import QuantLinear as LoraLinearGPTQ +from peft.tuners.lora.gptq import GPTQLoraLinear import torch # these parameters are to be patched for triton v2 @@ -162,7 +162,7 @@ def create_new_module_peft( # to be installed new_module = None if isinstance(target, target_cls): - new_module = LoraLinearGPTQ( + new_module = GPTQLoraLinear( target, adapter_name, lora_config=lora_config, **kwargs ) diff --git a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py index a6fd4b15..c73a1d8d 100644 --- a/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py +++ b/plugins/accelerated-peft/src/fms_acceleration_peft/gptqmodel/utils/peft.py @@ -30,7 +30,7 @@ from peft.mapping import PEFT_TYPE_TO_CONFIG_MAPPING from peft.peft_model import PEFT_TYPE_TO_MODEL_MAPPING from peft.tuners.lora import LoraConfig, LoraModel -from peft.tuners.lora.gptq import QuantLinear as LoraLinearGPTQ +from peft.tuners.lora.gptq import GPTQLoraLinear import torch # Local @@ -68,7 +68,7 @@ def _create_new_module( # to be installed new_module = None if isinstance(target, target_cls): - new_module = LoraLinearGPTQ( + new_module = GPTQLoraLinear( target, adapter_name, lora_config=lora_config, **kwargs ) diff --git a/plugins/framework/pyproject.toml b/plugins/framework/pyproject.toml index 157c4ada..e46513b9 100644 --- a/plugins/framework/pyproject.toml +++ b/plugins/framework/pyproject.toml @@ -24,7 +24,7 @@ classifiers=[ dependencies = [ "numpy<2.0", # numpy needs to be bounded due to incompatiblity with current torch<2.3 "torch>2.2", - "peft<=0.14.0", # QuantLinear is not available for peft version > 0.14.0 + "peft>=0.15.0", "accelerate", "pandas", ] diff --git a/plugins/fused-ops-and-kernels/tests/test_fused_ops.py b/plugins/fused-ops-and-kernels/tests/test_fused_ops.py index b7ee56d1..4b607bf1 100644 --- a/plugins/fused-ops-and-kernels/tests/test_fused_ops.py +++ b/plugins/fused-ops-and-kernels/tests/test_fused_ops.py @@ -28,7 +28,7 @@ if _is_package_available("auto_gptq"): # pylint: disable=ungrouped-imports # Third Party - from peft.tuners.lora.gptq import QuantLinear as LoraGPTQLinear4bit + from peft.tuners.lora.gptq import GPTQLoraLinear as LoraGPTQLinear4bit LORA_QUANTIZED_CLASSES[GPTQ] = LoraGPTQLinear4bit