Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from setuptools import find_packages, setup


VERSION = "0.18.0"
VERSION = "0.18.1"

extras = {}
extras["quality"] = [
Expand Down
2 changes: 1 addition & 1 deletion src/peft/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.18.0"
__version__ = "0.18.1"

from .auto import (
MODEL_TYPE_TO_PEFT_MODEL_MAPPING,
Expand Down
13 changes: 12 additions & 1 deletion src/peft/tuners/lora/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
from functools import partial, reduce
from typing import Literal, Optional

import packaging.version
import torch
import transformers
from torch import nn
from transformers.modeling_layers import GradientCheckpointingLayer

from peft.import_utils import is_bnb_4bit_available, is_bnb_available
from peft.tuners.tuners_utils import (
Expand Down Expand Up @@ -360,6 +361,16 @@ def _enable_peft_forward_hooks(self, *args, **kwargs):
hook_handles = []

if alora_offsets is not None:
# TODO: remove once transformers 4.52 is no longer supported. Note that 4.52.0 is yanked, so 4.52.1
# is the first 4.52 release.
transformers_lt_4_52 = packaging.version.parse(transformers.__version__) < packaging.version.parse(
"4.52.1"
)
if transformers_lt_4_52:
raise ValueError("Using aLoRA requires transformers >= 4.52.1.")

from transformers.modeling_layers import GradientCheckpointingLayer

for n, layer in self.named_modules():
# gradient checkpointing layer are executed concurrently to the 'normal' forward call
# (in the backward step the gradient checkpointing layer's forward will be executed again).
Expand Down
4 changes: 2 additions & 2 deletions src/peft/tuners/osf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def project_gradient_to_orthogonal_space(svd_dict: dict[str, Any]) -> None:
# Use addmm_ for efficient in-place operation
# Compute local contribution to (U_high^T @ dU); all-reduce to get global projection
proj_coeff = torch.mm(local_U_high.transpose(0, 1), local_dU)
if dist.is_initialized() and dist.get_world_size() > 1:
if dist.is_available() and dist.is_initialized() and dist.get_world_size() > 1:
dist.all_reduce(proj_coeff, op=dist.ReduceOp.SUM)
# Apply projection using only local rows of U_high
local_dU.addmm_(local_U_high, proj_coeff, alpha=-1.0)
Expand All @@ -120,7 +120,7 @@ def project_gradient_to_orthogonal_space(svd_dict: dict[str, Any]) -> None:
# Compute Gram matrix G = V_high^T @ V_high for global projection across row-sharded V_high
# Assumes column dimension is consistent across ranks (row sharding over singular vectors)
G_local = torch.mm(local_V_high.transpose(0, 1), local_V_high)
if dist.is_initialized() and dist.get_world_size() > 1:
if dist.is_available() and dist.is_initialized() and dist.get_world_size() > 1:
dist.all_reduce(G_local, op=dist.ReduceOp.SUM)

# Apply projection: dV = dV - dV @ G (use local shard of dV)
Expand Down
7 changes: 5 additions & 2 deletions src/peft/tuners/trainable_tokens/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def _collect_token_weights(self, weight: torch.Tensor, rows: torch.Tensor, embed
device = torch.device("cuda", torch.cuda.current_device())

with gather_params_ctx([weight], modifier_rank=None):
if dist.get_rank() == src_rank:
if dist.is_available() and dist.is_initialized() and dist.get_rank() == src_rank:
token_weights = weight[rows].clone()
else:
# build an empty tensor with correct shape/type/device
Expand Down Expand Up @@ -199,14 +199,17 @@ def unmerge(self) -> None:
originals = self.trainable_tokens_original[adapter_name].to(self.base_layer.weight)
self.base_layer.weight.data.index_copy_(dim=0, index=index, source=originals)

def get_merged_weights(self, active_adapters):
def get_merged_weights(self, active_adapters) -> torch.Tensor:
W = self.base_layer.weight

for adapter_name in active_adapters:
index = torch.tensor(self.token_indices[adapter_name]).to(W.device)
deltas = self.trainable_tokens_delta[adapter_name].to(W)
W = W.index_copy(dim=0, index=index, source=deltas)

# Note: the return type is a Tensor, not an nn.Parameter. This can lead to some errors, e.g. torch's
# model.get_parameter fails as it does a type check. But we cannot return an nn.Parameter here, as it can lead
# to other failures, as this is not a true nn.Parameter of the model.
return W

def forward_adapters(self, x: torch.Tensor, active_adapters, *args, **kwargs) -> torch.Tensor:
Expand Down
7 changes: 5 additions & 2 deletions src/peft/tuners/tuners_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@
from ._buffer_dict import BufferDict


_torch_supports_dtensor = version.parse(torch.__version__) >= version.parse("2.5.0")
_torch_supports_distributed = _torch_supports_dtensor and torch.distributed.is_available()


@contextmanager
def onload_layer(layer):
r"""
Expand Down Expand Up @@ -157,8 +161,7 @@ def _get_in_out_features(module: nn.Module) -> tuple[int, int] | tuple[None, Non
this function returns a valid result does not imply that the layer type is supported.
"""
if isinstance(module, nn.Linear):
torch_supports_dtensor = version.parse(torch.__version__) >= version.parse("2.5.0")
if torch_supports_dtensor and isinstance(module.weight, torch.distributed.tensor.DTensor):
if _torch_supports_distributed and isinstance(module.weight, torch.distributed.tensor.DTensor):
# If Tensor Parallel is used, the weight is sharded, so we need to get the local shape
out_features, in_features = module.weight.to_local().shape
else:
Expand Down
3 changes: 2 additions & 1 deletion src/peft/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from .integrations import map_cache_to_layer_device_map
from .integrations import is_transformers_ge_v5, map_cache_to_layer_device_map
from .loftq_utils import replace_lora_weights_loftq
from .other import (
CONFIG_NAME,
Expand Down Expand Up @@ -120,6 +120,7 @@
"get_quantization_config",
"id_tensor_storage",
"infer_device",
"is_transformers_ge_v5",
"load_peft_weights",
"map_cache_to_layer_device_map",
"prepare_model_for_kbit_training",
Expand Down
3 changes: 3 additions & 0 deletions src/peft/utils/integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from torch import nn


is_transformers_ge_v5 = packaging.version.parse(transformers.__version__) >= packaging.version.parse("5.0.0.dev0")


def check_deepspeed_zero3_enabled() -> bool:
if packaging.version.parse(transformers.__version__) >= packaging.version.parse("4.33.0"):
from transformers.integrations import is_deepspeed_zero3_enabled
Expand Down
16 changes: 14 additions & 2 deletions src/peft/utils/other.py
Original file line number Diff line number Diff line change
Expand Up @@ -1573,7 +1573,7 @@ def _get_module_names_tied_with_embedding(model) -> list[str]:
that the weight tying definition is present but the tying is disabled via `model_config.tie_word_embeddings=False`.
You have to check that yourself.
"""
tied_weights = []
tied_weights: list[str] = []

if hasattr(model, "get_base_model"):
# unpack PeftModel
Expand All @@ -1595,6 +1595,17 @@ def _get_module_names_tied_with_embedding(model) -> list[str]:
"'get_input_embeddings' so we can't determine which weights are tied to embeddings."
)

# collect all _tied_weights_keys, as sub-modules may have additional entries
tied_weights_keys: dict[str, str] = {}
for module_name, module in model.named_modules():
module_tied_weights_keys = getattr(module, "_tied_weights_keys", None)
if module_tied_weights_keys and not module_name:
tied_weights_keys.update(module_tied_weights_keys)
elif module_tied_weights_keys:
tied_weights_keys.update(
{f"{module_name}.{k}": f"{module_name}.{v}" for k, v in module_tied_weights_keys.items()}
)

# technically it would be sufficient to just return candidates since that contains all the keys of
# all models that are tied (not just equal!) to the input embeddings. the only reason why we aren't
# doing that is because we need to filter out the original embedding name since we promise to just
Expand All @@ -1613,12 +1624,13 @@ def _get_module_names_tied_with_embedding(model) -> list[str]:

tied_weights.extend(
peft_reverse_mapping.get(k, k)
for k, v in model._tied_weights_keys.items()
for k, v in tied_weights_keys.items()
if peft_reverse_mapping.get(v, v) in candidates
)

elif model._tied_weights_keys is not None:
# TODO remove this when transformers <v5 is no longer supported
tied_weights.extend(model._tied_weights_keys)

# get module names from parameter names
return sorted({name.rpartition(".")[0] for name in tied_weights})
18 changes: 12 additions & 6 deletions tests/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
is_optimum_available,
is_torchao_available,
)
from peft.utils import is_transformers_ge_v5


# Globally shared model cache used by `hub_online_once`.
Expand Down Expand Up @@ -279,18 +280,23 @@ def test_something(model_id, config_kwargs):
if model_id in _HUB_MODEL_ACCESSES:
override = {"HF_HUB_OFFLINE": "1"}
_HUB_MODEL_ACCESSES[model_id] += 1
else:
if model_id not in _HUB_MODEL_ACCESSES:
_HUB_MODEL_ACCESSES[model_id] = 0
elif model_id not in _HUB_MODEL_ACCESSES:
_HUB_MODEL_ACCESSES[model_id] = 0
is_offline = override.get("HF_HUB_OFFLINE", False) == "1"

with (
# strictly speaking it is not necessary to set the environment variable since most code that's out there
# is evaluating it at import time and we'd have to reload the modules for it to take effect. It's
# probably still a good idea to have it if there's some dynamic code that checks it.
mock.patch.dict(os.environ, override),
mock.patch("huggingface_hub.constants.HF_HUB_OFFLINE", override.get("HF_HUB_OFFLINE", False) == "1"),
mock.patch("transformers.utils.hub._is_offline_mode", override.get("HF_HUB_OFFLINE", False) == "1"),
mock.patch("huggingface_hub.constants.HF_HUB_OFFLINE", is_offline),
):
yield
if is_transformers_ge_v5:
with mock.patch("transformers.utils.hub.is_offline_mode", lambda: is_offline):
yield
else: # TODO remove if transformers <= 4 no longer supported
with mock.patch("transformers.utils.hub._is_offline_mode", is_offline):
yield
except Exception:
# in case of an error we have to assume that we didn't access the model properly from the hub
# for the first time, so the next call cannot be considered cached.
Expand Down
Loading