ENH: Extend the regex for rank/alpha pattern (#2419)

BenjaminBossan · web-flow · commit 2f063e634223 · 2025-03-13T12:53:27.000+01:00
Supersedes #2382 Right now, the regex used to match the keys passed for rank_pattern and alpha_pattern requires that either: 1. The module name is identical to the key 2. The module name having a prefix and then ending on the key This is restrictive, since it doesn't allow to disambiguate between all cases. E.g. if we have a model with these attributes: - model.foo - model.bar.foo We cannot currently target just model.foo. (We can already target only model.bar.foo by passing "bar.foo" as a key to the rank_pattern / alpha_pattern dict). This PR makes it possible to pass "^foo" as a key. This way, model.bar.foo is not targeted, as the key does not start with "foo". As a general rule for users, if they intend to have a full match, they should pass the full name of the module preceded by a ^. This is the least ambigious way. When running the test case with the old code, all the test cases with ^ will fail, which is fine, since ^ was not working anyway. At the same time, all test cases not using ^ pass, which means they are backwards compatible.
diff --git a/docs/source/developer_guides/lora.md b/docs/source/developer_guides/lora.md
@@ -239,6 +239,36 @@ Assuming the original model had 5 layers `[0, 1, 2 ,3, 4]`, this would create a
 [Fewshot-Metamath-OrcaVicuna-Mistral-10B](https://huggingface.co/abacusai/Fewshot-Metamath-OrcaVicuna-Mistral-10B) is an example of a model trained using this method on Mistral-7B expanded to 10B. The
 [adapter_config.json](https://huggingface.co/abacusai/Fewshot-Metamath-OrcaVicuna-Mistral-10B/blob/main/adapter_config.json) shows a sample LoRA adapter config applying this method for fine-tuning.
 
+### Fine grained control over ranks and alpha (scaling)
+
+By default, all layers targeted with LoRA will have the same rank `r` and the same `lora_alpha` (which determines the LoRA scaling), depending on what was specified in the [`LoraConfig`]. In same cases, however, you may want to indicate different values for different layers. This is possible by passing the `rank_pattern` and `alpha_pattern` arguments to [`LoraConfig`]. These arguments should be dictionaries with the key being the layer name and the value being the rank/alpha value. The keys can be [regular expressesions](https://docs.python.org/3/library/re.html) (regex). All LoRA layers that are not explicitly mentioned in `rank_pattern` and `alpha_pattern` will take the default `r` and `lora_alpha` values.
+
+To give an examples, let's assume that we have a model with the following structure:
+
+```python
+>>> print(model)
+Outer(
+  (foo): Linear(...)
+  (module): Middle(
+    (foo): Linear(...)
+    (foobar): Linear(...)
+    (module): Inner(
+      (foo): Linear(...)
+      (barfoo): Linear(...)
+    )
+  )
+)
+```
+
+- `rank_pattern={"foo": 42}` will match all 3 `foo` layers. Neither `foobar` nor `barfoo` are matched.
+- `rank_pattern={"^foo": 42}` will only match the `foo` layer of the model, but neither `module.foo` nor `module.module.foo`. This is because the `^` means "start of string" when using regular expressions, and only `foo` starts with `"foo"`, the other layer names have prefixes.
+- `rank_pattern={"^module.foo": 42}` matches only `module.foo`, but not `module.module.foo`, for the same reason.
+- `rank_pattern={"module.foo": 42}` matches both `module.foo` and `module.module.foo`, but not `foo`.
+- `rank_pattern={"^foo": 42, "^module.module.foo": 55}` matches `foo` and `module.module.foo`, respectively, but not `module.foo`.
+- There is no need to indicate `$` to mark the end of the match, as this is added automatically by PEFT.
+
+The same logic applies to `alpha_pattern`. If you're in doubt, don't try to get fancy with regular expressions -- just pass the full name for each module with a different rank/alpha, preceded by the `^` prefix, and you should be good.
+
 ## Optimizers
 
 LoRA training can optionally include special purpose optimizers. Currently the only such optimizer is LoRA+.
diff --git a/src/peft/tuners/bone/config.py b/src/peft/tuners/bone/config.py
@@ -51,9 +51,6 @@ class BoneConfig(PeftConfig):
             layer at this index.
         layers_pattern (`str`):
             The layer pattern name, used only if `layers_to_transform` is different from `None`.
-        rank_pattern (`dict`):
-            The mapping from layer names or regexp expression to ranks which are different from the default rank
-            specified by `r`.
         modules_to_save (`List[str]`):
             List of modules apart from adapter layers to be set as trainable and saved in the final checkpoint.
     """
diff --git a/src/peft/tuners/hra/config.py b/src/peft/tuners/hra/config.py
@@ -53,9 +53,6 @@ class HRAConfig(PeftConfig):
         layers_pattern (`Optional[Union[List[str], str]]`):
             The layer pattern name, used only if `layers_to_transform` is different from `None`. This should target the
             `nn.ModuleList` of the model, which is often called `'layers'` or `'h'`.
-        rank_pattern (`dict`):
-            The mapping from layer names or regexp expression to ranks which are different from the default rank
-            specified by `r`.
         modules_to_save (`List[str]`):
             List of modules apart from adapter layers to be set as trainable and saved in the final checkpoint.
     """
diff --git a/src/peft/tuners/loha/config.py b/src/peft/tuners/loha/config.py
@@ -60,10 +60,10 @@ class LoHaConfig(LycorisConfig):
             `nn.ModuleList` of the model, which is often called `'layers'` or `'h'`.
         rank_pattern (`dict`):
             The mapping from layer names or regexp expression to ranks which are different from the default rank
-            specified by `r`.
+            specified by `r`. For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`.
         alpha_pattern (`dict`):
             The mapping from layer names or regexp expression to alphas which are different from the default alpha
-            specified by `alpha`.
+            specified by `alpha`. For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`.
         modules_to_save (`Optional[List[str]]`):
             List of modules apart from adapter layers to be set as trainable and saved in the final checkpoint.
     """
diff --git a/src/peft/tuners/loha/model.py b/src/peft/tuners/loha/model.py
@@ -12,14 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
-from itertools import chain
 from typing import Dict, Type, Union
 
 import torch
 from torch import nn
 
 from peft.tuners.lycoris_utils import LycorisConfig, LycorisTuner
+from peft.utils.other import get_pattern_key
 
 from .layer import Conv2d, Linear, LoHaLayer
 
@@ -100,14 +99,11 @@ def _create_and_replace(
         """
         A private method to create and replace the target module with the adapter module.
         """
-
-        # Regexp matching - Find key which matches current target_name in patterns provided
-        pattern_keys = list(chain(config.rank_pattern.keys(), config.alpha_pattern.keys()))
-        target_name_key = next(filter(lambda key: re.match(rf"(.*\.)?{key}$", current_key), pattern_keys), target_name)
-
+        r_key = get_pattern_key(config.rank_pattern.keys(), current_key)
+        alpha_key = get_pattern_key(config.alpha_pattern.keys(), current_key)
         kwargs = config.to_dict()
-        kwargs["r"] = config.rank_pattern.get(target_name_key, config.r)
-        kwargs["alpha"] = config.alpha_pattern.get(target_name_key, config.alpha)
+        kwargs["r"] = config.rank_pattern.get(r_key, config.r)
+        kwargs["alpha"] = config.alpha_pattern.get(alpha_key, config.alpha)
 
         if isinstance(target, LoHaLayer):
             target.update_layer(adapter_name, **kwargs)
diff --git a/src/peft/tuners/lokr/config.py b/src/peft/tuners/lokr/config.py
@@ -66,10 +66,10 @@ class LoKrConfig(LycorisConfig):
             `nn.ModuleList` of the model, which is often called `'layers'` or `'h'`.
         rank_pattern (`dict`):
             The mapping from layer names or regexp expression to ranks which are different from the default rank
-            specified by `r`.
+            specified by `r`. For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`.
         alpha_pattern (`dict`):
             The mapping from layer names or regexp expression to alphas which are different from the default alpha
-            specified by `alpha`.
+            specified by `alpha`. For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`.
         modules_to_save (`Optional[List[str]]`):
             List of modules apart from adapter layers to be set as trainable and saved in the final checkpoint.
     """
diff --git a/src/peft/tuners/lokr/model.py b/src/peft/tuners/lokr/model.py
@@ -12,14 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
-from itertools import chain
 from typing import Dict, Type, Union
 
 import torch
 from torch import nn
 
 from peft.tuners.lycoris_utils import LycorisConfig, LycorisTuner
+from peft.utils.other import get_pattern_key
 
 from .layer import Conv2d, Linear, LoKrLayer
 
@@ -101,14 +100,11 @@ def _create_and_replace(
         """
         A private method to create and replace the target module with the adapter module.
         """
-
-        # Regexp matching - Find key which matches current target_name in patterns provided
-        pattern_keys = list(chain(config.rank_pattern.keys(), config.alpha_pattern.keys()))
-        target_name_key = next(filter(lambda key: re.match(rf"(.*\.)?{key}$", current_key), pattern_keys), target_name)
-
+        r_key = get_pattern_key(config.rank_pattern.keys(), current_key)
+        alpha_key = get_pattern_key(config.alpha_pattern.keys(), current_key)
         kwargs = config.to_dict()
-        kwargs["r"] = config.rank_pattern.get(target_name_key, config.r)
-        kwargs["alpha"] = config.alpha_pattern.get(target_name_key, config.alpha)
+        kwargs["r"] = config.rank_pattern.get(r_key, config.r)
+        kwargs["alpha"] = config.alpha_pattern.get(alpha_key, config.alpha)
         kwargs["rank_dropout_scale"] = config.rank_dropout_scale
 
         if isinstance(target, LoKrLayer):
diff --git a/src/peft/tuners/lora/config.py b/src/peft/tuners/lora/config.py
@@ -262,10 +262,10 @@ class LoraConfig(PeftConfig):
             `nn.ModuleList` of the model, which is often called `'layers'` or `'h'`.
         rank_pattern (`dict`):
             The mapping from layer names or regexp expression to ranks which are different from the default rank
-            specified by `r`.
+            specified by `r`. For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`.
         alpha_pattern (`dict`):
             The mapping from layer names or regexp expression to alphas which are different from the default alpha
-            specified by `lora_alpha`.
+            specified by `lora_alpha`. For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`.
         megatron_config (`Optional[dict]`):
             The TransformerConfig arguments for Megatron. It is used to create LoRA's parallel linear layer. You can
             get it like this, `core_transformer_config_from_args(get_args())`, these two functions being from Megatron.
@@ -399,7 +399,7 @@ class LoraConfig(PeftConfig):
         metadata={
             "help": (
                 "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. "
-                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}"
+                "For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`."
             )
         },
     )
@@ -408,7 +408,7 @@ class LoraConfig(PeftConfig):
         metadata={
             "help": (
                 "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `lora_alpha`. "
-                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}"
+                "For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`."
             )
         },
     )
diff --git a/src/peft/tuners/lycoris_utils.py b/src/peft/tuners/lycoris_utils.py
@@ -42,7 +42,7 @@ class LycorisConfig(PeftConfig):
         metadata={
             "help": (
                 "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. "
-                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}"
+                "For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`."
             )
         },
     )
@@ -51,7 +51,7 @@ class LycorisConfig(PeftConfig):
         metadata={
             "help": (
                 "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. "
-                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}"
+                "For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`."
             )
         },
     )
diff --git a/src/peft/tuners/oft/config.py b/src/peft/tuners/oft/config.py
@@ -57,9 +57,6 @@ class OFTConfig(PeftConfig):
         layers_pattern (`Optional[Union[List[str], str]]`):
             The layer pattern name, used only if `layers_to_transform` is different from `None`. This should target the
             `nn.ModuleList` of the model, which is often called `'layers'` or `'h'`.
-        rank_pattern (`dict`):
-            The mapping from layer names or regexp expression to ranks which are different from the default rank
-            specified by `r`.
         modules_to_save (`List[str]`):
             List of modules apart from adapter layers to be set as trainable and saved in the final checkpoint.
         coft (`bool`):
@@ -147,26 +144,6 @@ class OFTConfig(PeftConfig):
         default=False,
         metadata={"help": "Whether to share the OFT parameters between blocks or not."},
     )
-    rank_pattern: Optional[dict] = field(
-        default_factory=dict,
-        metadata={
-            "help": (
-                "The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. "
-                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}"
-                "Important: the rank pattern won't be applied to the layers after 0.12.1.dev0!"
-            )
-        },
-    )
-    alpha_pattern: Optional[dict] = field(
-        default_factory=dict,
-        metadata={
-            "help": (
-                "The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. "
-                "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}"
-                "Important: the alpha pattern won't be applied to the layers after 0.12.1.dev0!"
-            )
-        },
-    )
 
     def __post_init__(self):
         super().__post_init__()
diff --git a/src/peft/utils/other.py b/src/peft/utils/other.py
@@ -19,7 +19,7 @@
 import re
 import warnings
 from contextlib import nullcontext
-from typing import Any, Optional, Union
+from typing import Any, Optional, Sequence, Union
 
 import accelerate
 import torch
@@ -1089,6 +1089,12 @@ def check_file_exists_on_hf_hub(repo_id: str, filename: str, **kwargs) -> Option
     return exists
 
 
-def get_pattern_key(pattern_keys, key_to_match):
+def get_pattern_key(pattern_keys: Sequence[str], key_to_match: str) -> str:
     """Match a substring of key_to_match in pattern keys"""
-    return next(filter(lambda key: re.match(rf".*\.{key}$", key_to_match), pattern_keys), key_to_match)
+    for key in pattern_keys:
+        match = re.match(rf"(.*\.)?({key})$", key_to_match)
+        if not match:
+            continue
+        return key
+
+    return key_to_match
diff --git a/tests/test_initialization.py b/tests/test_initialization.py
@@ -291,7 +291,7 @@ def test_lora_scaling_default(self):
         assert model.conv2d.scaling["default"] == expected_scaling
 
     # testcase for bugfix for issue 2194
-    def test_pattern_override(self):
+    def test_rank_alpha_pattern_override(self):
         torch.manual_seed(0)
 
         layer = self.get_model()
diff --git a/tests/test_tuners_utils.py b/tests/test_tuners_utils.py

Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ class LycorisConfig(PeftConfig):`
`42`	`42`	`metadata={`
`43`	`43`	`"help": (`
`44`	`44`	"The mapping from layer names or regexp expression to ranks which are different from the default rank specified by `r`. "
`45`		- "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 8`}"
	`45`	+ "For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`."
`46`	`46`	`)`
`47`	`47`	`},`
`48`	`48`	`)`
`@@ -51,7 +51,7 @@ class LycorisConfig(PeftConfig):`
`51`	`51`	`metadata={`
`52`	`52`	`"help": (`
`53`	`53`	"The mapping from layer names or regexp expression to alphas which are different from the default alpha specified by `alpha`. "
`54`		- "For example, `{model.decoder.layers.0.encoder_attn.k_proj: 32`}"
	`54`	+ "For example, `{'^model.decoder.layers.0.encoder_attn.k_proj': 16}`."
`55`	`55`	`)`
`56`	`56`	`},`
`57`	`57`	`)`