FIX Minimal target module optimization bug w/ IA³ (huggingface#2432)

BenjaminBossan · web-flow · commit b2b34fd65880 · 2025-03-17T16:31:09.000+01:00
Fixes huggingface#2429 During PEFT model initialization, we have an optimization/compression step where we check the target_modules attribute and, if it's very long, try to find a minimal subset that targets the same modules. If we find it, we reduce the target_modules to that minimal set. This is done mostly to prevent some cases (e.g. in diffusers) that result in hundreds of target_modules being checked against thousands of module names, slowing down initialization. There is an issue with this when using IA³. There, we additionally have the feedforward_modules attribute, which must be subset of target_modules. When target_modules is shrunk, the subset check will fail. This PR fixes this by simply skipping the compression step for IA³. It would be possible to adjust the logic to also shrink feedforward_modules, but it's not quite as forward, since the latter may not be identical to target_modules, so there would have to be extra logic to account for that. At the end of the day, this is too much effort for what's pretty much an edge case, so the simple solution is implemented.
diff --git a/src/peft/tuners/tuners_utils.py b/src/peft/tuners/tuners_utils.py
@@ -452,9 +452,13 @@ def inject_adapter(
         # quite a lot. See: https://github.com/huggingface/diffusers/issues/9297
         # As there is a small chance for undiscovered bugs, we apply this optimization only if the list of
         # target_modules is sufficiently big.
+        # We also exclude IA³ from this optimization. This is because IA³ has both target_modules and
+        # feedforward_modules, which are coupled (the latter must be a subset). It would be possible to change the logic
+        # to keep both in sync, but it's not quite trivial and probably not worth the effort. See #2429.
         if (
             isinstance(peft_config.target_modules, (list, set))
-            and len(peft_config.target_modules) >= MIN_TARGET_MODULES_FOR_OPTIMIZATION
+            and (len(peft_config.target_modules) >= MIN_TARGET_MODULES_FOR_OPTIMIZATION)
+            and (peft_config.peft_type != PeftType.IA3)
         ):
             names_no_target = [
                 name
diff --git a/tests/test_tuners_utils.py b/tests/test_tuners_utils.py
@@ -37,6 +37,7 @@
     IA3Config,
     LoHaConfig,
     LoraConfig,
+    PeftModel,
     PromptTuningConfig,
     VeraConfig,
     get_layer_status,
@@ -1502,6 +1503,36 @@ def __init__(self):
         # target modules should *not* be simplified to "query" as that would match "single_transformers_blocks" too
         assert model.peft_config["default"].target_modules != {"query"}
 
+    def test_find_minimal_target_modules_does_not_error_with_ia3(self, tmp_path):
+        # See #2429
+        # There is an issue with the compression of the target_modules attribute when using IA³. There, we additionally
+        # have the feedforward_modules attribute, which must be subset of target_modules. When target_modules is shrunk,
+        # the subset check will fail. This test ensures that this doesn't happen.
+        n_layers = MIN_TARGET_MODULES_FOR_OPTIMIZATION + 1
+
+        class InnerModule(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.query = nn.Linear(10, 10)
+
+        class OuterModule(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.blocks = nn.ModuleList([InnerModule() for _ in range(n_layers)])
+
+        target_modules = [f"blocks.{i}.query" for i in range(n_layers)]
+        feedforward_modules = [f"blocks.{i}.query" for i in range(n_layers)]
+        # the subset check happens here
+        config = IA3Config(target_modules=target_modules, feedforward_modules=feedforward_modules)
+        # the optimization step happens here, after the subset check, so at first we're fine, but we will run into an
+        # issue after a save/load roundtrip
+        model = get_peft_model(OuterModule(), config)
+        model.save_pretrained(tmp_path)
+        del model
+
+        # does not raise
+        PeftModel.from_pretrained(OuterModule(), tmp_path)
+
 
 class TestRankAndAlphaPattern:
     @pytest.fixture