[BUG] Fixes Bottleneck Configs to work with ln_before = True and init_weights = "mam_adapter" (#761)

julian-fong · web-flow · commit e5919659b6a2 · 2024-12-03T00:16:14.000+01:00
Fixes #745 When "mam_adapter" is specified, the code will now look for the `nn.Linear` or `PHMLayer` inside the `self.down_adapter` layer sequence and apply the initialization on the correct layer edit: also removes an extra block of code in the `AdapterPlus` notebook
diff --git a/notebooks/ViT_AdapterPlus_FineTuning.ipynb b/notebooks/ViT_AdapterPlus_FineTuning.ipynb
@@ -302,25 +302,6 @@
     ")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "trainer = AdapterTrainer(\n",
-    "    model=model,\n",
-    "    args=training_args,\n",
-    "    data_collator=data_collator,\n",
-    "    train_dataset=train_dataset,\n",
-    "    eval_dataset=eval_dataset,\n",
-    "    tokenizer=processor,\n",
-    "    compute_metrics = compute_metrics\n",
-    ")\n",
-    "\n",
-    "trainer.train()"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/src/adapters/methods/modeling.py b/src/adapters/methods/modeling.py
@@ -123,9 +123,11 @@ def __init__(
                 self.gate.apply(self.init_bert_weights)
         elif config["init_weights"] == "mam_adapter":
             with torch.no_grad():
-                nn.init.kaiming_uniform_(self.adapter_down[0].weight, a=math.sqrt(5))
+                for layer in self.adapter_down:
+                    if isinstance(layer, nn.Linear) or isinstance(layer, PHMLayer):
+                        nn.init.kaiming_uniform_(layer.weight, a=math.sqrt(5))
+                        nn.init.zeros_(layer.bias)
                 nn.init.zeros_(self.adapter_up.weight)
-                nn.init.zeros_(self.adapter_down[0].bias)
                 nn.init.zeros_(self.adapter_up.bias)
                 if self.use_gating:
                     self.gate.apply(self.init_bert_weights)