diff --git a/src/peft/tuners/boft/layer.py b/src/peft/tuners/boft/layer.py
index 7232f39d17..470ce12312 100644
--- a/src/peft/tuners/boft/layer.py
+++ b/src/peft/tuners/boft/layer.py
@@ -457,10 +457,10 @@ def cayley_batch(self, data):
         skew_mat = 0.5 * (data - data.transpose(1, 2))
         id_mat = torch.eye(r, device=data.device).unsqueeze(0).expand(b, r, c)
 
-        # Perform the Cayley parametrization
+        # Perform the Cayley parametrization, must be in float32
         Q = torch.linalg.solve(id_mat + skew_mat, id_mat - skew_mat, left=False)
 
-        return Q
+        return Q.to(data.dtype)
 
 
 class Linear(nn.Module, BOFTLayer):
@@ -586,7 +586,7 @@ def get_delta_weight(self, adapter) -> tuple[torch.Tensor, torch.Tensor]:
             block_diagonal_butterfly = torch.block_diag(*torch.unbind(orth_rotate_butterfly))
             block_diagonal_butterfly = block_diagonal_butterfly.unsqueeze(0)
 
-        boft_P = self.boft_P.to(block_diagonal_butterfly.device)
+        boft_P = self.boft_P.to(block_diagonal_butterfly.device, block_diagonal_butterfly.dtype)
         butterfly_oft_mat_batch = torch.bmm(block_diagonal_butterfly, boft_P.permute(0, 2, 1))
         butterfly_oft_mat_batch = torch.bmm(boft_P, butterfly_oft_mat_batch)
         butterfly_oft_mat = butterfly_oft_mat_batch[0]
@@ -919,7 +919,7 @@ def get_delta_weight(self, adapter) -> tuple[torch.Tensor, torch.Tensor]:
             block_diagonal_butterfly = torch.block_diag(*torch.unbind(orth_rotate_butterfly))
             block_diagonal_butterfly = block_diagonal_butterfly.unsqueeze(0)
 
-        boft_P = self.boft_P.to(block_diagonal_butterfly.device)
+        boft_P = self.boft_P.to(block_diagonal_butterfly.device, block_diagonal_butterfly.dtype)
         butterfly_oft_mat_batch = torch.bmm(block_diagonal_butterfly, boft_P.permute(0, 2, 1))
         butterfly_oft_mat_batch = torch.bmm(boft_P, butterfly_oft_mat_batch)
         butterfly_oft_mat = butterfly_oft_mat_batch[0]
diff --git a/src/peft/tuners/c3a/model.py b/src/peft/tuners/c3a/model.py
index 6e71973691..19c9ef763e 100644
--- a/src/peft/tuners/c3a/model.py
+++ b/src/peft/tuners/c3a/model.py
@@ -93,5 +93,9 @@ def _create_new_module(c3a_config, adapter_name, target, **kwargs):
 
         if isinstance(target_base_layer, torch.nn.Linear):
             new_module = C3ALinear(target, adapter_name, **kwargs)
+        else:
+            raise ValueError(
+                f"Target module {target} is not supported. Currently, only `torch.nn.Linear` is supported."
+            )
 
         return new_module
diff --git a/src/peft/tuners/fourierft/layer.py b/src/peft/tuners/fourierft/layer.py
index a03a57f118..f95a414db9 100644
--- a/src/peft/tuners/fourierft/layer.py
+++ b/src/peft/tuners/fourierft/layer.py
@@ -21,6 +21,7 @@
 from transformers.pytorch_utils import Conv1D
 
 from peft.tuners.tuners_utils import BaseTunerLayer, check_adapters_to_merge
+from peft.utils.other import transpose
 
 
 class FourierFTLayer(BaseTunerLayer):
@@ -139,7 +140,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
                     # Note that safe_merge will be slower than the normal merge
                     # because of the copy operation.
                     orig_weights = base_layer.weight.data.clone()
-                    orig_weights += self.get_delta_weight(active_adapter)
+                    orig_weights += transpose(self.get_delta_weight(active_adapter), self.fan_in_fan_out)
 
                     if not torch.isfinite(orig_weights).all():
                         raise ValueError(
@@ -148,7 +149,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
 
                     base_layer.weight.data = orig_weights
                 else:
-                    base_layer.weight.data += self.get_delta_weight(active_adapter)
+                    base_layer.weight.data += transpose(self.get_delta_weight(active_adapter), self.fan_in_fan_out)
                 self.merged_adapters.append(active_adapter)
 
     def unmerge(self) -> None:
@@ -161,10 +162,9 @@ def unmerge(self) -> None:
         while len(self.merged_adapters) > 0:
             active_adapter = self.merged_adapters.pop()
             if active_adapter in self.fourierft_spectrum.keys():
-                self.get_base_layer().weight.data -= self.get_delta_weight(active_adapter)
-
-    def get_delta_weight(self, adapter) -> torch.Tensor:
-        return super().get_delta_weight(adapter)
+                self.get_base_layer().weight.data -= transpose(
+                    self.get_delta_weight(active_adapter), self.fan_in_fan_out
+                )
 
     def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
         previous_dtype = x.dtype
diff --git a/src/peft/tuners/ln_tuning/layer.py b/src/peft/tuners/ln_tuning/layer.py
index e29149f2cb..4000e992a7 100644
--- a/src/peft/tuners/ln_tuning/layer.py
+++ b/src/peft/tuners/ln_tuning/layer.py
@@ -105,7 +105,7 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
             if self.merged:
                 self.unmerge()
             result = self.base_layer(x, *args, **kwargs)
-        elif self.merged:
+        elif self.merged or (len(self.active_adapters) == 0):
             result = self.base_layer(x, *args, **kwargs)
         else:
             if len(self.active_adapters) != 1:
diff --git a/src/peft/tuners/waveft/layer.py b/src/peft/tuners/waveft/layer.py
index a17f3ffba3..c5030e4a16 100644
--- a/src/peft/tuners/waveft/layer.py
+++ b/src/peft/tuners/waveft/layer.py
@@ -21,6 +21,7 @@
 from transformers.pytorch_utils import Conv1D
 
 from peft.tuners.tuners_utils import BaseTunerLayer, check_adapters_to_merge
+from peft.utils.other import transpose
 
 from .constants import WAVELET_REDUCTIONS
 from .waverec2d import waverec2d
@@ -237,7 +238,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
                     # Note that safe_merge will be slower than the normal merge
                     # because of the copy operation.
                     orig_weights = base_layer.weight.data.clone()
-                    orig_weights += self.get_delta_weight(active_adapter)
+                    orig_weights += transpose(self.get_delta_weight(active_adapter), self.fan_in_fan_out)
 
                     if not torch.isfinite(orig_weights).all():
                         raise ValueError(
@@ -246,7 +247,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = N
 
                     base_layer.weight.data = orig_weights
                 else:
-                    base_layer.weight.data += self.get_delta_weight(active_adapter)
+                    base_layer.weight.data += transpose(self.get_delta_weight(active_adapter), self.fan_in_fan_out)
                 self.merged_adapters.append(active_adapter)
 
     def unmerge(self) -> None:
@@ -259,10 +260,9 @@ def unmerge(self) -> None:
         while len(self.merged_adapters) > 0:
             active_adapter = self.merged_adapters.pop()
             if active_adapter in self.waveft_spectrum.keys():
-                self.get_base_layer().weight.data -= self.get_delta_weight(active_adapter)
-
-    def get_delta_weight(self, adapter) -> torch.Tensor:
-        return super().get_delta_weight(adapter)
+                self.get_base_layer().weight.data -= transpose(
+                    self.get_delta_weight(active_adapter), self.fan_in_fan_out
+                )
 
     def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
         previous_dtype = x.dtype
diff --git a/tests/test_decoder_models.py b/tests/test_decoder_models.py
index 06402d637b..1b1d3631bb 100644
--- a/tests/test_decoder_models.py
+++ b/tests/test_decoder_models.py
@@ -327,10 +327,6 @@ def _skip_alora_no_activation(config_cls, config_kwargs):
 class TestDecoderModels(PeftCommonTester):
     transformers_class = AutoModelForCausalLM
 
-    def skipTest(self, reason=""):
-        # for backwards compatibility with unittest style test classes
-        pytest.skip(reason)
-
     def prepare_inputs_for_testing(self):
         input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
         attention_mask = torch.tensor([[1, 1, 1], [1, 0, 1]]).to(self.torch_device)
diff --git a/tests/test_encoder_decoder_models.py b/tests/test_encoder_decoder_models.py
index 1ec0aa0668..038ca22698 100644
--- a/tests/test_encoder_decoder_models.py
+++ b/tests/test_encoder_decoder_models.py
@@ -228,10 +228,6 @@
 class TestEncoderDecoderModels(PeftCommonTester):
     transformers_class = AutoModelForSeq2SeqLM
 
-    def skipTest(self, reason=""):
-        # for backwards compatibility with unittest style test classes
-        pytest.skip(reason)
-
     def prepare_inputs_for_testing(self):
         input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
         decoder_input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
diff --git a/tests/test_feature_extraction_models.py b/tests/test_feature_extraction_models.py
index a5377827f4..c6054782ba 100644
--- a/tests/test_feature_extraction_models.py
+++ b/tests/test_feature_extraction_models.py
@@ -258,10 +258,6 @@ class TestPeftFeatureExtractionModel(PeftCommonTester):
 
     transformers_class = AutoModel
 
-    def skipTest(self, reason=""):
-        # for backwards compatibility with unittest style test classes
-        pytest.skip(reason)
-
     def prepare_inputs_for_testing(self):
         input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
         attention_mask = torch.tensor([[1, 1, 1], [1, 0, 1]]).to(self.torch_device)
diff --git a/tests/test_gpu_examples.py b/tests/test_gpu_examples.py
index 909f456aa7..00b9d05ceb 100644
--- a/tests/test_gpu_examples.py
+++ b/tests/test_gpu_examples.py
@@ -4558,7 +4558,7 @@ def _test_model(self, model, precision):
         input_ids = torch.randint(0, 1000, (2, 10)).to(self.device)
         if precision == torch.bfloat16:
             if not is_bf16_available():
-                self.skipTest("Bfloat16 not supported on this device")
+                pytest.skip("Bfloat16 not supported on this device")
 
         # Forward pass with test precision
         with torch.autocast(enabled=True, dtype=precision, device_type=self.device):
diff --git a/tests/test_mixed.py b/tests/test_mixed.py
index 7ec18387c8..d7b663c182 100644
--- a/tests/test_mixed.py
+++ b/tests/test_mixed.py
@@ -526,7 +526,7 @@ def test_target_first_layer_same_type(self, config0, config1):
     def test_deeply_nested(self):
         # a somewhat absurdly nested model using different adapter types
         if platform.system() == "Linux":
-            self.skipTest("This test fails but only on GitHub CI with Linux systems.")
+            pytest.skip("This test fails but only on GitHub CI with Linux systems.")
 
         atol = 1e-5
         rtol = 1e-5
diff --git a/tests/test_seq_classifier.py b/tests/test_seq_classifier.py
index 03869c3a7a..b6a0ff28e6 100644
--- a/tests/test_seq_classifier.py
+++ b/tests/test_seq_classifier.py
@@ -234,10 +234,6 @@ class TestSequenceClassificationModels(PeftCommonTester):
 
     transformers_class = AutoModelForSequenceClassification
 
-    def skipTest(self, reason=""):
-        #  for backwards compatibility with unittest style test classes
-        pytest.skip(reason)
-
     def prepare_inputs_for_testing(self):
         input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
         attention_mask = torch.tensor([[1, 1, 1], [1, 0, 1]]).to(self.torch_device)
diff --git a/tests/test_target_parameters.py b/tests/test_target_parameters.py
index adffbce0d5..64297daf3c 100644
--- a/tests/test_target_parameters.py
+++ b/tests/test_target_parameters.py
@@ -169,10 +169,6 @@ class TestDecoderModelsTargetParameters(PeftCommonTester):
     # generally, nothing is broken.
     transformers_class = MyAutoModelForCausalLM
 
-    def skipTest(self, reason=""):
-        # for backwards compatibility with unittest style test classes
-        pytest.skip(reason)
-
     def prepare_inputs_for_testing(self):
         input_ids = torch.tensor([[1, 1, 1], [1, 2, 1]]).to(self.torch_device)
         attention_mask = torch.tensor([[1, 1, 1], [1, 0, 1]]).to(self.torch_device)
diff --git a/tests/testing_common.py b/tests/testing_common.py
index dab9ee6e45..75594f15f6 100644
--- a/tests/testing_common.py
+++ b/tests/testing_common.py
@@ -33,27 +33,19 @@
 from peft import (
     AdaLoraConfig,
     BOFTConfig,
-    BoneConfig,
     CPTConfig,
-    DeloraConfig,
-    FourierFTConfig,
-    HRAConfig,
     IA3Config,
     LNTuningConfig,
     LoHaConfig,
     LoKrConfig,
     LoraConfig,
-    MissConfig,
-    OFTConfig,
     PeftModel,
-    PeftType,
     PrefixTuningConfig,
     PromptEncoderConfig,
     PromptLearningConfig,
     PromptTuningConfig,
-    RandLoraConfig,
+    RoadConfig,
     VBLoRAConfig,
-    VeraConfig,
     get_peft_model,
     get_peft_model_state_dict,
     inject_adapter_in_model,
@@ -73,131 +65,29 @@
 from .testing_utils import get_state_dict, hub_online_once
 
 
-CONFIG_TESTING_KWARGS = (
-    # IA³
-    {
-        "target_modules": None,
-        "feedforward_modules": None,
-    },
-    # LoRA
-    {
-        "r": 8,
-        "lora_alpha": 32,
-        "target_modules": None,
-        "lora_dropout": 0.05,
-        "bias": "none",
-    },
-    # prefix tuning
-    {
-        "num_virtual_tokens": 10,
-    },
-    # prompt encoder
-    {
-        "num_virtual_tokens": 10,
-        "encoder_hidden_size": 32,
-    },
-    # prompt tuning
-    {
-        "num_virtual_tokens": 10,
-    },
-    # AdaLoRA
-    {
-        "target_modules": None,
-        "total_step": 1,
-    },
-    # BOFT
-    {
-        "target_modules": None,
-    },
-    # VeRA
-    {
-        "r": 8,
-        "target_modules": None,
-        "vera_dropout": 0.05,
-        "projection_prng_key": 0xFF,
-        "d_initial": 0.1,
-        "save_projection": True,
-        "bias": "none",
-    },
-    # FourierFT
-    {
-        "n_frequency": 10,
-        "target_modules": None,
-    },
-    # HRA
-    {
-        "target_modules": None,
-    },
-    # VBLoRA
-    {"target_modules": None, "vblora_dropout": 0.05, "vector_length": 1, "num_vectors": 2},
-    # OFT
-    {
-        "target_modules": None,
-    },
-    # Bone
-    {
-        "target_modules": None,
-        "r": 2,
-    },
-    # MiSS
-    {
-        "target_modules": None,
-        "r": 2,
-    },
-    # LoRA + trainable_tokens
-    {
-        "r": 8,
-        "lora_alpha": 32,
-        "target_modules": None,
-        "lora_dropout": 0.05,
-        "bias": "none",
-        "trainable_token_indices": [0, 1, 3],
-    },
-    # RandLoRA
-    {
-        "r": 32,
-        "randlora_alpha": 64,
-        "target_modules": None,
-        "randlora_dropout": 0.05,
-        "projection_prng_key": 0xFF,
-        "save_projection": True,
-        "bias": "none",
-    },
-    # CPT tuninig
-    {
-        "cpt_token_ids": [0, 1, 2, 3, 4, 5, 6, 7],  # Example token IDs for testing
-        "cpt_mask": [1, 1, 1, 1, 1, 1, 1, 1],
-        "cpt_tokens_type_mask": [1, 2, 2, 2, 3, 3, 4, 4],
-    },
-    # DeLoRA
-    {
-        "r": 8,
-        "target_modules": None,
-        "bias": "none",
-    },
-)
+def _skip_if_merging_not_supported(config_cls, config_kwargs):
+    if issubclass(config_cls, PromptLearningConfig):
+        pytest.skip("Prompt learning does not support merging, skipping this test.")
+    if config_kwargs.get("alora_invocation_tokens") is not None:
+        pytest.skip("Test not applicable for Activated LoRA")
+
+
+def _skip_if_adding_weighted_adapters_not_supported(config):
+    if not isinstance(config, (IA3Config, LoraConfig)):
+        pytest.skip("This PEFT method does not support adding weighted adapters, skipping this test.")
+
+
+def _skip_if_deleting_adapter_not_supported(config_cls, config_kwargs):
+    if issubclass(config_cls, PromptLearningConfig):
+        pytest.skip("Prompt learning does not support deletion of adapters, skipping this test.")
+
 
-CLASSES_MAPPING = {
-    "ia3": (IA3Config, CONFIG_TESTING_KWARGS[0]),
-    "lora": (LoraConfig, CONFIG_TESTING_KWARGS[1]),
-    "prefix_tuning": (PrefixTuningConfig, CONFIG_TESTING_KWARGS[2]),
-    "prompt_encoder": (PromptEncoderConfig, CONFIG_TESTING_KWARGS[3]),
-    "prompt_tuning": (PromptTuningConfig, CONFIG_TESTING_KWARGS[4]),
-    "adalora": (AdaLoraConfig, CONFIG_TESTING_KWARGS[5]),
-    "boft": (BOFTConfig, CONFIG_TESTING_KWARGS[6]),
-    "vera": (VeraConfig, CONFIG_TESTING_KWARGS[7]),
-    "fourierft": (FourierFTConfig, CONFIG_TESTING_KWARGS[8]),
-    "hra": (HRAConfig, CONFIG_TESTING_KWARGS[9]),
-    "vblora": (VBLoRAConfig, CONFIG_TESTING_KWARGS[10]),
-    "oft": (OFTConfig, CONFIG_TESTING_KWARGS[11]),
-    "bone": (BoneConfig, CONFIG_TESTING_KWARGS[12]),
-    "miss": (MissConfig, CONFIG_TESTING_KWARGS[12]),
-    "lora+trainable_tokens": (LoraConfig, CONFIG_TESTING_KWARGS[13]),
-    "randlora": (RandLoraConfig, CONFIG_TESTING_KWARGS[14]),
-    "delora": (DeloraConfig, CONFIG_TESTING_KWARGS[17]),
-}
-
-DECODER_MODELS_EXTRA = {"cpt": (CPTConfig, CONFIG_TESTING_KWARGS[15])}
+def _skip_if_conv1d_not_supported(model_id, config_cls, config_kwargs):
+    if "gpt2" not in model_id.lower():
+        return
+
+    if config_cls not in (IA3Config, LoHaConfig, LoKrConfig, LoraConfig):
+        pytest.skip("This PEFT method does not support Conv1D layers, skipping this test.")
 
 
 class PeftCommonTester:
@@ -305,7 +195,7 @@ def _test_adapter_name(self, model_id, config_cls, config_kwargs):
     def _test_prepare_for_training(self, model_id, config_cls, config_kwargs):
         if config_kwargs.get("trainable_token_indices", None) is not None:
             # incompatible because trainable tokens is marking embeddings as trainable
-            self.skipTest("Trainable tokens is incompatible with this test.")
+            pytest.skip("Trainable tokens is incompatible with this test.")
 
         # some tests require specific tokenizers, make sure that they can be fetched as well
         with hub_online_once(model_id + config_kwargs.get("tokenizer_name_or_path", "")):
@@ -460,7 +350,7 @@ def _test_save_pretrained(self, model_id, config_cls, config_kwargs, safe_serial
     def _test_save_pretrained_selected_adapters(self, model_id, config_cls, config_kwargs, safe_serialization=True):
         if issubclass(config_cls, AdaLoraConfig):
             # AdaLora does not support adding more than 1 adapter
-            return pytest.skip(f"Test not applicable for {config_cls}")
+            pytest.skip(f"Test not applicable for {config_cls}")
 
         # ensure that the weights are randomly initialized
         if issubclass(config_cls, LoraConfig):
@@ -595,20 +485,10 @@ def _test_load_multiple_adapters(self, model_id, config_cls, config_kwargs):
                     assert load_result2.missing_keys == []
 
     def _test_merge_layers_fp16(self, model_id, config_cls, config_kwargs):
-        if (
-            config_cls not in (LoraConfig, IA3Config, AdaLoraConfig, LoHaConfig, LoKrConfig, VBLoRAConfig)
-            or config_kwargs.get("alora_invocation_tokens") is not None
-        ):
-            # Merge layers only supported for LoRA and IA³, and not for Activated LoRA (aLoRA)
-            if config_kwargs.get("alora_invocation_tokens") is None:
-                return pytest.skip(f"Test not applicable for {config_cls}")
-            else:
-                return pytest.skip("Test not applicable for Activated LoRA")
-        if ("gpt2" in model_id.lower()) and (config_cls != LoraConfig):
-            self.skipTest("Merging GPT2 adapters not supported for IA³ (yet)")
-
+        _skip_if_merging_not_supported(config_cls, config_kwargs)
+        _skip_if_conv1d_not_supported(model_id, config_cls, config_kwargs)
         if (self.torch_device in ["cpu"]) and (version.parse(torch.__version__) <= version.parse("2.1")):
-            self.skipTest("PyTorch 2.1 not supported for Half of addmm_impl_cpu_ ")
+            pytest.skip("PyTorch 2.1 not supported for Half of addmm_impl_cpu_ ")
 
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id, dtype=torch.float16)
@@ -625,27 +505,8 @@ def _test_merge_layers_fp16(self, model_id, config_cls, config_kwargs):
             _ = model.merge_and_unload()
 
     def _test_merge_layers_nan(self, model_id, config_cls, config_kwargs):
-        if (
-            config_cls
-            not in (
-                LoraConfig,
-                IA3Config,
-                AdaLoraConfig,
-                LoHaConfig,
-                LoKrConfig,
-                VeraConfig,
-                FourierFTConfig,
-            )
-            or config_kwargs.get("alora_invocation_tokens") is not None
-        ):
-            # Merge layers only supported for LoRA and IA³, and not for Activated LoRA (aLoRA)
-            return
-        if ("gpt2" in model_id.lower()) and (config_cls != LoraConfig):
-            self.skipTest("Merging GPT2 adapters not supported for IA³ (yet)")
-
-        if "gemma" in model_id.lower():
-            # TODO: could be related to tied weights
-            self.skipTest("Merging currently fails with gemma")
+        _skip_if_merging_not_supported(config_cls, config_kwargs)
+        _skip_if_conv1d_not_supported(model_id, config_cls, config_kwargs)
 
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
@@ -680,14 +541,7 @@ def _test_merge_layers_nan(self, model_id, config_cls, config_kwargs):
             model = model.to(self.torch_device)
 
             for name, module in model.named_parameters():
-                if (
-                    "lora_A" in name
-                    or "ia3" in name
-                    or "lora_E" in name
-                    or "lora_B" in name
-                    or "vera_lambda" in name
-                    or "fourierft_spectrum" in name
-                ):
+                if model.prefix in name:
                     module.data[0] = torch.nan
 
             with pytest.raises(
@@ -712,21 +566,8 @@ def _test_merge_layers_nan(self, model_id, config_cls, config_kwargs):
                 model = model.merge_and_unload(safe_merge=True)
 
     def _test_merge_layers(self, model_id, config_cls, config_kwargs):
-        if issubclass(config_cls, PromptLearningConfig):
-            return pytest.skip(f"Test not applicable for {config_cls}")
-
-        if issubclass(config_cls, (OFTConfig, BOFTConfig)):
-            return pytest.skip(f"Test not applicable for {config_cls}")
-
-        if config_kwargs.get("alora_invocation_tokens") is not None:
-            return pytest.skip("Merging not applicable to aLoRA")
-
-        if ("gpt2" in model_id.lower()) and (config_cls != LoraConfig):
-            self.skipTest("Merging GPT2 adapters not supported for IA³ (yet)")
-
-        if "gemma" in model_id.lower():
-            # TODO: could be related to tied weights
-            self.skipTest("Merging currently fails with gemma")
+        _skip_if_merging_not_supported(config_cls, config_kwargs)
+        _skip_if_conv1d_not_supported(model_id, config_cls, config_kwargs)
 
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
@@ -756,15 +597,18 @@ def _test_merge_layers(self, model_id, config_cls, config_kwargs):
             logits_merged_unloaded = model(**dummy_input)[0]
 
             conv_ids = ["Conv2d", "Conv3d", "Conv2d2"]
+            is_decoder = getattr(getattr(model, "config", None), "is_decoder", False)
             atol, rtol = 1e-4, 1e-4
             if self.torch_device in ["mlu"]:
                 atol, rtol = 1e-3, 1e-3  # MLU
-            if config.peft_type == "ADALORA":
-                # AdaLoRA is a bit flaky on CI, but this cannot be reproduced locally
+            if config.peft_type in ("ADALORA", "OFT"):
+                # these methods require a bit higher tolerance
                 atol, rtol = 1e-2, 1e-2
-            if (config.peft_type in {"IA3", "LORA"}) and (model_id in conv_ids):
+            if (config.peft_type in {"IA3", "LORA", "OFT"}) and (model_id in conv_ids):
                 # for some reason, the Conv introduces a larger error
                 atol, rtol = 0.3, 0.01
+            if (config.peft_type == "OFT") and not is_decoder:
+                atol, rtol = 0.3, 0.01
             if model_id == "trl-internal-testing/tiny-Llama4ForCausalLM":
                 # also getting larger errors here, not exactly sure why
                 atol, rtol = 0.3, 0.01
@@ -800,23 +644,12 @@ def _test_merge_layers(self, model_id, config_cls, config_kwargs):
             assert torch.allclose(logits_merged, logits_merged_from_pretrained, atol=atol, rtol=rtol)
 
     def _test_merge_layers_multi(self, model_id, config_cls, config_kwargs):
-        supported_peft_types = [
-            PeftType.LORA,
-            PeftType.LOHA,
-            PeftType.LOKR,
-            PeftType.IA3,
-            PeftType.OFT,
-            PeftType.BOFT,
-            PeftType.HRA,
-            PeftType.BONE,
-            PeftType.MISS,
-        ]
-
-        if ("gpt2" in model_id.lower()) and (config_cls == IA3Config):
-            self.skipTest("Merging GPT2 adapters not supported for IA³ (yet)")
-
+        _skip_if_merging_not_supported(config_cls, config_kwargs)
+        if issubclass(config_cls, AdaLoraConfig):
+            # AdaLora does not support adding more than 1 adapter
+            pytest.skip("AdaLoRA does not support multiple adapters, skipping this test.")
         if config_kwargs.get("trainable_token_indices", None) is not None:
-            self.skipTest(
+            pytest.skip(
                 "Merging two adapters with trainable tokens is tested elsewhere since adapters with "
                 "the same token indices cannot be merged."
             )
@@ -825,12 +658,14 @@ def _test_merge_layers_multi(self, model_id, config_cls, config_kwargs):
             base_model_name_or_path=model_id,
             **config_kwargs,
         )
-
-        if config.peft_type not in supported_peft_types or config_kwargs.get("alora_invocation_tokens") is not None:
-            return
+        if config_cls == VBLoRAConfig:
+            # for VBLoRA, increase this value or else the two adapters are too similar
+            config.init_logits_std *= 100
+            config.init_vector_bank_bound *= 100
 
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
+            torch.manual_seed(0)
             model = get_peft_model(model, config)
             model = model.to(self.torch_device)
 
@@ -840,6 +675,7 @@ def _test_merge_layers_multi(self, model_id, config_cls, config_kwargs):
             with torch.inference_mode():
                 logits_adapter_1 = model(**dummy_input)[0]
 
+            torch.manual_seed(1)
             model.add_adapter("adapter-2", config)
             model.set_adapter("adapter-2")
             model.eval()
@@ -888,9 +724,9 @@ def _test_merge_layers_multi(self, model_id, config_cls, config_kwargs):
             assert torch.allclose(logits_merged_adapter_default, logits_adapter_1, atol=1e-3, rtol=1e-3)
 
     def _test_merge_layers_is_idempotent(self, model_id, config_cls, config_kwargs):
-        if config_kwargs.get("alora_invocation_tokens") is not None:
-            # Merging not supported for Activated LoRA (aLoRA)
-            return pytest.skip("Test not applicable for Activated LoRA (aLoRA)")
+        _skip_if_merging_not_supported(config_cls, config_kwargs)
+        _skip_if_conv1d_not_supported(model_id, config_cls, config_kwargs)
+
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
             config = config_cls(
@@ -913,9 +749,8 @@ def _test_merge_layers_is_idempotent(self, model_id, config_cls, config_kwargs):
             assert torch.allclose(logits_0, logits_1, atol=1e-6, rtol=1e-6)
 
     def _test_safe_merge(self, model_id, config_cls, config_kwargs):
-        if config_kwargs.get("alora_invocation_tokens") is not None:
-            # Merging not supported for Activated LoRA (aLoRA)
-            return pytest.skip("Test not applicable for Activated LoRA (aLoRA)")
+        _skip_if_merging_not_supported(config_cls, config_kwargs)
+
         torch.manual_seed(0)
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
@@ -960,8 +795,8 @@ def _test_safe_merge(self, model_id, config_cls, config_kwargs):
 
     def _test_mixed_adapter_batches(self, model_id, config_cls, config_kwargs):
         # Test for mixing different adapters in a single batch by passing the adapter_names argument
-        if config_cls not in (LoraConfig,):
-            return pytest.skip(f"Mixed adapter batches not supported for {config_cls}")
+        if config_cls not in (LoraConfig, RoadConfig):
+            pytest.skip(f"Mixed adapter batches not supported for {config_cls}")
 
         config = config_cls(
             base_model_name_or_path=model_id,
@@ -1025,13 +860,14 @@ def _test_generate_with_mixed_adapter_batches_and_beam_search(self, model_id, co
         # Test generating with beam search and with mixing different adapters in a single batch by passing the
         # adapter_names argument. See #2283.
         if config_cls not in (LoraConfig,):
-            return pytest.skip(f"Mixed adapter batches not supported for {config_cls}")
+            # note: RoAD supports mixed adapter batches but not beam search
+            pytest.skip(f"Mixed adapter batches not supported for {config_cls}")
         if config_kwargs.get("alora_invocation_tokens") is not None:
-            return pytest.skip("Beam search not yet supported for aLoRA")  # beam search not yet fully supported
+            pytest.skip("Beam search not yet supported for aLoRA")  # beam search not yet fully supported
         if config_kwargs.get("trainable_token_indices", None) is not None:
             # for some configurations this test will fail since the adapter values don't differ.
             # this is probably a problem with the test setup and not with the implementation.
-            return pytest.skip("Trainable token indices is not supported here (yet).")
+            pytest.skip("Trainable token indices is not supported here (yet).")
 
         config = config_cls(
             base_model_name_or_path=model_id,
@@ -1143,11 +979,7 @@ def _test_generate_pos_args(self, model_id, config_cls, config_kwargs, raises_er
                 _ = model.generate(inputs["input_ids"])
 
     def _test_generate_half_prec(self, model_id, config_cls, config_kwargs):
-        if config_cls not in (IA3Config, LoraConfig, PrefixTuningConfig):
-            return pytest.skip(f"Test not applicable for {config_cls}")
-
-        if self.torch_device == "mps":  # BFloat16 is not supported on MPS
-            return pytest.skip("BFloat16 is not supported on MPS")
+        _skip_if_conv1d_not_supported(model_id, config_cls, config_kwargs)
 
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16)
@@ -1166,7 +998,7 @@ def _test_generate_half_prec(self, model_id, config_cls, config_kwargs):
 
     def _test_prefix_tuning_half_prec_conversion(self, model_id, config_cls, config_kwargs):
         if config_cls not in (PrefixTuningConfig,):
-            return pytest.skip(f"Test not applicable for {config_cls}")
+            pytest.skip(f"Test not applicable for {config_cls}")
 
         config = config_cls(
             base_model_name_or_path=model_id,
@@ -1182,10 +1014,10 @@ def _test_prefix_tuning_half_prec_conversion(self, model_id, config_cls, config_
 
     def _test_training(self, model_id, config_cls, config_kwargs):
         if issubclass(config_cls, PromptLearningConfig):
-            return pytest.skip(f"Test not applicable for {config_cls}")
+            pytest.skip("Prompt learning does not support merging, skipping this test.")
         if (config_cls == AdaLoraConfig) and ("roberta" in model_id.lower()):
             # TODO: no gradients on the "dense" layer, other layers work, not sure why
-            self.skipTest("AdaLora with RoBERTa does not work correctly")
+            pytest.skip("AdaLora with RoBERTa does not work correctly")
 
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
@@ -1202,18 +1034,20 @@ def _test_training(self, model_id, config_cls, config_kwargs):
             output = model(**inputs)[0]
             loss = output.sum()
             loss.backward()
-            parameter_prefix = model.prefix
-            for n, param in model.named_parameters():
-                if (parameter_prefix in n) or ("modules_to_save" in n) or ("token_adapter.trainable_tokens" in n):
-                    assert param.grad is not None
-                else:
-                    assert param.grad is None
+
+            parameter_prefix = getattr(model, "prefix", None)
+            if parameter_prefix is not None:  # can only check PEFT methods that allow to identify PEFT params
+                for n, param in model.named_parameters():
+                    if (parameter_prefix in n) or ("modules_to_save" in n) or ("token_adapter.trainable_tokens" in n):
+                        assert param.grad is not None
+                    else:
+                        assert param.grad is None
 
     def _test_inference_safetensors(self, model_id, config_cls, config_kwargs):
         if (config_cls == PrefixTuningConfig) and ("deberta" in model_id.lower()):
             # TODO: raises an error:
             # TypeError: DebertaModel.forward() got an unexpected keyword argument 'past_key_values'
-            self.skipTest("DeBERTa with PrefixTuning does not work correctly")
+            pytest.skip("DeBERTa with PrefixTuning does not work correctly")
 
         config = config_cls(
             base_model_name_or_path=model_id,
@@ -1251,14 +1085,16 @@ def _test_inference_safetensors(self, model_id, config_cls, config_kwargs):
                 assert torch.allclose(logits, logits_from_pretrained, atol=1e-4, rtol=1e-4)
 
     def _test_training_layer_indexing(self, model_id, config_cls, config_kwargs):
-        if config_cls not in (LoraConfig,):
-            return pytest.skip(f"Test not applicable for {config_cls}")
+        try:
+            config = config_cls(
+                base_model_name_or_path=model_id,
+                layers_to_transform=[0],
+                **config_kwargs,
+            )
+        except TypeError:
+            pytest.skip("This PEFT method does not support layers_to_transform, skipping it.")
+        _skip_if_conv1d_not_supported(model_id, config_cls, config_kwargs)
 
-        config = config_cls(
-            base_model_name_or_path=model_id,
-            layers_to_transform=[0],
-            **config_kwargs,
-        )
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
             model = get_peft_model(model, config)
@@ -1292,7 +1128,11 @@ def _test_training_layer_indexing(self, model_id, config_cls, config_kwargs):
                 )
 
                 logits_from_pretrained = model_from_pretrained(**inputs)[0][0]
-                assert torch.allclose(logits, logits_from_pretrained, atol=1e-4, rtol=1e-4)
+                if config_cls == VBLoRAConfig:
+                    atol, rtol = 1e-3, 1e-3
+                else:
+                    atol, rtol = 1e-4, 1e-4
+                assert torch.allclose(logits, logits_from_pretrained, atol=atol, rtol=rtol)
 
             # check the nb of trainable params again but without layers_to_transform
             model = self.transformers_class.from_pretrained(model_id)
@@ -1317,21 +1157,16 @@ def _test_training_layer_indexing(self, model_id, config_cls, config_kwargs):
 
     def _test_training_gradient_checkpointing(self, model_id, config_cls, config_kwargs):
         if config_cls == PrefixTuningConfig:
-            return pytest.skip(f"Test not applicable for {config_cls}")
-
+            pytest.skip("Prefix Tuning does not support gradient checkpointing, skipping this test.")
         if (config_cls == AdaLoraConfig) and ("roberta" in model_id.lower()):
             # TODO: no gradients on the "dense" layer, other layers work, not sure why
-            self.skipTest("AdaLora with RoBERTa does not work correctly")
-
-        if (config_cls == OFTConfig) and ("deberta" in model_id.lower()):
-            # TODO: no gradients on the "dense" layer, other layers work, not sure why
-            self.skipTest("OFT with Deberta does not work correctly")
+            pytest.skip("AdaLora with RoBERTa does not work correctly")
 
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
 
             if not getattr(model, "supports_gradient_checkpointing", False):
-                return pytest.skip(f"Model {model_id} does not support gradient checkpointing")
+                pytest.skip(f"Model {model_id} does not support gradient checkpointing")
 
             model.gradient_checkpointing_enable()
 
@@ -1366,9 +1201,7 @@ def _test_training_gradient_checkpointing(self, model_id, config_cls, config_kwa
                     assert param.grad is None
 
     def _test_peft_model_device_map(self, model_id, config_cls, config_kwargs):
-        if config_cls not in (LoraConfig, VBLoRAConfig):
-            return pytest.skip(f"Test not applicable for {config_cls}")
-
+        _skip_if_conv1d_not_supported(model_id, config_cls, config_kwargs)
         config = config_cls(
             base_model_name_or_path=model_id,
             **config_kwargs,
@@ -1390,7 +1223,7 @@ def _test_peft_model_device_map(self, model_id, config_cls, config_kwargs):
 
     def _test_training_prompt_learning_tasks(self, model_id, config_cls, config_kwargs):
         if not issubclass(config_cls, PromptLearningConfig):
-            return pytest.skip(f"Test not applicable for {config_cls}")
+            pytest.skip(f"Test not applicable for {config_cls}")
 
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
@@ -1421,28 +1254,14 @@ def _test_training_prompt_learning_tasks(self, model_id, config_cls, config_kwar
                 assert param.grad is not None
 
     def _test_delete_adapter(self, model_id, config_cls, config_kwargs):
-        supported_peft_types = [
-            PeftType.LORA,
-            PeftType.LOHA,
-            PeftType.LOKR,
-            PeftType.IA3,
-            PeftType.OFT,
-            PeftType.BOFT,
-            PeftType.VERA,
-            PeftType.FOURIERFT,
-            PeftType.HRA,
-            PeftType.VBLORA,
-            PeftType.BONE,
-            PeftType.MISS,
-        ]
-        # IA3 does not support deleting adapters yet, but it just needs to be added
-        # AdaLora does not support multiple adapters
+        if config_cls == AdaLoraConfig:
+            pytest.skip("AdaLoRA does not support multiple adapters")
+        _skip_if_deleting_adapter_not_supported(config_cls, config_kwargs)
+
         config = config_cls(
             base_model_name_or_path=model_id,
             **config_kwargs,
         )
-        if config.peft_type not in supported_peft_types:
-            return pytest.skip(f"Test not applicable for {config.peft_type}")
 
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
@@ -1495,28 +1314,14 @@ def _test_delete_adapter(self, model_id, config_cls, config_kwargs):
             model.base_model(**input)  # should not raise an error
 
     def _test_delete_inactive_adapter(self, model_id, config_cls, config_kwargs):
-        # same as test_delete_adapter, but this time an inactive adapter is deleted
-        supported_peft_types = [
-            PeftType.LORA,
-            PeftType.LOHA,
-            PeftType.LOKR,
-            PeftType.IA3,
-            PeftType.OFT,
-            PeftType.BOFT,
-            PeftType.FOURIERFT,
-            PeftType.HRA,
-            PeftType.VBLORA,
-            PeftType.BONE,
-            PeftType.MISS,
-        ]
-        # IA3 does not support deleting adapters yet, but it just needs to be added
-        # AdaLora does not support multiple adapters
+        if config_cls == AdaLoraConfig:
+            pytest.skip("AdaLoRA does not support multiple adapters")
+        _skip_if_deleting_adapter_not_supported(config_cls, config_kwargs)
+
         config = config_cls(
             base_model_name_or_path=model_id,
             **config_kwargs,
         )
-        if config.peft_type not in supported_peft_types:
-            return pytest.skip(f"Test not applicable for {config.peft_type}")
 
         with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
@@ -1617,6 +1422,7 @@ def _test_unload_adapter(self, model_id, config_cls, config_kwargs):
             assert num_params_base == num_params_unloaded
 
     def _test_weighted_combination_of_adapters_lora(self, model, config, adapter_list, weight_list):
+        _skip_if_adding_weighted_adapters_not_supported(config)
         model.add_adapter(adapter_list[1], config)
         model.add_adapter(adapter_list[2], replace(config, r=20))
         model = model.to(self.torch_device)
@@ -1868,7 +1674,7 @@ def _test_weighted_combination_of_adapters(self, model_id, config_cls, config_kw
     def _test_disable_adapter(self, model_id, config_cls, config_kwargs):
         task_type = config_kwargs.get("task_type")
         if (task_type == "SEQ_2_SEQ_LM") and (config_cls in (PromptTuningConfig, PromptEncoderConfig)):
-            self.skipTest("Seq2Seq + prompt tuning/prompt encoder does not work with disabling adapters")
+            pytest.skip("Seq2Seq + prompt tuning/prompt encoder does not work with disabling adapters")
 
         def get_output(model):
             # helper function that works with different model types
@@ -1947,19 +1753,17 @@ def get_output(model):
             # TODO: add tests to check if disabling adapters works after calling merge_adapter
 
     def _test_adding_multiple_adapters_with_bias_raises(self, model_id, config_cls, config_kwargs):
-        # When trying to add multiple adapters with bias in Lora, AdaLora or BOFTConfig, an error should be
-        # raised. Also, the peft model should not be left in a half-initialized state.
-        if not issubclass(config_cls, (LoraConfig, AdaLoraConfig, BOFTConfig)):
-            return pytest.skip(f"Test not applicable for {config_cls}")
-
-        with hub_online_once(model_id):
-            config_kwargs = config_kwargs.copy()
-            config_kwargs["bias"] = "all"
+        config_kwargs = config_kwargs.copy()
+        config_kwargs["bias"] = "all"
+        try:
             config = config_cls(
                 base_model_name_or_path=model_id,
                 **config_kwargs,
             )
+        except TypeError:
+            pytest.skip(f"{config_cls} does not support the 'bias' argument, skipping this test.")
 
+        with hub_online_once(model_id):
             model = self.transformers_class.from_pretrained(model_id)
             model = get_peft_model(model, config, "adapter0")