diff --git a/optimum/neuron/accelerate/accelerator.py b/optimum/neuron/accelerate/accelerator.py
index bcb1be71b..e2fba78df 100644
--- a/optimum/neuron/accelerate/accelerator.py
+++ b/optimum/neuron/accelerate/accelerator.py
@@ -222,10 +222,11 @@ def prepare_data_loader(
             num_replicas = parallel_layers.parallel_state.get_data_parallel_size()
             rank = parallel_layers.parallel_state.get_data_parallel_rank()
             force_drop_last = parallel_layers.parallel_state.get_pipeline_model_parallel_size() > 1
-            logger.warning(
-                "Pipeline parallelsim: forcing the dataloader to drop the last incomplete batch because it can "
-                "cause failure if the last batch size is not divisible by the number of microbatches for the pipeline."
-            )
+            if force_drop_last and not data_loader.drop_last:
+                logger.warning(
+                    "Pipeline parallelsim: forcing the dataloader to drop the last incomplete batch because it can "
+                    "cause failure if the last batch size is not divisible by the number of microbatches for the pipeline."
+                )
         else:
             num_replicas = xr.world_size()
             rank = xr.global_ordinal()
diff --git a/optimum/neuron/models/training/training_utils.py b/optimum/neuron/models/training/training_utils.py
index 43fda903c..0988829f2 100644
--- a/optimum/neuron/models/training/training_utils.py
+++ b/optimum/neuron/models/training/training_utils.py
@@ -185,13 +185,6 @@ def is_logging_process() -> bool:
     return dp_rank == tp_rank == 0 and pp_rank == pp_size - 1
 
 
-def is_logging_process_method(self) -> bool:
-    """
-    Method version of `is_logging_process`, useful when this is used to patch a method from the Trainer class.
-    """
-    return is_logging_process()
-
-
 def is_custom_modeling_model(model) -> bool:
     from peft import PeftModel
 
diff --git a/tests/training/test_overfit.py b/tests/training/test_overfit.py
index 7f6cb6472..4066f477b 100644
--- a/tests/training/test_overfit.py
+++ b/tests/training/test_overfit.py
@@ -100,7 +100,6 @@ def gen():
         tensor_parallel_size=tp_size,
         pipeline_parallel_size=pp_size,
         do_train=True,
-        do_eval=False,
         learning_rate=learning_rate,
         warmup_ratio=warmup_ratio,
         per_device_train_batch_size=1,
@@ -114,10 +113,7 @@ def gen():
         max_steps=6 if is_precompilation() else num_steps,
         output_dir=output_dir,
         run_name=wandb_run_name,
-        # This will load the weights on every worker at the same time.
-        # By default it is set to 8 to avoid OOM errors, but here the model are small enough to use the maximum size.
-        # This will save some time during weight loading.
-        num_local_ranks_per_step=-1,
+        num_local_ranks_per_step=16,
         **training_kwargs,
     )
 
@@ -127,7 +123,7 @@ def gen():
             model_name_or_path,
             training_args.trn_config,
             torch_dtype=torch.bfloat16,
-            attn_implementation="flash_attention_2" if use_flash_attention_2 else None,
+            attn_implementation="flash_attention_2" if use_flash_attention_2 else "eager",
         )
     else:
         model = model_class.from_pretrained(
@@ -207,8 +203,11 @@ def on_log(self, args, state, control, logs=None, **kwargs):
             50,
         ],
         [
-            "Qwen3ForCausalLM",
-            "Qwen/Qwen3-0.6B",
+            # "Qwen3ForCausalLM",
+            "LlamaForCausalLM",
+            # "Qwen/Qwen3-0.6B",
+            # "michaelbenayoun/qwen3-tiny-4kv-heads-4layers-random",
+            "michaelbenayoun/llama-2-tiny-4kv-heads-4layers-random",
             1e-4,
             0.03,
             {},
@@ -227,7 +226,7 @@ def on_log(self, args, state, control, logs=None, **kwargs):
 @pytest.mark.parametrize(
     "world_size,tp_size,pp_size",
     [[32, 2, 4], [32, 8, 1]],
-    ids=["dp=4,tp=2,pp=4", "dp=4,tp=8"],
+    ids=["32_2_4", "32_8_1"],
 )
 @pytest.mark.neuron_parallel_compile
 @is_trainium_test