fix: checkpoint loading bug in Megatron LoRA GRPO (#2075)

vadam5 · web-flow · commit 8c70ee65ad22 · 2026-03-07T00:09:38.000Z
Signed-off-by: Virginia Wu &lt;vadams@nvidia.com&gt;
diff --git a/nemo_rl/models/megatron/setup.py b/nemo_rl/models/megatron/setup.py
@@ -928,16 +928,14 @@ def composed_peft_hook(model: list[MegatronModule]) -> list[MegatronModule]:
         pg_collection=ProcessGroupCollection.use_mpu_process_groups(),
     )
 
+    # If use_peft, the pretrained checkpoint weights are already loaded inside of the pre_wrap_hook
+    # so they only need to be loaded here if use_peft is False
     should_load_checkpoint = (
-        ref_checkpoint_config.pretrained_checkpoint is not None
+        not use_peft
+        and ref_checkpoint_config.pretrained_checkpoint is not None
         and checkpoint_exists(ref_checkpoint_config.pretrained_checkpoint)
     )
 
-    if should_load_checkpoint and use_peft:
-        # The finetune toggle is explicitly set to True in order to avoid loading optimizer and RNG states
-        # This is switched off here in order to load these states from the checkpoint
-        ref_megatron_cfg.checkpoint.finetune = False
-
     print("Loading the Reference Model")
 
     if should_load_checkpoint:
@@ -949,8 +947,6 @@ def composed_peft_hook(model: list[MegatronModule]) -> list[MegatronModule]:
             checkpointing_context=ref_ckpt_context,
             skip_load_to_model_and_opt=HAVE_FSDP2 and megatron_cfg.dist.use_torch_fsdp2,
         )
-    else:
-        print("Reference model not loaded")
 
     reference_state_dict = {}
 
@@ -966,6 +962,8 @@ def composed_peft_hook(model: list[MegatronModule]) -> list[MegatronModule]:
                 cpu_item = item
             reference_state_dict[name] = cpu_item
         print("Reference model loaded")
+    else:
+        print("Reference model not loaded")
 
     return reference_state_dict
 
diff --git a/tests/functional/L1_Functional_Tests_GPU.sh b/tests/functional/L1_Functional_Tests_GPU.sh
@@ -52,8 +52,8 @@ run_test      uv run --no-sync bash ./tests/functional/grpo_automodel_lora_async
 run_test      uv run --no-sync bash ./tests/functional/grpo_automodel_lora_non_colocated.sh
 run_test      uv run --no-sync bash ./tests/functional/grpo_megatron.sh
 run_test      uv run --no-sync bash ./tests/functional/grpo_megatron_generation.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_megatron_lora.sh
-run_test      uv run --no-sync bash ./tests/functional/grpo_megatron_lora_async.sh
+run_test fast uv run --no-sync bash ./tests/functional/grpo_megatron_lora.sh
+run_test fast uv run --no-sync bash ./tests/functional/grpo_megatron_lora_async.sh
 run_test      uv run --no-sync bash ./tests/functional/grpo_multiple_dataloaders.sh
 run_test      uv run --no-sync bash ./tests/functional/grpo_multiturn.sh
 run_test      uv run --no-sync bash ./tests/functional/grpo_non_colocated.sh