address feedback

ananthsub · ananthsub · commit d63506cd02d7 · 2026-02-06T10:17:09.000-08:00
Signed-off-by: Ananth Subramaniam &lt;ansubramania@nvidia.com&gt;
diff --git a/nemo_rl/models/megatron/pipeline_parallel.py b/nemo_rl/models/megatron/pipeline_parallel.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -57,14 +57,12 @@ def broadcast_obj_from_pp_rank(obj: Any) -> Any:
     # ------------------------------------------------------------------
     # 2. Identify the owning rank (the only rank with True flag)
     # ------------------------------------------------------------------
-    src_rank = None  # Rank *inside* the PP group
-    for rank, flag in enumerate(obj_flags):
-        if flag:
-            src_rank = rank
-            break
-
-    if src_rank is None:
+    true_ranks = [rank for rank, flag in enumerate(obj_flags) if flag]
+    if not true_ranks:
         raise ValueError("Object must exist on at least one PP rank")
+    if len(true_ranks) > 1:
+        raise ValueError(f"Object present on multiple PP ranks: {true_ranks}")
+    src_rank = true_ranks[0]
 
     # ------------------------------------------------------------------
     # 3. Broadcast the object from the source rank to all ranks
@@ -135,12 +133,11 @@ def broadcast_tensors_from_last_stage(
     if is_pipeline_last_stage(ignore_virtual=True):
         # Broadcast tensors from last stage
         for name, tensor in tensors.items():
-            if tensor is not None:
-                broadcasted_tensors[name] = broadcast_tensor(
-                    tensor, current_rank, pp_group
+            if tensor is None:
+                raise ValueError(
+                    f"Last PP stage must provide tensor '{name}' for broadcast."
                 )
-            else:
-                broadcasted_tensors[name] = None
+            broadcasted_tensors[name] = broadcast_tensor(tensor, current_rank, pp_group)
     else:
         # Receive tensors on other stages
         for name in tensors.keys():
diff --git a/nemo_rl/models/megatron/train.py b/nemo_rl/models/megatron/train.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -96,8 +96,6 @@ def model_forward(
             **multimodal_data,
         )
 
-    apply_temperature_scaling(output_tensor, cfg)
-
     return output_tensor
 
 
@@ -174,7 +172,11 @@ def forward_with_post_processing_fn(
         straggler_timer=straggler_timer,
     )
 
-    ## calling post_processing_fn will return a function that takes the output tensor and returns a tuple of (loss, metrics)
+    # Apply temperature scaling only for sampling-oriented post-processors.
+    # Loss computation should use unscaled logits.
+    if isinstance(post_processing_fn, (LogprobsPostProcessor, TopkLogitsPostProcessor)):
+        apply_temperature_scaling(output_tensor, cfg)
+
     # Use type checking to dispatch to the correct post-processing method
     if isinstance(post_processing_fn, LossPostProcessor):
         post_processing_fn_wrapped = post_processing_fn(
@@ -425,10 +427,6 @@ def __call__(
         seq_lengths = data_dict["input_lengths"]
 
         def processor_fn_inner(output_tensor):
-            # Only the last PP stage produces final logits/top-k; earlier stages return empty
-            # if not is_pipeline_last_stage(ignore_virtual=True):
-            # return output_tensor.new_zeros(()), {}
-
             tp_grp = get_tensor_model_parallel_group()
             tp_rank = get_tensor_model_parallel_rank()
             vocab_shard_size = output_tensor.shape[-1]