address comments

yuki-97 · yuki-97 · commit e363ebc8ea5c · 2026-03-01T18:48:32.000-08:00
Signed-off-by: Yuki Huang &lt;yukih@nvidia.com&gt;
diff --git a/nemo_rl/algorithms/loss/interfaces.py b/nemo_rl/algorithms/loss/interfaces.py
@@ -43,30 +43,29 @@ class LossFunction(Protocol):
 
     def __call__(
         self,
-        next_token_logits: torch.Tensor,
         data: BatchedDataDict,
         global_valid_seqs: torch.Tensor,
         global_valid_toks: torch.Tensor,
+        **kwargs: Any,
     ) -> tuple[torch.Tensor, dict[str, Any]]:
         """Compute loss and metrics from logprobs and other data.
 
         Args:
-            next_token_logits: Logits from the model, typically with shape [batch_size, seq_len, vocab_size].
-                               For each position (b, i), contains the logit distribution over the entire vocabulary
-                               for predicting the next token (at position i+1). For example, if processing "The cat sat on",
-                               then next_token_logits[b, 3] would contain the logits for predicting the word
-                               that follows "on".
             data: Dictionary containing all relevant data for loss computation
                   such as rewards, values, actions, advantages, masks, and other
                   algorithm-specific information needed for the particular loss calculation.
             global_valid_seqs: torch.Tensor
-                this tensor should contain the number of valid sequences in the microbatch.
+                This tensor should contain the number of valid sequences in the microbatch.
                 It's used for global normalization for losses/metrics that are computed at the sequence level
                 and needs to be aggregated across all microbatches.
             global_valid_toks: torch.Tensor
                 This tensor should contain the number of valid tokens in the microbatch.
                 It's used for global normalization for losses/metrics that are computed at the token level
                 and needs to be aggregated across all microbatches.
+            **kwargs: Loss function input, which varies by input_type:
+                - For LossInputType.LOGPROB: next_token_logprobs (torch.Tensor)
+                - For LossInputType.LOGIT: logits (torch.Tensor)
+                - For LossInputType.DISTILLATION: student_topk_logprobs, teacher_topk_logprobs, H_all (torch.Tensor)
 
         Returns:
             tuple: (loss, metrics)
diff --git a/nemo_rl/algorithms/loss/utils.py b/nemo_rl/algorithms/loss/utils.py
@@ -38,6 +38,11 @@ def prepare_loss_input(
         logits: Logits from the model.
         data: Microbatch data.
         loss_fn: Loss function.
+        vocab_parallel_rank: Vocab parallel rank.
+        vocab_parallel_group: Vocab parallel group.
+        context_parallel_group: Context parallel group.
+
+        vocab_parallel_rank, vocab_parallel_group, context_parallel_group are only used for megatron policy worker.
 
     Returns:
         Loss input.
diff --git a/nemo_rl/algorithms/loss/wrapper.py b/nemo_rl/algorithms/loss/wrapper.py
@@ -35,6 +35,22 @@ def __init__(
         vocab_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
         context_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
     ):
+        """Wrap a loss function to handle sequence packing.
+
+        Args:
+            loss_fn: Loss function.
+            prepare_fn: Prepare function.
+            cu_seqlens_q: Unpadded cu seqlens q.
+            cu_seqlens_q_padded: Padded cu seqlens q.
+            vocab_parallel_rank: Vocab parallel rank.
+            vocab_parallel_group: Vocab parallel group.
+            context_parallel_group: Context parallel group.
+
+            vocab_parallel_rank, vocab_parallel_group, context_parallel_group are only used for megatron policy worker.
+
+        Returns:
+            Sequence packing loss wrapper.
+        """
         self.loss_fn = loss_fn
         self.prepare_fn = prepare_fn
         self.cu_seqlens_q = cu_seqlens_q