fix CI bugs

hijkzzz · hijkzzz · commit ad6852a5bb60 · 2026-01-28T16:46:52.000-08:00
diff --git a/nemo_rl/algorithms/loss_functions.py b/nemo_rl/algorithms/loss_functions.py
@@ -46,9 +46,9 @@ class ClippedPGLossConfig(TypedDict):
     use_importance_sampling_correction: bool
     truncated_importance_sampling_ratio: float | None
     # Type of truncated importance sampling: "tis" (clamp max) or "icepop" (filter [min, max])
-    truncated_importance_sampling_type: NotRequired[str]
+    truncated_importance_sampling_type: NotRequired[str | None]
     # Lower bound for ICE-POP filtering (default 0.5)
-    truncated_importance_sampling_ratio_min: NotRequired[float]
+    truncated_importance_sampling_ratio_min: NotRequired[float | None]
     token_level_loss: bool
     # If True, apply the off-policy importance-sampling correction at the
     # sequence level (one weight per generated sample), as in GSPO.
diff --git a/nemo_rl/algorithms/reward_functions.py b/nemo_rl/algorithms/reward_functions.py
@@ -47,7 +47,7 @@ class RewardShapingConfig(TypedDict):
     # Stop properly penalty: scale factor for rewards of truncated responses (0-1).
     # When set to 0, truncated responses get zero reward.
     # When set to 1, no penalty is applied (default behavior).
-    stop_properly_penalty_coef: NotRequired[float]
+    stop_properly_penalty_coef: NotRequired[float | None]
 
 
 def apply_reward_shaping(
diff --git a/nemo_rl/models/generation/vllm/vllm_worker.py b/nemo_rl/models/generation/vllm/vllm_worker.py
@@ -543,6 +543,7 @@ def generate(
                     "logprobs": torch.zeros((0, 0), dtype=torch.float),
                     "generation_lengths": torch.zeros(0, dtype=torch.long),
                     "unpadded_sequence_lengths": torch.zeros(0, dtype=torch.long),
+                    "truncated": torch.zeros(0, dtype=torch.bool),
                 }
             )
 
diff --git a/nemo_rl/models/generation/vllm/vllm_worker_async.py b/nemo_rl/models/generation/vllm/vllm_worker_async.py
@@ -727,12 +727,18 @@ async def process_single_sample(sample_idx):
                     device=input_ids_single_row.device,
                 )
 
+                # Not truncated since no generation was attempted (length constraint)
+                truncated_tensor = torch.tensor(
+                    [False], dtype=torch.bool, device=input_ids_single_row.device
+                )
+
                 result_batch = BatchedDataDict[GenerationOutputSpec](
                     {
                         "output_ids": output_ids_single_item_batched,
                         "logprobs": logprobs_single_item,
                         "generation_lengths": generation_lengths_tensor,
                         "unpadded_sequence_lengths": unpadded_sequence_lengths_tensor,
+                        "truncated": truncated_tensor,
                     }
                 )
 

Original file line number	Diff line number	Diff line change
`@@ -543,6 +543,7 @@ def generate(`
`543`	`543`	`"logprobs": torch.zeros((0, 0), dtype=torch.float),`
`544`	`544`	`"generation_lengths": torch.zeros(0, dtype=torch.long),`
`545`	`545`	`"unpadded_sequence_lengths": torch.zeros(0, dtype=torch.long),`
	`546`	`+ "truncated": torch.zeros(0, dtype=torch.bool),`
`546`	`547`	`}`
`547`	`548`	`)`
`548`	`549`
Original file line number	Diff line number	Diff line change
`@@ -727,12 +727,18 @@ async def process_single_sample(sample_idx):`
`727`	`727`	`device=input_ids_single_row.device,`
`728`	`728`	`)`
`729`	`729`
	`730`	`+ # Not truncated since no generation was attempted (length constraint)`
	`731`	`+ truncated_tensor = torch.tensor(`
	`732`	`+ [False], dtype=torch.bool, device=input_ids_single_row.device`
	`733`	`+ )`
	`734`	`+`
`730`	`735`	`result_batch = BatchedDataDict[GenerationOutputSpec](`
`731`	`736`	`{`
`732`	`737`	`"output_ids": output_ids_single_item_batched,`
`733`	`738`	`"logprobs": logprobs_single_item,`
`734`	`739`	`"generation_lengths": generation_lengths_tensor,`
`735`	`740`	`"unpadded_sequence_lengths": unpadded_sequence_lengths_tensor,`
	`741`	`+ "truncated": truncated_tensor,`
`736`	`742`	`}`
`737`	`743`	`)`
`738`	`744`