fix

hijkzzz · hijkzzz · commit dfc36241b05a · 2026-02-02T04:56:25.000-08:00
Signed-off-by: jianh &lt;jianh@nvidia.com&gt;
diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml
@@ -61,7 +61,7 @@ loss_fn:
   use_importance_sampling_correction: false
   truncated_importance_sampling_ratio: null
   truncated_importance_sampling_ratio_min: null  # Lower bound for ICE-POP
-  truncated_importance_sampling_type: null  # "tis" (clamp to max) or "icepop" (filter outside [min, max])
+  truncated_importance_sampling_type: tis  # "tis" (clamp to max) or "icepop" (filter outside [min, max])
   sequence_level_importance_ratios: false
   token_level_loss: true
   force_on_policy_ratio: false  # Set to true to force ratio=1.0 (requires train_global_batch_size == num_prompts_per_step * num_generations_per_prompt)