We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 4c331ed commit dfc3624Copy full SHA for dfc3624
examples/configs/grpo_math_1B.yaml
@@ -61,7 +61,7 @@ loss_fn:
61
use_importance_sampling_correction: false
62
truncated_importance_sampling_ratio: null
63
truncated_importance_sampling_ratio_min: null # Lower bound for ICE-POP
64
- truncated_importance_sampling_type: null # "tis" (clamp to max) or "icepop" (filter outside [min, max])
+ truncated_importance_sampling_type: tis # "tis" (clamp to max) or "icepop" (filter outside [min, max])
65
sequence_level_importance_ratios: false
66
token_level_loss: true
67
force_on_policy_ratio: false # Set to true to force ratio=1.0 (requires train_global_batch_size == num_prompts_per_step * num_generations_per_prompt)
0 commit comments