We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 50a72d2 commit 5af40ceCopy full SHA for 5af40ce
nemo_rl/algorithms/loss_functions.py
@@ -46,7 +46,6 @@ class ClippedPGLossConfig(TypedDict):
46
use_importance_sampling_correction: bool
47
truncated_importance_sampling_ratio: float | None
48
token_level_loss: bool
49
- force_on_policy_ratio: bool
50
# If True, apply the off-policy importance-sampling correction at the
51
# sequence level (one weight per generated sample), as in GSPO.
52
# If False (default), correction is applied at the token level as in the
0 commit comments