We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 7bd853a commit 905a224Copy full SHA for 905a224
nemo_rl/algorithms/grpo.py
@@ -467,7 +467,10 @@ def setup(
467
468
if policy_config.get("megatron_cfg", {}).get("enabled", False):
469
## NOTE: this is equal to the total number of scheduler steps
470
- total_train_iters = min(grpo_config["max_num_steps"], len(dataloader))
+ total_train_iters = min(
471
+ grpo_config["max_num_steps"],
472
+ grpo_config["max_num_epochs"] * len(dataloader),
473
+ )
474
policy_config["megatron_cfg"]["train_iters"] = total_train_iters
475
476
policy = Policy(
0 commit comments