@@ -47,6 +47,10 @@ actor_rollout_ref:
4747 dist_ckpt_optim_fully_reshardable : false
4848 distrib_optim_fully_reshardable_mem_efficient : false
4949 seed : 42
50+ use_megatron_fsdp : false
51+ megatron_fsdp_zero_stage : 3
52+ megatron_fsdp_overlap_grad_reduce : true
53+ megatron_fsdp_overlap_param_gather : true
5054 override_ddp_config : {}
5155 override_transformer_config :
5256 recompute_granularity : null
@@ -221,6 +225,10 @@ actor_rollout_ref:
221225 dist_ckpt_optim_fully_reshardable : false
222226 distrib_optim_fully_reshardable_mem_efficient : false
223227 seed : ${oc.select:actor_rollout_ref.actor.megatron.seed,42}
228+ use_megatron_fsdp : false
229+ megatron_fsdp_zero_stage : 3
230+ megatron_fsdp_overlap_grad_reduce : true
231+ megatron_fsdp_overlap_param_gather : true
224232 override_ddp_config : {}
225233 override_transformer_config : ${oc.select:actor_rollout_ref.actor.megatron.override_transformer_config,{}}
226234 override_mcore_model_config : {}
@@ -512,6 +520,10 @@ critic:
512520 dist_ckpt_optim_fully_reshardable : false
513521 distrib_optim_fully_reshardable_mem_efficient : false
514522 seed : 42
523+ use_megatron_fsdp : false
524+ megatron_fsdp_zero_stage : 3
525+ megatron_fsdp_overlap_grad_reduce : true
526+ megatron_fsdp_overlap_param_gather : true
515527 override_ddp_config : {}
516528 override_transformer_config :
517529 recompute_granularity : null
0 commit comments