File tree Expand file tree Collapse file tree 1 file changed +10
-10
lines changed
Expand file tree Collapse file tree 1 file changed +10
-10
lines changed Original file line number Diff line number Diff line change 11grpo :
22 num_prompts_per_step : 8
3- num_generations_per_prompt : 4
3+ num_generations_per_prompt : 8
44 max_rollout_turns : 1
55 max_num_epochs : 1
6- max_num_steps : 500
6+ max_num_steps : 200
77 normalize_rewards : true
88 use_leave_one_out_baseline : true
99 val_period : 10
@@ -54,10 +54,10 @@ loss_fn:
5454 force_on_policy_ratio : false
5555checkpointing :
5656 enabled : true
57- checkpoint_dir : results/audio_grpo_3B_megatron
57+ checkpoint_dir : results/audio_grpo_3B_megatron_rerun
5858 metric_name : val:accuracy
5959 higher_is_better : true
60- keep_top_k : 3
60+ keep_top_k : 10
6161 save_period : 100
6262 checkpoint_must_save_by : null
6363policy :
@@ -173,8 +173,8 @@ policy:
173173 lora_dtype : null
174174 optimizer :
175175 optimizer : adam
176- lr : 2 .0e-07
177- min_lr : 2 .0e-07
176+ lr : 1 .0e-6
177+ min_lr : 1 .0e-7
178178 weight_decay : 0.01
179179 bf16 : true
180180 fp16 : false
@@ -194,8 +194,8 @@ policy:
194194 weight_decay_incr_style : constant
195195 lr_decay_style : constant
196196 lr_decay_iters : 1000
197- lr_warmup_iters : 50
198- lr_warmup_init : 2 .0e-08
197+ lr_warmup_iters : 10
198+ lr_warmup_init : 1 .0e-7
199199 distributed_data_parallel_config :
200200 grad_reduce_in_fp32 : false
201201 overlap_grad_reduce : false
@@ -246,10 +246,10 @@ logger:
246246 monitor_gpus : false
247247 wandb :
248248 project : grpo-dev
249- name : audio-grpo-3b-megatron
249+ name : audio-grpo-3b-megatron-large-lr
250250 swanlab :
251251 project : grpo-dev
252- name : audio-grpo-3b-megatron
252+ name : audio-grpo-3b-megatron-large-lr
253253 tensorboard : {}
254254 gpu_monitoring :
255255 collection_interval : 10
You can’t perform that action at this time.
0 commit comments