Skip to content

Commit a4cc659

Browse files
committed
update lr
Signed-off-by: root <zhangyuekai@foxmail.com>
1 parent ad1c0b6 commit a4cc659

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

examples/configs/audio_grpo_3B_megatron.yaml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
grpo:
22
num_prompts_per_step: 8
3-
num_generations_per_prompt: 4
3+
num_generations_per_prompt: 8
44
max_rollout_turns: 1
55
max_num_epochs: 1
6-
max_num_steps: 500
6+
max_num_steps: 200
77
normalize_rewards: true
88
use_leave_one_out_baseline: true
99
val_period: 10
@@ -54,10 +54,10 @@ loss_fn:
5454
force_on_policy_ratio: false
5555
checkpointing:
5656
enabled: true
57-
checkpoint_dir: results/audio_grpo_3B_megatron
57+
checkpoint_dir: results/audio_grpo_3B_megatron_rerun
5858
metric_name: val:accuracy
5959
higher_is_better: true
60-
keep_top_k: 3
60+
keep_top_k: 10
6161
save_period: 100
6262
checkpoint_must_save_by: null
6363
policy:
@@ -173,8 +173,8 @@ policy:
173173
lora_dtype: null
174174
optimizer:
175175
optimizer: adam
176-
lr: 2.0e-07
177-
min_lr: 2.0e-07
176+
lr: 1.0e-6
177+
min_lr: 1.0e-7
178178
weight_decay: 0.01
179179
bf16: true
180180
fp16: false
@@ -194,8 +194,8 @@ policy:
194194
weight_decay_incr_style: constant
195195
lr_decay_style: constant
196196
lr_decay_iters: 1000
197-
lr_warmup_iters: 50
198-
lr_warmup_init: 2.0e-08
197+
lr_warmup_iters: 10
198+
lr_warmup_init: 1.0e-7
199199
distributed_data_parallel_config:
200200
grad_reduce_in_fp32: false
201201
overlap_grad_reduce: false
@@ -246,10 +246,10 @@ logger:
246246
monitor_gpus: false
247247
wandb:
248248
project: grpo-dev
249-
name: audio-grpo-3b-megatron
249+
name: audio-grpo-3b-megatron-large-lr
250250
swanlab:
251251
project: grpo-dev
252-
name: audio-grpo-3b-megatron
252+
name: audio-grpo-3b-megatron-large-lr
253253
tensorboard: {}
254254
gpu_monitoring:
255255
collection_interval: 10

0 commit comments

Comments
 (0)