Skip to content

Commit 266e718

Browse files
authored
fix: fix scheduler decay steps with megatron backend (#939)
Signed-off-by: ashors1 <ashors@nvidia.com> Signed-off-by: Anna Shors <ashors@nvidia.com>
1 parent 5d2fd87 commit 266e718

27 files changed

+69
-27
lines changed

examples/configs/dpo.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,8 @@ policy:
139139
start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
140140
end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
141141
weight_decay_incr_style: "constant"
142-
lr_decay_style: "linear"
143-
lr_decay_iters: 1000000000
144-
lr_warmup_iters: 2
142+
lr_decay_style: "constant"
143+
lr_warmup_iters: 1
145144
lr_warmup_init: 0.00000001
146145

147146
distributed_data_parallel_config:

examples/configs/grpo_math_1B.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ policy:
106106
end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
107107
weight_decay_incr_style: "constant"
108108
lr_decay_style: "constant"
109-
lr_decay_iters: null
109+
lr_decay_iters: 1000
110110
lr_warmup_iters: 13
111111
lr_warmup_init: 5.0e-7
112112

examples/configs/grpo_math_1B_megatron.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ policy:
120120
end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
121121
weight_decay_incr_style: "constant"
122122
lr_decay_style: "constant"
123-
lr_decay_iters: null
123+
lr_decay_iters: 1000
124124
lr_warmup_iters: 13
125125
lr_warmup_init: 5.0e-7
126126

examples/configs/grpo_math_70B_megatron.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ policy:
4646
end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
4747
weight_decay_incr_style: "constant"
4848
lr_decay_style: "constant"
49-
lr_decay_iters: null
49+
lr_decay_iters: 1000
5050
lr_warmup_iters: 13
5151
lr_warmup_init: 3.0e-8
5252

examples/configs/grpo_math_8B_megatron.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ policy:
5252
end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
5353
weight_decay_incr_style: "constant"
5454
lr_decay_style: "constant"
55-
lr_decay_iters: null
55+
lr_decay_iters: 1000
5656
lr_warmup_iters: 13
5757
lr_warmup_init: 3.0e-8
5858

examples/configs/grpo_math_qwen30ba3b_megatron.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ policy:
5252
end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
5353
weight_decay_incr_style: "constant"
5454
lr_decay_style: "constant"
55-
lr_decay_iters: null
55+
lr_decay_iters: 1000
5656
lr_warmup_iters: 13
5757
lr_warmup_init: 3.0e-8
5858

examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,8 @@ policy:
9292
start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
9393
end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
9494
weight_decay_incr_style: "constant"
95-
lr_decay_style: "linear"
96-
lr_decay_iters: 1000000000
97-
lr_warmup_iters: 2
95+
lr_decay_style: "constant"
96+
lr_warmup_iters: 1
9897
lr_warmup_init: 0.00000001
9998

10099
distributed_data_parallel_config:

examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,8 @@ policy:
9292
start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
9393
end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
9494
weight_decay_incr_style: "constant"
95-
lr_decay_style: "linear"
96-
lr_decay_iters: 1000000000
97-
lr_warmup_iters: 2
95+
lr_decay_style: "constant"
96+
lr_warmup_iters: 1
9897
lr_warmup_init: 0.00000001
9998

10099
distributed_data_parallel_config:

examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ policy:
9898
end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
9999
weight_decay_incr_style: "constant"
100100
lr_decay_style: "constant"
101-
lr_decay_iters: null
101+
lr_decay_iters: 1000
102102
lr_warmup_iters: 2
103103
lr_warmup_init: 5.0e-8
104104

examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ policy:
8787
end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
8888
weight_decay_incr_style: "constant"
8989
lr_decay_style: "constant"
90-
lr_decay_iters: null
90+
lr_decay_iters: 1000
9191
lr_warmup_iters: 50
9292
lr_warmup_init: 5.0e-7
9393

0 commit comments

Comments
 (0)