We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 17ac713 commit dcac673Copy full SHA for dcac673
open_diloco/train_fsdp.py
@@ -184,7 +184,11 @@ def _get_cosine_schedule_with_warmup_lr_lambda(
184
num_cycles: float,
185
min_lr_rate: float = 0.0,
186
):
187
- if warmup_outerstep is not None and current_step % num_inner_steps < warmup_outerstep:
+ if (
188
+ warmup_outerstep is not None
189
+ and current_step > num_warmup_steps
190
+ and current_step % num_inner_steps < warmup_outerstep
191
+ ):
192
return 0
193
194
if current_step < num_warmup_steps:
0 commit comments