We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent d816788 commit 0374c25Copy full SHA for 0374c25
examples/dpo_humanlike/train_dpo.yaml
@@ -26,7 +26,7 @@ actor_rollout_ref:
26
min_lr_ratio: 0.1 # only useful for warmup with cosine
27
warmup_style: cosine # select from constant/cosine
28
total_training_steps: 783 #
29
- betas: [0.9, 0.95] # set to smaller value for scenarios with abrupt distribution shift (e.g., large sync_interval)
+ betas: [0.9, 0.95]
30
fsdp_config:
31
wrap_policy:
32
# transformer_layer_cls_to_wrap: None
0 commit comments