File tree Expand file tree Collapse file tree 2 files changed +3
-3
lines changed
Expand file tree Collapse file tree 2 files changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -5,15 +5,15 @@ algorithm:
55 algorithm_type : dpo
66checkpoint_root_dir : /PATH/TO/CHECKPOINT
77model :
8- model_path : ' /PATH/TO/MODEL' # NOTE
8+ model_path : ' /PATH/TO/MODEL'
99 max_prompt_tokens : 1792
1010 max_response_tokens : 256
1111cluster :
1212 node_num : 1
1313 gpu_per_node : 8
1414buffer :
1515 total_epochs : 20
16- batch_size : 32 # NOTE
16+ batch_size : 32
1717 max_retry_times : 3
1818 max_retry_interval : 1
1919 trainer_input :
Original file line number Diff line number Diff line change @@ -32,7 +32,7 @@ actor_rollout_ref:
3232 grad_clip : 1.0
3333 clip_ratio : 0.2
3434 entropy_coeff : 0.001
35- use_kl_loss : True # NOTE
35+ use_kl_loss : True
3636 kl_loss_coef : 0.1 # NOTE: beta for DPO
3737 kl_loss_type : low_var_kl # for grpo
3838 ppo_epochs : 1
You can’t perform that action at this time.
0 commit comments