File tree Expand file tree Collapse file tree 3 files changed +0
-25
lines changed
Expand file tree Collapse file tree 3 files changed +0
-25
lines changed Original file line number Diff line number Diff line change @@ -28,9 +28,7 @@ buffer:
2828 prompt_key : ' question'
2929 response_key : ' answer'
3030 rollout_args :
31- n : 8
3231 temperature : 1.0
33- logprobs : 0
3432 eval_tasksets :
3533 - name : gsm8k-eval
3634 storage_type : file
Original file line number Diff line number Diff line change @@ -53,9 +53,7 @@ buffer:
5353 prompt_key : ' question'
5454 response_key : ' answer'
5555 rollout_args :
56- n : 8
5756 temperature : 1.0
58- logprobs : 0
5957 eval_tasksets :
6058 - name : gsm8k-eval
6159 storage_type : file
Original file line number Diff line number Diff line change @@ -12,11 +12,6 @@ actor_rollout_ref:
1212 use_dynamic_bsz : True # False
1313 ppo_max_token_len_per_gpu : 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
1414 grad_clip : 1.0
15- clip_ratio : 0.2
16- entropy_coeff : 0.001
17- use_kl_loss : True # True for GRPO
18- kl_loss_coef : 0.001 # for grpo
19- kl_loss_type : low_var_kl # for grpo
2015 ppo_epochs : 1
2116 shuffle : False
2217 ulysses_sequence_parallel_size : 1 # sp size
@@ -33,10 +28,6 @@ actor_rollout_ref:
3328 param_offload : False
3429 optimizer_offload : False
3530 fsdp_size : -1
36- # --- below: opmd ---
37- tau : 0.000 # strength of regularization w.r.t. old / ref policy
38- opmd_baseline : mean # mean / logavgexp, applicable to opmd
39- use_uid : False # True / False, applicable to pairwise_opmd
4031 ref :
4132 fsdp_config :
4233 param_offload : False
@@ -48,18 +39,6 @@ actor_rollout_ref:
4839 log_prob_max_token_len_per_gpu : ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
4940 ulysses_sequence_parallel_size : ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
5041
51- custom_reward_function :
52- path : null
53- name : compute_score
54-
55- algorithm :
56- gamma : 1.0
57- lam : 1.0
58- kl_penalty : kl # how to estimate kl divergence
59- kl_ctrl :
60- type : fixed
61- kl_coef : 0.001
62-
6342trainer :
6443 balance_batch : True
6544 # total_training_steps: null
You can’t perform that action at this time.
0 commit comments