Skip to content

Commit 4fb41a1

Browse files
committed
fix example config
1 parent 9e2653b commit 4fb41a1

File tree

3 files changed

+0
-25
lines changed

3 files changed

+0
-25
lines changed

examples/grpo_gsm8k/gsm8k.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,7 @@ buffer:
2828
prompt_key: 'question'
2929
response_key: 'answer'
3030
rollout_args:
31-
n: 8
3231
temperature: 1.0
33-
logprobs: 0
3432
eval_tasksets:
3533
- name: gsm8k-eval
3634
storage_type: file

examples/grpo_gsm8k_task_pipeline/gsm8k.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@ buffer:
5353
prompt_key: 'question'
5454
response_key: 'answer'
5555
rollout_args:
56-
n: 8
5756
temperature: 1.0
58-
logprobs: 0
5957
eval_tasksets:
6058
- name: gsm8k-eval
6159
storage_type: file

examples/grpo_gsm8k_task_pipeline/train_gsm8k.yaml

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,6 @@ actor_rollout_ref:
1212
use_dynamic_bsz: True # False
1313
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
1414
grad_clip: 1.0
15-
clip_ratio: 0.2
16-
entropy_coeff: 0.001
17-
use_kl_loss: True # True for GRPO
18-
kl_loss_coef: 0.001 # for grpo
19-
kl_loss_type: low_var_kl # for grpo
2015
ppo_epochs: 1
2116
shuffle: False
2217
ulysses_sequence_parallel_size: 1 # sp size
@@ -33,10 +28,6 @@ actor_rollout_ref:
3328
param_offload: False
3429
optimizer_offload: False
3530
fsdp_size: -1
36-
# --- below: opmd ---
37-
tau: 0.000 # strength of regularization w.r.t. old / ref policy
38-
opmd_baseline: mean # mean / logavgexp, applicable to opmd
39-
use_uid: False # True / False, applicable to pairwise_opmd
4031
ref:
4132
fsdp_config:
4233
param_offload: False
@@ -48,18 +39,6 @@ actor_rollout_ref:
4839
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
4940
ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
5041

51-
custom_reward_function:
52-
path: null
53-
name: compute_score
54-
55-
algorithm:
56-
gamma: 1.0
57-
lam: 1.0
58-
kl_penalty: kl # how to estimate kl divergence
59-
kl_ctrl:
60-
type: fixed
61-
kl_coef: 0.001
62-
6342
trainer:
6443
balance_batch: True
6544
# total_training_steps: null

0 commit comments

Comments
 (0)