forked from agentscope-ai/Trinity-RFT
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclipb.yaml
More file actions
100 lines (100 loc) · 2.45 KB
/
clipb.yaml
File metadata and controls
100 lines (100 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
project: math_dapo
name: clipb_example
checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
model:
model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen2.5-7B-Instruct}
max_prompt_tokens: 1024
max_response_tokens: 7168
algorithm:
algorithm_type: grpo_verl
advantage_fn: clipb
advantage_fn_args:
mu: 2.5
repeat_times: 16
kl_loss_fn_args:
kl_coef: 0.0
cluster:
node_num: 1
gpu_per_node: 8
buffer:
total_epochs: 20
batch_size: 64
explorer_input:
taskset:
name: dapo_235
storage_type: file
path: ${oc.env:TRINITY_TASKSET_PATH} # processed DAPO-Math-17k
format:
prompt_key: 'question'
response_key: 'ground_truth'
rollout_args:
temperature: 1.0
logprobs: 20
eval_tasksets:
- name: dapo-validation-500
storage_type: file
path: '/path/to/dapo-validation' # validation samples from DAPO-Math-17k
split: 'test'
repeat_times: 32
format:
prompt_key: 'question'
response_key: 'ground_truth'
rollout_args:
temperature: 0.7
- name: amc23
storage_type: file
path: math-ai/amc23 # Path to the AMC23 dataset
split: 'test'
repeat_times: 32
format:
prompt_key: 'question'
response_key: 'answer'
rollout_args:
temperature: 0.7
- name: aime24
storage_type: file
path: HuggingFaceH4/aime_2024 # Path to the AIME2024 dataset
split: 'train'
repeat_times: 32
format:
prompt_key: 'problem'
response_key: 'answer'
rollout_args:
temperature: 0.7
- name : aime25
storage_type: file
path: math-ai/aime25 # Path to the AIME2025 dataset
split: 'test'
repeat_times: 32
format:
prompt_key: 'problem'
response_key: 'answer'
rollout_args:
temperature: 0.7
default_workflow_type: 'async_math_workflow'
default_reward_fn_type: 'math_boxed_reward'
trainer_input:
experience_buffer:
name: math_buffer
storage_type: queue
max_read_timeout: 7200
explorer:
eval_interval: 20
eval_on_startup: true
runner_per_model: 8
rollout_model:
engine_type: vllm_async
engine_num: 4
tensor_parallel_size: 1
seed: 42
trainer:
trainer_type: 'verl'
save_interval: 200
trainer_config:
algorithm:
rollout_correction:
bypass_mode: false
synchronizer:
sync_method: 'nccl'
sync_interval: 1
sync_timeout: 3200