forked from agentscope-ai/Trinity-RFT
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdpo.yaml
More file actions
59 lines (59 loc) · 1.26 KB
/
dpo.yaml
File metadata and controls
59 lines (59 loc) · 1.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
mode: train
data:
total_epochs: 20
batch_size: 32 # NOTE
train_split: "train"
dataset_path: ''
default_workflow_type: 'math_workflow'
format_config:
prompt_key: ''
response_key: ''
model:
model_path: '/PATH/TO/MODEL/CHECKPOINT/' # NOTE
max_prompt_tokens: 1792
max_response_tokens: 256
checkpoint_path: 'checkpoints/trinity_dpo'
cluster:
node_num: 1
gpu_per_node: 8
buffer:
max_retry_times: 3
max_retry_interval: 1
train_dataset:
name: dpo_buffer
storage_type: file
path: '/PATH/TO/DATASET/'
kwargs:
prompt_type: plaintext # plaintext/messages
prompt_key: prompt
chosen_key: chosen
rejected_key: rejected
explorer:
engine_type: vllm_async
engine_num: 0
runner_num: 32
tensor_parallel_size: 1
enable_prefix_caching: false
enforce_eager: true
dtype: bfloat16
temperature: 1.0
top_p: 1.0
top_k: -1
seed: 42
logprobs: 0
repeat_times: 1 # NOTE
use_ray: false
backend: 'nccl'
max_pending_requests: 32
max_waiting_steps: 4
synchronizer:
sync_method: 'offline'
sync_iteration_interval: 30
trainer:
trainer_type: 'verl'
algorithm_type: dpo
trainer_config_path: 'examples/dpo_humanlike/train_dpo.yaml'
monitor:
cache_root_dir: ""
project: "dpo_example"
name: "trinity_dpo"