Skip to content

Commit b52809f

Browse files
committed
* prepare the initial config files for exp pipeline
1 parent b8bd0ba commit b52809f

File tree

5 files changed

+155
-2
lines changed

5 files changed

+155
-2
lines changed

docs/sphinx_doc/source/tutorial/example_data_functionalities.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,6 @@ If you are familiar with Data-Juicer, you will realize that Data-Juicer provides
103103
# This is a Data-Juicer data processing recipe
104104
project_name: 'gsm-8k-difficulty'
105105
106-
export_path: '/path/to/the/result/processed-dataset.jsonl'
107-
108106
process:
109107
- llm_difficulty_score_filter:
110108
api_or_hf_model: "qwen2.5-72b-instruct" # use "qwen2.5-72b-instruct" to calculate the difficulty scores.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# GRPO on GSM8K dataset with Experience Pipeline
2+
3+
This example shows the usage of GRPO on the GSM8K dataset, with a experience pipeline to reshape the rewards of experiences while training.
4+
5+
For more detailed information, please refer to the [documentation](../../docs/sphinx_doc/source/tutorial/example_data_functionalities.md).
6+
7+
The config files are located in [`gsm8k.yaml`](gsm8k.yaml) and [`train_gsm8k.yaml`](train_gsm8k.yaml).
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# This is a Data-Juicer data processing recipe
2+
project_name: 'gsm-8k-experience-quality'
3+
4+
process:
5+
- llm_quality_score_filter:
6+
api_or_hf_model: "qwen2.5-32b-instruct" # use "qwen2.5-32b-instruct" to calculate the quality scores.
7+
input_keys: ["prompt", "response"] # set input_keys and field_names to the existing key names in gsm-8k. Here calculating the difficulty scores according to both questions and answers.
8+
field_names: ["prompt", "response"]
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
project: "Trinity-RFT-gsm8k-experience-pipeline"
2+
name: "qwen2.5-1.5B-gsm8k-experience-pipeline"
3+
checkpoint_root_dir: /PATH/TO/CHECKPOINT/
4+
algorithm:
5+
algorithm_type: grpo
6+
repeat_times: 8
7+
data_processor:
8+
data_processor_url: 'http://127.0.0.1:5005/data_processor'
9+
# experience pipeline related
10+
experience_pipeline:
11+
# I/O buffers
12+
input_buffers:
13+
- name: gsm8k_exp_output
14+
storage_type: queue
15+
path: 'sqlite:///gsm8k_exp_output.db'
16+
output_buffer:
17+
name: reshaped_gsm8k_exp_input
18+
storage_type: queue
19+
path: 'sqlite:///reshaped_gsm8k_exp_input.db'
20+
# format mapping
21+
format:
22+
prompt_key: 'prompt'
23+
response_key: 'response'
24+
reward_key: 'reward'
25+
# data active iterator related
26+
dj_config_path: 'examples/grpo_gsm8k_experience_pipeline/dj_scoring_exp.yaml'
27+
clean_strategy: 'iterative'
28+
29+
model:
30+
model_path: /PATH/TO/MODEL/
31+
max_prompt_tokens: 256
32+
max_response_tokens: 1024
33+
cluster:
34+
node_num: 1
35+
gpu_per_node: 8
36+
buffer:
37+
total_epochs: 1
38+
batch_size: 96
39+
max_retry_times: 3
40+
max_retry_interval: 1
41+
explorer_input:
42+
taskset:
43+
name: gsm8k
44+
storage_type: file
45+
path: 'openai/gsm8k'
46+
subset_name: 'main'
47+
split: 'train'
48+
format:
49+
prompt_key: 'question'
50+
response_key: 'answer'
51+
rollout_args:
52+
temperature: 1.0
53+
eval_tasksets:
54+
- name: gsm8k-eval
55+
storage_type: file
56+
path: 'openai/gsm8k'
57+
subset_name: 'main'
58+
split: 'test'
59+
format:
60+
prompt_key: 'question'
61+
response_key: 'answer'
62+
default_workflow_type: 'math_workflow'
63+
explorer_output:
64+
name: gsm8k_exp_output
65+
storage_type: queue
66+
path: 'sqlite:///gsm8k_exp_output.db'
67+
trainer_input:
68+
experience_buffer:
69+
name: reshaped_gsm8k_exp_input
70+
storage_type: queue
71+
path: 'sqlite:///reshaped_gsm8k_exp_input.db'
72+
explorer:
73+
eval_interval: 50
74+
runner_num: 32
75+
rollout_model:
76+
engine_type: vllm_async
77+
engine_num: 2
78+
tensor_parallel_size: 1
79+
enable_prefix_caching: false
80+
enforce_eager: true
81+
dtype: bfloat16
82+
seed: 42
83+
synchronizer:
84+
sync_method: 'nccl'
85+
sync_interval: 1
86+
sync_timeout: 1200
87+
trainer:
88+
trainer_type: 'verl'
89+
trainer_config_path: 'examples/grpo_gsm8k_experience_pipeline/train_gsm8k.yaml'
90+
save_interval: 100
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
actor_rollout_ref:
2+
hybrid_engine: True
3+
model:
4+
external_lib: null
5+
override_config: { }
6+
enable_gradient_checkpointing: True
7+
use_remove_padding: True # False
8+
actor:
9+
strategy: fsdp # This is for backward-compatibility
10+
ppo_mini_batch_size: 128
11+
ppo_micro_batch_size_per_gpu: 4
12+
use_dynamic_bsz: True # False
13+
ppo_max_token_len_per_gpu: 16384 # n * ${data.max_prompt_length} + ${data.max_response_length}
14+
grad_clip: 1.0
15+
ppo_epochs: 1
16+
shuffle: False
17+
ulysses_sequence_parallel_size: 1 # sp size
18+
optim:
19+
lr: 1e-5
20+
lr_warmup_steps_ratio: 0. # the total steps will be injected during runtime
21+
# min_lr_ratio: null # only useful for warmup with cosine
22+
warmup_style: constant # select from constant/cosine
23+
total_training_steps: -1 # must be override by program
24+
fsdp_config:
25+
wrap_policy:
26+
# transformer_layer_cls_to_wrap: None
27+
min_num_params: 0
28+
param_offload: False
29+
optimizer_offload: False
30+
fsdp_size: -1
31+
ref:
32+
fsdp_config:
33+
param_offload: False
34+
wrap_policy:
35+
# transformer_layer_cls_to_wrap: None
36+
min_num_params: 0
37+
log_prob_micro_batch_size_per_gpu: 16
38+
log_prob_use_dynamic_bsz: ${actor_rollout_ref.actor.use_dynamic_bsz}
39+
log_prob_max_token_len_per_gpu: ${actor_rollout_ref.actor.ppo_max_token_len_per_gpu}
40+
ulysses_sequence_parallel_size: ${actor_rollout_ref.actor.ulysses_sequence_parallel_size} # sp size
41+
42+
trainer:
43+
balance_batch: True
44+
# total_training_steps: null
45+
# auto: find the last ckpt to resume. If can't find, start from scratch
46+
resume_mode: auto # or auto or resume_path if
47+
default_hdfs_dir: null
48+
remove_previous_ckpt_in_save: False
49+
del_local_ckpt_after_load: False
50+
val_before_train: False

0 commit comments

Comments
 (0)