Skip to content

Commit 45c75db

Browse files
authored
Add sft/dpo example json (#2575)
1 parent e900f1d commit 45c75db

File tree

10 files changed

+216
-30
lines changed

10 files changed

+216
-30
lines changed

examples/README.md

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,32 @@
1+
## 0. 环境变量
2+
3+
在运行前,可以通过设置环境变量 `DOWNLOAD_SOURCE` 来指定模型的下载源,默认使用 **huggingface**
4+
5+
目前支持的下载源包括:
6+
- [huggingface](https://huggingface.co)
7+
- [modelscope](https://modelscope.cn/home)
8+
- [aistudio](https://aistudio.baidu.com/overview)
9+
10+
11+
示例:
12+
```bash
13+
# 使用 modelscope
14+
export DOWNLOAD_SOURCE=modelscope
15+
16+
# 使用 aistudio
17+
export DOWNLOAD_SOURCE=aistudio
18+
```
19+
20+
### Paddle 权重使用说明
21+
22+
使用 **Paddle** 格式权重,需要在配置文件(如 `sft_full.json``sft_lora.json`等)中手动添加以下参数,以避免与 **HuggingFace** 格式冲突:
23+
24+
```json
25+
"model_name_or_path": "your_model_name",
26+
"convert_from_hf": false,
27+
"save_to_hf": false,
28+
```
29+
130
## 1. 精调
231

332
### 1.1 数据准备
@@ -25,21 +54,19 @@ tar -xvf alpaca_demo.gz
2554

2655
单卡
2756
```bash
28-
# 需要12G显存左右
29-
python -u run_finetune.py ./config/qwen/sft_argument_qwen2_0p5b.json
57+
python -u run_finetune.py ./config/sft_full.json
3058
```
3159

3260
多卡
3361
```bash
34-
python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" run_finetune.py ./config/qwen/sft_argument_qwen2_0p5b.json
62+
python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" run_finetune.py ./config/sft_full.json
3563
```
3664

3765
### 1.3 LoRA SFT
3866

3967
LoRA SFT 启动命令参考
4068
```bash
41-
# 需要9G左右显存
42-
python -u run_finetune.py ./config/qwen/lora_argument_qwen2_0p5b.json
69+
python -u run_finetune.py ./config/sft_lora.json
4370
```
4471

4572

@@ -81,17 +108,17 @@ tar -zxvf ultrafeedback_binarized.tar.gz
81108

82109
单卡
83110
```bash
84-
python -u ./alignment/dpo/run_dpo.py ./config/qwen/dpo_argument_qwen2_0p5b.json
111+
python -u ./alignment/dpo/run_dpo.py ./config/dpo_full.json
85112
```
86113

87114
多卡
88115
```bash
89-
python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" ./alignment/dpo/run_dpo.py ./config/qwen/dpo_argument_qwen2_0p5b.json
116+
python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" ./alignment/dpo/run_dpo.py ./config/dpo_full.json
90117
```
91118

92119
### 2.3 LoRA DPO
93120

94121
LoRA DPO 启动命令参考
95122
```bash
96-
python -u ./alignment/dpo/run_dpo.py ./config/qwen/dpo_lora_argument_qwen2_0p5b.json
123+
python -u ./alignment/dpo/run_dpo.py ./config/dpo_lora.json
97124
```

examples/alignment/dpo/run_dpo.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,30 @@
4545
LlamaForCausalLMPipe,
4646
Qwen2ForCausalLM,
4747
Qwen2ForCausalLMPipe,
48+
Qwen2MoeForCausalLM,
49+
Qwen2MoeForCausalLMPipe,
50+
Qwen3ForCausalLM,
51+
Qwen3ForCausalLMPipe,
52+
Qwen3MoeForCausalLM,
53+
Qwen3MoeForCausalLMPipe,
4854
)
4955
from paddleformers.transformers.configuration_utils import LlmMetaConfig
5056
from paddleformers.trl import DPOTrainer
5157
from paddleformers.trl.llm_utils import get_lora_target_modules
5258
from paddleformers.utils.log import logger
5359

54-
flash_mask_support_list = [Qwen2ForCausalLM, Qwen2ForCausalLMPipe, LlamaForCausalLM, LlamaForCausalLMPipe]
60+
flash_mask_support_list = [
61+
LlamaForCausalLM,
62+
LlamaForCausalLMPipe,
63+
Qwen2ForCausalLM,
64+
Qwen2ForCausalLMPipe,
65+
Qwen2MoeForCausalLM,
66+
Qwen2MoeForCausalLMPipe,
67+
Qwen3ForCausalLM,
68+
Qwen3ForCausalLMPipe,
69+
Qwen3MoeForCausalLM,
70+
Qwen3MoeForCausalLMPipe,
71+
]
5572

5673

5774
def main():
@@ -123,7 +140,6 @@ def main():
123140
model_config = AutoConfig.from_pretrained(
124141
model_args.model_name_or_path,
125142
dtype=dtype,
126-
download_hub=model_args.download_hub,
127143
)
128144
model_config._attn_implementation = model_args.attn_impl
129145

@@ -133,7 +149,6 @@ def main():
133149
ref_model_config = AutoConfig.from_pretrained(
134150
model_args.model_name_or_path,
135151
dtype=dtype,
136-
download_hub=model_args.download_hub,
137152
)
138153
LlmMetaConfig.set_llm_config(ref_model_config, training_args)
139154

@@ -148,7 +163,6 @@ def main():
148163
model = model_class.from_pretrained(
149164
model_args.model_name_or_path,
150165
config=model_config,
151-
download_hub=model_args.download_hub,
152166
convert_from_hf=training_args.convert_from_hf,
153167
)
154168
# for DPO save
@@ -170,11 +184,9 @@ def main():
170184
raise NotImplementedError(f"{model.__class__} not support flash mask.")
171185

172186
if model_args.tokenizer_name_or_path is not None:
173-
tokenizer = AutoTokenizer.from_pretrained(
174-
model_args.tokenizer_name_or_path, download_hub=model_args.download_hub
175-
)
187+
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name_or_path)
176188
else:
177-
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, download_hub=model_args.download_hub)
189+
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path)
178190

179191
logger.info("Loading model & tokenizer successfully !")
180192

examples/config/dpo_full.json

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"model_name_or_path": "Qwen/Qwen3-0.6B-Base",
3+
"train_dataset_path": "./data/dpo/train.jsonl",
4+
"train_dataset_prob": "1.0",
5+
"train_dataset_type": "erniekit",
6+
"eval_dataset_path": "./data/dpo/dev.jsonl",
7+
"eval_dataset_prob": "1.0",
8+
"eval_dataset_type": "erniekit",
9+
"packing": false,
10+
"mix_strategy": "concat",
11+
"output_dir": "./checkpoints/qwen3_paddle_dpo_ckpts",
12+
"max_seq_len": 8192,
13+
"per_device_train_batch_size": 1,
14+
"gradient_accumulation_steps": 8,
15+
"per_device_eval_batch_size": 1,
16+
"num_train_epochs": 1,
17+
"learning_rate": 1e-06,
18+
"warmup_steps": 10,
19+
"logging_steps": 1,
20+
"max_steps": -1,
21+
"evaluation_strategy": "steps",
22+
"save_strategy": "steps",
23+
"eval_steps": 100,
24+
"save_steps": 100,
25+
"bf16": true,
26+
"fp16_opt_level": "O2",
27+
"do_train": true,
28+
"do_eval": true,
29+
"disable_tqdm": true,
30+
"recompute": true,
31+
"save_total_limit": 1,
32+
"tensor_parallel_degree": 1,
33+
"pipeline_parallel_degree": 1,
34+
"sharding": "stage2",
35+
"unified_checkpoint": true,
36+
"attn_impl": "flashmask"
37+
}

examples/config/dpo_lora.json

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
{
2+
"model_name_or_path": "Qwen/Qwen3-0.6B-Base",
3+
"train_dataset_path": "./data/dpo/train.jsonl",
4+
"train_dataset_prob": "1.0",
5+
"train_dataset_type": "erniekit",
6+
"eval_dataset_path": "./data/dpo/dev.jsonl",
7+
"eval_dataset_prob": "1.0",
8+
"eval_dataset_type": "erniekit",
9+
"packing": false,
10+
"mix_strategy": "concat",
11+
"output_dir": "./checkpoints/qwen3_paddle_dpo_lora_ckpts",
12+
"max_seq_len": 8192,
13+
"per_device_train_batch_size": 1,
14+
"gradient_accumulation_steps": 8,
15+
"per_device_eval_batch_size": 1,
16+
"num_train_epochs": 1,
17+
"learning_rate": 1e-05,
18+
"warmup_steps": 10,
19+
"logging_steps": 1,
20+
"max_steps": -1,
21+
"evaluation_strategy": "steps",
22+
"save_strategy": "steps",
23+
"eval_steps": 100,
24+
"save_steps": 100,
25+
"bf16": true,
26+
"fp16_opt_level": "O2",
27+
"do_train": true,
28+
"do_eval": true,
29+
"disable_tqdm": true,
30+
"recompute": true,
31+
"save_total_limit": 1,
32+
"tensor_parallel_degree": 1,
33+
"pipeline_parallel_degree": 1,
34+
"sharding": "stage2",
35+
"unified_checkpoint": true,
36+
"lora": true,
37+
"lora_rank": 64,
38+
"attn_impl": "flashmask"
39+
}

examples/config/sft_full.json

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
"model_name_or_path": "Qwen/Qwen3-0.6B-Base",
3+
"train_dataset_path": "./data/sft/train.json",
4+
"train_dataset_prob": "1.0",
5+
"train_dataset_type": "erniekit",
6+
"eval_dataset_path": "./data/sft/dev.json",
7+
"eval_dataset_prob": "1.0",
8+
"eval_dataset_type": "erniekit",
9+
"packing": false,
10+
"mix_strategy": "concat",
11+
"output_dir": "./checkpoints/qwen3_paddle_sft_ckpts",
12+
"max_seq_len": 8192,
13+
"per_device_train_batch_size": 1,
14+
"gradient_accumulation_steps": 4,
15+
"per_device_eval_batch_size": 1,
16+
"eval_accumulation_steps":16,
17+
"num_train_epochs": 1,
18+
"learning_rate": 3e-05,
19+
"warmup_steps": 10,
20+
"logging_steps": 1,
21+
"max_steps": -1,
22+
"evaluation_strategy": "steps",
23+
"save_strategy": "steps",
24+
"eval_steps": 100,
25+
"save_steps": 100,
26+
"bf16": true,
27+
"fp16_opt_level": "O2",
28+
"do_train": true,
29+
"do_eval": true,
30+
"disable_tqdm": true,
31+
"recompute": true,
32+
"save_total_limit": 1,
33+
"tensor_parallel_degree": 1,
34+
"pipeline_parallel_degree": 1,
35+
"sharding": "stage2",
36+
"unified_checkpoint": true,
37+
"attn_impl": "flashmask"
38+
}

examples/config/sft_lora.json

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
{
2+
"model_name_or_path": "Qwen/Qwen3-0.6B-Base",
3+
"train_dataset_path": "./data/sft/train.json",
4+
"train_dataset_prob": "1.0",
5+
"train_dataset_type": "erniekit",
6+
"eval_dataset_path": "./data/sft/dev.json",
7+
"eval_dataset_prob": "1.0",
8+
"eval_dataset_type": "erniekit",
9+
"packing": false,
10+
"mix_strategy": "concat",
11+
"output_dir": "./checkpoints/qwen3_paddle_lora_ckpts",
12+
"max_seq_len": 8192,
13+
"per_device_train_batch_size": 1,
14+
"gradient_accumulation_steps": 4,
15+
"per_device_eval_batch_size": 1,
16+
"eval_accumulation_steps":16,
17+
"num_train_epochs": 1,
18+
"learning_rate": 3e-04,
19+
"warmup_steps": 10,
20+
"logging_steps": 1,
21+
"max_steps": -1,
22+
"evaluation_strategy": "steps",
23+
"save_strategy": "steps",
24+
"eval_steps": 100,
25+
"save_steps": 100,
26+
"bf16": true,
27+
"fp16_opt_level": "O2",
28+
"do_train": true,
29+
"do_eval": true,
30+
"disable_tqdm": true,
31+
"recompute": true,
32+
"save_total_limit": 1,
33+
"tensor_parallel_degree": 1,
34+
"pipeline_parallel_degree": 1,
35+
"sharding": "stage2",
36+
"unified_checkpoint": true,
37+
"lora": true,
38+
"attn_impl": "flashmask"
39+
}

examples/run_finetune.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ def main():
132132
model_config = AutoConfig.from_pretrained(
133133
model_args.model_name_or_path,
134134
dtype=dtype,
135-
download_hub=model_args.download_hub,
136135
)
137136

138137
architectures_to_check = {"Qwen2Moe", "DeepseekV2", "DeepseekV3"}
@@ -186,8 +185,7 @@ def main():
186185
model = model_class.from_pretrained(
187186
model_args.model_name_or_path,
188187
config=model_config,
189-
download_hub=model_args.download_hub,
190-
convert_from_hf=training_args.convert_from_hf, # run paddle weights
188+
convert_from_hf=training_args.convert_from_hf,
191189
)
192190
else:
193191
model = model_class.from_config(model_config, dtype=dtype)
@@ -214,7 +212,7 @@ def neft_post_hook(module, input, output):
214212
raise NotImplementedError("Only support neftune for model with get_input_embeddings")
215213

216214
# Load tokenizer & dataset
217-
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, download_hub=model_args.download_hub)
215+
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path)
218216
# tokenizer.chat_template = None
219217

220218
# init chat_template for tokenizer

paddleformers/trainer/training_args.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,11 +1081,11 @@ class TrainingArguments:
10811081
metadata={"help": "是否开启单路sharding时global norm通信拆分全局通信组为pp通信和mp通信分别做"},
10821082
)
10831083
convert_from_hf: Optional[bool] = field(
1084-
default=False,
1084+
default=True,
10851085
metadata={"help": "Load model from HuggingFace safetensors."},
10861086
)
10871087
save_to_hf: Optional[bool] = field(
1088-
default=False,
1088+
default=True,
10891089
metadata={"help": "Save model to HuggingFace safetensors."},
10901090
)
10911091

paddleformers/transformers/qwen2/modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
120120
sin = sin[position_ids].unsqueeze(2) # [bs, seq_len, 1, dim]
121121
q_embed = (q * cos) + (rotate_half(q) * sin)
122122
k_embed = (k * cos) + (rotate_half(k) * sin)
123-
return q_embed, k_embed
123+
return q_embed.astype(q.dtype), k_embed.astype(q.dtype)
124124

125125

126126
class Qwen2Attention(nn.Layer):

paddleformers/trl/model_config.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,6 @@ class ModelConfig:
9898
# reft related parameter
9999
reft: bool = field(default=False, metadata={"help": "Whether using reft method"})
100100

101-
download_hub: str = field(
102-
default="aistudio",
103-
metadata={
104-
"help": "The source for model downloading, options include `huggingface`, `aistudio`, `modelscope`, default `aistudio`"
105-
},
106-
)
107101
save_to_aistudio: bool = field(default=False, metadata={"help": "Whether to save model to aistudio"})
108102
aistudio_repo_id: str = field(default=None, metadata={"help": "The id of aistudio repo"})
109103
aistudio_repo_private: bool = field(default=True, metadata={"help": "Whether to create a private repo"})
@@ -156,4 +150,6 @@ class ModelConfig:
156150
default=True,
157151
metadata={"help": "Whether to use attn_mask_start_row_indices in flash attention."},
158152
)
159-
pp_seg_method: Optional[str] = field(default=None, metadata={"help": "PP Segmentation Method"})
153+
pp_seg_method: Optional[str] = field(
154+
default="layer:DecoderLayer|EmptyLayer", metadata={"help": "PP Segmentation Method"}
155+
)

0 commit comments

Comments
 (0)