Skip to content

Commit e471074

Browse files
authored
Fix deepspeed (#2778)
1 parent 629dca3 commit e471074

File tree

13 files changed

+75
-80
lines changed

13 files changed

+75
-80
lines changed

docs/source/Instruction/命令行参数.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787

8888
- 🔥output_dir: 默认为`output/<model_name>`
8989
- 🔥gradient_checkpointing: 是否使用gradient_checkpointing,默认为True
90-
- 🔥deepspeed: 默认为None. 可以设置为'zero2', 'zero3', 'zero2_offload', 'zero3_offload'来使用ms-swift内置的deepspeed配置文件
90+
- 🔥deepspeed: 默认为None. 可以设置为'zero0', 'zero1', 'zero2', 'zero3', 'zero2_offload', 'zero3_offload'来使用ms-swift内置的deepspeed配置文件
9191
- 🔥per_device_train_batch_size: 默认值1
9292
- 🔥per_device_eval_batch_size: 默认值1
9393
- weight_decay: weight衰减系数,默认值0.1

docs/source_en/Instruction/Command-line-parameters.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ This parameter list inherits from transformers `Seq2SeqTrainingArguments`, with
8888

8989
- 🔥output_dir: Default is `output/<model_name>`.
9090
- 🔥gradient_checkpointing: Whether to use gradient checkpointing, default is True.
91-
- 🔥deepspeed: Default is None. Can be set to 'zero2', 'zero3', 'zero2_offload', 'zero3_offload' to use the built-in deepspeed configuration files from ms-swift.
91+
- 🔥deepspeed: Default is None. Can be set to 'zero0', 'zero1', 'zero2', 'zero3', 'zero2_offload', 'zero3_offload' to use the built-in deepspeed configuration files from ms-swift.
9292
- 🔥per_device_train_batch_size: Default is 1.
9393
- 🔥per_device_eval_batch_size: Default is 1.
9494
- weight_decay: Weight decay coefficient, default value is 0.1.

swift/llm/argument/train_args.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,10 @@ def _init_deepspeed(self):
164164
f'local_world_size: {self.local_world_size}.')
165165

166166
ds_config_folder = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'ds_config'))
167-
deepspeed_mapping = {name: f'{name}.json' for name in ['zero2', 'zero3', 'zero2_offload', 'zero3_offload']}
167+
deepspeed_mapping = {
168+
name: f'{name}.json'
169+
for name in ['zero0', 'zero1', 'zero2', 'zero3', 'zero2_offload', 'zero3_offload']
170+
}
168171
for ds_name, ds_config in deepspeed_mapping.items():
169172
if self.deepspeed == ds_name:
170173
self.deepspeed = os.path.join(ds_config_folder, ds_config)

swift/llm/argument/tuner_args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,4 +222,4 @@ def _init_multimodal_full(self):
222222
if self.freeze_parameters:
223223
logger.info(f'freeze_parameters: {self.freeze_parameters}')
224224
if self.trainable_parameters:
225-
logger.info(f'trainable_parameters: {self.trainable_parameters}')
225+
logger.info(f'additional trainable_parameters: {self.trainable_parameters}')

swift/llm/ds_config/zero0.json

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"fp16": {
3+
"enabled": "auto",
4+
"loss_scale": 0,
5+
"loss_scale_window": 1000,
6+
"initial_scale_power": 16,
7+
"hysteresis": 2,
8+
"min_loss_scale": 1
9+
},
10+
11+
"bf16": {
12+
"enabled": "auto"
13+
},
14+
15+
"zero_optimization": {
16+
"stage": 0,
17+
"allgather_partitions": true,
18+
"allgather_bucket_size": 2e8,
19+
"overlap_comm": true,
20+
"reduce_scatter": true,
21+
"reduce_bucket_size": 2e8,
22+
"contiguous_gradients": true
23+
},
24+
25+
"gradient_accumulation_steps": "auto",
26+
"gradient_clipping": "auto",
27+
"steps_per_print": 2000,
28+
"train_batch_size": "auto",
29+
"train_micro_batch_size_per_gpu": "auto",
30+
"wall_clock_breakdown": false
31+
}

swift/llm/ds_config/zero1.json

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"fp16": {
3+
"enabled": "auto",
4+
"loss_scale": 0,
5+
"loss_scale_window": 1000,
6+
"initial_scale_power": 16,
7+
"hysteresis": 2,
8+
"min_loss_scale": 1
9+
},
10+
11+
"bf16": {
12+
"enabled": "auto"
13+
},
14+
15+
"zero_optimization": {
16+
"stage": 1,
17+
"offload_optimizer": {
18+
"device": "none",
19+
"pin_memory": true
20+
},
21+
"allgather_partitions": true,
22+
"allgather_bucket_size": 2e8,
23+
"overlap_comm": true,
24+
"reduce_scatter": true,
25+
"reduce_bucket_size": 2e8,
26+
"contiguous_gradients": true
27+
},
28+
29+
"gradient_accumulation_steps": "auto",
30+
"gradient_clipping": "auto",
31+
"steps_per_print": 2000,
32+
"train_batch_size": "auto",
33+
"train_micro_batch_size_per_gpu": "auto",
34+
"wall_clock_breakdown": false
35+
}

swift/llm/ds_config/zero2.json

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,6 @@
1212
"enabled": "auto"
1313
},
1414

15-
"optimizer": {
16-
"type": "AdamW",
17-
"params": {
18-
"lr": "auto",
19-
"betas": "auto",
20-
"eps": "auto",
21-
"weight_decay": "auto"
22-
}
23-
},
24-
25-
"scheduler": {
26-
"type": "WarmupCosineLR",
27-
"params": {
28-
"total_num_steps": "auto",
29-
"warmup_num_steps": "auto"
30-
}
31-
},
32-
3315
"zero_optimization": {
3416
"stage": 2,
3517
"offload_optimizer": {

swift/llm/ds_config/zero2_offload.json

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,6 @@
1212
"enabled": "auto"
1313
},
1414

15-
"optimizer": {
16-
"type": "AdamW",
17-
"params": {
18-
"lr": "auto",
19-
"betas": "auto",
20-
"eps": "auto",
21-
"weight_decay": "auto"
22-
}
23-
},
24-
25-
"scheduler": {
26-
"type": "WarmupCosineLR",
27-
"params": {
28-
"total_num_steps": "auto",
29-
"warmup_num_steps": "auto"
30-
}
31-
},
32-
3315
"zero_optimization": {
3416
"stage": 2,
3517
"offload_optimizer": {

swift/llm/ds_config/zero3.json

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,6 @@
1212
"enabled": "auto"
1313
},
1414

15-
"optimizer": {
16-
"type": "AdamW",
17-
"params": {
18-
"lr": "auto",
19-
"betas": "auto",
20-
"eps": "auto",
21-
"weight_decay": "auto"
22-
}
23-
},
24-
25-
"scheduler": {
26-
"type": "WarmupCosineLR",
27-
"params": {
28-
"total_num_steps": "auto",
29-
"warmup_num_steps": "auto"
30-
}
31-
},
32-
3315
"zero_optimization": {
3416
"stage": 3,
3517
"offload_optimizer": {

swift/llm/ds_config/zero3_offload.json

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,6 @@
1212
"enabled": "auto"
1313
},
1414

15-
"optimizer": {
16-
"type": "AdamW",
17-
"params": {
18-
"lr": "auto",
19-
"betas": "auto",
20-
"eps": "auto",
21-
"weight_decay": "auto"
22-
}
23-
},
24-
25-
"scheduler": {
26-
"type": "WarmupCosineLR",
27-
"params": {
28-
"total_num_steps": "auto",
29-
"warmup_num_steps": "auto"
30-
}
31-
},
32-
3315
"zero_optimization": {
3416
"stage": 3,
3517
"offload_optimizer": {

0 commit comments

Comments
 (0)