update yi-9b sh (#510)

Jintao-Huang · Jintao-Huang · commit a02596753b15 · 2024-03-07T22:29:42.000+08:00
diff --git a/README.md b/README.md
@@ -64,7 +64,7 @@ Users can check the [documentation of SWIFT](docs/source/GetStarted/快速使用
 
 
 ## 🎉 News
-- 2024.03.06: Support training and inference of qwen1.5 awq series, support training and inference of [yi-9b](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_9b/lora_mp_ddp).
+- 2024.03.06: Support training and inference of qwen1.5 awq series, support training and inference of [yi-9b](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_9b/lora_zero3).
 - 🔥2024.02.29: Support [LLaMA PRO](https://arxiv.org/pdf/2401.02415.pdf), use [this script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/llamapro/sft.sh) to begin.
 - 🔥2024.02.29: Support [LoRA+](https://arxiv.org/pdf/2402.12354.pdf), use [this script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/sft.sh) to begin.
 - 2024.02.25: Support `swift export` to export models for **AWQ/GPTQ** quantization and push to ModelScope Hub. For more details, please refer to the document: [LLM Quantization Document](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E9%87%8F%E5%8C%96%E6%96%87%E6%A1%A3.md).
diff --git a/README_CN.md b/README_CN.md
@@ -62,7 +62,7 @@ SWIFT（Scalable lightWeight Infrastructure for Fine-Tuning）是一个可扩展
 用户可以查看 [SWIFT官方文档](docs/source/GetStarted/快速使用.md) 来了解详细信息。
 
 ## 🎉 新闻
-- 2024.03.06: 支持qwen1.5 awq系列训练与推理, 支持[yi-9b](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_9b/lora_mp_ddp)训练与推理.
+- 2024.03.06: 支持qwen1.5 awq系列训练与推理, 支持[yi-9b](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_9b/lora_zero3)训练与推理.
 - 🔥2024.02.29: 支持[LLaMA PRO](https://arxiv.org/pdf/2401.02415.pdf), 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/llamapro/sft.sh)即可开始训练.
 - 🔥2024.02.29: 支持[LoRA+](https://arxiv.org/pdf/2402.12354.pdf), 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/yi_6b_chat/lorap/sft.sh)即可开始训练.
 - 2024.02.25: 支持`swift export`, 对模型进行**AWQ/GPTQ**量化导出, 以及推送ModelScope Hub. 具体可以查看文档: [LLM量化文档](https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E9%87%8F%E5%8C%96%E6%96%87%E6%A1%A3.md).
diff --git a/examples/pytorch/llm/scripts/yi_9b/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/yi_9b/lora_ddp/infer.sh
@@ -0,0 +1,13 @@
+# Experimental environment: A100
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --ckpt_dir "output/yi-9b/vx-xxx/checkpoint-xxx" \
+    --load_dataset_config true \
+    --max_length 2048 \
+    --use_flash_attn true \
+    --max_new_tokens 2048 \
+    --temperature 0.3 \
+    --top_p 0.7 \
+    --repetition_penalty 1. \
+    --do_sample true \
+    --merge_lora false \
diff --git a/examples/pytorch/llm/scripts/yi_9b/lora_ddp/sft.sh b/examples/pytorch/llm/scripts/yi_9b/lora_ddp/sft.sh
@@ -0,0 +1,40 @@
+# Experimental environment: 4 * A100
+# 4 * 30GB GPU memory
+# Train a chat model with agent capabilities and self-cognition from the base.
+nproc_per_node=4
+
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model_type yi-9b \
+    --sft_type lora \
+    --tuner_backend swift \
+    --template_type default \
+    --dtype AUTO \
+    --output_dir output \
+    --dataset ms-agent \
+    --train_dataset_sample 20000 \
+    --train_dataset_mix_ratio 2 \
+    --num_train_epochs 3 \
+    --max_length 4096 \
+    --check_dataset_strategy warning \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0.05 \
+    --lora_target_modules ALL \
+    --lora_modules_to_save EMBEDDING LN \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0.1 \
+    --learning_rate 5e-5 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --use_flash_attn false \
+    --self_cognition_sample 2000 \
+    --model_name 小黄 'Xiao Huang' \
+    --model_author 魔搭 ModelScope \
diff --git a/examples/pytorch/llm/scripts/yi_9b/lora_zero3/infer.sh b/examples/pytorch/llm/scripts/yi_9b/lora_zero3/infer.sh
diff --git a/examples/pytorch/llm/scripts/yi_9b/lora_zero3/sft.sh b/examples/pytorch/llm/scripts/yi_9b/lora_zero3/sft.sh
@@ -2,13 +2,15 @@
 # 4 * 22GB GPU memory
 # Train a chat model with agent capabilities and self-cognition from the base.
 
+nproc_per_node=4
+
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
-NPROC_PER_NODE=2 \
+NPROC_PER_NODE=$nproc_per_node \
 swift sft \
     --model_type yi-9b \
     --sft_type lora \
     --tuner_backend swift \
-    --template_type yi \
+    --template_type default \
     --dtype AUTO \
     --output_dir output \
     --dataset ms-agent \
@@ -26,7 +28,7 @@ swift sft \
     --batch_size 1 \
     --weight_decay 0.1 \
     --learning_rate 5e-5 \
-    --gradient_accumulation_steps 16 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
     --max_grad_norm 0.5 \
     --warmup_ratio 0.03 \
     --eval_steps 100 \
@@ -35,5 +37,6 @@ swift sft \
     --logging_steps 10 \
     --use_flash_attn false \
     --self_cognition_sample 2000 \
+    --deepspeed default-zero3 \
     --model_name 小黄 'Xiao Huang' \
     --model_author 魔搭 ModelScope \