modelscope
diff --git a/‎examples/pytorch/llm/README.md‎
Lines changed: 13 additions & 7 deletions b/‎examples/pytorch/llm/README.md‎
Lines changed: 13 additions & 7 deletions
diff --git a/‎examples/pytorch/llm/README_CN.md‎
Lines changed: 13 additions & 7 deletions b/‎examples/pytorch/llm/README_CN.md‎
Lines changed: 13 additions & 7 deletions
diff --git a/‎examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/sft.sh‎
Lines changed: 2 additions & 2 deletions b/‎examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/sft.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp/sft.sh‎
Lines changed: 2 additions & 2 deletions b/‎examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp/sft.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/pytorch/llm/scripts/internlm_7b_chat/lora_ddp/sft.sh‎
Lines changed: 2 additions & 2 deletions b/‎examples/pytorch/llm/scripts/internlm_7b_chat/lora_ddp/sft.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/pytorch/llm/scripts/llama2_70b_chat/qlora/sft.sh‎
Lines changed: 2 additions & 2 deletions b/‎examples/pytorch/llm/scripts/llama2_70b_chat/qlora/sft.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/pytorch/llm/scripts/openbuddy-llama2-70b/qlora/sft.sh‎
Lines changed: 2 additions & 2 deletions b/‎examples/pytorch/llm/scripts/openbuddy-llama2-70b/qlora/sft.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/pytorch/llm/scripts/polylm_13b/qlora_ddp/sft.sh‎
Lines changed: 2 additions & 2 deletions b/‎examples/pytorch/llm/scripts/polylm_13b/qlora_ddp/sft.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/pytorch/llm/scripts/qwen_7b/lora_ddp/infer.sh‎
Lines changed: 5 additions & 2 deletions b/‎examples/pytorch/llm/scripts/qwen_7b/lora_ddp/infer.sh‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎examples/pytorch/llm/scripts/qwen_7b/lora_ddp/sft.sh‎
Lines changed: 4 additions & 4 deletions b/‎examples/pytorch/llm/scripts/qwen_7b/lora_ddp/sft.sh‎
Lines changed: 4 additions & 4 deletions
@@ -62,34 +62,40 @@ pip install .
 ```
 
 ## Run SFT and Inference
+Performace: full(nice) > lora > qlora
+Training GPU memory: qlora(low,3090) > lora > full(2*A100)
 ```bash
 # Clone the repository and enter the code directory.
 git clone https://github.com/modelscope/swift.git
 cd swift/examples/pytorch/llm
 
-# sft lora and infer qwen-7b, Requires 27GB VRAM.
-# If you want to push weights into modelscope hub during training, you need to set '--push_to_hub true'
+# sft lora and infer qwen-7b-chat, Requires 27GB GPU memory.
+# You can save GPU memory by setting `--gradient_checkpointing true`, but this will slightly decrease the training speed.
+# If you want to push weights into modelscope hub during training, you need to set '--push_to_hub true'.
+# Recommended experimental environment: A100
 bash scripts/qwen_7b_chat/lora/sft.sh
 bash scripts/qwen_7b_chat/lora/infer.sh
 
-# sft(lora+ddp) and infer qwen-7b, Requires 4*27GB VRAM.
+# sft(lora+ddp) and infer qwen-7b-chat, Requires 2*27GB GPU memory.
 bash scripts/qwen_7b_chat/lora_ddp/sft.sh
 bash scripts/qwen_7b_chat/lora_ddp/infer.sh
 
-# sft(qlora) and infer qwen-7b, Requires 20GB VRAM.
+# sft(qlora) and infer qwen-7b-chat, Requires 13GB GPU memory.
 # If you want to use quantification, you need to `pip install bitsandbytes -U`
+# Recommended experimental environment: 3090
 bash scripts/qwen_7b_chat/qlora/sft.sh
 bash scripts/qwen_7b_chat/qlora/infer.sh
 
-# sft(qlora+ddp) and infer qwen-7b, Requires 4*20GB VRAM.
+# sft(qlora+ddp) and infer qwen-7b-chat, Requires 2*13GB GPU memory.
 bash scripts/qwen_7b_chat/qlora_ddp/sft.sh
 bash scripts/qwen_7b_chat/qlora_ddp/infer.sh
 
-# sft(full) and infer qwen-7b, Requires 95GB VRAM.
+# sft(full) and infer qwen-7b-chat, Requires 100GB GPU memory.
+# Recommended experimental environment: A100
 bash scripts/qwen_7b_chat/full/sft.sh
 bash scripts/qwen_7b_chat/full/infer.sh
 
-# For more scripts, please see `scripts/` folder
+# For more scripts, please see `scripts/` folder.
 ```
 
 ## Extend Datasets
 
@@ -64,34 +64,40 @@ pip install .
 ```
 
 ## 微调和推理
+性能: full(优) > lora > qlora
+训练显存: qlora(低,3090) > lora > full(2*A100)
 ```bash
 # clone仓库并进入代码目录
 git clone https://github.com/modelscope/swift.git
 cd swift/examples/pytorch/llm
 
-# 微调(lora)+推理 qwen-7b, 需要27GB显存.
-# 如果你想在训练时, 将权重push到modelscope hub中, 你需要设置`--push_to_hub true`
+# 微调(lora)+推理 qwen-7b-chat, 需要27GB显存.
+# 你可以通过设置`--gradient_checkpointing true`来节约显存, 但这会略微降低训练速度.
+# 如果你想在训练时, 将权重push到modelscope hub中, 你需要设置`--push_to_hub true`.
+# 推荐的实验环境: A100
 bash scripts/qwen_7b_chat/lora/sft.sh
 bash scripts/qwen_7b_chat/lora/infer.sh
 
-# 微调(lora+ddp)+推理 qwen-7b, 需要4卡*27GB显存.
+# 微调(lora+ddp)+推理 qwen-7b-chat, 需要2卡*27GB显存.
 bash scripts/qwen_7b_chat/lora_ddp/sft.sh
 bash scripts/qwen_7b_chat/lora_ddp/infer.sh
 
-# 微调(qlora)+推理 qwen-7b, 需要20GB显存.
+# 微调(qlora)+推理 qwen-7b-chat, 需要13GB显存.
 # 如果你想要使用量化, 你需要`pip install bitsandbytes -U`
+# 推荐的实验环境: 3090
 bash scripts/qwen_7b_chat/qlora/sft.sh
 bash scripts/qwen_7b_chat/qlora/infer.sh
 
-# 微调(qlora+ddp)+推理 qwen-7b, 需要4卡*20GB显存.
+# 微调(qlora+ddp)+推理 qwen-7b-chat, 需要2卡*13GB显存.
 bash scripts/qwen_7b_chat/qlora_ddp/sft.sh
 bash scripts/qwen_7b_chat/qlora_ddp/infer.sh
 
-# 微调(full)+推理 qwen-7b, 需要95G显存.
+# 微调(full)+推理 qwen-7b-chat, 需要100G显存.
+# 推荐的实验环境: A100
 bash scripts/qwen_7b_chat/full/sft.sh
 bash scripts/qwen_7b_chat/full/infer.sh
 
-# 更多的scripts脚本, 可以看`scripts`文件夹
+# 更多的scripts脚本, 可以看`scripts`文件夹.
 ```
 
 ## 拓展数据集
 
@@ -14,10 +14,10 @@ torchrun \
     --dataset alpaca-en,alpaca-zh \
     --dataset_sample 20000 \
     --num_train_epochs 1 \
-    --max_length 1024 \
+    --max_length 2048 \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.05 \
+    --lora_dropout_p 0. \
     --lora_target_modules W_pack o_proj \
     --gradient_checkpointing false \
     --batch_size 1 \
 
@@ -13,10 +13,10 @@ torchrun \
     --dataset alpaca-en,alpaca-zh \
     --dataset_sample -1 \
     --num_train_epochs 1 \
-    --max_length 1024 \
+    --max_length 2048 \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.1 \
+    --lora_dropout_p 0. \
     --gradient_checkpointing false \
     --batch_size 1 \
     --weight_decay 0. \
 
@@ -13,10 +13,10 @@ torchrun \
     --dataset alpaca-en,alpaca-zh \
     --dataset_sample 20000 \
     --num_train_epochs 1 \
-    --max_length 1024 \
+    --max_length 2048 \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.05 \
+    --lora_dropout_p 0. \
     --lora_target_modules q_proj k_proj v_proj \
     --gradient_checkpointing false \
     --batch_size 1 \
 
@@ -7,11 +7,11 @@ python src/llm_sft.py \
     --dataset alpaca-en \
     --dataset_sample 20000 \
     --num_train_epochs 1 \
-    --max_length 1024 \
+    --max_length 2048 \
     --quantization_bit 4 \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.1 \
+    --lora_dropout_p 0. \
     --batch_size 1 \
     --learning_rate 1e-4 \
     --gradient_accumulation_steps 16 \
 
@@ -9,12 +9,12 @@ python src/llm_sft.py \
     --dataset alpaca-en,alpaca-zh \
     --dataset_sample 20000 \
     --num_train_epochs 1 \
-    --max_length 1024 \
+    --max_length 2048 \
     --quantization_bit 4 \
     --bnb_4bit_comp_dtype bf16 \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.1 \
+    --lora_dropout_p 0. \
     --gradient_checkpointing false \
     --batch_size 1 \
     --weight_decay 0. \
 
@@ -12,11 +12,11 @@ torchrun \
     --dataset alpaca-en,alpaca-zh \
     --dataset_sample -1 \
     --num_train_epochs 1 \
-    --max_length 1024 \
+    --max_length 2048 \
     --quantization_bit 4 \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.1 \
+    --lora_dropout_p 0. \
     --batch_size 1 \
     --learning_rate 1e-4 \
     --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
 
@@ -2,10 +2,13 @@ CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b \
     --sft_type lora \
-    --template_type default \
+    --template_type default-generation \
     --dtype bf16 \
     --ckpt_dir "runs/qwen-7b/vx_xxx/checkpoint-xxx" \
-    --eval_human true \
+    --eval_human false \
+    --dataset dureader-robust-zh \
+    --dataset_sample -1 \
+    --max_length 2048 \
     --use_flash_attn true \
     --max_new_tokens 1024 \
     --temperature 0.9 \
 
@@ -6,17 +6,17 @@ torchrun \
     src/llm_sft.py \
     --model_type qwen-7b \
     --sft_type lora \
-    --template_type default \
+    --template_type default-generation \
     --dtype bf16 \
     --output_dir runs \
     --ddp_backend nccl \
-    --dataset alpaca-en,alpaca-zh \
+    --dataset dureader-robust-zh \
     --dataset_sample -1 \
     --num_train_epochs 1 \
-    --max_length 1024 \
+    --max_length 2048 \
     --lora_rank 8 \
     --lora_alpha 32 \
-    --lora_dropout_p 0.05 \
+    --lora_dropout_p 0. \
     --lora_target_modules c_attn c_proj \
     --gradient_checkpointing false \
     --batch_size 1 \