Add internlm agent (#86)

Jintao-Huang · web-flow · commit e28ea24b5351 · 2023-09-21T23:44:03.000+08:00
diff --git a/examples/pytorch/llm/README.md b/examples/pytorch/llm/README.md
@@ -74,6 +74,7 @@ cd swift/examples/pytorch/llm
 # If you want to push weights into modelscope hub during training, you need to set '--push_to_hub true'.
 # Recommended experimental environment: A100
 bash scripts/qwen_7b_chat/lora/sft.sh
+# If you want to merge LoRA weight and save it, you need to set `--merge_lora_and_save true`.
 bash scripts/qwen_7b_chat/lora/infer.sh
 
 # sft(lora+ddp) and infer qwen-7b-chat, Requires 2*38GB GPU memory.
diff --git a/examples/pytorch/llm/README_CN.md b/examples/pytorch/llm/README_CN.md
@@ -76,6 +76,7 @@ cd swift/examples/pytorch/llm
 # 如果你想在训练时, 将权重push到modelscope hub中, 你需要设置`--push_to_hub true`.
 # 推荐的实验环境: A100
 bash scripts/qwen_7b_chat/lora/sft.sh
+# 如何你想要合并LoRA权重并保存，你需要设置`--merge_lora_and_save true`
 bash scripts/qwen_7b_chat/lora/infer.sh
 
 # 微调(lora+ddp)+推理 qwen-7b-chat, 需要2卡*38GB显存.
diff --git a/examples/pytorch/llm/scripts/baichuan2_7b/qlora/sft.sh b/examples/pytorch/llm/scripts/baichuan2_7b/qlora/sft.sh
@@ -1,5 +1,5 @@
 # Experimental environment: 3090
-# 12GB GPU memory
+# 10GB GPU memory
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_sft.py \
     --model_type baichuan2-7b \
@@ -17,7 +17,7 @@ python src/llm_sft.py \
     --lora_alpha 32 \
     --lora_dropout_p 0. \
     --lora_target_modules ALL \
-    --gradient_checkpointing false \
+    --gradient_checkpointing true \
     --batch_size 1 \
     --weight_decay 0. \
     --learning_rate 1e-4 \
diff --git a/examples/pytorch/llm/scripts/internlm_20b/qlora/sft.sh b/examples/pytorch/llm/scripts/internlm_20b/qlora/sft.sh
@@ -1,5 +1,5 @@
 # Experimental environment: A10
-# 20GB GPU memory
+# 14GB GPU memory
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_sft.py \
     --model_type internlm-20b \
@@ -17,7 +17,7 @@ python src/llm_sft.py \
     --lora_alpha 32 \
     --lora_dropout_p 0. \
     --lora_target_modules ALL \
-    --gradient_checkpointing false \
+    --gradient_checkpointing true \
     --batch_size 1 \
     --weight_decay 0. \
     --learning_rate 1e-4 \
diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/infer.sh
@@ -0,0 +1,18 @@
+CUDA_VISIBLE_DEVICES=0 \
+python src/llm_infer.py \
+    --model_type internlm-20b-chat \
+    --sft_type lora \
+    --template_type internlm \
+    --dtype bf16 \
+    --ckpt_dir "output/internlm-20b-chat/vx_xxx/checkpoint-xxx" \
+    --eval_human false \
+    --dataset damo-agent-mini-zh \
+    --max_length 4096 \
+    --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
+    --max_new_tokens 2048 \
+    --temperature 0.9 \
+    --top_k 20 \
+    --top_p 0.9 \
+    --do_sample true \
+    --merge_lora_and_save false \
diff --git a/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/sft.sh b/examples/pytorch/llm/scripts/internlm_20b_chat/qlora/sft.sh
@@ -0,0 +1,34 @@
+# Experimental environment: A10
+# 18GB GPU memory
+CUDA_VISIBLE_DEVICES=0 \
+python src/llm_sft.py \
+    --model_type internlm-20b-chat \
+    --sft_type lora \
+    --template_type internlm \
+    --dtype bf16 \
+    --output_dir output \
+    --dataset damo-agent-mini-zh \
+    --train_dataset_sample 20000 \
+    --num_train_epochs 1 \
+    --max_length 4096 \
+    --quantization_bit 4 \
+    --bnb_4bit_comp_dtype bf16 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --lora_dropout_p 0. \
+    --lora_target_modules q_proj v_proj \
+    --gradient_checkpointing true \
+    --batch_size 1 \
+    --weight_decay 0. \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --max_grad_norm 0.5 \
+    --warmup_ratio 0.03 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 10 \
+    --push_to_hub false \
+    --hub_model_id internlm-20b-chat-qlora \
+    --hub_private_repo true \
+    --hub_token 'your-sdk-token' \
diff --git a/examples/pytorch/llm/scripts/qwen_7b/lora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b/lora_ddp/infer.sh
@@ -1,4 +1,4 @@
-# If you want to merge LoRA weights, please set merge_lora_and_save to true.
+# If you want to merge LoRA weight and save it, you need to set `--merge_lora_and_save true`.
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora/infer.sh
@@ -1,4 +1,4 @@
-# If you want to merge LoRA weights, please set merge_lora_and_save to true.
+# If you want to merge LoRA weight and save it, you need to set `--merge_lora_and_save true`.
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b-chat \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/infer.sh
@@ -1,4 +1,4 @@
-# If you want to merge LoRA weights, please set merge_lora_and_save to true.
+# If you want to merge LoRA weight and save it, you need to set `--merge_lora_and_save true`.
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b-chat \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/lora_mp_ddp/sft.sh
@@ -20,7 +20,7 @@ torchrun \
     --lora_alpha 32 \
     --lora_dropout_p 0. \
     --lora_target_modules c_attn \
-    --gradient_checkpointing false \
+    --gradient_checkpointing true \
     --batch_size 1 \
     --weight_decay 0. \
     --learning_rate 1e-4 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/infer.sh
@@ -1,4 +1,4 @@
-# If you want to merge LoRA weights, please set merge_lora_and_save to true.
+# If you want to merge LoRA weight and save it, you need to set `--merge_lora_and_save true`.
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b-chat \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora/sft.sh
@@ -1,5 +1,5 @@
 # Experimental environment: 3090
-# 12GB GPU memory
+# 10GB GPU memory
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_sft.py \
     --model_type qwen-7b-chat \
@@ -17,7 +17,7 @@ python src/llm_sft.py \
     --lora_alpha 32 \
     --lora_dropout_p 0. \
     --lora_target_modules ALL \
-    --gradient_checkpointing false \
+    --gradient_checkpointing true \
     --batch_size 1 \
     --weight_decay 0. \
     --learning_rate 1e-4 \
diff --git a/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh b/examples/pytorch/llm/scripts/qwen_7b_chat/qlora_ddp/infer.sh
@@ -1,4 +1,4 @@
-# If you want to merge LoRA weights, please set merge_lora_and_save to true.
+# If you want to merge LoRA weight and save it, you need to set `--merge_lora_and_save true`.
 CUDA_VISIBLE_DEVICES=0 \
 python src/llm_infer.py \
     --model_type qwen-7b-chat \
diff --git a/examples/pytorch/llm/src/utils/model.py b/examples/pytorch/llm/src/utils/model.py
@@ -299,7 +299,7 @@ class ResTunerTM(NamedTuple):
     },
     'chatglm2-6b': {
         'model_id': 'ZhipuAI/chatglm2-6b',
-        'revision': 'v1.0.9',
+        'revision': 'v1.0.11',
         'get_function': get_model_tokenizer_chatglm2,
         'template': 'chatglm2',
         'lora_TM': LoRATM.chatglm2,

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# If you want to merge LoRA weights, please set merge_lora_and_save to true.`
	`1`	+# If you want to merge LoRA weight and save it, you need to set `--merge_lora_and_save true`.
`2`	`2`	`CUDA_VISIBLE_DEVICES=0 \`
`3`	`3`	`python src/llm_infer.py \`
`4`	`4`	`--model_type qwen-7b \`