File tree Expand file tree Collapse file tree 14 files changed +67
-13
lines changed Expand file tree Collapse file tree 14 files changed +67
-13
lines changed Original file line number Diff line number Diff line change @@ -74,6 +74,7 @@ cd swift/examples/pytorch/llm
7474# If you want to push weights into modelscope hub during training, you need to set '--push_to_hub true'.
7575# Recommended experimental environment: A100
7676bash scripts/qwen_7b_chat/lora/sft.sh
77+ # If you want to merge LoRA weight and save it, you need to set `--merge_lora_and_save true`.
7778bash scripts/qwen_7b_chat/lora/infer.sh
7879
7980# sft(lora+ddp) and infer qwen-7b-chat, Requires 2*38GB GPU memory.
Original file line number Diff line number Diff line change @@ -76,6 +76,7 @@ cd swift/examples/pytorch/llm
7676# 如果你想在训练时, 将权重push到modelscope hub中, 你需要设置`--push_to_hub true`.
7777# 推荐的实验环境: A100
7878bash scripts/qwen_7b_chat/lora/sft.sh
79+ # 如何你想要合并LoRA权重并保存,你需要设置`--merge_lora_and_save true`
7980bash scripts/qwen_7b_chat/lora/infer.sh
8081
8182# 微调(lora+ddp)+推理 qwen-7b-chat, 需要2卡*38GB显存.
Original file line number Diff line number Diff line change 11# Experimental environment: 3090
2- # 12GB GPU memory
2+ # 10GB GPU memory
33CUDA_VISIBLE_DEVICES=0 \
44python src/llm_sft.py \
55 --model_type baichuan2-7b \
@@ -17,7 +17,7 @@ python src/llm_sft.py \
1717 --lora_alpha 32 \
1818 --lora_dropout_p 0. \
1919 --lora_target_modules ALL \
20- --gradient_checkpointing false \
20+ --gradient_checkpointing true \
2121 --batch_size 1 \
2222 --weight_decay 0. \
2323 --learning_rate 1e-4 \
Original file line number Diff line number Diff line change 11# Experimental environment: A10
2- # 20GB GPU memory
2+ # 14GB GPU memory
33CUDA_VISIBLE_DEVICES=0 \
44python src/llm_sft.py \
55 --model_type internlm-20b \
@@ -17,7 +17,7 @@ python src/llm_sft.py \
1717 --lora_alpha 32 \
1818 --lora_dropout_p 0. \
1919 --lora_target_modules ALL \
20- --gradient_checkpointing false \
20+ --gradient_checkpointing true \
2121 --batch_size 1 \
2222 --weight_decay 0. \
2323 --learning_rate 1e-4 \
Original file line number Diff line number Diff line change 1+ CUDA_VISIBLE_DEVICES=0 \
2+ python src/llm_infer.py \
3+ --model_type internlm-20b-chat \
4+ --sft_type lora \
5+ --template_type internlm \
6+ --dtype bf16 \
7+ --ckpt_dir " output/internlm-20b-chat/vx_xxx/checkpoint-xxx" \
8+ --eval_human false \
9+ --dataset damo-agent-mini-zh \
10+ --max_length 4096 \
11+ --quantization_bit 4 \
12+ --bnb_4bit_comp_dtype bf16 \
13+ --max_new_tokens 2048 \
14+ --temperature 0.9 \
15+ --top_k 20 \
16+ --top_p 0.9 \
17+ --do_sample true \
18+ --merge_lora_and_save false \
Original file line number Diff line number Diff line change 1+ # Experimental environment: A10
2+ # 18GB GPU memory
3+ CUDA_VISIBLE_DEVICES=0 \
4+ python src/llm_sft.py \
5+ --model_type internlm-20b-chat \
6+ --sft_type lora \
7+ --template_type internlm \
8+ --dtype bf16 \
9+ --output_dir output \
10+ --dataset damo-agent-mini-zh \
11+ --train_dataset_sample 20000 \
12+ --num_train_epochs 1 \
13+ --max_length 4096 \
14+ --quantization_bit 4 \
15+ --bnb_4bit_comp_dtype bf16 \
16+ --lora_rank 8 \
17+ --lora_alpha 32 \
18+ --lora_dropout_p 0. \
19+ --lora_target_modules q_proj v_proj \
20+ --gradient_checkpointing true \
21+ --batch_size 1 \
22+ --weight_decay 0. \
23+ --learning_rate 1e-4 \
24+ --gradient_accumulation_steps 16 \
25+ --max_grad_norm 0.5 \
26+ --warmup_ratio 0.03 \
27+ --eval_steps 100 \
28+ --save_steps 100 \
29+ --save_total_limit 2 \
30+ --logging_steps 10 \
31+ --push_to_hub false \
32+ --hub_model_id internlm-20b-chat-qlora \
33+ --hub_private_repo true \
34+ --hub_token ' your-sdk-token' \
Original file line number Diff line number Diff line change 1- # If you want to merge LoRA weights, please set merge_lora_and_save to true.
1+ # If you want to merge LoRA weight and save it, you need to set `-- merge_lora_and_save true` .
22CUDA_VISIBLE_DEVICES=0 \
33python src/llm_infer.py \
44 --model_type qwen-7b \
Original file line number Diff line number Diff line change 1- # If you want to merge LoRA weights, please set merge_lora_and_save to true.
1+ # If you want to merge LoRA weight and save it, you need to set `-- merge_lora_and_save true` .
22CUDA_VISIBLE_DEVICES=0 \
33python src/llm_infer.py \
44 --model_type qwen-7b-chat \
Original file line number Diff line number Diff line change 1- # If you want to merge LoRA weights, please set merge_lora_and_save to true.
1+ # If you want to merge LoRA weight and save it, you need to set `-- merge_lora_and_save true` .
22CUDA_VISIBLE_DEVICES=0 \
33python src/llm_infer.py \
44 --model_type qwen-7b-chat \
Original file line number Diff line number Diff line change @@ -20,7 +20,7 @@ torchrun \
2020 --lora_alpha 32 \
2121 --lora_dropout_p 0. \
2222 --lora_target_modules c_attn \
23- --gradient_checkpointing false \
23+ --gradient_checkpointing true \
2424 --batch_size 1 \
2525 --weight_decay 0. \
2626 --learning_rate 1e-4 \
You can’t perform that action at this time.
0 commit comments