Skip to content

Commit 8e51ccc

Browse files
authored
Fix bug 0917 (#74)
1 parent 05c0987 commit 8e51ccc

File tree

53 files changed

+541
-468
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+541
-468
lines changed

examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/infer.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ python src/llm_infer.py \
44
--sft_type lora \
55
--template_type baichuan \
66
--dtype bf16 \
7-
--ckpt_dir "runs/baichuan2-7b-chat/vx_xxx/checkpoint-xxx" \
7+
--ckpt_dir "output/baichuan2-7b-chat/vx_xxx/checkpoint-xxx" \
88
--eval_human false \
99
--dataset damo-agent-mini-zh \
1010
--max_length 4096 \
1111
--max_new_tokens 2048 \
1212
--temperature 0.9 \
13-
--top_k 50 \
13+
--top_k 20 \
1414
--top_p 0.9 \
1515
--do_sample true \

examples/pytorch/llm/scripts/baichuan2_7b_chat/lora_ddp/sft.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ torchrun \
1010
--sft_type lora \
1111
--template_type baichuan \
1212
--dtype bf16 \
13-
--output_dir runs \
13+
--output_dir output \
1414
--ddp_backend nccl \
1515
--dataset damo-agent-mini-zh \
1616
--train_dataset_sample -1 \

examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora/infer.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@ python src/llm_infer.py \
44
--sft_type lora \
55
--template_type baichuan \
66
--dtype bf16 \
7-
--ckpt_dir "runs/baichuan2-7b-chat/vx_xxx/checkpoint-xxx" \
7+
--ckpt_dir "output/baichuan2-7b-chat/vx_xxx/checkpoint-xxx" \
88
--eval_human false \
99
--dataset advertise-gen \
1010
--max_length 2048 \
1111
--quantization_bit 4 \
1212
--bnb_4bit_comp_dtype bf16 \
1313
--max_new_tokens 1024 \
1414
--temperature 0.9 \
15-
--top_k 50 \
15+
--top_k 20 \
1616
--top_p 0.9 \
1717
--do_sample true \

examples/pytorch/llm/scripts/baichuan2_7b_chat/qlora/sft.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ python src/llm_sft.py \
66
--sft_type lora \
77
--template_type baichuan \
88
--dtype bf16 \
9-
--output_dir runs \
9+
--output_dir output \
1010
--dataset advertise-gen \
1111
--train_dataset_sample -1 \
1212
--num_train_epochs 1 \

examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp/infer.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ python src/llm_infer.py \
44
--sft_type lora \
55
--template_type chatglm2 \
66
--dtype bf16 \
7-
--ckpt_dir "runs/chatglm2-6b/vx_xxx/checkpoint-xxx" \
7+
--ckpt_dir "output/chatglm2-6b/vx_xxx/checkpoint-xxx" \
88
--eval_human false \
99
--dataset code-python-zh \
1010
--max_length 8192 \
1111
--max_new_tokens 1024 \
1212
--temperature 0.9 \
13-
--top_k 50 \
13+
--top_k 20 \
1414
--top_p 0.9 \
1515
--do_sample true \

examples/pytorch/llm/scripts/chatglm2_6b/lora_ddp/sft.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ torchrun \
1010
--sft_type lora \
1111
--template_type chatglm2 \
1212
--dtype bf16 \
13-
--output_dir runs \
13+
--output_dir output \
1414
--ddp_backend nccl \
1515
--dataset code-python-zh \
1616
--train_dataset_sample -1 \
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
CUDA_VISIBLE_DEVICES=0 \
22
python src/llm_infer.py \
3-
--model_type internlm-7b-chat \
3+
--model_type internlm-20b \
44
--sft_type lora \
5-
--template_type internlm \
5+
--template_type default-generation \
66
--dtype bf16 \
7-
--ckpt_dir "runs/internlm-7b-chat/vx_xxx/checkpoint-xxx" \
7+
--ckpt_dir "output/internlm-20b/vx_xxx/checkpoint-xxx" \
88
--eval_human false \
99
--dataset jd-zh \
1010
--max_length 2048 \
1111
--max_new_tokens 1024 \
1212
--temperature 0.9 \
13-
--top_k 50 \
13+
--top_k 20 \
1414
--top_p 0.9 \
1515
--do_sample true \

examples/pytorch/llm/scripts/internlm_7b_chat/lora_ddp/sft.sh renamed to examples/pytorch/llm/scripts/internlm_20b/lora_ddp/sft.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ torchrun \
44
--nproc_per_node=$nproc_per_node \
55
--master_port 29500 \
66
src/llm_sft.py \
7-
--model_type internlm-7b-chat \
7+
--model_type internlm-20b \
88
--sft_type lora \
9-
--template_type internlm \
9+
--template_type default-generation \
1010
--dtype bf16 \
11-
--output_dir runs \
11+
--output_dir output \
1212
--ddp_backend nccl \
1313
--dataset jd-zh \
1414
--train_dataset_sample -1 \
@@ -30,6 +30,6 @@ torchrun \
3030
--save_total_limit 2 \
3131
--logging_steps 10 \
3232
--push_to_hub false \
33-
--hub_model_id internlm-7b-lora \
33+
--hub_model_id internlm-20b-lora \
3434
--hub_private_repo true \
3535
--hub_token 'your-sdk-token' \
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
CUDA_VISIBLE_DEVICES=0 \
2+
python src/llm_infer.py \
3+
--model_type internlm-20b \
4+
--sft_type lora \
5+
--template_type default-generation \
6+
--dtype bf16 \
7+
--ckpt_dir "output/internlm-20b/vx_xxx/checkpoint-xxx" \
8+
--eval_human false \
9+
--dataset advertise-gen \
10+
--max_length 2048 \
11+
--quantization_bit 4 \
12+
--bnb_4bit_comp_dtype bf16 \
13+
--max_new_tokens 1024 \
14+
--temperature 0.9 \
15+
--top_k 20 \
16+
--top_p 0.9 \
17+
--do_sample true \
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Experimental environment: A10
2+
# 20GB GPU memory
3+
CUDA_VISIBLE_DEVICES=0 \
4+
python src/llm_sft.py \
5+
--model_type internlm-20b \
6+
--sft_type lora \
7+
--template_type default-generation \
8+
--dtype bf16 \
9+
--output_dir output \
10+
--dataset advertise-gen \
11+
--train_dataset_sample -1 \
12+
--num_train_epochs 1 \
13+
--max_length 2048 \
14+
--quantization_bit 4 \
15+
--bnb_4bit_comp_dtype bf16 \
16+
--lora_rank 8 \
17+
--lora_alpha 32 \
18+
--lora_dropout_p 0. \
19+
--lora_target_modules ALL \
20+
--gradient_checkpointing false \
21+
--batch_size 1 \
22+
--weight_decay 0. \
23+
--learning_rate 1e-4 \
24+
--gradient_accumulation_steps 16 \
25+
--max_grad_norm 0.5 \
26+
--warmup_ratio 0.03 \
27+
--eval_steps 100 \
28+
--save_steps 100 \
29+
--save_total_limit 2 \
30+
--logging_steps 10 \
31+
--push_to_hub false \
32+
--hub_model_id internlm-20b-qlora \
33+
--hub_private_repo true \
34+
--hub_token 'your-sdk-token' \

0 commit comments

Comments
 (0)