Skip to content

Commit f8976e9

Browse files
fix template name (#389)
1 parent 408d500 commit f8976e9

File tree

3 files changed

+20
-20
lines changed

3 files changed

+20
-20
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ Users can check the [documentation of SWIFT](docs/source/GetStarted/快速使用
6262

6363

6464
## 🎉 News
65-
- 🔥2024.02.05: Support qwen1.5 series: like [qwen1.5-0.5b](https://www.modelscope.cn/models/qwen/Qwen1.5-0.5B/summary), [qwen1.5-7b](https://www.modelscope.cn/models/qwen/Qwen1.5-7B/summary),[qwen1.5-14b](https://www.modelscope.cn/models/qwen/Qwen1.5-14B/summary) , etc. To view all supported qwen1.5 models please check [Model List](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md).
65+
- 🔥2024.02.05: Support Qwen1.5 series: like [qwen1.5-0.5b](https://www.modelscope.cn/models/qwen/Qwen1.5-0.5B/summary), [qwen1.5-7b](https://www.modelscope.cn/models/qwen/Qwen1.5-7B/summary),[qwen1.5-14b](https://www.modelscope.cn/models/qwen/Qwen1.5-14B/summary) , etc. To view all supported Qwen1.5 models please check [Model List](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md).
6666
- 2024.02.01: Support openbmb-minicpm series: [openbmb-minicpm-2b-sft-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/openbmb_minicpm_2b_sft_chat), openbmb-minicpm-2b-chat.
6767
- 🔥2024.02.01: Support dataset mixture to reduce **Catastrophic Forgetting**. Use `--train_dataset_mix_ratio 2.0` to train! We also provide a common knowledge dataset [ms-bench](https://www.modelscope.cn/datasets/iic/ms_bench/summary).
6868
- 🔥2024.02.01: Support Agent training! Agent training algorithm comes from this [paper](https://arxiv.org/pdf/2309.00986.pdf). We also introduce the [ms-agent](https://www.modelscope.cn/datasets/iic/ms_agent/summary) dataset. Use [this script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/qwen_7b_chat/lora/sft.sh) to begin an agent training!

README_CN.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ SWIFT(Scalable lightWeight Infrastructure for Fine-Tuning)是一个可扩展
6060
用户可以查看 [SWIFT官方文档](docs/source/GetStarted/快速使用.md) 来了解详细信息。
6161

6262
## 🎉 新闻
63-
- 🔥2024.02.05: 支持qwen1.5系列模型: [qwen1.5-0.5b](https://www.modelscope.cn/models/qwen/Qwen1.5-0.5B/summary), [qwen1.5-7b](https://www.modelscope.cn/models/qwen/Qwen1.5-7B/summary),[qwen1.5-14b](https://www.modelscope.cn/models/qwen/Qwen1.5-14B/summary)等, 支持的所有qwen1.5系列模型请查看[模型列表](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md).
63+
- 🔥2024.02.05: 支持Qwen1.5系列模型: [qwen1.5-0.5b](https://www.modelscope.cn/models/qwen/Qwen1.5-0.5B/summary), [qwen1.5-7b](https://www.modelscope.cn/models/qwen/Qwen1.5-7B/summary),[qwen1.5-14b](https://www.modelscope.cn/models/qwen/Qwen1.5-14B/summary)等, 支持的所有Qwen1.5系列模型请查看[模型列表](https://github.com/modelscope/swift/blob/main/docs/source/LLM/%E6%94%AF%E6%8C%81%E7%9A%84%E6%A8%A1%E5%9E%8B%E5%92%8C%E6%95%B0%E6%8D%AE%E9%9B%86.md).
6464
- 2024.02.01: 支持openbmb-minicpm系列: [openbmb-minicpm-2b-sft-chat](https://github.com/modelscope/swift/tree/main/examples/pytorch/llm/scripts/openbmb_minicpm_2b_sft_chat), openbmb-minicpm-2b-chat.
6565
- 🔥2024.02.01: 支持数据集打混来减少 **灾难性遗忘问题**. 使用`--train_dataset_mix_ratio 2.0`开启训练!同时我们也开源了通用知识数据集 [ms-bench](https://www.modelscope.cn/datasets/iic/ms_bench/summary).
6666
- 🔥2024.02.01: 支持Agent训练!Agent训练算法源自这篇[论文](https://arxiv.org/pdf/2309.00986.pdf). 我们也增加了[ms-agent](https://www.modelscope.cn/datasets/iic/ms_agent/summary)这个优质的agent数据集. 使用[这个脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/qwen_7b_chat/lora/sft.sh)开启Agent训练!

swift/llm/utils/model.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ def cross_entropy_forward(self, inputs: Tensor,
724724
ModelType.qwen1half_0_5b_chat,
725725
'qwen/Qwen1.5-0.5B-Chat',
726726
LoRATM.qwen1half,
727-
TemplateType.chatml,
727+
TemplateType.qwen,
728728
support_flash_attn=True,
729729
support_vllm=True,
730730
requires=['transformers>=4.37'])
@@ -740,7 +740,7 @@ def cross_entropy_forward(self, inputs: Tensor,
740740
ModelType.qwen1half_1_8b_chat,
741741
'qwen/Qwen1.5-1.8B-Chat',
742742
LoRATM.qwen1half,
743-
TemplateType.chatml,
743+
TemplateType.qwen,
744744
support_flash_attn=True,
745745
support_vllm=True,
746746
requires=['transformers>=4.37'])
@@ -756,7 +756,7 @@ def cross_entropy_forward(self, inputs: Tensor,
756756
ModelType.qwen1half_4b_chat,
757757
'qwen/Qwen1.5-4B-Chat',
758758
LoRATM.qwen1half,
759-
TemplateType.chatml,
759+
TemplateType.qwen,
760760
support_flash_attn=True,
761761
support_vllm=True,
762762
requires=['transformers>=4.37'])
@@ -772,7 +772,7 @@ def cross_entropy_forward(self, inputs: Tensor,
772772
ModelType.qwen1half_7b_chat,
773773
'qwen/Qwen1.5-7B-Chat',
774774
LoRATM.qwen1half,
775-
TemplateType.chatml,
775+
TemplateType.qwen,
776776
support_flash_attn=True,
777777
support_vllm=True,
778778
requires=['transformers>=4.37'])
@@ -788,7 +788,7 @@ def cross_entropy_forward(self, inputs: Tensor,
788788
ModelType.qwen1half_14b_chat,
789789
'qwen/Qwen1.5-14B-Chat',
790790
LoRATM.qwen1half,
791-
TemplateType.chatml,
791+
TemplateType.qwen,
792792
support_flash_attn=True,
793793
support_vllm=True,
794794
requires=['transformers>=4.37'])
@@ -804,7 +804,7 @@ def cross_entropy_forward(self, inputs: Tensor,
804804
ModelType.qwen1half_72b_chat,
805805
'qwen/Qwen1.5-72B-Chat',
806806
LoRATM.qwen1half,
807-
TemplateType.chatml,
807+
TemplateType.qwen,
808808
support_flash_attn=True,
809809
support_vllm=True,
810810
requires=['transformers>=4.37'])
@@ -1068,7 +1068,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
10681068
ModelType.qwen1half_0_5b_chat_int4,
10691069
'qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4',
10701070
LoRATM.qwen1half,
1071-
TemplateType.chatml,
1071+
TemplateType.qwen,
10721072
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
10731073
torch_dtype=torch.float16,
10741074
function_kwargs={'bits': 4},
@@ -1078,7 +1078,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
10781078
ModelType.qwen1half_0_5b_chat_int8,
10791079
'qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8',
10801080
LoRATM.qwen1half,
1081-
TemplateType.chatml,
1081+
TemplateType.qwen,
10821082
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
10831083
torch_dtype=torch.float16,
10841084
function_kwargs={'bits': 8},
@@ -1088,7 +1088,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
10881088
ModelType.qwen1half_1_8b_chat_int4,
10891089
'qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4',
10901090
LoRATM.qwen1half,
1091-
TemplateType.chatml,
1091+
TemplateType.qwen,
10921092
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
10931093
torch_dtype=torch.float16,
10941094
function_kwargs={'bits': 4},
@@ -1098,7 +1098,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
10981098
ModelType.qwen1half_1_8b_chat_int8,
10991099
'qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8',
11001100
LoRATM.qwen1half,
1101-
TemplateType.chatml,
1101+
TemplateType.qwen,
11021102
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
11031103
torch_dtype=torch.float16,
11041104
function_kwargs={'bits': 8},
@@ -1108,7 +1108,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
11081108
ModelType.qwen1half_4b_chat_int4,
11091109
'qwen/Qwen1.5-4B-Chat-GPTQ-Int4',
11101110
LoRATM.qwen1half,
1111-
TemplateType.chatml,
1111+
TemplateType.qwen,
11121112
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
11131113
torch_dtype=torch.float16,
11141114
function_kwargs={'bits': 4},
@@ -1118,7 +1118,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
11181118
ModelType.qwen1half_4b_chat_int8,
11191119
'qwen/Qwen1.5-4B-Chat-GPTQ-Int8',
11201120
LoRATM.qwen1half,
1121-
TemplateType.chatml,
1121+
TemplateType.qwen,
11221122
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
11231123
torch_dtype=torch.float16,
11241124
function_kwargs={'bits': 8},
@@ -1128,7 +1128,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
11281128
ModelType.qwen1half_7b_chat_int4,
11291129
'qwen/Qwen1.5-7B-Chat-GPTQ-Int4',
11301130
LoRATM.qwen1half,
1131-
TemplateType.chatml,
1131+
TemplateType.qwen,
11321132
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
11331133
torch_dtype=torch.float16,
11341134
function_kwargs={'bits': 4},
@@ -1138,7 +1138,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
11381138
ModelType.qwen1half_7b_chat_int8,
11391139
'qwen/Qwen1.5-7B-Chat-GPTQ-Int8',
11401140
LoRATM.qwen1half,
1141-
TemplateType.chatml,
1141+
TemplateType.qwen,
11421142
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
11431143
torch_dtype=torch.float16,
11441144
function_kwargs={'bits': 8},
@@ -1148,7 +1148,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
11481148
ModelType.qwen1half_14b_chat_int4,
11491149
'qwen/Qwen1.5-14B-Chat-GPTQ-Int4',
11501150
LoRATM.qwen1half,
1151-
TemplateType.chatml,
1151+
TemplateType.qwen,
11521152
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
11531153
torch_dtype=torch.float16,
11541154
function_kwargs={'bits': 4},
@@ -1158,7 +1158,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
11581158
ModelType.qwen1half_14b_chat_int8,
11591159
'qwen/Qwen1.5-14B-Chat-GPTQ-Int8',
11601160
LoRATM.qwen1half,
1161-
TemplateType.chatml,
1161+
TemplateType.qwen,
11621162
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
11631163
torch_dtype=torch.float16,
11641164
function_kwargs={'bits': 8},
@@ -1168,7 +1168,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
11681168
ModelType.qwen1half_72b_chat_int4,
11691169
'qwen/Qwen1.5-72B-Chat-GPTQ-Int4',
11701170
LoRATM.qwen1half,
1171-
TemplateType.chatml,
1171+
TemplateType.qwen,
11721172
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
11731173
torch_dtype=torch.float16,
11741174
function_kwargs={'bits': 4},
@@ -1178,7 +1178,7 @@ def get_model_tokenizer_with_flash_attn(model_dir: str,
11781178
ModelType.qwen1half_72b_chat_int8,
11791179
'qwen/Qwen1.5-72B-Chat-GPTQ-Int8',
11801180
LoRATM.qwen1half,
1181-
TemplateType.chatml,
1181+
TemplateType.qwen,
11821182
requires=['auto_gptq>=0.5', 'transformers>=4.37'],
11831183
torch_dtype=torch.float16,
11841184
function_kwargs={'bits': 8},

0 commit comments

Comments
 (0)