Skip to content

Commit 1658ccb

Browse files
authored
support telechat2 (#2210)
1 parent b51c1c1 commit 1658ccb

File tree

4 files changed

+20
-8
lines changed

4 files changed

+20
-8
lines changed

docs/source/Instruction/支持的模型和数据集.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,9 @@
405405
|mamba-2.8b|[AI-ModelScope/mamba-2.8b-hf](https://modelscope.cn/models/AI-ModelScope/mamba-2.8b-hf/summary)|in_proj, x_proj, embeddings, out_proj|default-generation|✘|✘|✘|✘|transformers>=4.39.0|-|[state-spaces/mamba-2.8b-hf](https://huggingface.co/state-spaces/mamba-2.8b-hf)|
406406
|telechat-7b|[TeleAI/TeleChat-7B](https://modelscope.cn/models/TeleAI/TeleChat-7B/summary)|key_value, query|telechat|✔|✘|✘|✘||-|[Tele-AI/telechat-7B](https://huggingface.co/Tele-AI/telechat-7B)|
407407
|telechat-12b|[TeleAI/TeleChat-12B](https://modelscope.cn/models/TeleAI/TeleChat-12B/summary)|key_value, query|telechat|✔|✘|✘|✘||-|[Tele-AI/TeleChat-12B](https://huggingface.co/Tele-AI/TeleChat-12B)|
408-
|telechat-12b-v2|[TeleAI/TeleChat-12B-v2](https://modelscope.cn/models/TeleAI/TeleChat-12B-v2/summary)|key_value, query|telechat-v2|✔|✘|✘|✘||-|[Tele-AI/TeleChat-12B-v2](https://huggingface.co/Tele-AI/TeleChat-12B-v2)|
409-
|telechat-12b-v2-gptq-int4|[swift/TeleChat-12B-V2-GPTQ-Int4](https://modelscope.cn/models/swift/TeleChat-12B-V2-GPTQ-Int4/summary)|key_value, query|telechat-v2|✔|✘|✘|✘|auto_gptq>=0.5|-|-|
408+
|telechat-12b-v2|[TeleAI/TeleChat-12B-v2](https://modelscope.cn/models/TeleAI/TeleChat-12B-v2/summary)|key_value, query|telechat|✔|✘|✘|✘||-|[Tele-AI/TeleChat-12B-v2](https://huggingface.co/Tele-AI/TeleChat-12B-v2)|
409+
|telechat-12b-v2-gptq-int4|[swift/TeleChat-12B-V2-GPTQ-Int4](https://modelscope.cn/models/swift/TeleChat-12B-V2-GPTQ-Int4/summary)|key_value, query|telechat|✔|✘|✘|✘|auto_gptq>=0.5|-|-|
410+
|telechat2-115b|[TeleAI/TeleChat2-115B](https://modelscope.cn/models/TeleAI/TeleChat2-115B/summary)|key_value, query|telechat2|✔|✘|✘|✘||-|[Tele-AI/TeleChat2-115B](https://huggingface.co/Tele-AI/TeleChat2-115B)|
410411
|grok-1|[colossalai/grok-1-pytorch](https://modelscope.cn/models/colossalai/grok-1-pytorch/summary)|q_proj, k_proj, v_proj|default-generation|✘|✘|✘|✘||-|[hpcai-tech/grok-1](https://huggingface.co/hpcai-tech/grok-1)|
411412
|dbrx-instruct|[AI-ModelScope/dbrx-instruct](https://modelscope.cn/models/AI-ModelScope/dbrx-instruct/summary)|attn.Wqkv|dbrx|✔|✔|✘|✘|transformers>=4.36|moe|[databricks/dbrx-instruct](https://huggingface.co/databricks/dbrx-instruct)|
412413
|dbrx-base|[AI-ModelScope/dbrx-base](https://modelscope.cn/models/AI-ModelScope/dbrx-base/summary)|attn.Wqkv|dbrx|✔|✔|✘|✘|transformers>=4.36|moe|[databricks/dbrx-base](https://huggingface.co/databricks/dbrx-base)|

docs/source_en/Instruction/Supported-models-datasets.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,9 @@ The table below introcudes all models supported by SWIFT:
405405
|mamba-2.8b|[AI-ModelScope/mamba-2.8b-hf](https://modelscope.cn/models/AI-ModelScope/mamba-2.8b-hf/summary)|in_proj, x_proj, embeddings, out_proj|default-generation|✘|✘|✘|✘|transformers>=4.39.0|-|[state-spaces/mamba-2.8b-hf](https://huggingface.co/state-spaces/mamba-2.8b-hf)|
406406
|telechat-7b|[TeleAI/TeleChat-7B](https://modelscope.cn/models/TeleAI/TeleChat-7B/summary)|key_value, query|telechat|✔|✘|✘|✘||-|[Tele-AI/telechat-7B](https://huggingface.co/Tele-AI/telechat-7B)|
407407
|telechat-12b|[TeleAI/TeleChat-12B](https://modelscope.cn/models/TeleAI/TeleChat-12B/summary)|key_value, query|telechat|✔|✘|✘|✘||-|[Tele-AI/TeleChat-12B](https://huggingface.co/Tele-AI/TeleChat-12B)|
408-
|telechat-12b-v2|[TeleAI/TeleChat-12B-v2](https://modelscope.cn/models/TeleAI/TeleChat-12B-v2/summary)|key_value, query|telechat-v2|✔|✘|✘|✘||-|[Tele-AI/TeleChat-12B-v2](https://huggingface.co/Tele-AI/TeleChat-12B-v2)|
409-
|telechat-12b-v2-gptq-int4|[swift/TeleChat-12B-V2-GPTQ-Int4](https://modelscope.cn/models/swift/TeleChat-12B-V2-GPTQ-Int4/summary)|key_value, query|telechat-v2|✔|✘|✘|✘|auto_gptq>=0.5|-|-|
408+
|telechat-12b-v2|[TeleAI/TeleChat-12B-v2](https://modelscope.cn/models/TeleAI/TeleChat-12B-v2/summary)|key_value, query|telechat|✔|✘|✘|✘||-|[Tele-AI/TeleChat-12B-v2](https://huggingface.co/Tele-AI/TeleChat-12B-v2)|
409+
|telechat-12b-v2-gptq-int4|[swift/TeleChat-12B-V2-GPTQ-Int4](https://modelscope.cn/models/swift/TeleChat-12B-V2-GPTQ-Int4/summary)|key_value, query|telechat|✔|✘|✘|✘|auto_gptq>=0.5|-|-|
410+
|telechat2-115b|[TeleAI/TeleChat2-115B](https://modelscope.cn/models/TeleAI/TeleChat2-115B/summary)|key_value, query|telechat2|✔|✘|✘|✘||-|[Tele-AI/TeleChat2-115B](https://huggingface.co/Tele-AI/TeleChat2-115B)|
410411
|grok-1|[colossalai/grok-1-pytorch](https://modelscope.cn/models/colossalai/grok-1-pytorch/summary)|q_proj, k_proj, v_proj|default-generation|✘|✘|✘|✘||-|[hpcai-tech/grok-1](https://huggingface.co/hpcai-tech/grok-1)|
411412
|dbrx-instruct|[AI-ModelScope/dbrx-instruct](https://modelscope.cn/models/AI-ModelScope/dbrx-instruct/summary)|attn.Wqkv|dbrx|✔|✔|✘|✘|transformers>=4.36|moe|[databricks/dbrx-instruct](https://huggingface.co/databricks/dbrx-instruct)|
412413
|dbrx-base|[AI-ModelScope/dbrx-base](https://modelscope.cn/models/AI-ModelScope/dbrx-base/summary)|attn.Wqkv|dbrx|✔|✔|✘|✘|transformers>=4.36|moe|[databricks/dbrx-base](https://huggingface.co/databricks/dbrx-base)|

swift/llm/utils/model.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,7 @@ class ModelType:
599599
telechat_12b = 'telechat-12b'
600600
telechat_12b_v2 = 'telechat-12b-v2'
601601
telechat_12b_v2_gptq_int4 = 'telechat-12b-v2-gptq-int4'
602+
telechat2_115b = 'telechat2-115b'
602603
# grok-1
603604
grok_1 = 'grok-1'
604605
# dbrx
@@ -930,6 +931,14 @@ def _new_forward(self, x):
930931
support_vllm=True,
931932
support_flash_attn=True,
932933
hf_model_id='CohereForAI/c4ai-command-r-plus')
934+
@register_model(
935+
ModelType.telechat2_115b,
936+
'TeleAI/TeleChat2-115B',
937+
LoRATM.telechat,
938+
TemplateType.telechat2,
939+
torch_dtype=torch.float16,
940+
support_flash_attn=True,
941+
hf_model_id='Tele-AI/TeleChat2-115B')
933942
def get_model_tokenizer_from_repo(model_dir: str,
934943
torch_dtype: Optional[torch.dtype],
935944
model_kwargs: Dict[str, Any],
@@ -5829,17 +5838,18 @@ def get_model_tokenizer_codellama(model_dir: str,
58295838
ModelType.telechat_12b_v2,
58305839
'TeleAI/TeleChat-12B-v2',
58315840
LoRATM.telechat,
5832-
TemplateType.telechat_v2,
5841+
TemplateType.telechat,
58335842
eos_token=2,
58345843
support_flash_attn=True,
58355844
hf_model_id='Tele-AI/TeleChat-12B-v2')
58365845
@register_model(
58375846
ModelType.telechat_12b_v2_gptq_int4,
58385847
'swift/TeleChat-12B-V2-GPTQ-Int4',
58395848
LoRATM.telechat,
5840-
TemplateType.telechat_v2,
5849+
TemplateType.telechat,
58415850
eos_token=2,
58425851
requires=['auto_gptq>=0.5'],
5852+
torch_dtype=torch.float16,
58435853
support_flash_attn=True,
58445854
function_kwargs={'gptq_bits': 4})
58455855
def get_model_tokenizer_phi(model_dir: str,

swift/llm/utils/template.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ class TemplateType:
138138
phi3 = 'phi3'
139139
phi3_vl = 'phi3-vl'
140140
telechat = 'telechat'
141-
telechat_v2 = 'telechat-v2'
141+
telechat2 = 'telechat2'
142142
dbrx = 'dbrx'
143143
mengzi = 'mengzi'
144144
c4ai = 'c4ai'
@@ -3448,7 +3448,7 @@ class MiniCPMV2_5Template(Llama3TemplateMixin, MiniCPMVTemplate):
34483448

34493449
register_template(TemplateType.telechat, Template([], ['<_user>{{QUERY}}<_bot>'], ['<_end>'], ['<_end>']))
34503450

3451-
register_template(TemplateType.telechat_v2, Template([], ['<_user> {{QUERY}}<_bot>'], [], ['<_end>']))
3451+
register_template(TemplateType.telechat2, Template(['<_start>'], [[4], '{{QUERY}}', [5]], ['<_end>'], ['<_end>']))
34523452

34533453
DBRX_SYSTEM = (
34543454
'You are DBRX, created by Databricks. You were last updated in December 2023. '

0 commit comments

Comments
 (0)