Skip to content

Commit d2b45ec

Browse files
authored
support glm-edge & glm-edge-v (#2526)
1 parent 896aaac commit d2b45ec

File tree

7 files changed

+90
-2
lines changed

7 files changed

+90
-2
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ You can contact us and communicate with us by adding our group:
5555
<img src="asset/discord_qr.jpg" width="200" height="200"> | <img src="asset/wechat.png" width="200" height="200">
5656

5757
## 🎉 News
58-
- 2024.11.28: Supports the model qwq-32b-preview, macro-o1, and the dataset open-o1. Use `swift infer --model_type qwq-32b-preview` for the experience.
58+
- 2024.11.29: Support for glm-edge and glm-edge-v series models. Use `swift infer --model_type glm-edge-v-2b` for the experience.
59+
- 2024.11.28: Supports the model qwq-32b-preview, marco-o1, and the dataset open-o1. Use `swift infer --model_type qwq-32b-preview` for the experience.
5960
- 2024.11.12: Supports training and deployment of the qwen2.5-coder series models: 0.5b, 3b, 14b, and 32b. Use `swift infer --model_type qwen2_5-coder-3b-instruct` to experience it.
6061
- 2024.10.26: Support for training and deploying aya-expanse series models. Experience it using `swift infer --model_type aya-expanse-32b`.
6162
- 2024.10.23: Support for training and deploying emu3-chat. Experience it using `swift infer --model_type emu3-chat`.

README_CN.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ SWIFT具有丰富全面的文档,请查看我们的文档网站:
5656

5757

5858
## 🎉 新闻
59-
- 2024.11.28: 支持模型qwq-32b-preview, macro-o1, 支持数据集open-o1. 使用`swift infer --model_type qwq-32b-preview`进行体验.
59+
- 2024.11.29: 支持glm-edge和glm-edge-v系列模型. 使用`swift infer --model_type glm-edge-v-2b`进行体验.
60+
- 2024.11.28: 支持模型qwq-32b-preview, marco-o1, 支持数据集open-o1. 使用`swift infer --model_type qwq-32b-preview`进行体验.
6061
- 2024.11.12: 支持qwen2.5-coder系列模型0.5b, 3b, 14b, 32b的训练到部署. 使用`swift infer --model_type qwen2_5-coder-3b-instruct`进行体验.
6162
- 2024.10.26: 支持aya-expanse系列模型的训练到部署. 使用`swift infer --model_type aya-expanse-32b`进行体验.
6263
- 2024.10.23: 支持emu3-chat的训练到部署. 使用`swift infer --model_type emu3-chat`进行体验.

docs/source/Instruction/支持的模型和数据集.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@
193193
|glm4-9b-chat|[ZhipuAI/glm-4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat/summary)|query_key_value|chatglm4|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.42|-|[THUDM/glm-4-9b-chat](https://huggingface.co/THUDM/glm-4-9b-chat)|
194194
|glm4-9b-chat-1m|[ZhipuAI/glm-4-9b-chat-1m](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m/summary)|query_key_value|chatglm4|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.42|-|[THUDM/glm-4-9b-chat-1m](https://huggingface.co/THUDM/glm-4-9b-chat-1m)|
195195
|codegeex4-9b-chat|[ZhipuAI/codegeex4-all-9b](https://modelscope.cn/models/ZhipuAI/codegeex4-all-9b/summary)|query_key_value|codegeex4|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers<4.42|coding|[THUDM/codegeex4-all-9b](https://huggingface.co/THUDM/codegeex4-all-9b)|
196+
|glm-edge-1_5b-chat|[ZhipuAI/glm-edge-1.5b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-1.5b-chat/summary)|q_proj, k_proj, v_proj|chatglm4|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.46|-|[THUDM/glm-edge-1.5b-chat](https://huggingface.co/THUDM/glm-edge-1.5b-chat)|
197+
|glm-edge-4b-chat|[ZhipuAI/glm-edge-4b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-4b-chat/summary)|q_proj, k_proj, v_proj|chatglm4|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.46|-|[THUDM/glm-edge-4b-chat](https://huggingface.co/THUDM/glm-edge-4b-chat)|
196198
|llama2-7b|[modelscope/Llama-2-7b-ms](https://modelscope.cn/models/modelscope/Llama-2-7b-ms/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|&#x2718;||-|[meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf)|
197199
|llama2-7b-chat|[modelscope/Llama-2-7b-chat-ms](https://modelscope.cn/models/modelscope/Llama-2-7b-chat-ms/summary)|q_proj, k_proj, v_proj|llama|&#x2714;|&#x2714;|&#x2714;|&#x2718;||-|[meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)|
198200
|llama2-13b|[modelscope/Llama-2-13b-ms](https://modelscope.cn/models/modelscope/Llama-2-13b-ms/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|&#x2718;||-|[meta-llama/Llama-2-13b-hf](https://huggingface.co/meta-llama/Llama-2-13b-hf)|
@@ -475,6 +477,8 @@
475477
|qwen2-vl-72b-instruct-gptq-int8|[qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.45.dev.0, qwen_vl_utils, auto_gptq>=0.5|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)|
476478
|qwen2-vl-72b-instruct-awq|[qwen/Qwen2-VL-72B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-72B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.45.dev.0, qwen_vl_utils, autoawq|vision, video|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)|
477479
|glm4v-9b-chat|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b/summary)|^(transformer.encoder)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm4v|&#x2718;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42|vision|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
480+
|glm-edge-v-2b|[ZhipuAI/glm-edge-v-2b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-2b/summary)|^(model.layers)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm-edge-v|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.46|vision|[THUDM/glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b)|
481+
|glm-edge-v-5b|[ZhipuAI/glm-edge-v-5b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-5b/summary)|^(model.layers)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm-edge-v|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.46|vision|[THUDM/glm-edge-v-5b](https://huggingface.co/THUDM/glm-edge-v-5b)|
478482
|llama3_2-11b-vision|[LLM-Research/Llama-3.2-11B-Vision](https://modelscope.cn/models/LLM-Research/Llama-3.2-11B-Vision/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3_2-vision-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.45|vision|[meta-llama/Llama-3.2-11B-Vision](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision)|
479483
|llama3_2-11b-vision-instruct|[LLM-Research/Llama-3.2-11B-Vision-Instruct](https://modelscope.cn/models/LLM-Research/Llama-3.2-11B-Vision-Instruct/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3_2-vision|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.45|vision|[meta-llama/Llama-3.2-11B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct)|
480484
|llama3_2-90b-vision|[LLM-Research/Llama-3.2-90B-Vision](https://modelscope.cn/models/LLM-Research/Llama-3.2-90B-Vision/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3_2-vision-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.45|vision|[meta-llama/Llama-3.2-90B-Vision](https://huggingface.co/meta-llama/Llama-3.2-90B-Vision)|

docs/source_en/Instruction/Supported-models-datasets.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ The table below introcudes all models supported by SWIFT:
193193
|glm4-9b-chat|[ZhipuAI/glm-4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat/summary)|query_key_value|chatglm4|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.42|-|[THUDM/glm-4-9b-chat](https://huggingface.co/THUDM/glm-4-9b-chat)|
194194
|glm4-9b-chat-1m|[ZhipuAI/glm-4-9b-chat-1m](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat-1m/summary)|query_key_value|chatglm4|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.42|-|[THUDM/glm-4-9b-chat-1m](https://huggingface.co/THUDM/glm-4-9b-chat-1m)|
195195
|codegeex4-9b-chat|[ZhipuAI/codegeex4-all-9b](https://modelscope.cn/models/ZhipuAI/codegeex4-all-9b/summary)|query_key_value|codegeex4|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers<4.42|coding|[THUDM/codegeex4-all-9b](https://huggingface.co/THUDM/codegeex4-all-9b)|
196+
|glm-edge-1_5b-chat|[ZhipuAI/glm-edge-1.5b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-1.5b-chat/summary)|q_proj, k_proj, v_proj|chatglm4|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.46|-|[THUDM/glm-edge-1.5b-chat](https://huggingface.co/THUDM/glm-edge-1.5b-chat)|
197+
|glm-edge-4b-chat|[ZhipuAI/glm-edge-4b-chat](https://modelscope.cn/models/ZhipuAI/glm-edge-4b-chat/summary)|q_proj, k_proj, v_proj|chatglm4|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.46|-|[THUDM/glm-edge-4b-chat](https://huggingface.co/THUDM/glm-edge-4b-chat)|
196198
|llama2-7b|[modelscope/Llama-2-7b-ms](https://modelscope.cn/models/modelscope/Llama-2-7b-ms/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|&#x2718;||-|[meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf)|
197199
|llama2-7b-chat|[modelscope/Llama-2-7b-chat-ms](https://modelscope.cn/models/modelscope/Llama-2-7b-chat-ms/summary)|q_proj, k_proj, v_proj|llama|&#x2714;|&#x2714;|&#x2714;|&#x2718;||-|[meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf)|
198200
|llama2-13b|[modelscope/Llama-2-13b-ms](https://modelscope.cn/models/modelscope/Llama-2-13b-ms/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|&#x2718;||-|[meta-llama/Llama-2-13b-hf](https://huggingface.co/meta-llama/Llama-2-13b-hf)|
@@ -475,6 +477,8 @@ The table below introcudes all models supported by SWIFT:
475477
|qwen2-vl-72b-instruct-gptq-int8|[qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.45.dev.0, qwen_vl_utils, auto_gptq>=0.5|vision, video|[Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-GPTQ-Int8)|
476478
|qwen2-vl-72b-instruct-awq|[qwen/Qwen2-VL-72B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-72B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.45.dev.0, qwen_vl_utils, autoawq|vision, video|[Qwen/Qwen2-VL-72B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct-AWQ)|
477479
|glm4v-9b-chat|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b/summary)|^(transformer.encoder)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm4v|&#x2718;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42|vision|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
480+
|glm-edge-v-2b|[ZhipuAI/glm-edge-v-2b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-2b/summary)|^(model.layers)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm-edge-v|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.46|vision|[THUDM/glm-edge-v-2b](https://huggingface.co/THUDM/glm-edge-v-2b)|
481+
|glm-edge-v-5b|[ZhipuAI/glm-edge-v-5b](https://modelscope.cn/models/ZhipuAI/glm-edge-v-5b/summary)|^(model.layers)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm-edge-v|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.46|vision|[THUDM/glm-edge-v-5b](https://huggingface.co/THUDM/glm-edge-v-5b)|
478482
|llama3_2-11b-vision|[LLM-Research/Llama-3.2-11B-Vision](https://modelscope.cn/models/LLM-Research/Llama-3.2-11B-Vision/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3_2-vision-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.45|vision|[meta-llama/Llama-3.2-11B-Vision](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision)|
479483
|llama3_2-11b-vision-instruct|[LLM-Research/Llama-3.2-11B-Vision-Instruct](https://modelscope.cn/models/LLM-Research/Llama-3.2-11B-Vision-Instruct/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3_2-vision|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.45|vision|[meta-llama/Llama-3.2-11B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct)|
480484
|llama3_2-90b-vision|[LLM-Research/Llama-3.2-90B-Vision](https://modelscope.cn/models/LLM-Research/Llama-3.2-90B-Vision/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llama3_2-vision-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.45|vision|[meta-llama/Llama-3.2-90B-Vision](https://huggingface.co/meta-llama/Llama-3.2-90B-Vision)|

swift/llm/utils/model.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ class ModelType:
259259
glm4_9b_chat = 'glm4-9b-chat'
260260
glm4_9b_chat_1m = 'glm4-9b-chat-1m'
261261
codegeex4_9b_chat = 'codegeex4-9b-chat'
262+
263+
glm_edge_1_5b_chat = 'glm-edge-1_5b-chat'
264+
glm_edge_4b_chat = 'glm-edge-4b-chat'
265+
glm_edge_v_2b = 'glm-edge-v-2b'
266+
glm_edge_v_5b = 'glm-edge-v-5b'
262267
# llama2
263268
llama2_7b = 'llama2-7b'
264269
llama2_7b_chat = 'llama2-7b-chat'
@@ -711,6 +716,7 @@ class LoRATM(NamedTuple):
711716
molmo = 'molmo'
712717
deepseek_janus = 'deepseek_janus'
713718
emu3_chat = 'emu3_chat'
719+
glm_edge_v = 'glm_edge_v'
714720
# default lora target modules for nlp llms.
715721
minicpm3 = ['q_a_proj', 'q_b_proj', 'kv_a_proj_with_mqa', 'kv_b_proj']
716722
baichuan = ['W_pack']
@@ -5158,6 +5164,22 @@ def get_model_tokenizer_deepseek_vl(model_dir: str,
51585164
return model, tokenizer
51595165

51605166

5167+
@register_model(
5168+
ModelType.glm_edge_1_5b_chat,
5169+
'ZhipuAI/glm-edge-1.5b-chat',
5170+
LoRATM.llama,
5171+
TemplateType.chatglm4,
5172+
support_flash_attn=True,
5173+
requires=['transformers>=4.46'],
5174+
hf_model_id='THUDM/glm-edge-1.5b-chat')
5175+
@register_model(
5176+
ModelType.glm_edge_4b_chat,
5177+
'ZhipuAI/glm-edge-4b-chat',
5178+
LoRATM.llama,
5179+
TemplateType.chatglm4,
5180+
support_flash_attn=True,
5181+
requires=['transformers>=4.46'],
5182+
hf_model_id='THUDM/glm-edge-4b-chat')
51615183
@register_model(
51625184
ModelType.llama3_1_nemotron_70B_instruct_hf,
51635185
'AI-ModelScope/Llama-3.1-Nemotron-70B-Instruct-HF',
@@ -6652,6 +6674,34 @@ def get_model_tokenizer_llava_hf(model_dir: str, *args, **kwargs):
66526674
return model, tokenizer
66536675

66546676

6677+
@register_model(
6678+
ModelType.glm_edge_v_2b,
6679+
'ZhipuAI/glm-edge-v-2b',
6680+
LoRATM.glm_edge_v,
6681+
TemplateType.glm_edge_v,
6682+
support_flash_attn=True,
6683+
placeholder_tokens=['<|begin_of_image|>'],
6684+
requires=['transformers>=4.46'],
6685+
tags=['multi-modal', 'vision'],
6686+
hf_model_id='THUDM/glm-edge-v-2b')
6687+
@register_model(
6688+
ModelType.glm_edge_v_5b,
6689+
'ZhipuAI/glm-edge-v-5b',
6690+
LoRATM.glm_edge_v,
6691+
TemplateType.glm_edge_v,
6692+
support_flash_attn=True,
6693+
requires=['transformers>=4.46'],
6694+
placeholder_tokens=['<|begin_of_image|>'],
6695+
tags=['multi-modal', 'vision'],
6696+
hf_model_id='THUDM/glm-edge-v-5b')
6697+
def get_model_tokenizer_glm_edge_v(model_dir: str, *args, **kwargs):
6698+
from transformers import AutoImageProcessor
6699+
processor = AutoImageProcessor.from_pretrained(model_dir)
6700+
model, tokenizer = get_model_tokenizer_with_flash_attn(model_dir, *args, **kwargs)
6701+
tokenizer.processor = processor
6702+
return model, tokenizer
6703+
6704+
66556705
@register_model(
66566706
ModelType.llama3_2_11b_vision,
66576707
'LLM-Research/Llama-3.2-11B-Vision',

swift/llm/utils/template.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class TemplateType:
6262
chatglm2 = 'chatglm2'
6363
chatglm3 = 'chatglm3'
6464
chatglm4 = 'chatglm4'
65+
glm_edge_v = 'glm-edge-v'
6566
codegeex4 = 'codegeex4'
6667
llama = 'llama' # llama2
6768
llama3 = 'llama3'
@@ -1920,6 +1921,30 @@ def data_collator(self, batch: List[Dict[str, Any]], padding_to: Optional[int] =
19201921

19211922
register_template(TemplateType.glm4v, GLM4VTemplate(), infer_media_type='dialogue', lazy_tokenize=True, use_model=True)
19221923

1924+
1925+
class GLMEdgeVTemplate(GLMTemplate):
1926+
1927+
def __init__(self):
1928+
super().__init__([], ['<|user|>\\n{{QUERY}}\\n<|assistant|>\\n'], ['\\n'], ['<|endoftext|>'], None,
1929+
['<|system|>\\n{{SYSTEM}}\\n'])
1930+
1931+
def replace_tag(self, media_type: Literal['image', 'video', 'audio'], index, example) -> List[Context]:
1932+
assert media_type == 'image'
1933+
return ['<|begin_of_image|>' * 578]
1934+
1935+
def _encode(self, example: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
1936+
inputs, _ = super()._encode(example)
1937+
if len(inputs) == 0:
1938+
return inputs, {}
1939+
processor = self.tokenizer.processor
1940+
images = example['images']
1941+
if images:
1942+
inputs['pixel_values'] = torch.tensor(processor(images).pixel_values)
1943+
return inputs, {}
1944+
1945+
1946+
register_template(TemplateType.glm_edge_v, GLMEdgeVTemplate(), lazy_tokenize=True, use_model=True)
1947+
19231948
register_template(
19241949
TemplateType.yi_vl,
19251950
YiVLTemplate([], [[8308], 'Human: {{QUERY}}\n', [8308], 'Assistant:'], ['\n'], ['\n', [8308]], yi_vl_default_system,

swift/utils/module_mapping.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,13 +319,16 @@ def __post_init__(self):
319319

320320
EMU3_CHAT_KEYS = MultiModelKeys(language_model='model', )
321321

322+
GLM_EDGE_V = MultiModelKeys(language_model='model.layers', vision_tower='model.vision')
323+
322324
MODEL_KEYS_MAPPING = OrderedDict([
323325
# MLLM here
324326
('qwen_audio', QWEN_AUDIO_KEYS),
325327
('qwen_vl', QWEN_VL_KEYS),
326328
('qwen2_audio', QWEN2_AUDIO_KEYS),
327329
('qwen2_vl', QWEN2_VL_KEYS),
328330
('glm4v', GLM4V_KEYS),
331+
('glm_edge_v', GLM_EDGE_V),
329332
('llava_next_video', LLAVA_NEXT_VIDEO_KEYS),
330333
('llava_llama', LLAVA_LLAMA_KEYS),
331334
('llava', LLAVA_KEYS),

0 commit comments

Comments
 (0)