Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
146 commits
Select commit Hold shift + click to select a range
bab8cd1
update
Jintao-Huang Dec 29, 2025
a5b0192
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Dec 29, 2025
6c3799f
lint pass
Jintao-Huang Dec 29, 2025
7356963
update
Jintao-Huang Dec 29, 2025
4518360
update
Jintao-Huang Dec 29, 2025
cb779de
update
Jintao-Huang Dec 29, 2025
8eb5e8a
update
Jintao-Huang Dec 29, 2025
1ab87b1
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Dec 29, 2025
208536d
update
Jintao-Huang Dec 29, 2025
af8cef7
update
Jintao-Huang Dec 30, 2025
c17c9e5
Merge remote-tracking branch 'refs/remotes/origin/refactor_model_type…
Jintao-Huang Dec 30, 2025
999529c
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Dec 31, 2025
d475287
update
Jintao-Huang Dec 31, 2025
c497f81
update
Jintao-Huang Dec 31, 2025
bc901d0
update
Jintao-Huang Dec 31, 2025
86f49a2
update
Jintao-Huang Dec 31, 2025
6b2cfc4
update
Jintao-Huang Dec 31, 2025
a7d1e47
update
Jintao-Huang Dec 31, 2025
2b7ad87
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Dec 31, 2025
58e3c5f
update
Jintao-Huang Jan 4, 2026
78a0397
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Jan 4, 2026
c82e54e
fix
Jintao-Huang Jan 4, 2026
ccc11f4
update
Jintao-Huang Jan 4, 2026
7996307
update
Jintao-Huang Jan 4, 2026
0bb3103
update
Jintao-Huang Jan 4, 2026
0da5034
update
Jintao-Huang Jan 4, 2026
232a0ac
update
Jintao-Huang Jan 4, 2026
8f7b65e
update
Jintao-Huang Jan 4, 2026
0db729d
update
Jintao-Huang Jan 4, 2026
cad6256
update
Jintao-Huang Jan 4, 2026
82c0575
update
Jintao-Huang Jan 4, 2026
8508f97
update dataset
Jintao-Huang Jan 4, 2026
f4ab64e
update template/dataset
Jintao-Huang Jan 4, 2026
cecfb29
update model/agent_template/loss_scale
Jintao-Huang Jan 4, 2026
b509ba7
update hub
Jintao-Huang Jan 4, 2026
b3e7516
update infer_engine
Jintao-Huang Jan 4, 2026
6c2520e
update optimizers
Jintao-Huang Jan 4, 2026
9b967e9
update plugins/loss/metrics
Jintao-Huang Jan 4, 2026
4433794
update trainers/tuners
Jintao-Huang Jan 5, 2026
b78aefc
update loss
Jintao-Huang Jan 5, 2026
e6232a9
update metrics
Jintao-Huang Jan 5, 2026
2311fac
update rlhf_trainers
Jintao-Huang Jan 5, 2026
0319d04
update pipelines
Jintao-Huang Jan 5, 2026
33dc65b
update megatron/arguments
Jintao-Huang Jan 5, 2026
b685d3f
update megatron
Jintao-Huang Jan 5, 2026
4838510
update ui
Jintao-Huang Jan 5, 2026
b4d9098
update tests
Jintao-Huang Jan 5, 2026
dfa2107
update tests
Jintao-Huang Jan 5, 2026
8a8874d
update
Jintao-Huang Jan 5, 2026
620e414
update model
Jintao-Huang Jan 6, 2026
648a11c
update model
Jintao-Huang Jan 6, 2026
bca1468
update
Jintao-Huang Jan 6, 2026
77215dc
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Jan 6, 2026
96a6bed
update
Jintao-Huang Jan 6, 2026
ae1a3f4
update
Jintao-Huang Jan 6, 2026
de50793
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Jan 7, 2026
3ea50d3
update
Jintao-Huang Jan 7, 2026
b53e8ac
update
Jintao-Huang Jan 7, 2026
6a274dd
update
Jintao-Huang Jan 7, 2026
d719d62
update
Jintao-Huang Jan 7, 2026
e641378
update
Jintao-Huang Jan 7, 2026
404d96e
update
Jintao-Huang Jan 9, 2026
805083a
update
Jintao-Huang Jan 9, 2026
e7f72f0
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Jan 9, 2026
9e80a7d
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Jan 11, 2026
18006f3
update
Jintao-Huang Jan 11, 2026
236dcc5
update
Jintao-Huang Jan 11, 2026
acc3ec4
update
Jintao-Huang Jan 11, 2026
2b0a16c
update
Jintao-Huang Jan 11, 2026
78ed6f5
update
Jintao-Huang Jan 11, 2026
9fccb60
update
Jintao-Huang Jan 11, 2026
fe7b274
update
Jintao-Huang Jan 11, 2026
157043d
update
Jintao-Huang Jan 11, 2026
051e5ef
update
Jintao-Huang Jan 11, 2026
39165ea
update
Jintao-Huang Jan 12, 2026
cb64018
update
Jintao-Huang Jan 12, 2026
247bf28
update
Jintao-Huang Jan 12, 2026
800f77b
update
Jintao-Huang Jan 12, 2026
8f2c4ca
update
Jintao-Huang Jan 12, 2026
ac3bbf9
update readme
Jintao-Huang Jan 12, 2026
678f7f8
update
Jintao-Huang Jan 12, 2026
9130443
update
Jintao-Huang Jan 12, 2026
dbded02
update
Jintao-Huang Jan 12, 2026
a774123
fix
Jintao-Huang Jan 12, 2026
43c66e5
update
Jintao-Huang Jan 12, 2026
e1b6171
update
Jintao-Huang Jan 12, 2026
83f5cb9
update
Jintao-Huang Jan 12, 2026
d05aa15
Merge remote-tracking branch 'refs/remotes/origin/refactor_model_type…
Jintao-Huang Jan 12, 2026
bf8fc78
fix
Jintao-Huang Jan 12, 2026
1141ccd
update docs
Jintao-Huang Jan 12, 2026
f64c10b
update
Jintao-Huang Jan 12, 2026
a210b8f
update
Jintao-Huang Jan 12, 2026
5af5d26
update
Jintao-Huang Jan 12, 2026
874a593
update
Jintao-Huang Jan 12, 2026
63c57cb
update docs
Jintao-Huang Jan 12, 2026
67795f9
update docs
Jintao-Huang Jan 12, 2026
fd04c43
update
Jintao-Huang Jan 12, 2026
ec3caf0
update docs
Jintao-Huang Jan 12, 2026
a3249ff
Merge remote-tracking branch 'refs/remotes/origin/refactor_model_type…
Jintao-Huang Jan 12, 2026
aab7cf4
update docs
Jintao-Huang Jan 12, 2026
aba2c7e
update
Jintao-Huang Jan 12, 2026
54c7455
update
Jintao-Huang Jan 12, 2026
4fa688d
update
Jintao-Huang Jan 12, 2026
1d396eb
update
Jintao-Huang Jan 12, 2026
8f1265d
fix
Jintao-Huang Jan 12, 2026
2de91fa
fix ci
Jintao-Huang Jan 13, 2026
aa89a21
fix ci
Jintao-Huang Jan 13, 2026
2803cca
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Jan 13, 2026
9c5938e
fix ci
Jintao-Huang Jan 13, 2026
899055f
update
Jintao-Huang Jan 13, 2026
39a4b03
fix ci
Jintao-Huang Jan 13, 2026
642de01
update
Jintao-Huang Jan 13, 2026
24b5c97
update
Jintao-Huang Jan 13, 2026
2d244a8
fix
Jintao-Huang Jan 13, 2026
0dbd3dd
update
Jintao-Huang Jan 13, 2026
04619eb
Merge remote-tracking branch 'refs/remotes/origin/refactor_model_type…
Jintao-Huang Jan 13, 2026
1f65b1e
update pretrain
Jintao-Huang Jan 13, 2026
ce802e6
update
Jintao-Huang Jan 13, 2026
3f30d53
Merge remote-tracking branch 'refs/remotes/origin/refactor_model_type…
Jintao-Huang Jan 13, 2026
3d1f2a7
update
Jintao-Huang Jan 13, 2026
073e361
fix sp
Jintao-Huang Jan 13, 2026
5e65d49
fix reranker
Jintao-Huang Jan 13, 2026
6bba51a
fix grpo
Jintao-Huang Jan 13, 2026
16456ca
fix
Jintao-Huang Jan 13, 2026
f1a995a
fix
Jintao-Huang Jan 13, 2026
5da7607
fix gkd megatron
Jintao-Huang Jan 13, 2026
2aeec3f
fix
Jintao-Huang Jan 13, 2026
6994256
fix
Jintao-Huang Jan 13, 2026
9947f1a
Merge remote-tracking branch 'refs/remotes/origin/refactor_model_type…
Jintao-Huang Jan 13, 2026
b3978ff
fix torch_dtype
Jintao-Huang Jan 13, 2026
5aad7f2
fix
Jintao-Huang Jan 13, 2026
e4710d6
update
Jintao-Huang Jan 13, 2026
75b56bf
fix max_model_len
Jintao-Huang Jan 13, 2026
14e9eb4
fix
Jintao-Huang Jan 13, 2026
d0a6c46
fix
Jintao-Huang Jan 13, 2026
ff705ea
fix
Jintao-Huang Jan 13, 2026
adc4184
update
Jintao-Huang Jan 13, 2026
2a81ec3
update
Jintao-Huang Jan 13, 2026
448962e
fix unsloth
Jintao-Huang Jan 13, 2026
49eeef2
fix
Jintao-Huang Jan 13, 2026
bc7276c
fix grpo
Jintao-Huang Jan 13, 2026
eada51b
fix
Jintao-Huang Jan 13, 2026
8c5be30
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Jan 13, 2026
f85f0c9
fix model
Jintao-Huang Jan 13, 2026
98a2be2
fix
Jintao-Huang Jan 13, 2026
0171487
Merge branch 'main' into refactor_model_type_template
Jintao-Huang Jan 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/BestPractices/MLLM-Registration.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ register_model(
Model('Qwen/Qwen2.5-Omni-7B', 'Qwen/Qwen2.5-Omni-7B'),
]),
],
'my_qwen2_5_omni',
# 用来获取model和processor的函数。
get_model_tokenizer_qwen2_5_omni,
get_function=get_model_tokenizer_qwen2_5_omni,
template='my_qwen2_5_omni',
is_multimodal=True, # 是否是多模态模型
model_arch='my_qwen2_5_omni', # 通常只为多模态模型设置
# 用于model_type的自动匹配
Expand Down
2 changes: 1 addition & 1 deletion docs/source/Customization/Custom-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ ms-swift内置的模型,你可以直接通过指定model_id或者model_path来
register_model会在`MODEL_MAPPING`中注册模型,调用函数`register_model(model_meta)`即可完成模型注册,其中model_meta将存储模型的元信息。ModelMeta的参数列表如下:
- model_type: 必填项。模型类型,也是唯一ID。
- model_groups: 必填项。罗列ModelScope/HuggingFace的模型id和模型本地路径。运行[run_model_info.py](https://github.com/modelscope/ms-swift/blob/main/scripts/utils/run_model_info.py)文件将自动产生[支持的模型文档](https://swift.readthedocs.io/zh-cn/latest/Instruction/Supported-models-and-datasets.html)以及自动根据`--model`后缀匹配model_type。
- template: 必填项。命令行不额外指定`--template`时的默认template类型。
- get_function: 必填项。模型和tokenizer/processor(多模态模型)的加载函数。LLM通常设置为`get_model_tokenizer_with_flash_attn`即可。
- template: 命令行不额外指定`--template`时的默认template类型。默认为None。
- model_arch: 模型架构。默认为None。多模态模型训练需要设置该参数来确定llm/vit/aligner的前缀。
- architectures: config.json中的architectures项,用于自动匹配模型对应的model_type。默认为`[]`。
- additional_saved_files: 全参数训练和merge-lora时需要额外保存的文件。默认为`[]`。
Expand Down
4 changes: 2 additions & 2 deletions docs/source_en/BestPractices/MLLM-Registration.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ register_model(
Model('Qwen/Qwen2.5-Omni-7B', 'Qwen/Qwen2.5-Omni-7B'),
]),
],
'my_qwen2_5_omni',
# Function to get model and processor.
get_model_tokenizer_qwen2_5_omni,
get_function=get_model_tokenizer_qwen2_5_omni,
template='my_qwen2_5_omni',
is_multimodal=True, # Whether it's a multimodal model
model_arch='my_qwen2_5_omni', # Usually set only for multimodal models
# Used for automatic model_type matching
Expand Down
2 changes: 1 addition & 1 deletion docs/source_en/Customization/Custom-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ The `register_model` function registers a model in the `MODEL_MAPPING`. You can

- model_type: Required. The model type, which is also the unique ID.
- model_groups: Required. Lists the ModelScope/HuggingFace model IDs and local paths. Running the [run_model_info.py](https://github.com/modelscope/ms-swift/blob/main/scripts/utils/run_model_info.py) file will automatically generate the [supported models documentation](https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html) and automatically match the model_type based on the `--model` suffix.
- template: Required. The default template type when `--template` is not specified in the command line.
- get_function: Required. The loading function for the model and tokenizer/processor (for multi-modal models). LLM is typically set to `get_model_tokenizer_with_flash_attn`.
- template: The default template type when `--template` is not additionally specified in the command line. Defaults to None.
- model_arch: The model architecture. Defaults to None. Multi-modal model training requires setting this parameter to determine the prefix for llm/vit/aligner.
- architectures: The architectures item in config.json, used to automatically match the model with its model_type. Defaults to `[]`.
- additional_saved_files: Files that need to be additionally saved during full parameter training and merge-lora. Defaults to `[]`.
Expand Down
2 changes: 1 addition & 1 deletion examples/custom/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
model_groups=[
ModelGroup([Model('AI-ModelScope/Nemotron-Mini-4B-Instruct', 'nvidia/Nemotron-Mini-4B-Instruct')])
],
template='custom',
get_function=get_model_tokenizer_with_flash_attn,
template='custom',
ignore_patterns=['nemo'],
is_multimodal=False,
))
Expand Down
2 changes: 1 addition & 1 deletion examples/custom/model_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def get_function(model_dir: str,
model_groups=[
ModelGroup([Model('AI-ModelScope/Nemotron-Mini-4B-Instruct', 'nvidia/Nemotron-Mini-4B-Instruct')])
],
template='custom',
get_function=get_function,
template='custom',
ignore_patterns=['nemo'],
is_multimodal=False,
))
Expand Down
4 changes: 2 additions & 2 deletions examples/custom/my_qwen2_5_omni/my_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ def get_model_tokenizer_qwen2_5_omni(model_dir, *args, **kwargs):
Model('Qwen/Qwen2.5-Omni-7B', 'Qwen/Qwen2.5-Omni-7B'),
]),
],
'my_qwen2_5_omni',
# Function to get model and processor.
get_model_tokenizer_qwen2_5_omni,
get_function=get_model_tokenizer_qwen2_5_omni,
template='my_qwen2_5_omni',
is_multimodal=True, # Whether it's a multimodal model
model_arch='my_qwen2_5_omni', # Usually set only for multimodal models
# Used for automatic model_type matching
Expand Down
14 changes: 0 additions & 14 deletions swift/llm/model/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,10 @@ class LLMModelType:
qwen = 'qwen'
qwen2 = 'qwen2'
qwen2_5 = 'qwen2_5'
qwen2_5_math = 'qwen2_5_math'
qwen2_moe = 'qwen2_moe'
qwq_preview = 'qwq_preview'
qwq = 'qwq'
qwen3 = 'qwen3'
qwen3_guard = 'qwen3_guard'
qwen3_thinking = 'qwen3_thinking'
qwen3_nothinking = 'qwen3_nothinking'
qwen3_coder = 'qwen3_coder'
qwen3_moe = 'qwen3_moe'
qwen3_moe_thinking = 'qwen3_moe_thinking'
qwen3_next = 'qwen3_next'
qwen3_next_thinking = 'qwen3_next_thinking'
qwen3_emb = 'qwen3_emb'

qwen2_gte = 'qwen2_gte'
Expand All @@ -36,8 +27,6 @@ class LLMModelType:
reflection = 'reflection'
megrez = 'megrez'
yi = 'yi'
yi_coder = 'yi_coder'
sus = 'sus'
gpt_oss = 'gpt_oss'
seed_oss = 'seed_oss'

Expand Down Expand Up @@ -140,7 +129,6 @@ class LLMModelType:
hunyuan = 'hunyuan'
ernie = 'ernie'
gemma_emb = 'gemma_emb'
ernie_thinking = 'ernie_thinking'
longchat = 'longchat'
minimind = 'minimind'

Expand Down Expand Up @@ -220,7 +208,6 @@ class MLLMModelType:
llava_onevision_hf = 'llava_onevision_hf'
yi_vl = 'yi_vl'
ernie_vl = 'ernie_vl'
ernie_vl_thinking = 'ernie_vl_thinking'

llava_llama3_1_hf = 'llava_llama3_1_hf' # DaozeZhang
llava_llama3_hf = 'llava_llama3_hf' # xtuner
Expand Down Expand Up @@ -279,7 +266,6 @@ class MLLMModelType:
mistral_2503 = 'mistral_2503'
mistral_2506 = 'mistral_2506'
mistral_2512 = 'mistral_2512'
mistral_2512_thinking = 'mistral_2512_thinking'
paddle_ocr = 'paddle_ocr'
hunyuan_ocr = 'hunyuan_ocr'

Expand Down
6 changes: 3 additions & 3 deletions swift/llm/model/model/baai.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def get_model_tokenizer_emu3_gen(model_dir: str,
Model('BAAI/Emu3-Gen', 'BAAI/Emu3-Gen'),
]),
],
TemplateType.emu3_gen,
get_model_tokenizer_emu3_gen,
template=TemplateType.emu3_gen,
architectures=['Emu3ForCausalLM'],
model_arch=ModelArch.emu3_chat,
tags=['t2i'],
Expand Down Expand Up @@ -87,8 +87,8 @@ def get_model_tokenizer_emu3_chat(model_dir: str,
Model('BAAI/Emu3-Chat', 'BAAI/Emu3-Chat'),
]),
],
TemplateType.emu3_chat,
get_model_tokenizer_emu3_chat,
template=TemplateType.emu3_chat,
architectures=['Emu3ForCausalLM'],
model_arch=ModelArch.emu3_chat,
tags=['vision'],
Expand All @@ -111,7 +111,7 @@ def get_model_tokenizer_bge_reranker(*args, **kwargs):
Model('BAAI/bge-reranker-large', 'BAAI/bge-reranker-large'),
]),
],
TemplateType.bge_reranker,
get_model_tokenizer_bge_reranker,
template=TemplateType.bge_reranker,
architectures=['XLMRobertaForSequenceClassification'],
))
6 changes: 3 additions & 3 deletions swift/llm/model/model/baichuan.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def get_model_tokenizer_baichuan(model_dir: str,
Model('baichuan-inc/baichuan-7B', 'baichuan-inc/Baichuan-7B'),
]),
],
TemplateType.baichuan,
get_model_tokenizer_baichuan,
template=TemplateType.baichuan,
architectures=['BaichuanForCausalLM', 'BaiChuanForCausalLM'],
model_arch=ModelArch.baichuan,
requires=['transformers<4.34']))
Expand Down Expand Up @@ -75,8 +75,8 @@ def _new_forward(self, q, k, seqlen_offset=None, cu_seqlens=None, max_seqlen=Non
Model('baichuan-inc/Baichuan-M1-14B-Instruct', 'baichuan-inc/Baichuan-M1-14B-Instruct'),
]),
],
TemplateType.baichuan_m1,
get_model_tokenizer_baichuan_m1,
template=TemplateType.baichuan_m1,
architectures=['BaichuanM1ForCausalLM'],
model_arch=ModelArch.baichuan,
requires=['transformers>=4.48']))
Expand Down Expand Up @@ -140,8 +140,8 @@ def get_model_tokenizer_baichuan2(model_dir: str,
],
requires=['bitsandbytes<0.41.2', 'accelerate<0.26'])
],
TemplateType.baichuan,
get_model_tokenizer_baichuan2,
template=TemplateType.baichuan,
architectures=['BaichuanForCausalLM', 'BaiChuanForCausalLM'],
model_arch=ModelArch.baichuan,
))
36 changes: 6 additions & 30 deletions swift/llm/model/model/baidu.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,17 @@
ModelGroup([
Model('PaddlePaddle/ERNIE-4.5-0.3B-Base-PT', 'baidu/ERNIE-4.5-0.3B-PT'),
Model('PaddlePaddle/ERNIE-4.5-0.3B-PT', 'baidu/ERNIE-4.5-0.3B-PT'),
]),
], TemplateType.ernie),
ModelGroup([
Model('PaddlePaddle/ERNIE-4.5-21B-A3B-Base-PT', 'baidu/ERNIE-4.5-21B-A3B-Base-PT'),
Model('PaddlePaddle/ERNIE-4.5-21B-A3B-PT', 'baidu/ERNIE-4.5-21B-A3B-PT'),
Model('PaddlePaddle/ERNIE-4.5-300B-A47B-Base-PT', 'baidu/ERNIE-4.5-300B-A47B-Base-PT'),
Model('PaddlePaddle/ERNIE-4.5-300B-A47B-PT', 'baidu/ERNIE-4.5-300B-A47B-PT'),
]),
],
TemplateType.ernie,
get_model_tokenizer_with_flash_attn,
architectures=['Ernie4_5_ForCausalLM', 'Ernie4_5_MoeForCausalLM'],
))

register_model(
ModelMeta(
LLMModelType.ernie_thinking,
[
], TemplateType.ernie),
ModelGroup([
Model('PaddlePaddle/ERNIE-4.5-21B-A3B-Thinking', 'baidu/ERNIE-4.5-21B-A3B-Thinking'),
]),
], TemplateType.ernie_thinking),
],
TemplateType.ernie_thinking,
get_model_tokenizer_with_flash_attn,
architectures=['Ernie4_5_ForCausalLM', 'Ernie4_5_MoeForCausalLM'],
))
Expand All @@ -51,8 +40,8 @@
Model('PaddlePaddle/PaddleOCR-VL', 'PaddlePaddle/PaddleOCR-VL'),
]),
],
TemplateType.paddle_ocr,
get_model_tokenizer_multimodal,
template=TemplateType.paddle_ocr,
model_arch=ModelArch.keye_vl,
architectures=['PaddleOCRVLForConditionalGeneration'],
))
Expand All @@ -76,24 +65,11 @@ def get_model_tokenizer_ernie_vl(model_dir, *args, **kwargs):
Model('PaddlePaddle/ERNIE-4.5-VL-424B-A47B-PT', 'baidu/ERNIE-4.5-VL-424B-A47B-PT'),
Model('PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Base-PT', 'baidu/ERNIE-4.5-VL-28B-A3B-Base-PT'),
Model('PaddlePaddle/ERNIE-4.5-VL-424B-A47B-Base-PT', 'baidu/ERNIE-4.5-VL-424B-A47B-Base-PT'),
]),
],
TemplateType.ernie_vl,
get_model_tokenizer_ernie_vl,
model_arch=ModelArch.ernie_vl,
architectures=['Ernie4_5_VLMoeForConditionalGeneration'],
requires=['transformers>=4.52', 'moviepy'],
))

register_model(
ModelMeta(
MLLMModelType.ernie_vl_thinking,
[
], TemplateType.ernie_vl),
ModelGroup([
Model('PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Thinking', 'baidu/ERNIE-4.5-VL-28B-A3B-Thinking'),
]),
], TemplateType.ernie_vl_thinking),
],
TemplateType.ernie_vl_thinking,
get_model_tokenizer_ernie_vl,
model_arch=ModelArch.ernie_vl,
architectures=['Ernie4_5_VLMoeForConditionalGeneration'],
Expand Down
19 changes: 2 additions & 17 deletions swift/llm/model/model/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,28 +238,13 @@ def get_model_tokenizer_mistral_2512(model_dir: str,
Model('mistralai/Ministral-3-14B-Base-2512', 'mistralai/Ministral-3-14B-Base-2512'),
Model('mistralai/Ministral-3-14B-Instruct-2512', 'mistralai/Ministral-3-14B-Instruct-2512'),
Model('mistralai/Ministral-3-14B-Instruct-2512-BF16', 'mistralai/Ministral-3-14B-Instruct-2512-BF16'),
]),
],
TemplateType.mistral_2512,
get_model_tokenizer_mistral_2512,
architectures=['Mistral3ForConditionalGeneration'],
model_arch=ModelArch.llava_hf,
requires=['transformers>=5.0.0.dev0', 'mistral-common>=1.8.6'],
tags=['vision'],
ignore_patterns=[],
))

register_model(
ModelMeta(
MLLMModelType.mistral_2512_thinking,
[
], TemplateType.mistral_2512),
ModelGroup([
Model('mistralai/Ministral-3-3B-Reasoning-2512', 'mistralai/Ministral-3-3B-Reasoning-2512'),
Model('mistralai/Ministral-3-8B-Reasoning-2512', 'mistralai/Ministral-3-8B-Reasoning-2512'),
Model('mistralai/Ministral-3-14B-Reasoning-2512', 'mistralai/Ministral-3-14B-Reasoning-2512'),
]),
], TemplateType.mistral_2512_thinking),
],
TemplateType.mistral_2512_thinking,
get_model_tokenizer_mistral_2512,
architectures=['Mistral3ForConditionalGeneration'],
model_arch=ModelArch.llava_hf,
Expand Down
Loading
Loading