modelscope · Jintao-Huang · Jan 14, 2026 · Dec 29, 2025 · Dec 29, 2025 · Dec 29, 2025
diff --git a/docs/source/BestPractices/MLLM-Registration.md b/docs/source/BestPractices/MLLM-Registration.md
@@ -79,9 +79,9 @@ register_model(
                 Model('Qwen/Qwen2.5-Omni-7B', 'Qwen/Qwen2.5-Omni-7B'),
             ]),
         ],
-        'my_qwen2_5_omni',
         # 用来获取model和processor的函数。
-        get_model_tokenizer_qwen2_5_omni,
+        get_function=get_model_tokenizer_qwen2_5_omni,
+        template='my_qwen2_5_omni',
         is_multimodal=True,  # 是否是多模态模型
         model_arch='my_qwen2_5_omni',  # 通常只为多模态模型设置
         # 用于model_type的自动匹配

diff --git a/docs/source/Customization/Custom-model.md b/docs/source/Customization/Custom-model.md
@@ -13,8 +13,8 @@ ms-swift内置的模型，你可以直接通过指定model_id或者model_path来
 register_model会在`MODEL_MAPPING`中注册模型，调用函数`register_model(model_meta)`即可完成模型注册，其中model_meta将存储模型的元信息。ModelMeta的参数列表如下：
 - model_type: 必填项。模型类型，也是唯一ID。
 - model_groups: 必填项。罗列ModelScope/HuggingFace的模型id和模型本地路径。运行[run_model_info.py](https://github.com/modelscope/ms-swift/blob/main/scripts/utils/run_model_info.py)文件将自动产生[支持的模型文档](https://swift.readthedocs.io/zh-cn/latest/Instruction/Supported-models-and-datasets.html)以及自动根据`--model`后缀匹配model_type。
-- template: 必填项。命令行不额外指定`--template`时的默认template类型。
 - get_function: 必填项。模型和tokenizer/processor（多模态模型）的加载函数。LLM通常设置为`get_model_tokenizer_with_flash_attn`即可。
+- template: 命令行不额外指定`--template`时的默认template类型。默认为None。
 - model_arch: 模型架构。默认为None。多模态模型训练需要设置该参数来确定llm/vit/aligner的前缀。
 - architectures: config.json中的architectures项，用于自动匹配模型对应的model_type。默认为`[]`。
 - additional_saved_files: 全参数训练和merge-lora时需要额外保存的文件。默认为`[]`。

diff --git a/docs/source_en/BestPractices/MLLM-Registration.md b/docs/source_en/BestPractices/MLLM-Registration.md
@@ -79,9 +79,9 @@ register_model(
                 Model('Qwen/Qwen2.5-Omni-7B', 'Qwen/Qwen2.5-Omni-7B'),
             ]),
         ],
-        'my_qwen2_5_omni',
         # Function to get model and processor.
-        get_model_tokenizer_qwen2_5_omni,
+        get_function=get_model_tokenizer_qwen2_5_omni,
+        template='my_qwen2_5_omni',
         is_multimodal=True,  # Whether it's a multimodal model
         model_arch='my_qwen2_5_omni',  # Usually set only for multimodal models
         # Used for automatic model_type matching

diff --git a/docs/source_en/Customization/Custom-model.md b/docs/source_en/Customization/Custom-model.md
@@ -14,8 +14,8 @@ The `register_model` function registers a model in the `MODEL_MAPPING`. You can
 
 - model_type: Required. The model type, which is also the unique ID.
 - model_groups: Required. Lists the ModelScope/HuggingFace model IDs and local paths. Running the [run_model_info.py](https://github.com/modelscope/ms-swift/blob/main/scripts/utils/run_model_info.py) file will automatically generate the [supported models documentation](https://swift.readthedocs.io/en/latest/Instruction/Supported-models-and-datasets.html) and automatically match the model_type based on the `--model` suffix.
-- template: Required. The default template type when `--template` is not specified in the command line.
 - get_function: Required. The loading function for the model and tokenizer/processor (for multi-modal models). LLM is typically set to `get_model_tokenizer_with_flash_attn`.
+- template: The default template type when `--template` is not additionally specified in the command line. Defaults to None.
 - model_arch: The model architecture. Defaults to None. Multi-modal model training requires setting this parameter to determine the prefix for llm/vit/aligner.
 - architectures: The architectures item in config.json, used to automatically match the model with its model_type. Defaults to `[]`.
 - additional_saved_files: Files that need to be additionally saved during full parameter training and merge-lora. Defaults to `[]`.

diff --git a/examples/custom/model.py b/examples/custom/model.py
@@ -15,8 +15,8 @@
         model_groups=[
             ModelGroup([Model('AI-ModelScope/Nemotron-Mini-4B-Instruct', 'nvidia/Nemotron-Mini-4B-Instruct')])
         ],
-        template='custom',
         get_function=get_model_tokenizer_with_flash_attn,
+        template='custom',
         ignore_patterns=['nemo'],
         is_multimodal=False,
     ))

diff --git a/examples/custom/model_hf.py b/examples/custom/model_hf.py
@@ -42,8 +42,8 @@ def get_function(model_dir: str,
         model_groups=[
             ModelGroup([Model('AI-ModelScope/Nemotron-Mini-4B-Instruct', 'nvidia/Nemotron-Mini-4B-Instruct')])
         ],
-        template='custom',
         get_function=get_function,
+        template='custom',
         ignore_patterns=['nemo'],
         is_multimodal=False,
     ))

diff --git a/examples/custom/my_qwen2_5_omni/my_register.py b/examples/custom/my_qwen2_5_omni/my_register.py
@@ -74,9 +74,9 @@ def get_model_tokenizer_qwen2_5_omni(model_dir, *args, **kwargs):
                 Model('Qwen/Qwen2.5-Omni-7B', 'Qwen/Qwen2.5-Omni-7B'),
             ]),
         ],
-        'my_qwen2_5_omni',
         # Function to get model and processor.
-        get_model_tokenizer_qwen2_5_omni,
+        get_function=get_model_tokenizer_qwen2_5_omni,
+        template='my_qwen2_5_omni',
         is_multimodal=True,  # Whether it's a multimodal model
         model_arch='my_qwen2_5_omni',  # Usually set only for multimodal models
         # Used for automatic model_type matching

diff --git a/swift/llm/model/constant.py b/swift/llm/model/constant.py
@@ -8,19 +8,10 @@ class LLMModelType:
     qwen = 'qwen'
     qwen2 = 'qwen2'
     qwen2_5 = 'qwen2_5'
-    qwen2_5_math = 'qwen2_5_math'
     qwen2_moe = 'qwen2_moe'
-    qwq_preview = 'qwq_preview'
-    qwq = 'qwq'
     qwen3 = 'qwen3'
-    qwen3_guard = 'qwen3_guard'
-    qwen3_thinking = 'qwen3_thinking'
-    qwen3_nothinking = 'qwen3_nothinking'
-    qwen3_coder = 'qwen3_coder'
     qwen3_moe = 'qwen3_moe'
-    qwen3_moe_thinking = 'qwen3_moe_thinking'
     qwen3_next = 'qwen3_next'
-    qwen3_next_thinking = 'qwen3_next_thinking'
     qwen3_emb = 'qwen3_emb'
 
     qwen2_gte = 'qwen2_gte'
@@ -36,8 +27,6 @@ class LLMModelType:
     reflection = 'reflection'
     megrez = 'megrez'
     yi = 'yi'
-    yi_coder = 'yi_coder'
-    sus = 'sus'
     gpt_oss = 'gpt_oss'
     seed_oss = 'seed_oss'
 
@@ -140,7 +129,6 @@ class LLMModelType:
     hunyuan = 'hunyuan'
     ernie = 'ernie'
     gemma_emb = 'gemma_emb'
-    ernie_thinking = 'ernie_thinking'
     longchat = 'longchat'
     minimind = 'minimind'
 
@@ -220,7 +208,6 @@ class MLLMModelType:
     llava_onevision_hf = 'llava_onevision_hf'
     yi_vl = 'yi_vl'
     ernie_vl = 'ernie_vl'
-    ernie_vl_thinking = 'ernie_vl_thinking'
 
     llava_llama3_1_hf = 'llava_llama3_1_hf'  # DaozeZhang
     llava_llama3_hf = 'llava_llama3_hf'  # xtuner
@@ -279,7 +266,6 @@ class MLLMModelType:
     mistral_2503 = 'mistral_2503'
     mistral_2506 = 'mistral_2506'
     mistral_2512 = 'mistral_2512'
-    mistral_2512_thinking = 'mistral_2512_thinking'
     paddle_ocr = 'paddle_ocr'
     hunyuan_ocr = 'hunyuan_ocr'
 

diff --git a/swift/llm/model/model/baai.py b/swift/llm/model/model/baai.py
@@ -45,8 +45,8 @@ def get_model_tokenizer_emu3_gen(model_dir: str,
                 Model('BAAI/Emu3-Gen', 'BAAI/Emu3-Gen'),
             ]),
         ],
-        TemplateType.emu3_gen,
         get_model_tokenizer_emu3_gen,
+        template=TemplateType.emu3_gen,
         architectures=['Emu3ForCausalLM'],
         model_arch=ModelArch.emu3_chat,
         tags=['t2i'],
@@ -87,8 +87,8 @@ def get_model_tokenizer_emu3_chat(model_dir: str,
                 Model('BAAI/Emu3-Chat', 'BAAI/Emu3-Chat'),
             ]),
         ],
-        TemplateType.emu3_chat,
         get_model_tokenizer_emu3_chat,
+        template=TemplateType.emu3_chat,
         architectures=['Emu3ForCausalLM'],
         model_arch=ModelArch.emu3_chat,
         tags=['vision'],
@@ -111,7 +111,7 @@ def get_model_tokenizer_bge_reranker(*args, **kwargs):
                 Model('BAAI/bge-reranker-large', 'BAAI/bge-reranker-large'),
             ]),
         ],
-        TemplateType.bge_reranker,
         get_model_tokenizer_bge_reranker,
+        template=TemplateType.bge_reranker,
         architectures=['XLMRobertaForSequenceClassification'],
     ))
diff --git a/swift/llm/model/model/baichuan.py b/swift/llm/model/model/baichuan.py
@@ -41,8 +41,8 @@ def get_model_tokenizer_baichuan(model_dir: str,
                 Model('baichuan-inc/baichuan-7B', 'baichuan-inc/Baichuan-7B'),
             ]),
         ],
-        TemplateType.baichuan,
         get_model_tokenizer_baichuan,
+        template=TemplateType.baichuan,
         architectures=['BaichuanForCausalLM', 'BaiChuanForCausalLM'],
         model_arch=ModelArch.baichuan,
         requires=['transformers<4.34']))
@@ -75,8 +75,8 @@ def _new_forward(self, q, k, seqlen_offset=None, cu_seqlens=None, max_seqlen=Non
                 Model('baichuan-inc/Baichuan-M1-14B-Instruct', 'baichuan-inc/Baichuan-M1-14B-Instruct'),
             ]),
         ],
-        TemplateType.baichuan_m1,
         get_model_tokenizer_baichuan_m1,
+        template=TemplateType.baichuan_m1,
         architectures=['BaichuanM1ForCausalLM'],
         model_arch=ModelArch.baichuan,
         requires=['transformers>=4.48']))
@@ -140,8 +140,8 @@ def get_model_tokenizer_baichuan2(model_dir: str,
             ],
                        requires=['bitsandbytes<0.41.2', 'accelerate<0.26'])
         ],
-        TemplateType.baichuan,
         get_model_tokenizer_baichuan2,
+        template=TemplateType.baichuan,
         architectures=['BaichuanForCausalLM', 'BaiChuanForCausalLM'],
         model_arch=ModelArch.baichuan,
     ))
diff --git a/swift/llm/model/model/baidu.py b/swift/llm/model/model/baidu.py
@@ -17,28 +17,17 @@
             ModelGroup([
                 Model('PaddlePaddle/ERNIE-4.5-0.3B-Base-PT', 'baidu/ERNIE-4.5-0.3B-PT'),
                 Model('PaddlePaddle/ERNIE-4.5-0.3B-PT', 'baidu/ERNIE-4.5-0.3B-PT'),
-            ]),
+            ], TemplateType.ernie),
             ModelGroup([
                 Model('PaddlePaddle/ERNIE-4.5-21B-A3B-Base-PT', 'baidu/ERNIE-4.5-21B-A3B-Base-PT'),
                 Model('PaddlePaddle/ERNIE-4.5-21B-A3B-PT', 'baidu/ERNIE-4.5-21B-A3B-PT'),
                 Model('PaddlePaddle/ERNIE-4.5-300B-A47B-Base-PT', 'baidu/ERNIE-4.5-300B-A47B-Base-PT'),
                 Model('PaddlePaddle/ERNIE-4.5-300B-A47B-PT', 'baidu/ERNIE-4.5-300B-A47B-PT'),
-            ]),
-        ],
-        TemplateType.ernie,
-        get_model_tokenizer_with_flash_attn,
-        architectures=['Ernie4_5_ForCausalLM', 'Ernie4_5_MoeForCausalLM'],
-    ))
-
-register_model(
-    ModelMeta(
-        LLMModelType.ernie_thinking,
-        [
+            ], TemplateType.ernie),
             ModelGroup([
                 Model('PaddlePaddle/ERNIE-4.5-21B-A3B-Thinking', 'baidu/ERNIE-4.5-21B-A3B-Thinking'),
-            ]),
+            ], TemplateType.ernie_thinking),
         ],
-        TemplateType.ernie_thinking,
         get_model_tokenizer_with_flash_attn,
         architectures=['Ernie4_5_ForCausalLM', 'Ernie4_5_MoeForCausalLM'],
     ))
@@ -51,8 +40,8 @@
                 Model('PaddlePaddle/PaddleOCR-VL', 'PaddlePaddle/PaddleOCR-VL'),
             ]),
         ],
-        TemplateType.paddle_ocr,
         get_model_tokenizer_multimodal,
+        template=TemplateType.paddle_ocr,
         model_arch=ModelArch.keye_vl,
         architectures=['PaddleOCRVLForConditionalGeneration'],
     ))
@@ -76,24 +65,11 @@ def get_model_tokenizer_ernie_vl(model_dir, *args, **kwargs):
                 Model('PaddlePaddle/ERNIE-4.5-VL-424B-A47B-PT', 'baidu/ERNIE-4.5-VL-424B-A47B-PT'),
                 Model('PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Base-PT', 'baidu/ERNIE-4.5-VL-28B-A3B-Base-PT'),
                 Model('PaddlePaddle/ERNIE-4.5-VL-424B-A47B-Base-PT', 'baidu/ERNIE-4.5-VL-424B-A47B-Base-PT'),
-            ]),
-        ],
-        TemplateType.ernie_vl,
-        get_model_tokenizer_ernie_vl,
-        model_arch=ModelArch.ernie_vl,
-        architectures=['Ernie4_5_VLMoeForConditionalGeneration'],
-        requires=['transformers>=4.52', 'moviepy'],
-    ))
-
-register_model(
-    ModelMeta(
-        MLLMModelType.ernie_vl_thinking,
-        [
+            ], TemplateType.ernie_vl),
             ModelGroup([
                 Model('PaddlePaddle/ERNIE-4.5-VL-28B-A3B-Thinking', 'baidu/ERNIE-4.5-VL-28B-A3B-Thinking'),
-            ]),
+            ], TemplateType.ernie_vl_thinking),
         ],
-        TemplateType.ernie_vl_thinking,
         get_model_tokenizer_ernie_vl,
         model_arch=ModelArch.ernie_vl,
         architectures=['Ernie4_5_VLMoeForConditionalGeneration'],

diff --git a/swift/llm/model/model/mistral.py b/swift/llm/model/model/mistral.py
@@ -238,28 +238,13 @@ def get_model_tokenizer_mistral_2512(model_dir: str,
                 Model('mistralai/Ministral-3-14B-Base-2512', 'mistralai/Ministral-3-14B-Base-2512'),
                 Model('mistralai/Ministral-3-14B-Instruct-2512', 'mistralai/Ministral-3-14B-Instruct-2512'),
                 Model('mistralai/Ministral-3-14B-Instruct-2512-BF16', 'mistralai/Ministral-3-14B-Instruct-2512-BF16'),
-            ]),
-        ],
-        TemplateType.mistral_2512,
-        get_model_tokenizer_mistral_2512,
-        architectures=['Mistral3ForConditionalGeneration'],
-        model_arch=ModelArch.llava_hf,
-        requires=['transformers>=5.0.0.dev0', 'mistral-common>=1.8.6'],
-        tags=['vision'],
-        ignore_patterns=[],
-    ))
-
-register_model(
-    ModelMeta(
-        MLLMModelType.mistral_2512_thinking,
-        [
+            ], TemplateType.mistral_2512),
             ModelGroup([
                 Model('mistralai/Ministral-3-3B-Reasoning-2512', 'mistralai/Ministral-3-3B-Reasoning-2512'),
                 Model('mistralai/Ministral-3-8B-Reasoning-2512', 'mistralai/Ministral-3-8B-Reasoning-2512'),
                 Model('mistralai/Ministral-3-14B-Reasoning-2512', 'mistralai/Ministral-3-14B-Reasoning-2512'),
-            ]),
+            ], TemplateType.mistral_2512_thinking),
         ],
-        TemplateType.mistral_2512_thinking,
         get_model_tokenizer_mistral_2512,
         architectures=['Mistral3ForConditionalGeneration'],
         model_arch=ModelArch.llava_hf,