diff --git a/docs/source/Instruction/Supported-models-and-datasets.md b/docs/source/Instruction/Supported-models-and-datasets.md index 8af4d53176..33593ab3ab 100644 --- a/docs/source/Instruction/Supported-models-and-datasets.md +++ b/docs/source/Instruction/Supported-models-and-datasets.md @@ -647,6 +647,8 @@ |[PaddlePaddle/ERNIE-4.5-21B-A3B-Thinking](https://modelscope.cn/models/PaddlePaddle/ERNIE-4.5-21B-A3B-Thinking)|ernie_thinking|ernie_thinking|-|✔|-|[baidu/ERNIE-4.5-21B-A3B-Thinking](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Thinking)| |[meituan-longcat/LongCat-Flash-Chat](https://modelscope.cn/models/meituan-longcat/LongCat-Flash-Chat)|longchat|longchat|transformers>=4.54,<4.56|✘|-|[meituan-longcat/LongCat-Flash-Chat](https://huggingface.co/meituan-longcat/LongCat-Flash-Chat)| |[meituan-longcat/LongCat-Flash-Chat-FP8](https://modelscope.cn/models/meituan-longcat/LongCat-Flash-Chat-FP8)|longchat|longchat|transformers>=4.54,<4.56|✘|-|[meituan-longcat/LongCat-Flash-Chat-FP8](https://huggingface.co/meituan-longcat/LongCat-Flash-Chat-FP8)| +|[XiaomiMiMo/MiMo-V2-Flash](https://modelscope.cn/models/XiaomiMiMo/MiMo-V2-Flash)|mimo_v2|mimo_v2|-|✘|-|[XiaomiMiMo/MiMo-V2-Flash](https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash)| +|[XiaomiMiMo/MiMo-V2-Flash-Base](https://modelscope.cn/models/XiaomiMiMo/MiMo-V2-Flash-Base)|mimo_v2|mimo_v2|-|✘|-|[XiaomiMiMo/MiMo-V2-Flash-Base](https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash-Base)| |[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|transformers>=4.48|✘|bert|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)| |[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|transformers>=4.48|✘|bert|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)| |[iic/gte-modernbert-base](https://modelscope.cn/models/iic/gte-modernbert-base)|modern_bert_gte|dummy|transformers>=4.48|✘|bert, embedding|[Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)| diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md index 7af54ce879..8a93631869 100644 --- a/docs/source_en/Instruction/Supported-models-and-datasets.md +++ b/docs/source_en/Instruction/Supported-models-and-datasets.md @@ -649,6 +649,8 @@ The table below introduces the models integrated with ms-swift: |[PaddlePaddle/ERNIE-4.5-21B-A3B-Thinking](https://modelscope.cn/models/PaddlePaddle/ERNIE-4.5-21B-A3B-Thinking)|ernie_thinking|ernie_thinking|-|✔|-|[baidu/ERNIE-4.5-21B-A3B-Thinking](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Thinking)| |[meituan-longcat/LongCat-Flash-Chat](https://modelscope.cn/models/meituan-longcat/LongCat-Flash-Chat)|longchat|longchat|transformers>=4.54,<4.56|✘|-|[meituan-longcat/LongCat-Flash-Chat](https://huggingface.co/meituan-longcat/LongCat-Flash-Chat)| |[meituan-longcat/LongCat-Flash-Chat-FP8](https://modelscope.cn/models/meituan-longcat/LongCat-Flash-Chat-FP8)|longchat|longchat|transformers>=4.54,<4.56|✘|-|[meituan-longcat/LongCat-Flash-Chat-FP8](https://huggingface.co/meituan-longcat/LongCat-Flash-Chat-FP8)| +|[XiaomiMiMo/MiMo-V2-Flash](https://modelscope.cn/models/XiaomiMiMo/MiMo-V2-Flash)|mimo_v2|mimo_v2|-|✘|-|[XiaomiMiMo/MiMo-V2-Flash](https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash)| +|[XiaomiMiMo/MiMo-V2-Flash-Base](https://modelscope.cn/models/XiaomiMiMo/MiMo-V2-Flash-Base)|mimo_v2|mimo_v2|-|✘|-|[XiaomiMiMo/MiMo-V2-Flash-Base](https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash-Base)| |[answerdotai/ModernBERT-base](https://modelscope.cn/models/answerdotai/ModernBERT-base)|modern_bert|dummy|transformers>=4.48|✘|bert|[answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base)| |[answerdotai/ModernBERT-large](https://modelscope.cn/models/answerdotai/ModernBERT-large)|modern_bert|dummy|transformers>=4.48|✘|bert|[answerdotai/ModernBERT-large](https://huggingface.co/answerdotai/ModernBERT-large)| |[iic/gte-modernbert-base](https://modelscope.cn/models/iic/gte-modernbert-base)|modern_bert_gte|dummy|transformers>=4.48|✘|bert, embedding|[Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base)| diff --git a/swift/llm/model/constant.py b/swift/llm/model/constant.py index fa3b20b8e9..44b3f6c727 100644 --- a/swift/llm/model/constant.py +++ b/swift/llm/model/constant.py @@ -141,6 +141,7 @@ class LLMModelType: gemma_emb = 'gemma_emb' ernie_thinking = 'ernie_thinking' longchat = 'longchat' + mimo_v2 = 'mimo_v2' class BertModelType: diff --git a/swift/llm/model/model/llm.py b/swift/llm/model/model/llm.py index 0b7a308fc9..fe3b05539c 100644 --- a/swift/llm/model/model/llm.py +++ b/swift/llm/model/model/llm.py @@ -397,3 +397,17 @@ def get_model_tokenizer_yuan(model_dir: str, get_model_tokenizer_with_flash_attn, architectures=['BailingMoeV2ForCausalLM'], )) + +register_model( + ModelMeta( + LLMModelType.mimo_v2, + [ + ModelGroup([ + Model('XiaomiMiMo/MiMo-V2-Flash', 'XiaomiMiMo/MiMo-V2-Flash'), + Model('XiaomiMiMo/MiMo-V2-Flash-Base', 'XiaomiMiMo/MiMo-V2-Flash-Base'), + ]) + ], + TemplateType.mimo_v2, + get_model_tokenizer_with_flash_attn, + architectures=['MiMoV2FlashForCausalLM'], + )) diff --git a/swift/llm/template/constant.py b/swift/llm/template/constant.py index 8c7601c187..d228221a27 100644 --- a/swift/llm/template/constant.py +++ b/swift/llm/template/constant.py @@ -104,6 +104,7 @@ class LLMTemplateType: ernie = 'ernie' ernie_thinking = 'ernie_thinking' longchat = 'longchat' + mimo_v2 = 'mimo_v2' aya = 'aya' c4ai = 'c4ai' diff --git a/swift/llm/template/template/llm.py b/swift/llm/template/template/llm.py index 24da73fb56..f6c8402a54 100644 --- a/swift/llm/template/template/llm.py +++ b/swift/llm/template/template/llm.py @@ -424,3 +424,10 @@ class GptOssTemplateMeta(TemplateMeta): is_thinking=True, thinking_prefix='\n', )) + +register_template( + ChatmlTemplateMeta( + LLMTemplateType.mimo_v2, + default_system='You are MiMo, a helpful AI assistant engineered by Xiaomi.', + response_prefix='', + ))