File tree Expand file tree Collapse file tree 14 files changed +45
-1
lines changed
tests/test_align/test_template Expand file tree Collapse file tree 14 files changed +45
-1
lines changed Original file line number Diff line number Diff line change 231231| [ Qwen/Qwen3-235B-A22B-Thinking-2507] ( https://modelscope.cn/models/Qwen/Qwen3-235B-A22B-Thinking-2507 ) | qwen3_moe_thinking| qwen3_thinking| transformers>=4.51| ✔ ; | -| [ Qwen/Qwen3-235B-A22B-Thinking-2507] ( https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507 ) |
232232| [ Qwen/Qwen3-235B-A22B-Thinking-2507-FP8] ( https://modelscope.cn/models/Qwen/Qwen3-235B-A22B-Thinking-2507-FP8 ) | qwen3_moe_thinking| qwen3_thinking| transformers>=4.51| ✘ ; | -| [ Qwen/Qwen3-235B-A22B-Thinking-2507-FP8] ( https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507-FP8 ) |
233233| [ swift/Qwen3-235B-A22B-Thinking-2507-AWQ] ( https://modelscope.cn/models/swift/Qwen3-235B-A22B-Thinking-2507-AWQ ) | qwen3_moe_thinking| qwen3_thinking| transformers>=4.51| ✘ ; | -| -|
234+ | [ Qwen/Qwen3-Next-80B-A3B-Instruct] ( https://modelscope.cn/models/Qwen/Qwen3-Next-80B-A3B-Instruct ) | qwen3_next| qwen3_nothinking| transformers>=4.57.0.dev| ✘ ; | -| -|
235+ | [ Qwen/Qwen3-Next-80B-A3B-Thinking] ( https://modelscope.cn/models/Qwen/Qwen3-Next-80B-A3B-Thinking ) | qwen3_next_thinking| qwen3_thinking| transformers>=4.57.0.dev| ✘ ; | -| -|
234236| [ Qwen/Qwen3-Embedding-0.6B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-0.6B] ( https://huggingface.co/Qwen/Qwen3-Embedding-0.6B ) |
235237| [ Qwen/Qwen3-Embedding-4B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-4B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-4B] ( https://huggingface.co/Qwen/Qwen3-Embedding-4B ) |
236238| [ Qwen/Qwen3-Embedding-8B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-8B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-8B] ( https://huggingface.co/Qwen/Qwen3-Embedding-8B ) |
Original file line number Diff line number Diff line change @@ -231,6 +231,8 @@ The table below introduces the models integrated with ms-swift:
231231| [ Qwen/Qwen3-235B-A22B-Thinking-2507] ( https://modelscope.cn/models/Qwen/Qwen3-235B-A22B-Thinking-2507 ) | qwen3_moe_thinking| qwen3_thinking| transformers>=4.51| ✔ ; | -| [ Qwen/Qwen3-235B-A22B-Thinking-2507] ( https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507 ) |
232232| [ Qwen/Qwen3-235B-A22B-Thinking-2507-FP8] ( https://modelscope.cn/models/Qwen/Qwen3-235B-A22B-Thinking-2507-FP8 ) | qwen3_moe_thinking| qwen3_thinking| transformers>=4.51| ✘ ; | -| [ Qwen/Qwen3-235B-A22B-Thinking-2507-FP8] ( https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507-FP8 ) |
233233| [ swift/Qwen3-235B-A22B-Thinking-2507-AWQ] ( https://modelscope.cn/models/swift/Qwen3-235B-A22B-Thinking-2507-AWQ ) | qwen3_moe_thinking| qwen3_thinking| transformers>=4.51| ✘ ; | -| -|
234+ | [ Qwen/Qwen3-Next-80B-A3B-Instruct] ( https://modelscope.cn/models/Qwen/Qwen3-Next-80B-A3B-Instruct ) | qwen3_next| qwen3_nothinking| transformers>=4.57.0.dev| ✘ ; | -| -|
235+ | [ Qwen/Qwen3-Next-80B-A3B-Thinking] ( https://modelscope.cn/models/Qwen/Qwen3-Next-80B-A3B-Thinking ) | qwen3_next_thinking| qwen3_thinking| transformers>=4.57.0.dev| ✘ ; | -| -|
234236| [ Qwen/Qwen3-Embedding-0.6B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-0.6B] ( https://huggingface.co/Qwen/Qwen3-Embedding-0.6B ) |
235237| [ Qwen/Qwen3-Embedding-4B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-4B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-4B] ( https://huggingface.co/Qwen/Qwen3-Embedding-4B ) |
236238| [ Qwen/Qwen3-Embedding-8B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-8B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-8B] ( https://huggingface.co/Qwen/Qwen3-Embedding-8B ) |
Original file line number Diff line number Diff line change @@ -17,6 +17,8 @@ class LLMModelType:
1717 qwen3_nothinking = 'qwen3_nothinking'
1818 qwen3_moe = 'qwen3_moe'
1919 qwen3_moe_thinking = 'qwen3_moe_thinking'
20+ qwen3_next = 'qwen3_next'
21+ qwen3_next_thinking = 'qwen3_next_thinking'
2022 qwen3_emb = 'qwen3_emb'
2123 qwen3_reranker = 'qwen3_reranker'
2224
Original file line number Diff line number Diff line change @@ -624,6 +624,26 @@ def _get_cast_dtype(self) -> torch.dtype:
624624 requires = ['transformers>=4.51' ],
625625 ))
626626
627+ register_model (
628+ ModelMeta (
629+ LLMModelType .qwen3_next ,
630+ [ModelGroup ([Model ('Qwen/Qwen3-Next-80B-A3B-Instruct' )])],
631+ TemplateType .qwen3_nothinking ,
632+ get_model_tokenizer_with_flash_attn ,
633+ architectures = ['Qwen3NextForCausalLM' ],
634+ requires = ['transformers>=4.57.0.dev' ],
635+ ))
636+
637+ register_model (
638+ ModelMeta (
639+ LLMModelType .qwen3_next_thinking ,
640+ [ModelGroup ([Model ('Qwen/Qwen3-Next-80B-A3B-Thinking' )])],
641+ TemplateType .qwen3_thinking ,
642+ get_model_tokenizer_with_flash_attn ,
643+ architectures = ['Qwen3NextForCausalLM' ],
644+ requires = ['transformers>=4.57.0.dev' ],
645+ ))
646+
627647
628648def patch_qwen_vl_utils (vision_process ):
629649 if hasattr (vision_process , '_patch' ):
Original file line number Diff line number Diff line change 1+ # Copyright (c) Alibaba, Inc. and its affiliates.
12from typing import Any , Dict
23
34from ..config import convert_hf_config
Original file line number Diff line number Diff line change 1+ # Copyright (c) Alibaba, Inc. and its affiliates.
12from . import glm , internvl , qwen
Original file line number Diff line number Diff line change 1+ # Copyright (c) Alibaba, Inc. and its affiliates.
12from megatron .training import get_args
23
34from swift .llm import ModelType , Template
Original file line number Diff line number Diff line change 1+ # Copyright (c) Alibaba, Inc. and its affiliates.
12import torch
23
34from swift .llm import ModelType
Original file line number Diff line number Diff line change 1+ # Copyright (c) Alibaba, Inc. and its affiliates.
12import torch
23from megatron .training import get_args , get_tokenizer
34from PIL import Image
Original file line number Diff line number Diff line change 1+ # Copyright (c) Alibaba, Inc. and its affiliates.
12from abc import ABC , abstractmethod
23from contextlib import contextmanager
34from dataclasses import dataclass
You can’t perform that action at this time.
0 commit comments