[model] support qwenlong L1.5 (#7237)

Jintao-Huang · web-flow · commit b47f451bb483 · 2025-12-29T16:54:52.000+08:00
diff --git a/docs/source/Instruction/Supported-models-and-datasets.md b/docs/source/Instruction/Supported-models-and-datasets.md
@@ -211,6 +211,7 @@
 |[Qwen/Qwen3Guard-Gen-8B](https://modelscope.cn/models/Qwen/Qwen3Guard-Gen-8B)|qwen3_guard|qwen3_guard|transformers>=4.51|&#x2718;|-|[Qwen/Qwen3Guard-Gen-8B](https://huggingface.co/Qwen/Qwen3Guard-Gen-8B)|
 |[Qwen/Qwen3-4B-Thinking-2507](https://modelscope.cn/models/Qwen/Qwen3-4B-Thinking-2507)|qwen3_thinking|qwen3_thinking|transformers>=4.51|&#x2714;|-|[Qwen/Qwen3-4B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507)|
 |[Qwen/Qwen3-4B-Thinking-2507-FP8](https://modelscope.cn/models/Qwen/Qwen3-4B-Thinking-2507-FP8)|qwen3_thinking|qwen3_thinking|transformers>=4.51|&#x2714;|-|[Qwen/Qwen3-4B-Thinking-2507-FP8](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507-FP8)|
+|[iic/QwenLong-L1.5-30B-A3B](https://modelscope.cn/models/iic/QwenLong-L1.5-30B-A3B)|qwen3_thinking|qwen3_thinking|transformers>=4.51|&#x2714;|-|[Tongyi-Zhiwen/QwenLong-L1.5-30B-A3B](https://huggingface.co/Tongyi-Zhiwen/QwenLong-L1.5-30B-A3B)|
 |[Qwen/Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507)|qwen3_nothinking|qwen3_nothinking|transformers>=4.51|&#x2714;|-|[Qwen/Qwen3-30B-A3B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507)|
 |[Qwen/Qwen3-30B-A3B-Instruct-2507-FP8](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507-FP8)|qwen3_nothinking|qwen3_nothinking|transformers>=4.51|&#x2714;|-|[Qwen/Qwen3-30B-A3B-Instruct-2507-FP8](https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507-FP8)|
 |[Qwen/Qwen3-235B-A22B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-235B-A22B-Instruct-2507)|qwen3_nothinking|qwen3_nothinking|transformers>=4.51|&#x2714;|-|[Qwen/Qwen3-235B-A22B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507)|
diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md
@@ -212,6 +212,7 @@ The table below introduces the models integrated with ms-swift:
 |[Qwen/Qwen3Guard-Gen-8B](https://modelscope.cn/models/Qwen/Qwen3Guard-Gen-8B)|qwen3_guard|qwen3_guard|transformers>=4.51|&#x2718;|-|[Qwen/Qwen3Guard-Gen-8B](https://huggingface.co/Qwen/Qwen3Guard-Gen-8B)|
 |[Qwen/Qwen3-4B-Thinking-2507](https://modelscope.cn/models/Qwen/Qwen3-4B-Thinking-2507)|qwen3_thinking|qwen3_thinking|transformers>=4.51|&#x2714;|-|[Qwen/Qwen3-4B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507)|
 |[Qwen/Qwen3-4B-Thinking-2507-FP8](https://modelscope.cn/models/Qwen/Qwen3-4B-Thinking-2507-FP8)|qwen3_thinking|qwen3_thinking|transformers>=4.51|&#x2714;|-|[Qwen/Qwen3-4B-Thinking-2507-FP8](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507-FP8)|
+|[iic/QwenLong-L1.5-30B-A3B](https://modelscope.cn/models/iic/QwenLong-L1.5-30B-A3B)|qwen3_thinking|qwen3_thinking|transformers>=4.51|&#x2714;|-|[Tongyi-Zhiwen/QwenLong-L1.5-30B-A3B](https://huggingface.co/Tongyi-Zhiwen/QwenLong-L1.5-30B-A3B)|
 |[Qwen/Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507)|qwen3_nothinking|qwen3_nothinking|transformers>=4.51|&#x2714;|-|[Qwen/Qwen3-30B-A3B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507)|
 |[Qwen/Qwen3-30B-A3B-Instruct-2507-FP8](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507-FP8)|qwen3_nothinking|qwen3_nothinking|transformers>=4.51|&#x2714;|-|[Qwen/Qwen3-30B-A3B-Instruct-2507-FP8](https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507-FP8)|
 |[Qwen/Qwen3-235B-A22B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-235B-A22B-Instruct-2507)|qwen3_nothinking|qwen3_nothinking|transformers>=4.51|&#x2714;|-|[Qwen/Qwen3-235B-A22B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507)|
diff --git a/swift/llm/model/model/qwen.py b/swift/llm/model/model/qwen.py
@@ -592,6 +592,9 @@ def _get_cast_dtype(self) -> torch.dtype:
                 Model('Qwen/Qwen3-4B-Thinking-2507', 'Qwen/Qwen3-4B-Thinking-2507'),
                 Model('Qwen/Qwen3-4B-Thinking-2507-FP8', 'Qwen/Qwen3-4B-Thinking-2507-FP8'),
             ]),
+            ModelGroup([
+                Model('iic/QwenLong-L1.5-30B-A3B', 'Tongyi-Zhiwen/QwenLong-L1.5-30B-A3B'),
+            ]),
         ],
         TemplateType.qwen3_thinking,
         get_model_tokenizer_with_flash_attn,