Skip to content

Commit c232caa

Browse files
committed
[model] support Qwen/Qwen3Guard-Gen-0.6B series (#6189)
1 parent df9508f commit c232caa

File tree

6 files changed

+50
-0
lines changed

6 files changed

+50
-0
lines changed

docs/source/Instruction/支持的模型和数据集.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@
205205
|[Qwen/Qwen3-14B-AWQ](https://modelscope.cn/models/Qwen/Qwen3-14B-AWQ)|qwen3|qwen3|transformers>=4.51|✘|-|[Qwen/Qwen3-14B-AWQ](https://huggingface.co/Qwen/Qwen3-14B-AWQ)|
206206
|[Qwen/Qwen3-32B-AWQ](https://modelscope.cn/models/Qwen/Qwen3-32B-AWQ)|qwen3|qwen3|transformers>=4.51|✘|-|[Qwen/Qwen3-32B-AWQ](https://huggingface.co/Qwen/Qwen3-32B-AWQ)|
207207
|[swift/Qwen3-32B-AWQ](https://modelscope.cn/models/swift/Qwen3-32B-AWQ)|qwen3|qwen3|transformers>=4.51|✘|-|-|
208+
|[Qwen/Qwen3Guard-Gen-0.6B](https://modelscope.cn/models/Qwen/Qwen3Guard-Gen-0.6B)|qwen3_guard|qwen3_guard|transformers>=4.51|✘|-|[Qwen/Qwen3Guard-Gen-0.6B](https://huggingface.co/Qwen/Qwen3Guard-Gen-0.6B)|
209+
|[Qwen/Qwen3Guard-Gen-4B](https://modelscope.cn/models/Qwen/Qwen3Guard-Gen-4B)|qwen3_guard|qwen3_guard|transformers>=4.51|✘|-|[Qwen/Qwen3Guard-Gen-4B](https://huggingface.co/Qwen/Qwen3Guard-Gen-4B)|
210+
|[Qwen/Qwen3Guard-Gen-8B](https://modelscope.cn/models/Qwen/Qwen3Guard-Gen-8B)|qwen3_guard|qwen3_guard|transformers>=4.51|✘|-|[Qwen/Qwen3Guard-Gen-8B](https://huggingface.co/Qwen/Qwen3Guard-Gen-8B)|
208211
|[Qwen/Qwen3-4B-Thinking-2507](https://modelscope.cn/models/Qwen/Qwen3-4B-Thinking-2507)|qwen3_thinking|qwen3_thinking|transformers>=4.51|✔|-|[Qwen/Qwen3-4B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507)|
209212
|[Qwen/Qwen3-4B-Thinking-2507-FP8](https://modelscope.cn/models/Qwen/Qwen3-4B-Thinking-2507-FP8)|qwen3_thinking|qwen3_thinking|transformers>=4.51|✘|-|[Qwen/Qwen3-4B-Thinking-2507-FP8](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507-FP8)|
210213
|[Qwen/Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507)|qwen3_nothinking|qwen3_nothinking|transformers>=4.51|✔|-|[Qwen/Qwen3-30B-A3B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507)|

docs/source_en/Instruction/Supported-models-and-datasets.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ The table below introduces the models integrated with ms-swift:
205205
|[Qwen/Qwen3-14B-AWQ](https://modelscope.cn/models/Qwen/Qwen3-14B-AWQ)|qwen3|qwen3|transformers>=4.51|✘|-|[Qwen/Qwen3-14B-AWQ](https://huggingface.co/Qwen/Qwen3-14B-AWQ)|
206206
|[Qwen/Qwen3-32B-AWQ](https://modelscope.cn/models/Qwen/Qwen3-32B-AWQ)|qwen3|qwen3|transformers>=4.51|✘|-|[Qwen/Qwen3-32B-AWQ](https://huggingface.co/Qwen/Qwen3-32B-AWQ)|
207207
|[swift/Qwen3-32B-AWQ](https://modelscope.cn/models/swift/Qwen3-32B-AWQ)|qwen3|qwen3|transformers>=4.51|✘|-|-|
208+
|[Qwen/Qwen3Guard-Gen-0.6B](https://modelscope.cn/models/Qwen/Qwen3Guard-Gen-0.6B)|qwen3_guard|qwen3_guard|transformers>=4.51|✘|-|[Qwen/Qwen3Guard-Gen-0.6B](https://huggingface.co/Qwen/Qwen3Guard-Gen-0.6B)|
209+
|[Qwen/Qwen3Guard-Gen-4B](https://modelscope.cn/models/Qwen/Qwen3Guard-Gen-4B)|qwen3_guard|qwen3_guard|transformers>=4.51|✘|-|[Qwen/Qwen3Guard-Gen-4B](https://huggingface.co/Qwen/Qwen3Guard-Gen-4B)|
210+
|[Qwen/Qwen3Guard-Gen-8B](https://modelscope.cn/models/Qwen/Qwen3Guard-Gen-8B)|qwen3_guard|qwen3_guard|transformers>=4.51|✘|-|[Qwen/Qwen3Guard-Gen-8B](https://huggingface.co/Qwen/Qwen3Guard-Gen-8B)|
208211
|[Qwen/Qwen3-4B-Thinking-2507](https://modelscope.cn/models/Qwen/Qwen3-4B-Thinking-2507)|qwen3_thinking|qwen3_thinking|transformers>=4.51|✔|-|[Qwen/Qwen3-4B-Thinking-2507](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507)|
209212
|[Qwen/Qwen3-4B-Thinking-2507-FP8](https://modelscope.cn/models/Qwen/Qwen3-4B-Thinking-2507-FP8)|qwen3_thinking|qwen3_thinking|transformers>=4.51|✘|-|[Qwen/Qwen3-4B-Thinking-2507-FP8](https://huggingface.co/Qwen/Qwen3-4B-Thinking-2507-FP8)|
210213
|[Qwen/Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507)|qwen3_nothinking|qwen3_nothinking|transformers>=4.51|✔|-|[Qwen/Qwen3-30B-A3B-Instruct-2507](https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507)|

swift/llm/model/constant.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class LLMModelType:
1313
qwq_preview = 'qwq_preview'
1414
qwq = 'qwq'
1515
qwen3 = 'qwen3'
16+
qwen3_guard = 'qwen3_guard'
1617
qwen3_thinking = 'qwen3_thinking'
1718
qwen3_nothinking = 'qwen3_nothinking'
1819
qwen3_coder = 'qwen3_coder'

swift/llm/model/model/qwen.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,22 @@ def _get_cast_dtype(self) -> torch.dtype:
565565
requires=['transformers>=4.51'],
566566
))
567567

568+
register_model(
569+
ModelMeta(
570+
LLMModelType.qwen3_guard,
571+
[
572+
ModelGroup([
573+
Model('Qwen/Qwen3Guard-Gen-0.6B', 'Qwen/Qwen3Guard-Gen-0.6B'),
574+
Model('Qwen/Qwen3Guard-Gen-4B', 'Qwen/Qwen3Guard-Gen-4B'),
575+
Model('Qwen/Qwen3Guard-Gen-8B', 'Qwen/Qwen3Guard-Gen-8B'),
576+
])
577+
],
578+
TemplateType.qwen3_guard,
579+
get_model_tokenizer_with_flash_attn,
580+
architectures=['Qwen3ForCausalLM'],
581+
requires=['transformers>=4.51'],
582+
))
583+
568584
register_model(
569585
ModelMeta(
570586
LLMModelType.qwen3_thinking,

swift/llm/template/constant.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class LLMTemplateType:
1313
qwen2_5_math = 'qwen2_5_math'
1414
qwen2_5_math_prm = 'qwen2_5_math_prm'
1515
qwen3 = 'qwen3'
16+
qwen3_guard = 'qwen3_guard'
1617
qwen3_thinking = 'qwen3_thinking'
1718
qwen3_nothinking = 'qwen3_nothinking'
1819
qwen3_coder = 'qwen3_coder'

swift/llm/template/template/qwen.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,32 @@ class Qwen3Template(ThinkingTemplate):
6262

6363
register_template(QwenTemplateMeta(LLMTemplateType.qwen3, default_system=None, template_cls=Qwen3Template))
6464

65+
QWEN3_GUARD_TEMPLATE = (
66+
'<|im_start|>user\n'
67+
"# Task:\n1) Evaluate whether THE LAST USER's query belongs to safe or unsafe content based on our "
68+
'safety policy.\n2) Identify the category of unsafe content found.\n\n<BEGIN SAFETY POLICY>\n'
69+
"**Safety: Safe**: The USER's query does not contain any prohibited content or content that could "
70+
"lead to prohibited content.\n**Safety: Unsafe**: The USER's query contains prohibited content.\n"
71+
"**Safety: Controversial**: The USER's query does not explicitly contain prohibited content, but "
72+
'its intent, context, or potential responses could be misinterpreted, misused, or exploited to '
73+
'generate unsafe content under certain conditions.\n<END SAFETY POLICY>\n\n<BEGIN UNSAFE CONTENT '
74+
'CATEGORIES>\nViolent.\nNon-violent Illegal Acts.\nSexual Content or Sexual Acts.\nPII.\nSuicide '
75+
'& Self-Harm.\nUnethical Acts.\nPolitically Sensitive Topics.\nCopyright Violation.\nJailbreak.\n'
76+
'<END UNSAFE CONTENT CATEGORIES>\n\n<BEGIN CONVERSATION>\n\nUSER: {{QUERY}}\n\n<END CONVERSATION>'
77+
"\n\nProvide your safety assessment for ONLY THE LAST **USER's query** in the above conversation:\n"
78+
" - The first line must be one of: 'Safety: Safe', 'Safety: Unsafe', 'Safety: Controversial'.\n - "
79+
"The second line should start with 'Categories:' followed by a list of any unsafe content "
80+
"categories, separated by commas. If the content is safe, use 'Categories: None'."
81+
'<|im_end|>\n<|im_start|>assistant\n')
82+
83+
register_template(
84+
QwenTemplateMeta(
85+
LLMTemplateType.qwen3_guard,
86+
default_system=None,
87+
template_cls=Qwen3Template,
88+
prompt=[QWEN3_GUARD_TEMPLATE],
89+
response_prefix='<think>\n\n</think>\n\n'))
90+
6591
register_template(
6692
QwenTemplateMeta(
6793
LLMTemplateType.qwen3_thinking, default_system=None, response_prefix='<think>\n',

0 commit comments

Comments
 (0)