Skip to content

Commit 2181147

Browse files
authored
support qwq-32b-preview (#2520)
1 parent 1156a57 commit 2181147

File tree

7 files changed

+36
-2
lines changed

7 files changed

+36
-2
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ You can contact us and communicate with us by adding our group:
5555
<img src="asset/discord_qr.jpg" width="200" height="200"> | <img src="asset/wechat.png" width="200" height="200">
5656

5757
## 🎉 News
58+
- 2024.11.28: Supports the model qwq-32b-preview, macro-o1, and the dataset open-o1. Use `swift infer --model_type qwq-32b-preview` for the experience.
5859
- 2024.11.12: Supports training and deployment of the qwen2.5-coder series models: 0.5b, 3b, 14b, and 32b. Use `swift infer --model_type qwen2_5-coder-3b-instruct` to experience it.
5960
- 2024.10.26: Support for training and deploying aya-expanse series models. Experience it using `swift infer --model_type aya-expanse-32b`.
6061
- 2024.10.23: Support for training and deploying emu3-chat. Experience it using `swift infer --model_type emu3-chat`.

README_CN.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ SWIFT具有丰富全面的文档,请查看我们的文档网站:
5656

5757

5858
## 🎉 新闻
59+
- 2024.11.28: 支持模型qwq-32b-preview, macro-o1, 支持数据集open-o1. 使用`swift infer --model_type qwq-32b-preview`进行体验.
5960
- 2024.11.12: 支持qwen2.5-coder系列模型0.5b, 3b, 14b, 32b的训练到部署. 使用`swift infer --model_type qwen2_5-coder-3b-instruct`进行体验.
6061
- 2024.10.26: 支持aya-expanse系列模型的训练到部署. 使用`swift infer --model_type aya-expanse-32b`进行体验.
6162
- 2024.10.23: 支持emu3-chat的训练到部署. 使用`swift infer --model_type emu3-chat`进行体验.

docs/source/Instruction/支持的模型和数据集.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@
180180
|qwen2_5-coder-32b-instruct-gptq-int4|[qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|&#x2714;|&#x2714;|&#x2714;|&#x2718;|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4)|
181181
|qwen2_5-coder-32b-instruct-gptq-int8|[qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|&#x2714;|&#x2714;|&#x2718;|&#x2718;|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8)|
182182
|qwen2_5-coder-32b-instruct-awq|[qwen/Qwen2.5-Coder-32B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-Coder-32B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-32B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-AWQ)|
183+
|qwq-32b-preview|[Qwen/QwQ-32B-Preview](https://modelscope.cn/models/Qwen/QwQ-32B-Preview/summary)|q_proj, k_proj, v_proj|qwq|&#x2714;|&#x2714;|&#x2714;|&#x2714;|transformers>=4.37|-|[Qwen/QwQ-32B-Preview](https://huggingface.co/Qwen/QwQ-32B-Preview)|
183184
|marco-o1|[AIDC-AI/Marco-o1](https://modelscope.cn/models/AIDC-AI/Marco-o1/summary)|q_proj, k_proj, v_proj|marco_o1|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.37|-|[AIDC-AI/Marco-o1](https://huggingface.co/AIDC-AI/Marco-o1)|
184185
|chatglm2-6b|[ZhipuAI/chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary)|query_key_value|chatglm2|&#x2718;|&#x2714;|&#x2718;|&#x2718;|transformers<4.42|-|[THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b)|
185186
|chatglm2-6b-32k|[ZhipuAI/chatglm2-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm2-6b-32k/summary)|query_key_value|chatglm2|&#x2718;|&#x2714;|&#x2718;|&#x2718;|transformers<4.42|-|[THUDM/chatglm2-6b-32k](https://huggingface.co/THUDM/chatglm2-6b-32k)|
@@ -541,6 +542,7 @@
541542
|mplug-owl3-1b-chat|[iic/mPLUG-Owl3-1B-241014](https://modelscope.cn/models/iic/mPLUG-Owl3-1B-241014/summary)|^(language_model\|vision2text_model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|mplug_owl3|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.36, icecream|vision, video|[mPLUG/mPLUG-Owl3-1B-241014](https://huggingface.co/mPLUG/mPLUG-Owl3-1B-241014)|
542543
|mplug-owl3-2b-chat|[iic/mPLUG-Owl3-2B-241014](https://modelscope.cn/models/iic/mPLUG-Owl3-2B-241014/summary)|^(language_model\|vision2text_model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|mplug_owl3|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.36, icecream|vision, video|[mPLUG/mPLUG-Owl3-2B-241014](https://huggingface.co/mPLUG/mPLUG-Owl3-2B-241014)|
543544
|mplug-owl3-7b-chat|[iic/mPLUG-Owl3-7B-240728](https://modelscope.cn/models/iic/mPLUG-Owl3-7B-240728/summary)|^(language_model\|vision2text_model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|mplug_owl3|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.36, icecream|vision, video|[mPLUG/mPLUG-Owl3-7B-240728](https://huggingface.co/mPLUG/mPLUG-Owl3-7B-240728)|
545+
|mplug-owl3v-7b-chat|[iic/mPLUG-Owl3-7B-241101](https://modelscope.cn/models/iic/mPLUG-Owl3-7B-241101/summary)|^(language_model\|vision2text_model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|mplug_owl3v|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.36, icecream|vision, video|[mPLUG/mPLUG-Owl3-7B-241101](https://huggingface.co/mPLUG/mPLUG-Owl3-7B-241101)|
544546
|phi3-vision-128k-instruct|[LLM-Research/Phi-3-vision-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-vision-128k-instruct/summary)|^(model.layers\|model.vision_embed_tokens.img_projection)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|phi3-vl|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|vision|[microsoft/Phi-3-vision-128k-instruct](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct)|
545547
|phi3_5-vision-instruct|[LLM-Research/Phi-3.5-vision-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-vision-instruct/summary)|^(model.layers\|model.vision_embed_tokens.img_projection)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|phi3-vl|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|vision|[microsoft/Phi-3.5-vision-instruct](https://huggingface.co/microsoft/Phi-3.5-vision-instruct)|
546548
|cogvlm-17b-chat|[ZhipuAI/cogvlm-chat](https://modelscope.cn/models/ZhipuAI/cogvlm-chat/summary)|^(model.layers)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|cogvlm|&#x2718;|&#x2718;|&#x2718;|&#x2718;|transformers<4.42|vision|[THUDM/cogvlm-chat-hf](https://huggingface.co/THUDM/cogvlm-chat-hf)|
@@ -637,7 +639,7 @@
637639
|coco-en-2|[modelscope/coco_2014_caption](https://modelscope.cn/datasets/modelscope/coco_2014_caption/summary)|coco_2014_caption|454617|36.8±2.8, min=32, max=89|chat, multi-modal, vision|-|
638640
|🔥coco-en-2-mini|[modelscope/coco_2014_caption](https://modelscope.cn/datasets/modelscope/coco_2014_caption/summary)|coco_2014_caption|40504|36.8±2.6, min=32, max=75|chat, multi-modal, vision|-|
639641
|capcha-images|[AI-ModelScope/captcha-images](https://modelscope.cn/datasets/AI-ModelScope/captcha-images/summary)||8000|31.0±0.0, min=31, max=31|chat, multi-modal, vision|-|
640-
|latex-ocr-print|[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR/summary)|full|17918|362.7±34.8, min=294, max=528|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)|
642+
|latex-ocr-print|[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR/summary)|default|17918|362.7±34.8, min=294, max=528|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)|
641643
|latex-ocr-handwrite|[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR/summary)|synthetic_handwrite|95424|375.1±59.4, min=292, max=2115|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)|
642644
|aishell1-zh|[speech_asr/speech_asr_aishell1_trainsets](https://modelscope.cn/datasets/speech_asr/speech_asr_aishell1_trainsets/summary)||141600|152.2±36.8, min=63, max=419|chat, multi-modal, audio|-|
643645
|🔥aishell1-zh-mini|[speech_asr/speech_asr_aishell1_trainsets](https://modelscope.cn/datasets/speech_asr/speech_asr_aishell1_trainsets/summary)||14526|152.2±35.6, min=74, max=359|chat, multi-modal, audio|-|
@@ -713,6 +715,7 @@
713715
|moondream2-coyo-5M|[swift/moondream2-coyo-5M-captions](https://modelscope.cn/datasets/swift/moondream2-coyo-5M-captions/summary)||-|Dataset is too huge, please click the original link to view the dataset stat.|caption, pretrain, quality|[isidentical/moondream2-coyo-5M-captions](https://huggingface.co/datasets/isidentical/moondream2-coyo-5M-captions)|
714716
|no-robots|[swift/no_robots](https://modelscope.cn/datasets/swift/no_robots/summary)||9485|298.7±246.4, min=40, max=6739|multi-task, quality, human-annotated|[HuggingFaceH4/no_robots](https://huggingface.co/datasets/HuggingFaceH4/no_robots)|
715717
|open-hermes|[swift/OpenHermes-2.5](https://modelscope.cn/datasets/swift/OpenHermes-2.5/summary)||-|Dataset is too huge, please click the original link to view the dataset stat.|cot, en, quality|[teknium/OpenHermes-2.5](https://huggingface.co/datasets/teknium/OpenHermes-2.5)|
718+
|open-o1|[AI-ModelScope/OpenO1-SFT](https://modelscope.cn/datasets/AI-ModelScope/OpenO1-SFT/summary)|default|203579|615.5±659.6, min=11, max=27509|chat, general, o1|[O1-OPEN/OpenO1-SFT](https://huggingface.co/datasets/O1-OPEN/OpenO1-SFT)|
716719
|open-orca-chinese|[AI-ModelScope/OpenOrca-Chinese](https://modelscope.cn/datasets/AI-ModelScope/OpenOrca-Chinese/summary)||-|Dataset is too huge, please click the original link to view the dataset stat.|QA, zh, general, quality|[yys/OpenOrca-Chinese](https://huggingface.co/datasets/yys/OpenOrca-Chinese)|
717720
|orca_dpo_pairs|[swift/orca_dpo_pairs](https://modelscope.cn/datasets/swift/orca_dpo_pairs/summary)||12859|366.9±251.9, min=30, max=2010|rlhf, quality|[Intel/orca_dpo_pairs](https://huggingface.co/datasets/Intel/orca_dpo_pairs)|
718721
|path-vqa|[swift/path-vqa](https://modelscope.cn/datasets/swift/path-vqa/summary)||19654|34.8±7.3, min=27, max=85|multi-modal, vqa, medical|[flaviagiammarino/path-vqa](https://huggingface.co/datasets/flaviagiammarino/path-vqa)|

docs/source_en/Instruction/Supported-models-datasets.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ The table below introcudes all models supported by SWIFT:
180180
|qwen2_5-coder-32b-instruct-gptq-int4|[qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int4/summary)|q_proj, k_proj, v_proj|qwen2_5|&#x2714;|&#x2714;|&#x2714;|&#x2718;|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4)|
181181
|qwen2_5-coder-32b-instruct-gptq-int8|[qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-Int8/summary)|q_proj, k_proj, v_proj|qwen2_5|&#x2714;|&#x2714;|&#x2718;|&#x2718;|auto_gptq>=0.5, transformers>=4.37|-|[Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-GPTQ-Int8)|
182182
|qwen2_5-coder-32b-instruct-awq|[qwen/Qwen2.5-Coder-32B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2.5-Coder-32B-Instruct-AWQ/summary)|q_proj, k_proj, v_proj|qwen2_5|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.37, autoawq|-|[Qwen/Qwen2.5-32B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct-AWQ)|
183+
|qwq-32b-preview|[Qwen/QwQ-32B-Preview](https://modelscope.cn/models/Qwen/QwQ-32B-Preview/summary)|q_proj, k_proj, v_proj|qwq|&#x2714;|&#x2714;|&#x2714;|&#x2714;|transformers>=4.37|-|[Qwen/QwQ-32B-Preview](https://huggingface.co/Qwen/QwQ-32B-Preview)|
183184
|marco-o1|[AIDC-AI/Marco-o1](https://modelscope.cn/models/AIDC-AI/Marco-o1/summary)|q_proj, k_proj, v_proj|marco_o1|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.37|-|[AIDC-AI/Marco-o1](https://huggingface.co/AIDC-AI/Marco-o1)|
184185
|chatglm2-6b|[ZhipuAI/chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary)|query_key_value|chatglm2|&#x2718;|&#x2714;|&#x2718;|&#x2718;|transformers<4.42|-|[THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b)|
185186
|chatglm2-6b-32k|[ZhipuAI/chatglm2-6b-32k](https://modelscope.cn/models/ZhipuAI/chatglm2-6b-32k/summary)|query_key_value|chatglm2|&#x2718;|&#x2714;|&#x2718;|&#x2718;|transformers<4.42|-|[THUDM/chatglm2-6b-32k](https://huggingface.co/THUDM/chatglm2-6b-32k)|
@@ -541,6 +542,7 @@ The table below introcudes all models supported by SWIFT:
541542
|mplug-owl3-1b-chat|[iic/mPLUG-Owl3-1B-241014](https://modelscope.cn/models/iic/mPLUG-Owl3-1B-241014/summary)|^(language_model\|vision2text_model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|mplug_owl3|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.36, icecream|vision, video|[mPLUG/mPLUG-Owl3-1B-241014](https://huggingface.co/mPLUG/mPLUG-Owl3-1B-241014)|
542543
|mplug-owl3-2b-chat|[iic/mPLUG-Owl3-2B-241014](https://modelscope.cn/models/iic/mPLUG-Owl3-2B-241014/summary)|^(language_model\|vision2text_model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|mplug_owl3|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.36, icecream|vision, video|[mPLUG/mPLUG-Owl3-2B-241014](https://huggingface.co/mPLUG/mPLUG-Owl3-2B-241014)|
543544
|mplug-owl3-7b-chat|[iic/mPLUG-Owl3-7B-240728](https://modelscope.cn/models/iic/mPLUG-Owl3-7B-240728/summary)|^(language_model\|vision2text_model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|mplug_owl3|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.36, icecream|vision, video|[mPLUG/mPLUG-Owl3-7B-240728](https://huggingface.co/mPLUG/mPLUG-Owl3-7B-240728)|
545+
|mplug-owl3v-7b-chat|[iic/mPLUG-Owl3-7B-241101](https://modelscope.cn/models/iic/mPLUG-Owl3-7B-241101/summary)|^(language_model\|vision2text_model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|mplug_owl3v|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.36, icecream|vision, video|[mPLUG/mPLUG-Owl3-7B-241101](https://huggingface.co/mPLUG/mPLUG-Owl3-7B-241101)|
544546
|phi3-vision-128k-instruct|[LLM-Research/Phi-3-vision-128k-instruct](https://modelscope.cn/models/LLM-Research/Phi-3-vision-128k-instruct/summary)|^(model.layers\|model.vision_embed_tokens.img_projection)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|phi3-vl|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|vision|[microsoft/Phi-3-vision-128k-instruct](https://huggingface.co/microsoft/Phi-3-vision-128k-instruct)|
545547
|phi3_5-vision-instruct|[LLM-Research/Phi-3.5-vision-instruct](https://modelscope.cn/models/LLM-Research/Phi-3.5-vision-instruct/summary)|^(model.layers\|model.vision_embed_tokens.img_projection)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|phi3-vl|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|vision|[microsoft/Phi-3.5-vision-instruct](https://huggingface.co/microsoft/Phi-3.5-vision-instruct)|
546548
|cogvlm-17b-chat|[ZhipuAI/cogvlm-chat](https://modelscope.cn/models/ZhipuAI/cogvlm-chat/summary)|^(model.layers)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|cogvlm|&#x2718;|&#x2718;|&#x2718;|&#x2718;|transformers<4.42|vision|[THUDM/cogvlm-chat-hf](https://huggingface.co/THUDM/cogvlm-chat-hf)|
@@ -637,7 +639,7 @@ The table below introduces the datasets supported by SWIFT:
637639
|coco-en-2|[modelscope/coco_2014_caption](https://modelscope.cn/datasets/modelscope/coco_2014_caption/summary)|coco_2014_caption|454617|36.8±2.8, min=32, max=89|chat, multi-modal, vision|-|
638640
|🔥coco-en-2-mini|[modelscope/coco_2014_caption](https://modelscope.cn/datasets/modelscope/coco_2014_caption/summary)|coco_2014_caption|40504|36.8±2.6, min=32, max=75|chat, multi-modal, vision|-|
639641
|capcha-images|[AI-ModelScope/captcha-images](https://modelscope.cn/datasets/AI-ModelScope/captcha-images/summary)||8000|31.0±0.0, min=31, max=31|chat, multi-modal, vision|-|
640-
|latex-ocr-print|[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR/summary)|full|17918|362.7±34.8, min=294, max=528|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)|
642+
|latex-ocr-print|[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR/summary)|default|17918|362.7±34.8, min=294, max=528|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)|
641643
|latex-ocr-handwrite|[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR/summary)|synthetic_handwrite|95424|375.1±59.4, min=292, max=2115|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)|
642644
|aishell1-zh|[speech_asr/speech_asr_aishell1_trainsets](https://modelscope.cn/datasets/speech_asr/speech_asr_aishell1_trainsets/summary)||141600|152.2±36.8, min=63, max=419|chat, multi-modal, audio|-|
643645
|🔥aishell1-zh-mini|[speech_asr/speech_asr_aishell1_trainsets](https://modelscope.cn/datasets/speech_asr/speech_asr_aishell1_trainsets/summary)||14526|152.2±35.6, min=74, max=359|chat, multi-modal, audio|-|
@@ -713,6 +715,7 @@ The table below introduces the datasets supported by SWIFT:
713715
|moondream2-coyo-5M|[swift/moondream2-coyo-5M-captions](https://modelscope.cn/datasets/swift/moondream2-coyo-5M-captions/summary)||-|Dataset is too huge, please click the original link to view the dataset stat.|caption, pretrain, quality|[isidentical/moondream2-coyo-5M-captions](https://huggingface.co/datasets/isidentical/moondream2-coyo-5M-captions)|
714716
|no-robots|[swift/no_robots](https://modelscope.cn/datasets/swift/no_robots/summary)||9485|298.7±246.4, min=40, max=6739|multi-task, quality, human-annotated|[HuggingFaceH4/no_robots](https://huggingface.co/datasets/HuggingFaceH4/no_robots)|
715717
|open-hermes|[swift/OpenHermes-2.5](https://modelscope.cn/datasets/swift/OpenHermes-2.5/summary)||-|Dataset is too huge, please click the original link to view the dataset stat.|cot, en, quality|[teknium/OpenHermes-2.5](https://huggingface.co/datasets/teknium/OpenHermes-2.5)|
718+
|open-o1|[AI-ModelScope/OpenO1-SFT](https://modelscope.cn/datasets/AI-ModelScope/OpenO1-SFT/summary)|default|203579|615.5±659.6, min=11, max=27509|chat, general, o1|[O1-OPEN/OpenO1-SFT](https://huggingface.co/datasets/O1-OPEN/OpenO1-SFT)|
716719
|open-orca-chinese|[AI-ModelScope/OpenOrca-Chinese](https://modelscope.cn/datasets/AI-ModelScope/OpenOrca-Chinese/summary)||-|Dataset is too huge, please click the original link to view the dataset stat.|QA, zh, general, quality|[yys/OpenOrca-Chinese](https://huggingface.co/datasets/yys/OpenOrca-Chinese)|
717720
|orca_dpo_pairs|[swift/orca_dpo_pairs](https://modelscope.cn/datasets/swift/orca_dpo_pairs/summary)||12859|366.9±251.9, min=30, max=2010|rlhf, quality|[Intel/orca_dpo_pairs](https://huggingface.co/datasets/Intel/orca_dpo_pairs)|
718721
|path-vqa|[swift/path-vqa](https://modelscope.cn/datasets/swift/path-vqa/summary)||19654|34.8±7.3, min=27, max=85|multi-modal, vqa, medical|[flaviagiammarino/path-vqa](https://huggingface.co/datasets/flaviagiammarino/path-vqa)|

swift/llm/data/dataset_info.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
11
{
2+
"open-o1": {
3+
"dataset_id": "AI-ModelScope/OpenO1-SFT",
4+
"hf_dataset_id": "O1-OPEN/OpenO1-SFT",
5+
"subsets": ["default"],
6+
"tags": ["chat", "general", "o1"]
7+
},
28
"multi-alpaca": {
39
"dataset_id": "damo/nlp_polylm_multialpaca_sft",
410
"subsets": ["ar", "de", "es", "fr", "id", "ja", "ko", "pt", "ru", "th", "vi"],

swift/llm/utils/model.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ class ModelType:
218218
qwen2_5_coder_32b_instruct_gptq_int8 = 'qwen2_5-coder-32b-instruct-gptq-int8'
219219
qwen2_5_coder_32b_instruct_awq = 'qwen2_5-coder-32b-instruct-awq'
220220

221+
qwq_32b_preview = 'qwq-32b-preview'
222+
221223
marco_o1 = 'marco-o1'
222224

223225
# qwen-vl
@@ -3345,6 +3347,17 @@ def rotary_emb(self, query_states, key_states, **kwargs):
33453347
return model, tokenizer
33463348

33473349

3350+
@register_model(
3351+
ModelType.qwq_32b_preview,
3352+
'Qwen/QwQ-32B-Preview',
3353+
LoRATM.llama,
3354+
TemplateType.qwq,
3355+
support_flash_attn=True,
3356+
support_vllm=True,
3357+
support_lmdeploy=True,
3358+
support_megatron=True,
3359+
requires=['transformers>=4.37'],
3360+
hf_model_id='Qwen/QwQ-32B-Preview')
33483361
@register_model(
33493362
ModelType.qwen2_math_1_5b_instruct,
33503363
'qwen/Qwen2-Math-1.5B-Instruct',

swift/llm/utils/template.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class TemplateType:
4949
default = 'default'
5050
qwen = 'qwen'
5151
qwen2_5 = 'qwen2_5'
52+
qwq = 'qwq'
5253
marco_o1 = 'marco_o1'
5354
qwen_vl = 'qwen-vl'
5455
qwen_audio = 'qwen-audio'
@@ -1423,8 +1424,14 @@ class Qwen2_5Template(QwenTemplate):
14231424
system = 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.'
14241425

14251426

1427+
class QwqTemplate(QwenTemplate):
1428+
system = ('You are a helpful and harmless assistant. You are Qwen developed by Alibaba. '
1429+
'You should think step-by-step.')
1430+
1431+
14261432
register_template(TemplateType.qwen, QwenTemplate())
14271433
register_template(TemplateType.qwen2_5, Qwen2_5Template())
1434+
register_template(TemplateType.qwq, QwqTemplate())
14281435

14291436

14301437
class MarcoO1Template(QwenTemplate):

0 commit comments

Comments
 (0)