Skip to content

Commit c1f10f4

Browse files
authored
support AI-ModelScope/Skywork-o1-Open-Llama-3.1-8B (#2739)
1 parent f17ca92 commit c1f10f4

File tree

8 files changed

+61
-9
lines changed

8 files changed

+61
-9
lines changed

docs/source/Instruction/支持的模型和数据集.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,9 @@
418418
|[LLM-Research/gemma-2-9b-it](https://modelscope.cn/models/LLM-Research/gemma-2-9b-it)|gemma2|gemma|transformers>=4.42|-|[google/gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it)|
419419
|[LLM-Research/gemma-2-27b](https://modelscope.cn/models/LLM-Research/gemma-2-27b)|gemma2|gemma|transformers>=4.42|-|[google/gemma-2-27b](https://huggingface.co/google/gemma-2-27b)|
420420
|[LLM-Research/gemma-2-27b-it](https://modelscope.cn/models/LLM-Research/gemma-2-27b-it)|gemma2|gemma|transformers>=4.42|-|[google/gemma-2-27b-it](https://huggingface.co/google/gemma-2-27b-it)|
421+
|[skywork/Skywork-13B-base](https://modelscope.cn/models/skywork/Skywork-13B-base)|skywork|skywork|-|-|[skywork/Skywork-13B-base](https://huggingface.co/skywork/Skywork-13B-base)|
422+
|[skywork/Skywork-13B-chat](https://modelscope.cn/models/skywork/Skywork-13B-chat)|skywork|skywork|-|-|-|
423+
|[AI-ModelScope/Skywork-o1-Open-Llama-3.1-8B](https://modelscope.cn/models/AI-ModelScope/Skywork-o1-Open-Llama-3.1-8B)|skywork_o1|skywork_o1|transformers>=4.43|-|[Skywork/Skywork-o1-Open-Llama-3.1-8B](https://huggingface.co/Skywork/Skywork-o1-Open-Llama-3.1-8B)|
421424
|[IEITYuan/Yuan2.0-2B-hf](https://modelscope.cn/models/IEITYuan/Yuan2.0-2B-hf)|yuan2|yuan|-|-|[IEITYuan/Yuan2-2B-hf](https://huggingface.co/IEITYuan/Yuan2-2B-hf)|
422425
|[IEITYuan/Yuan2.0-51B-hf](https://modelscope.cn/models/IEITYuan/Yuan2.0-51B-hf)|yuan2|yuan|-|-|[IEITYuan/Yuan2-51B-hf](https://huggingface.co/IEITYuan/Yuan2-51B-hf)|
423426
|[IEITYuan/Yuan2.0-102B-hf](https://modelscope.cn/models/IEITYuan/Yuan2.0-102B-hf)|yuan2|yuan|-|-|[IEITYuan/Yuan2-102B-hf](https://huggingface.co/IEITYuan/Yuan2-102B-hf)|
@@ -451,8 +454,6 @@
451454
|[AI-ModelScope/mamba-1.4b-hf](https://modelscope.cn/models/AI-ModelScope/mamba-1.4b-hf)|mamba|default|transformers>=4.39.0|-|[state-spaces/mamba-1.4b-hf](https://huggingface.co/state-spaces/mamba-1.4b-hf)|
452455
|[AI-ModelScope/mamba-2.8b-hf](https://modelscope.cn/models/AI-ModelScope/mamba-2.8b-hf)|mamba|default|transformers>=4.39.0|-|[state-spaces/mamba-2.8b-hf](https://huggingface.co/state-spaces/mamba-2.8b-hf)|
453456
|[damo/nlp_polylm_13b_text_generation](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation)|polylm|default|-|-|[DAMO-NLP-MT/polylm-13b](https://huggingface.co/DAMO-NLP-MT/polylm-13b)|
454-
|[skywork/Skywork-13B-base](https://modelscope.cn/models/skywork/Skywork-13B-base)|skywork|skywork|-|-|-|
455-
|[skywork/Skywork-13B-chat](https://modelscope.cn/models/skywork/Skywork-13B-chat)|skywork|skywork|-|-|-|
456457
|[AI-ModelScope/aya-expanse-8b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-8b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-8b](https://huggingface.co/CohereForAI/aya-expanse-8b)|
457458
|[AI-ModelScope/aya-expanse-32b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-32b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-32b](https://huggingface.co/CohereForAI/aya-expanse-32b)|
458459

docs/source_en/Instruction/Supported-models-and-datasets.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,9 @@ The table below introduces the models integrated with ms-swift:
418418
|[LLM-Research/gemma-2-9b-it](https://modelscope.cn/models/LLM-Research/gemma-2-9b-it)|gemma2|gemma|transformers>=4.42|-|[google/gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it)|
419419
|[LLM-Research/gemma-2-27b](https://modelscope.cn/models/LLM-Research/gemma-2-27b)|gemma2|gemma|transformers>=4.42|-|[google/gemma-2-27b](https://huggingface.co/google/gemma-2-27b)|
420420
|[LLM-Research/gemma-2-27b-it](https://modelscope.cn/models/LLM-Research/gemma-2-27b-it)|gemma2|gemma|transformers>=4.42|-|[google/gemma-2-27b-it](https://huggingface.co/google/gemma-2-27b-it)|
421+
|[skywork/Skywork-13B-base](https://modelscope.cn/models/skywork/Skywork-13B-base)|skywork|skywork|-|-|[skywork/Skywork-13B-base](https://huggingface.co/skywork/Skywork-13B-base)|
422+
|[skywork/Skywork-13B-chat](https://modelscope.cn/models/skywork/Skywork-13B-chat)|skywork|skywork|-|-|-|
423+
|[AI-ModelScope/Skywork-o1-Open-Llama-3.1-8B](https://modelscope.cn/models/AI-ModelScope/Skywork-o1-Open-Llama-3.1-8B)|skywork_o1|skywork_o1|transformers>=4.43|-|[Skywork/Skywork-o1-Open-Llama-3.1-8B](https://huggingface.co/Skywork/Skywork-o1-Open-Llama-3.1-8B)|
421424
|[IEITYuan/Yuan2.0-2B-hf](https://modelscope.cn/models/IEITYuan/Yuan2.0-2B-hf)|yuan2|yuan|-|-|[IEITYuan/Yuan2-2B-hf](https://huggingface.co/IEITYuan/Yuan2-2B-hf)|
422425
|[IEITYuan/Yuan2.0-51B-hf](https://modelscope.cn/models/IEITYuan/Yuan2.0-51B-hf)|yuan2|yuan|-|-|[IEITYuan/Yuan2-51B-hf](https://huggingface.co/IEITYuan/Yuan2-51B-hf)|
423426
|[IEITYuan/Yuan2.0-102B-hf](https://modelscope.cn/models/IEITYuan/Yuan2.0-102B-hf)|yuan2|yuan|-|-|[IEITYuan/Yuan2-102B-hf](https://huggingface.co/IEITYuan/Yuan2-102B-hf)|
@@ -451,8 +454,6 @@ The table below introduces the models integrated with ms-swift:
451454
|[AI-ModelScope/mamba-1.4b-hf](https://modelscope.cn/models/AI-ModelScope/mamba-1.4b-hf)|mamba|default|transformers>=4.39.0|-|[state-spaces/mamba-1.4b-hf](https://huggingface.co/state-spaces/mamba-1.4b-hf)|
452455
|[AI-ModelScope/mamba-2.8b-hf](https://modelscope.cn/models/AI-ModelScope/mamba-2.8b-hf)|mamba|default|transformers>=4.39.0|-|[state-spaces/mamba-2.8b-hf](https://huggingface.co/state-spaces/mamba-2.8b-hf)|
453456
|[damo/nlp_polylm_13b_text_generation](https://modelscope.cn/models/damo/nlp_polylm_13b_text_generation)|polylm|default|-|-|[DAMO-NLP-MT/polylm-13b](https://huggingface.co/DAMO-NLP-MT/polylm-13b)|
454-
|[skywork/Skywork-13B-base](https://modelscope.cn/models/skywork/Skywork-13B-base)|skywork|skywork|-|-|-|
455-
|[skywork/Skywork-13B-chat](https://modelscope.cn/models/skywork/Skywork-13B-chat)|skywork|skywork|-|-|-|
456457
|[AI-ModelScope/aya-expanse-8b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-8b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-8b](https://huggingface.co/CohereForAI/aya-expanse-8b)|
457458
|[AI-ModelScope/aya-expanse-32b](https://modelscope.cn/models/AI-ModelScope/aya-expanse-32b)|aya|aya|transformers>=4.44.0|-|[CohereForAI/aya-expanse-32b](https://huggingface.co/CohereForAI/aya-expanse-32b)|
458459

swift/llm/model/constant.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ class LLMModelType:
7878
gemma = 'gemma'
7979
gemma2 = 'gemma2'
8080

81+
skywork = 'skywork'
82+
skywork_o1 = 'skywork_o1'
83+
8184
yuan2 = 'yuan2'
8285
orion = 'orion'
8386
xverse = 'xverse'
@@ -89,7 +92,6 @@ class LLMModelType:
8992
grok = 'grok'
9093
mamba = 'mamba'
9194
polylm = 'polylm'
92-
skywork = 'skywork'
9395
aya = 'aya'
9496

9597

swift/llm/model/model/llm.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def get_skywork_model_tokenizer(model_dir: str,
8080
LLMModelType.skywork,
8181
[
8282
ModelGroup([
83-
Model('skywork/Skywork-13B-base'),
83+
Model('skywork/Skywork-13B-base', 'skywork/Skywork-13B-base'),
8484
Model('skywork/Skywork-13B-chat'),
8585
]),
8686
],
@@ -90,6 +90,21 @@ def get_skywork_model_tokenizer(model_dir: str,
9090
model_arch=ModelArch.llama,
9191
))
9292

93+
register_model(
94+
ModelMeta(
95+
LLMModelType.skywork_o1,
96+
[
97+
ModelGroup([
98+
Model('AI-ModelScope/Skywork-o1-Open-Llama-3.1-8B', 'Skywork/Skywork-o1-Open-Llama-3.1-8B'),
99+
]),
100+
],
101+
TemplateType.skywork_o1,
102+
get_model_tokenizer_with_flash_attn,
103+
architectures=['LlamaForCausalLM'],
104+
requires=['transformers>=4.43'],
105+
model_arch=ModelArch.llama,
106+
))
107+
93108

94109
def get_model_tokenizer_yuan(model_dir: str,
95110
model_info: ModelInfo,

swift/llm/template/constant.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ class LLMTemplateType:
5050
codefuse = 'codefuse'
5151
codefuse_codellama = 'codefuse_codellama'
5252

53+
skywork = 'skywork'
54+
skywork_o1 = 'skywork_o1'
55+
5356
mistral_nemo = 'mistral_nemo'
5457
zephyr = 'zephyr'
5558
wizardlm2 = 'wizardlm2'
@@ -59,7 +62,6 @@ class LLMTemplateType:
5962

6063
yuan = 'yuan'
6164
xverse = 'xverse'
62-
skywork = 'skywork'
6365
bluelm = 'bluelm'
6466
orion = 'orion'
6567

swift/llm/template/template/llm.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from ..constant import LLMTemplateType
66
from ..register import TemplateMeta, register_template
77
from ..utils import Prompt
8+
from .llama import Llama3_2TemplateMeta
89
from .utils import DEFAULT_SYSTEM, ChatmlTemplateMeta
910

1011
register_template(
@@ -70,6 +71,17 @@
7071
chat_sep=None,
7172
suffix=['[SEP]</s>']))
7273

74+
register_template(
75+
Llama3_2TemplateMeta(
76+
LLMTemplateType.skywork_o1,
77+
default_system=(
78+
'You are Skywork-o1, a thinking model developed by Skywork AI, specializing in solving complex problems '
79+
"involving mathematics, coding, and logical reasoning through deep thought. When faced with a user's "
80+
'request, you first engage in a lengthy and in-depth thinking process to explore possible solutions to '
81+
'the problem. After completing your thoughts, you then provide a detailed explanation of the solution '
82+
'process in your response.'),
83+
))
84+
7385
register_template(
7486
TemplateMeta(
7587
LLMTemplateType.bluelm,

swift/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Make sure to modify __release_datetime__ to release time when making official release.
2-
__version__ = '3.0.0.dev0'
2+
__version__ = '3.1.0.dev0'
33
# default release datetime for branches under active development is set
44
# to be a time far-far-away-into-the-future
55
__release_datetime__ = '2099-10-13 08:56:12'

tests/test_align/test_template/test_llm.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,24 @@ def test_megrez():
140140
assert res == res2, f'res: {res}, res2: {res2}'
141141

142142

143+
def test_skywork_o1():
144+
pt_engine = PtEngine('AI-ModelScope/Skywork-o1-Open-Llama-3.1-8B')
145+
res = _infer_model(
146+
pt_engine,
147+
messages=[{
148+
'role':
149+
'user',
150+
'content':
151+
('Jane has 12 apples. She gives 4 apples to her friend Mark, then buys 1 more apple, and finally splits '
152+
'all her apples equally among herself and her 2 siblings. How many apples does each person get?')
153+
}])
154+
assert res == ("To solve the problem, let's break it down into a series of logical steps:\n\n1. **Initial Number "
155+
'of Apples**: Jane starts with 12 apples.\n2. **Apples Given Away**: Jane gives 4 apples to her '
156+
'friend Mark. So, the number of apples she has now is:\n \\[\n 12 - 4 = 8\n \\]\n3. **Apples '
157+
'Bought**: Jane then buys 1 more apple. So, the number of apples she has now is:\n \\[\n '
158+
'8 + 1 = 9\n \\]\n4. **Apples Split Equally')
159+
160+
143161
if __name__ == '__main__':
144162
from swift.llm import PtEngine, RequestConfig, get_template, get_model_tokenizer, VllmEngine
145163
from swift.utils import get_logger, seed_everything
@@ -158,4 +176,5 @@ def test_megrez():
158176
# test_glm_edge()
159177
# test_llama()
160178
# test_openbuddy()
161-
test_megrez()
179+
# test_megrez()
180+
test_skywork_o1()

0 commit comments

Comments
 (0)