Skip to content

Commit f0a0317

Browse files
authored
support deepseek_prover_v2 (#4184)
1 parent cb05876 commit f0a0317

File tree

4 files changed

+19
-2
lines changed

4 files changed

+19
-2
lines changed

docs/source/Instruction/支持的模型和数据集.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,8 @@
394394
|[deepseek-ai/DeepSeek-V3-0324](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-0324)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|✘|-|[deepseek-ai/DeepSeek-V3-0324](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324)|
395395
|[cognitivecomputations/DeepSeek-V3-awq](https://modelscope.cn/models/cognitivecomputations/DeepSeek-V3-awq)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|✘|-|[cognitivecomputations/DeepSeek-V3-AWQ](https://huggingface.co/cognitivecomputations/DeepSeek-V3-AWQ)|
396396
|[cognitivecomputations/DeepSeek-V3-0324-AWQ](https://modelscope.cn/models/cognitivecomputations/DeepSeek-V3-0324-AWQ)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|✘|-|[cognitivecomputations/DeepSeek-V3-0324-AWQ](https://huggingface.co/cognitivecomputations/DeepSeek-V3-0324-AWQ)|
397+
|[deepseek-ai/DeepSeek-Prover-V2-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-7B)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|✘|-|[deepseek-ai/DeepSeek-Prover-V2-7B](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-7B)|
398+
|[deepseek-ai/DeepSeek-Prover-V2-671B](https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-671B)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|✘|-|[deepseek-ai/DeepSeek-Prover-V2-671B](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-671B)|
397399
|[deepseek-ai/DeepSeek-R1](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1)|deepseek_r1|deepseek_r1|transformers>=4.39.3|✘|-|[deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)|
398400
|[deepseek-ai/DeepSeek-R1-Zero](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Zero)|deepseek_r1|deepseek_r1|transformers>=4.39.3|✘|-|[deepseek-ai/DeepSeek-R1-Zero](https://huggingface.co/deepseek-ai/DeepSeek-R1-Zero)|
399401
|[cognitivecomputations/DeepSeek-R1-awq](https://modelscope.cn/models/cognitivecomputations/DeepSeek-R1-awq)|deepseek_r1|deepseek_r1|transformers>=4.39.3|✘|-|[cognitivecomputations/DeepSeek-R1-AWQ](https://huggingface.co/cognitivecomputations/DeepSeek-R1-AWQ)|

docs/source_en/Instruction/Supported-models-and-datasets.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,8 @@ The table below introduces the models integrated with ms-swift:
394394
|[deepseek-ai/DeepSeek-V3-0324](https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-0324)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|✘|-|[deepseek-ai/DeepSeek-V3-0324](https://huggingface.co/deepseek-ai/DeepSeek-V3-0324)|
395395
|[cognitivecomputations/DeepSeek-V3-awq](https://modelscope.cn/models/cognitivecomputations/DeepSeek-V3-awq)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|✘|-|[cognitivecomputations/DeepSeek-V3-AWQ](https://huggingface.co/cognitivecomputations/DeepSeek-V3-AWQ)|
396396
|[cognitivecomputations/DeepSeek-V3-0324-AWQ](https://modelscope.cn/models/cognitivecomputations/DeepSeek-V3-0324-AWQ)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|✘|-|[cognitivecomputations/DeepSeek-V3-0324-AWQ](https://huggingface.co/cognitivecomputations/DeepSeek-V3-0324-AWQ)|
397+
|[deepseek-ai/DeepSeek-Prover-V2-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-7B)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|✘|-|[deepseek-ai/DeepSeek-Prover-V2-7B](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-7B)|
398+
|[deepseek-ai/DeepSeek-Prover-V2-671B](https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-671B)|deepseek_v2_5|deepseek_v2_5|transformers>=4.39.3|✘|-|[deepseek-ai/DeepSeek-Prover-V2-671B](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-671B)|
397399
|[deepseek-ai/DeepSeek-R1](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1)|deepseek_r1|deepseek_r1|transformers>=4.39.3|✘|-|[deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)|
398400
|[deepseek-ai/DeepSeek-R1-Zero](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Zero)|deepseek_r1|deepseek_r1|transformers>=4.39.3|✘|-|[deepseek-ai/DeepSeek-R1-Zero](https://huggingface.co/deepseek-ai/DeepSeek-R1-Zero)|
399401
|[cognitivecomputations/DeepSeek-R1-awq](https://modelscope.cn/models/cognitivecomputations/DeepSeek-R1-awq)|deepseek_r1|deepseek_r1|transformers>=4.39.3|✘|-|[cognitivecomputations/DeepSeek-R1-AWQ](https://huggingface.co/cognitivecomputations/DeepSeek-R1-AWQ)|

swift/llm/model/model/deepseek.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,11 @@ def get_model_tokenizer_deepseek_moe(model_dir: str,
110110
ModelGroup([
111111
Model('cognitivecomputations/DeepSeek-V3-awq', 'cognitivecomputations/DeepSeek-V3-AWQ'),
112112
Model('cognitivecomputations/DeepSeek-V3-0324-AWQ', 'cognitivecomputations/DeepSeek-V3-0324-AWQ')
113-
])
113+
]),
114+
ModelGroup([
115+
Model('deepseek-ai/DeepSeek-Prover-V2-7B', 'deepseek-ai/DeepSeek-Prover-V2-7B'),
116+
Model('deepseek-ai/DeepSeek-Prover-V2-671B', 'deepseek-ai/DeepSeek-Prover-V2-671B'),
117+
]),
114118
],
115119
TemplateType.deepseek_v2_5,
116120
get_model_tokenizer_deepseek_moe,

tests/test_align/test_template/test_llm.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,14 @@ def test_deepseek_r1_distill():
302302
assert res == res2, f'res: {res}, res2: {res2}'
303303

304304

305+
def test_deepseek_prover_v2():
306+
pt_engine = PtEngine('deepseek-ai/DeepSeek-Prover-V2-7B')
307+
res = _infer_model(pt_engine)
308+
pt_engine.default_template.template_backend = 'jinja'
309+
res2 = _infer_model(pt_engine)
310+
assert res == res2, f'res: {res}, res2: {res2}'
311+
312+
305313
def test_qwen2_5_prm():
306314
pt_engine = PtEngine('Qwen/Qwen2.5-Math-7B-PRM800K')
307315
data = {
@@ -418,6 +426,7 @@ def test_mimo():
418426
# test_phi4_mini()
419427
# test_internlm3()
420428
# test_deepseek_r1_distill()
429+
test_deepseek_prover_v2()
421430
# test_qwen2_5_prm()
422431
# test_mistral_small()
423432
# test_baichuan_m1()
@@ -426,4 +435,4 @@ def test_mimo():
426435
# test_gemma3()
427436
# test_glm4_0414()
428437
# test_qwen3()
429-
test_mimo()
438+
# test_mimo()

0 commit comments

Comments
 (0)