File tree Expand file tree Collapse file tree 4 files changed +19
-2
lines changed
tests/test_align/test_template Expand file tree Collapse file tree 4 files changed +19
-2
lines changed Original file line number Diff line number Diff line change 394394| [ deepseek-ai/DeepSeek-V3-0324] ( https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-0324 ) | deepseek_v2_5| deepseek_v2_5| transformers>=4.39.3| ✘ ; | -| [ deepseek-ai/DeepSeek-V3-0324] ( https://huggingface.co/deepseek-ai/DeepSeek-V3-0324 ) |
395395| [ cognitivecomputations/DeepSeek-V3-awq] ( https://modelscope.cn/models/cognitivecomputations/DeepSeek-V3-awq ) | deepseek_v2_5| deepseek_v2_5| transformers>=4.39.3| ✘ ; | -| [ cognitivecomputations/DeepSeek-V3-AWQ] ( https://huggingface.co/cognitivecomputations/DeepSeek-V3-AWQ ) |
396396| [ cognitivecomputations/DeepSeek-V3-0324-AWQ] ( https://modelscope.cn/models/cognitivecomputations/DeepSeek-V3-0324-AWQ ) | deepseek_v2_5| deepseek_v2_5| transformers>=4.39.3| ✘ ; | -| [ cognitivecomputations/DeepSeek-V3-0324-AWQ] ( https://huggingface.co/cognitivecomputations/DeepSeek-V3-0324-AWQ ) |
397+ | [ deepseek-ai/DeepSeek-Prover-V2-7B] ( https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-7B ) | deepseek_v2_5| deepseek_v2_5| transformers>=4.39.3| ✘ ; | -| [ deepseek-ai/DeepSeek-Prover-V2-7B] ( https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-7B ) |
398+ | [ deepseek-ai/DeepSeek-Prover-V2-671B] ( https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-671B ) | deepseek_v2_5| deepseek_v2_5| transformers>=4.39.3| ✘ ; | -| [ deepseek-ai/DeepSeek-Prover-V2-671B] ( https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-671B ) |
397399| [ deepseek-ai/DeepSeek-R1] ( https://modelscope.cn/models/deepseek-ai/DeepSeek-R1 ) | deepseek_r1| deepseek_r1| transformers>=4.39.3| ✘ ; | -| [ deepseek-ai/DeepSeek-R1] ( https://huggingface.co/deepseek-ai/DeepSeek-R1 ) |
398400| [ deepseek-ai/DeepSeek-R1-Zero] ( https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Zero ) | deepseek_r1| deepseek_r1| transformers>=4.39.3| ✘ ; | -| [ deepseek-ai/DeepSeek-R1-Zero] ( https://huggingface.co/deepseek-ai/DeepSeek-R1-Zero ) |
399401| [ cognitivecomputations/DeepSeek-R1-awq] ( https://modelscope.cn/models/cognitivecomputations/DeepSeek-R1-awq ) | deepseek_r1| deepseek_r1| transformers>=4.39.3| ✘ ; | -| [ cognitivecomputations/DeepSeek-R1-AWQ] ( https://huggingface.co/cognitivecomputations/DeepSeek-R1-AWQ ) |
Original file line number Diff line number Diff line change @@ -394,6 +394,8 @@ The table below introduces the models integrated with ms-swift:
394394| [ deepseek-ai/DeepSeek-V3-0324] ( https://modelscope.cn/models/deepseek-ai/DeepSeek-V3-0324 ) | deepseek_v2_5| deepseek_v2_5| transformers>=4.39.3| ✘ ; | -| [ deepseek-ai/DeepSeek-V3-0324] ( https://huggingface.co/deepseek-ai/DeepSeek-V3-0324 ) |
395395| [ cognitivecomputations/DeepSeek-V3-awq] ( https://modelscope.cn/models/cognitivecomputations/DeepSeek-V3-awq ) | deepseek_v2_5| deepseek_v2_5| transformers>=4.39.3| ✘ ; | -| [ cognitivecomputations/DeepSeek-V3-AWQ] ( https://huggingface.co/cognitivecomputations/DeepSeek-V3-AWQ ) |
396396| [ cognitivecomputations/DeepSeek-V3-0324-AWQ] ( https://modelscope.cn/models/cognitivecomputations/DeepSeek-V3-0324-AWQ ) | deepseek_v2_5| deepseek_v2_5| transformers>=4.39.3| ✘ ; | -| [ cognitivecomputations/DeepSeek-V3-0324-AWQ] ( https://huggingface.co/cognitivecomputations/DeepSeek-V3-0324-AWQ ) |
397+ | [ deepseek-ai/DeepSeek-Prover-V2-7B] ( https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-7B ) | deepseek_v2_5| deepseek_v2_5| transformers>=4.39.3| ✘ ; | -| [ deepseek-ai/DeepSeek-Prover-V2-7B] ( https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-7B ) |
398+ | [ deepseek-ai/DeepSeek-Prover-V2-671B] ( https://modelscope.cn/models/deepseek-ai/DeepSeek-Prover-V2-671B ) | deepseek_v2_5| deepseek_v2_5| transformers>=4.39.3| ✘ ; | -| [ deepseek-ai/DeepSeek-Prover-V2-671B] ( https://huggingface.co/deepseek-ai/DeepSeek-Prover-V2-671B ) |
397399| [ deepseek-ai/DeepSeek-R1] ( https://modelscope.cn/models/deepseek-ai/DeepSeek-R1 ) | deepseek_r1| deepseek_r1| transformers>=4.39.3| ✘ ; | -| [ deepseek-ai/DeepSeek-R1] ( https://huggingface.co/deepseek-ai/DeepSeek-R1 ) |
398400| [ deepseek-ai/DeepSeek-R1-Zero] ( https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Zero ) | deepseek_r1| deepseek_r1| transformers>=4.39.3| ✘ ; | -| [ deepseek-ai/DeepSeek-R1-Zero] ( https://huggingface.co/deepseek-ai/DeepSeek-R1-Zero ) |
399401| [ cognitivecomputations/DeepSeek-R1-awq] ( https://modelscope.cn/models/cognitivecomputations/DeepSeek-R1-awq ) | deepseek_r1| deepseek_r1| transformers>=4.39.3| ✘ ; | -| [ cognitivecomputations/DeepSeek-R1-AWQ] ( https://huggingface.co/cognitivecomputations/DeepSeek-R1-AWQ ) |
Original file line number Diff line number Diff line change @@ -110,7 +110,11 @@ def get_model_tokenizer_deepseek_moe(model_dir: str,
110110 ModelGroup ([
111111 Model ('cognitivecomputations/DeepSeek-V3-awq' , 'cognitivecomputations/DeepSeek-V3-AWQ' ),
112112 Model ('cognitivecomputations/DeepSeek-V3-0324-AWQ' , 'cognitivecomputations/DeepSeek-V3-0324-AWQ' )
113- ])
113+ ]),
114+ ModelGroup ([
115+ Model ('deepseek-ai/DeepSeek-Prover-V2-7B' , 'deepseek-ai/DeepSeek-Prover-V2-7B' ),
116+ Model ('deepseek-ai/DeepSeek-Prover-V2-671B' , 'deepseek-ai/DeepSeek-Prover-V2-671B' ),
117+ ]),
114118 ],
115119 TemplateType .deepseek_v2_5 ,
116120 get_model_tokenizer_deepseek_moe ,
Original file line number Diff line number Diff line change @@ -302,6 +302,14 @@ def test_deepseek_r1_distill():
302302 assert res == res2 , f'res: { res } , res2: { res2 } '
303303
304304
305+ def test_deepseek_prover_v2 ():
306+ pt_engine = PtEngine ('deepseek-ai/DeepSeek-Prover-V2-7B' )
307+ res = _infer_model (pt_engine )
308+ pt_engine .default_template .template_backend = 'jinja'
309+ res2 = _infer_model (pt_engine )
310+ assert res == res2 , f'res: { res } , res2: { res2 } '
311+
312+
305313def test_qwen2_5_prm ():
306314 pt_engine = PtEngine ('Qwen/Qwen2.5-Math-7B-PRM800K' )
307315 data = {
@@ -418,6 +426,7 @@ def test_mimo():
418426 # test_phi4_mini()
419427 # test_internlm3()
420428 # test_deepseek_r1_distill()
429+ test_deepseek_prover_v2 ()
421430 # test_qwen2_5_prm()
422431 # test_mistral_small()
423432 # test_baichuan_m1()
@@ -426,4 +435,4 @@ def test_mimo():
426435 # test_gemma3()
427436 # test_glm4_0414()
428437 # test_qwen3()
429- test_mimo ()
438+ # test_mimo()
You can’t perform that action at this time.
0 commit comments