3131from .common import (PluginOptions , convert_weights , get_mmlu_accuracy ,
3232 prune_checkpoint , quantize_data , refit_model ,
3333 venv_check_call )
34- from .conftest import (get_device_count , llm_models_root , skip_no_sm120 ,
35- skip_nvlink_inactive , skip_post_blackwell , skip_pre_ada ,
36- skip_pre_blackwell , skip_pre_hopper , tests_path ,
37- unittest_path )
34+ from .conftest import (get_device_count , get_sm_version , llm_models_root ,
35+ skip_no_sm120 , skip_nvlink_inactive , skip_post_blackwell ,
36+ skip_pre_ada , skip_pre_blackwell , skip_pre_hopper ,
37+ tests_path , unittest_path )
3838
3939sys .path .append (os .path .join (str (tests_path ()), '/../examples/apps' ))
4040
@@ -2195,7 +2195,6 @@ def test_ptp_quickstart_advanced_deepseek_r1_8gpus(llm_root, llm_venv,
21952195 _check_mem_usage (running_log , [106.3 , 0 , 0 , 0 ], 8 )
21962196
21972197
2198- @skip_post_blackwell
21992198@pytest .mark .skip_less_device_memory (110000 )
22002199@pytest .mark .skip_less_device (8 )
22012200@pytest .mark .parametrize ("model_name,model_path" , [
@@ -2206,6 +2205,7 @@ def test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus(
22062205 llm_root , llm_venv , model_name , model_path ):
22072206 print (f"Testing { model_name } ." )
22082207 example_root = Path (os .path .join (llm_root , "examples" , "llm-api" ))
2208+ is_blackwell = get_sm_version () > 90
22092209 with tempfile .NamedTemporaryFile (mode = 'w+t' ,
22102210 suffix = f".{ model_name } .log" ,
22112211 dir = "./" ,
@@ -2219,7 +2219,7 @@ def test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus(
22192219 "--moe_ep_size=8" ,
22202220 "--tp_size=8" ,
22212221 "--use_cuda_graph" ,
2222- f"--kv_cache_fraction={ _MEM_FRACTION_95 } " ,
2222+ f"--kv_cache_fraction={ _MEM_FRACTION_50 if is_blackwell else _MEM_FRACTION_95 } " ,
22232223 "--max_batch_size=1" ,
22242224 "--max_seq_len=3000" ,
22252225 "--disable_kv_cache_reuse" ,
@@ -2232,6 +2232,8 @@ def test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus(
22322232 "--relaxed_delta=0.5" ,
22332233 "--enable_attention_dp" ,
22342234 "--use_one_model" ,
2235+ "--moe_backend" ,
2236+ "DEEPGEMM" if is_blackwell else "CUTLASS" ,
22352237 ],
22362238 stdout = running_log )
22372239 _check_mem_usage (running_log , [85.6 , 0 , 0 , 0 ], 8 )
0 commit comments