@@ -1903,7 +1903,7 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
19031903
19041904 @skip_pre_blackwell
19051905 @pytest .mark .parametrize (
1906- "tp_size,pp_size,ep_size,mtp_nextn,fp8kv,attention_dp,cuda_graph,overlap_scheduler,max_batch_size,moe_backend" ,
1906+ "tp_size,pp_size,ep_size,mtp_nextn,fp8kv,attention_dp,enable_lm_head_tp_in_adp, cuda_graph,overlap_scheduler,max_batch_size,moe_backend" ,
19071907 [
19081908 # Use a larger batch_size to speed up the tests
19091909 pytest .param (8 ,
@@ -1912,6 +1912,7 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
19121912 3 ,
19131913 False ,
19141914 False ,
1915+ False ,
19151916 True ,
19161917 True ,
19171918 32 ,
@@ -1923,6 +1924,31 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
19231924 3 ,
19241925 False ,
19251926 False ,
1927+ False ,
1928+ True ,
1929+ True ,
1930+ 32 ,
1931+ "TRTLLM" ,
1932+ marks = pytest .mark .skip_less_mpi_world_size (8 )),
1933+ pytest .param (8 ,
1934+ 1 ,
1935+ 4 ,
1936+ 3 ,
1937+ False ,
1938+ True ,
1939+ True ,
1940+ True ,
1941+ True ,
1942+ 32 ,
1943+ "CUTLASS" ,
1944+ marks = pytest .mark .skip_less_mpi_world_size (8 )),
1945+ pytest .param (8 ,
1946+ 1 ,
1947+ 4 ,
1948+ 3 ,
1949+ False ,
1950+ True ,
1951+ True ,
19261952 True ,
19271953 True ,
19281954 32 ,
@@ -1934,6 +1960,7 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
19341960 0 ,
19351961 True ,
19361962 True ,
1963+ False ,
19371964 True ,
19381965 True ,
19391966 32 ,
@@ -1945,6 +1972,7 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
19451972 0 ,
19461973 True ,
19471974 True ,
1975+ False ,
19481976 True ,
19491977 True ,
19501978 32 ,
@@ -1956,6 +1984,7 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
19561984 0 ,
19571985 True ,
19581986 True ,
1987+ False ,
19591988 True ,
19601989 True ,
19611990 16 ,
@@ -1967,6 +1996,7 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
19671996 1 ,
19681997 True ,
19691998 True ,
1999+ False ,
19702000 True ,
19712001 True ,
19722002 32 ,
@@ -1978,19 +2008,22 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
19782008 1 ,
19792009 True ,
19802010 True ,
2011+ False ,
19812012 True ,
19822013 True ,
19832014 8 ,
19842015 "CUTLASS" ,
19852016 marks = pytest .mark .skip_less_mpi_world_size (8 )),
19862017 ],
19872018 ids = [
1988- "latency" , "latency_trtllmgen" , "throughput" , "throughput_tp8" ,
2019+ "latency" , "latency_trtllmgen" , "latency_adp_lmtp" ,
2020+ "latency_trtllmgen_adp_lmtp" , "throughput" , "throughput_tp8" ,
19892021 "throughput_tp4" , "throughput_mtp" , "throughput_bs8_mtp"
19902022 ])
19912023 def test_nvfp4_multi_gpus (self , tp_size , pp_size , ep_size , mtp_nextn , fp8kv ,
1992- attention_dp , cuda_graph , overlap_scheduler ,
1993- max_batch_size , moe_backend ):
2024+ attention_dp , enable_lm_head_tp_in_adp ,
2025+ cuda_graph , overlap_scheduler , max_batch_size ,
2026+ moe_backend ):
19942027 if moe_backend == "TRTLLM" and (get_sm_version () == 120
19952028 or get_sm_version () == 121 ):
19962029 pytest .skip (
@@ -2016,6 +2049,7 @@ def test_nvfp4_multi_gpus(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
20162049 kv_cache_config = kv_cache_config ,
20172050 ** pytorch_config ,
20182051 enable_attention_dp = attention_dp ,
2052+ enable_lm_head_tp_in_adp = enable_lm_head_tp_in_adp ,
20192053 speculative_config = mtp_config ) as llm :
20202054
20212055 assert llm .args .moe_config .backend == moe_backend
0 commit comments