Skip to content

Commit 5c607c5

Browse files
StanleySun639fredricz-20070104
authored andcommitted
[TRTLLM-8113][test] Add pytorch workflow e2e tests with pp enabled (NVIDIA#8357)
Signed-off-by: Stanley Sun <[email protected]> Signed-off-by: Mike Iovine <[email protected]> Signed-off-by: FredricZ-2007 <[email protected]>
1 parent d4dd0f3 commit 5c607c5

File tree

3 files changed

+40
-0
lines changed

3 files changed

+40
-0
lines changed

tests/integration/defs/test_e2e.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2332,6 +2332,40 @@ def test_ptp_quickstart_advanced_multi_gpus(llm_root, llm_venv, model_name,
23322332
gpu_count)
23332333

23342334

2335+
@pytest.mark.skip_less_device_memory(80000)
2336+
@pytest.mark.parametrize("cuda_graph", [False, True])
2337+
@pytest.mark.parametrize("tp_size, pp_size", [
2338+
pytest.param(2, 2, marks=pytest.mark.skip_less_device(4)),
2339+
pytest.param(2, 4, marks=pytest.mark.skip_less_device(8)),
2340+
])
2341+
@pytest.mark.parametrize("model_name,model_path", [
2342+
pytest.param('Llama3.3-70B-FP8',
2343+
'llama-3.3-models/Llama-3.3-70B-Instruct-FP8',
2344+
marks=skip_pre_hopper),
2345+
])
2346+
def test_ptp_quickstart_advanced_pp_enabled(llm_root, llm_venv, model_name,
2347+
model_path, cuda_graph, tp_size,
2348+
pp_size):
2349+
print(f"Testing {model_name} on 8 GPUs.")
2350+
example_root = Path(os.path.join(llm_root, "examples", "llm-api"))
2351+
cmd = [
2352+
str(example_root / "quickstart_advanced.py"),
2353+
"--enable_chunked_prefill",
2354+
"--model_dir",
2355+
f"{llm_models_root()}/{model_path}",
2356+
f"--tp_size={tp_size}",
2357+
f"--pp_size={pp_size}",
2358+
"--moe_ep_size=1",
2359+
"--kv_cache_fraction=0.5",
2360+
]
2361+
if cuda_graph:
2362+
cmd.extend([
2363+
"--use_cuda_graph",
2364+
"--cuda_graph_padding_enabled",
2365+
])
2366+
llm_venv.run_cmd(cmd)
2367+
2368+
23352369
@skip_pre_hopper
23362370
@pytest.mark.skip_less_device(8)
23372371
@pytest.mark.parametrize("cuda_graph", [False, True])

tests/integration/test_lists/qa/llm_function_core.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search[
417417
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search[enable_cuda_graph=True-enable_padding=True-disable_overlap_scheduler=False]
418418
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search[enable_cuda_graph=True-enable_padding=True-disable_overlap_scheduler=True]
419419
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
420+
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp2pp2
420421
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
421422
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=True]
422423
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False]
@@ -692,6 +693,10 @@ test_e2e.py::test_ptp_quickstart_multimodal_multiturn[phi4-multimodal-instruct-f
692693
test_e2e.py::test_ptp_quickstart_multimodal_multiturn[phi4-multimodal-instruct-fp4-multimodals/Phi-4-multimodal-instruct-FP4]
693694
test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
694695
test_e2e.py::test_ptp_quickstart_bert[TRTLLM-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
696+
test_e2e.py::test_ptp_quickstart_advanced_pp_enabled[Llama3.3-70B-FP8-llama-3.3-models/Llama-3.3-70B-Instruct-FP8-2-2-False]
697+
test_e2e.py::test_ptp_quickstart_advanced_pp_enabled[Llama3.3-70B-FP8-llama-3.3-models/Llama-3.3-70B-Instruct-FP8-2-2-True]
698+
test_e2e.py::test_ptp_quickstart_advanced_pp_enabled[Llama3.3-70B-FP8-llama-3.3-models/Llama-3.3-70B-Instruct-FP8-2-4-False]
699+
test_e2e.py::test_ptp_quickstart_advanced_pp_enabled[Llama3.3-70B-FP8-llama-3.3-models/Llama-3.3-70B-Instruct-FP8-2-4-True]
695700
test_e2e.py::test_ptp_star_attention_example[Llama3.1-8B-BF16-llama-3.1-model/Meta-Llama-3.1-8B]
696701
test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-hf-nvfp4-False-False]
697702
test_e2e.py::test_ptp_scaffolding[DeepSeek-R1-Distill-Qwen-7B-DeepSeek-R1/DeepSeek-R1-Distill-Qwen-7B]

tests/integration/test_lists/qa/llm_function_core_sanity.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_auto_dtype
129129
accuracy/test_llm_api_pytorch.py::TestLlama3_2_1B::test_fp8_prequantized
130130
accuracy/test_llm_api_pytorch.py::TestLlama3_2_3B::test_auto_dtype
131131
accuracy/test_llm_api_pytorch.py::TestLlama3_2_3B::test_fp8_prequantized
132+
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp2pp2
132133
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
133134
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
134135
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=True]

0 commit comments

Comments
 (0)