Skip to content

Commit 14bfb5e

Browse files
authored
test: FIX test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus (#4283)
* update test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus Signed-off-by: xinhe-nv <[email protected]> * skip llava-v1.6-mistral-7b-hf-vision-trtllm on L40S Signed-off-by: xinhe-nv <[email protected]> --------- Signed-off-by: xinhe-nv <[email protected]>
1 parent 97bc680 commit 14bfb5e

File tree

3 files changed

+19
-29
lines changed

3 files changed

+19
-29
lines changed

tests/integration/defs/examples/test_multimodal.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,9 @@ def _test_llm_multimodal_general(llm_venv,
611611
'blip2-flan-t5-xl',
612612
'llava-1.5-7b-hf',
613613
'llava-v1.6-mistral-7b-hf',
614-
'llava-v1.6-mistral-7b-hf-vision-trtllm',
614+
pytest.param('llava-v1.6-mistral-7b-hf-vision-trtllm',
615+
marks=pytest.mark.skipif(get_device_memory() < 50000,
616+
reason="Skip due to low memory")),
615617
'llava-onevision-qwen2-7b-ov-hf',
616618
'llava-onevision-qwen2-7b-ov-hf-video',
617619
'nougat-base',

tests/integration/defs/test_e2e.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,24 +1349,22 @@ def test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus(
13491349
# "RCCA https://nvbugs/5163844"
13501350
print(f"Testing {model_name}.")
13511351
example_root = Path(os.path.join(llm_root, "examples", "pytorch"))
1352-
with tempfile.NamedTemporaryFile(mode='w+t',
1353-
suffix=f".{model_name}.log",
1354-
dir="./",
1355-
delete=True,
1356-
delete_on_close=True) as running_log:
1357-
llm_venv.run_cmd([
1358-
str(example_root / "quickstart_advanced.py"),
1359-
"--model_dir",
1360-
f"{llm_models_root()}/{model_path}",
1361-
"--moe_ep_size=8",
1362-
"--tp_size=16",
1363-
"--use_cuda_graph",
1364-
f"--kv_cache_fraction={_MEM_FRACTION_50}",
1365-
"--max_batch_size=32",
1366-
"--max_num_tokens=2048",
1367-
],
1368-
running_log=running_log)
1369-
# _check_mem_usage(running_log, [56.30, 0, 0, 0])
1352+
run_cmd = [
1353+
"trtllm-llmapi-launch",
1354+
"python3",
1355+
str(example_root / "quickstart_advanced.py"),
1356+
"--enable_overlap_scheduler",
1357+
"--model_dir",
1358+
f"{llm_models_root()}/{model_path}",
1359+
"--moe_ep_size=8",
1360+
"--tp_size=16",
1361+
"--use_cuda_graph",
1362+
f"--kv_cache_fraction={_MEM_FRACTION_50}",
1363+
"--max_batch_size=32",
1364+
"--max_num_tokens=2048",
1365+
"--disable_kv_cache_reuse",
1366+
]
1367+
check_call(" ".join(run_cmd), shell=True, env=llm_venv._new_env)
13701368

13711369

13721370
@pytest.mark.parametrize("model_name,model_path,eagle_model_path", [

tests/integration/test_lists/qa/llm_multinodes_function_test.txt

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,5 @@
11
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-build]
22
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-8b-disable_fp8-tp16pp1-infer]
3-
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-70b-disable_fp8-tp8pp2-build]
4-
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-70b-disable_fp8-tp8pp2-infer]
5-
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-70b-enable_fp8-tp8pp2-build]
6-
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-70b-enable_fp8-tp8pp2-infer]
7-
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-405b-enable_fp8-tp8pp2-build]
8-
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-405b-enable_fp8-tp8pp2-infer]
9-
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-405b-disable_fp8-tp8pp2-build]
10-
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-405b-disable_fp8-tp8pp2-infer]
11-
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-405b-fp8-disable_fp8-tp8pp2-build]
12-
examples/test_llama.py::test_llm_llama_v3_1_2nodes_8gpus[llama-3.1-405b-fp8-disable_fp8-tp8pp2-infer]
133
examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-build]
144
examples/test_mixtral.py::test_llm_mixtral_2nodes_8gpus[Mixtral-8x22B-v0.1-plugin-renormalize-tensor_parallel-infer]
155
test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_2nodes_8gpus[DeepSeek-V3-DeepSeek-V3]

0 commit comments

Comments
 (0)