Skip to content

Commit 611ca65

Browse files
authored
[None][test] add Perf sanity gb200 test into QA test db (NVIDIA#11882)
Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
1 parent 298b6c8 commit 611ca65

File tree

2 files changed

+27
-5
lines changed

2 files changed

+27
-5
lines changed

jenkins/scripts/perf/local/submit.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ def generate_srun_args(args, runtime_mode, timestamp):
321321

322322

323323
def generate_pytest_command(
324-
llm_src, work_dir, config_file_base_name, select_pattern, runtime_mode, benchmark_mode
324+
test_prefix, work_dir, config_file_base_name, select_pattern, runtime_mode, benchmark_mode
325325
):
326326
"""Generate pytest command and test list."""
327327
# Generate test list content based on runtime_mode and benchmark_mode
@@ -344,8 +344,8 @@ def generate_pytest_command(
344344
test_list_path = os.path.join(work_dir, "test_list.txt")
345345

346346
pytest_command = (
347-
f"pytest -v -s "
348-
f"--test-prefix={llm_src}/tests/integration/defs "
347+
f"pytest -v "
348+
f"--test-prefix={test_prefix} "
349349
f"--test-list={test_list_path} "
350350
f"--output-dir={work_dir} "
351351
f"-o junit_logging=out-err"
@@ -423,6 +423,7 @@ def main():
423423
default="1-100",
424424
help="Nsys start-stop range for generation workers in disaggregated mode (default: 1-100)",
425425
)
426+
parser.add_argument("--test-prefix", default="", help="Test prefix")
426427

427428
args = parser.parse_args()
428429

@@ -480,6 +481,8 @@ def main():
480481
work_dir = os.path.join(llm_src, "jenkins", "scripts", "perf", "local", timestamp)
481482
os.makedirs(work_dir, exist_ok=True)
482483

484+
test_prefix = args.test_prefix if args.test_prefix else f"{llm_src}/tests/integration/defs"
485+
483486
# Determine paths
484487
launch_sh = args.launch_sh if args.launch_sh else os.path.join(work_dir, "slurm_launch.sh")
485488
run_sh = (
@@ -521,7 +524,7 @@ def main():
521524

522525
# Generate pytest command
523526
pytest_command, test_list_content, test_list_path = generate_pytest_command(
524-
llm_src, work_dir, config_file_base_name, select_pattern, runtime_mode, benchmark_mode
527+
test_prefix, work_dir, config_file_base_name, select_pattern, runtime_mode, benchmark_mode
525528
)
526529

527530
# Write test list file

tests/integration/test_lists/qa/llm_perf_multinode.yml

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,30 @@ llm_perf_multinode:
55
- condition:
66
wildcards:
77
gpu:
8-
- '*b200*'
8+
- 'b200'
99
tests:
1010
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-b200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120)
1111
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-b200_deepseek-r1-fp4_1k1k_con2048_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-UCX] TIMEOUT (120)
1212
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-b200_deepseek-r1-fp4_1k1k_con256_ctx1_dep4_gen1_dep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120)
1313
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-b200_deepseek-r1-fp4_8k1k_con1536_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-UCX] TIMEOUT (120)
1414
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-b200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120)
1515
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-b200_deepseek-r1-fp4_8k1k_con256_ctx1_dep4_gen1_dep8_eplb0_mtp1_ccb-UCX] TIMEOUT (120)
16+
17+
# 2: GB200 test cases
18+
- condition:
19+
wildcards:
20+
gpu:
21+
- 'gb200'
22+
tests:
23+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_gpt-oss-120b-fp4_1k1k_con2048_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-UCX] TIMEOUT (120)
24+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_gpt-oss-120b-fp4_1k1k_con512_ctx1_tp1_gen1_dep2_eplb0_mtp0_ccb-UCX] TIMEOUT (120)
25+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_gpt-oss-120b-fp4_1k1k_con64_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-UCX] TIMEOUT (120)
26+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_gpt-oss-120b-fp4_8k1k_con128_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-UCX] TIMEOUT (120)
27+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_gpt-oss-120b-fp4_8k1k_con4_ctx1_tp1_gen1_tp4_eplb0_mtp0_ccb-UCX] TIMEOUT (120)
28+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_deepseek-r1-fp4_1k1k_con3072_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-UCX] TIMEOUT (120)
29+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_deepseek-v32-fp4_1k1k_con2048_ctx1_dep4_gen1_dep4_eplb0_mtp1_ccb-UCX] TIMEOUT (120)
30+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_kimi-k2-thinking-fp4_1k1k_con4_ctx1_dep4_gen1_tep4_eplb0_mtp0_ccb-UCX] TIMEOUT (120)
31+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_deepseek-r1-fp4_1k1k_con1024_ctx1_dep4_gen1_dep8_eplb0_mtp0_ccb-UCX] TIMEOUT (120)
32+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_deepseek-r1-fp4_1k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120)
33+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_deepseek-r1-fp4_8k1k_con1_ctx1_dep4_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120)
34+
- perf/test_perf_sanity.py::test_e2e[disagg-gen_only-gb200_deepseek-r1-fp4_128k8k_con1_ctx1_pp8_gen1_tep8_eplb0_mtp3_ccb-UCX] TIMEOUT (120)

0 commit comments

Comments
 (0)