Skip to content

Commit 3a412ec

Browse files
committed
Move some of the tests from b200 to gb200 to avoid load model issue
Signed-off-by: Yi Zhang <187001205+yizhang-nv@users.noreply.github.com>
1 parent bde69dd commit 3a412ec

File tree

3 files changed

+13
-6
lines changed

3 files changed

+13
-6
lines changed

jenkins/L0_Test.groovy

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2777,7 +2777,8 @@ def launchTestJobs(pipeline, testFilter)
27772777
"DGX_B200-4_GPUs-PyTorch-1": ["b200-x4", "l0_dgx_b200", 1, 2, 4],
27782778
"DGX_B200-4_GPUs-PyTorch-2": ["b200-x4", "l0_dgx_b200", 2, 2, 4],
27792779
"DGX_B200-4_GPUs-PyTorch-Ray-1": ["b200-x4", "l0_dgx_b200", 1, 1, 4],
2780-
"DGX_B200-8_GPUs-PyTorch-1": ["b200-x8", "l0_dgx_b200", 1, 1, 8],
2780+
// Move DGX_B200-8_GPUs-PyTorch-1 due to nodes load model issue.
2781+
// "DGX_B200-8_GPUs-PyTorch-1": ["b200-x8", "l0_dgx_b200", 1, 1, 8],
27812782
"DGX_B200-4_GPUs-PyTorch-Post-Merge-1": ["b200-trtllm", "l0_dgx_b200", 1, 1, 4, 1, true],
27822783
"DGX_B300-4_GPUs-PyTorch-Post-Merge-1": ["b300-x4", "l0_dgx_b300", 1, 1, 4],
27832784
// Perf sanity post merge test
@@ -2819,8 +2820,10 @@ def launchTestJobs(pipeline, testFilter)
28192820
multiNodesSBSAConfigs = [
28202821
// Each testcase uses 8 GPUs and 2 nodes.
28212822
// https://nvbugs/5598863 (uncorrectable NVLink error detected during the execution) may not exist in OCI machines.
2822-
"GB200-8_GPUs-2_Nodes-PyTorch-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 2, 8, 2],
2823-
"GB200-8_GPUs-2_Nodes-PyTorch-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 2, 8, 2],
2823+
"GB200-8_GPUs-2_Nodes-PyTorch-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 3, 8, 2],
2824+
"GB200-8_GPUs-2_Nodes-PyTorch-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 3, 8, 2],
2825+
// Request 1 more node for tests that moved from B200 to GB200
2826+
"GB200-8_GPUs-2_Nodes-PyTorch-3": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 3, 3, 8, 2],
28242827
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-1": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 1, 3, 8, 2],
28252828
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-2": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 2, 3, 8, 2],
28262829
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-3": ["gb200-oci-trtllm", "l0_gb200_multi_nodes", 3, 3, 8, 2],

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,7 +2426,7 @@ def test_nvfp4_multi_gpus_chunked_prefill(self, tp_size, pp_size, ep_size,
24262426
task.evaluate(llm)
24272427

24282428
@skip_pre_blackwell
2429-
@pytest.mark.skip_less_device(8)
2429+
@pytest.mark.skip_less_mpi_world_size(8)
24302430
def test_nvfp4_multi_gpus_corner_case(self):
24312431
"""
24322432
This test is used to test the corner case of the NVFP4 model.
@@ -2575,7 +2575,6 @@ class TestDeepSeekV32(LlmapiAccuracyTestHarness):
25752575
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3.2-Exp-hf"
25762576

25772577
@pytest.mark.skip_less_mpi_world_size(8)
2578-
@pytest.mark.skip_less_device(8)
25792578
@skip_pre_hopper
25802579
@pytest.mark.skip_less_device_memory(140000)
25812580
@pytest.mark.parametrize(
@@ -2650,7 +2649,6 @@ def test_fp8_blockscale(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
26502649
task.evaluate(llm)
26512650

26522651
@pytest.mark.skip_less_mpi_world_size(8)
2653-
@pytest.mark.skip_less_device(8)
26542652
@skip_pre_blackwell
26552653
@pytest.mark.parametrize(
26562654
"tp_size,pp_size,ep_size,mtp_nextn,fp8kv,attention_dp,cuda_graph,overlap_scheduler,max_batch_size,moe_backend",

tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,15 @@ l0_gb200_multi_nodes:
1515
tests:
1616
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput] TIMEOUT (180)
1717
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_mtp] TIMEOUT (180)
18+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_bs8_mtp] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
1819
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] TIMEOUT (180)
1920
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp] TIMEOUT (180)
2021
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp_trtllm] TIMEOUT (180)
22+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
23+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_fp8_blockscale[baseline_mtp1] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
24+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_nvfp4_multi_gpus[baseline] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
25+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV32::test_nvfp4_multi_gpus[baseline_mtp1] TIMEOUT (180) # Temporarily added to GB200 to avoid load model issue
26+
- accuracy/test_disaggregated_serving.py::TestDeepSeekV32Exp::test_auto_dtype[False] TIMEOUT (360) # Temporarily added to GB200 to avoid load model issue
2127
- condition:
2228
ranges:
2329
# 2 nodes with each node has 4 GPUs

0 commit comments

Comments
 (0)