NVIDIA · pcastonguay · Feb 20, 2026
@@ -3290,10 +3290,9 @@ def launchTestJobs(pipeline, testFilter)
         "DGX_B300-4_GPUs-PyTorch-Post-Merge-1": ["b300-x4", "l0_dgx_b300", 1, 2, 4],
         "DGX_B300-4_GPUs-PyTorch-Post-Merge-2": ["b300-x4", "l0_dgx_b300", 2, 2, 4],
         // PerfSanity post-merge tests
-        "DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-1": ["auto:dgx-b200-flex", "l0_b200_multi_gpus_perf_sanity", 1, 4, 8, 1, true],
-        "DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-2": ["auto:dgx-b200-flex", "l0_b200_multi_gpus_perf_sanity", 2, 4, 8, 1, true],
-        "DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-3": ["auto:dgx-b200-flex", "l0_b200_multi_gpus_perf_sanity", 3, 4, 8, 1, true],
-        "DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-4": ["auto:dgx-b200-flex", "l0_b200_multi_gpus_perf_sanity", 4, 4, 8, 1, true],
+        "DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-1": ["auto:dgx-b200-flex", "l0_dgx_b200_perf_sanity", 1, 3, 8, 1, true],
+        "DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-2": ["auto:dgx-b200-flex", "l0_dgx_b200_perf_sanity", 2, 3, 8, 1, true],
+        "DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-3": ["auto:dgx-b200-flex", "l0_dgx_b200_perf_sanity", 3, 3, 8, 1, true],
     ]
     fullSet += x86SlurmTestConfigs.keySet()
 
@@ -3329,13 +3328,9 @@ def launchTestJobs(pipeline, testFilter)
         // PerfSanity pre-merge tests
         "GB200-4_GPUs-PyTorch-PerfSanity-1": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 1, 1, 4],
         // PerfSanity post-merge tests
-        "GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-1": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 1, 7, 4],
-        "GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-2": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 2, 7, 4],
-        "GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-3": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 3, 7, 4],
-        "GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-4": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 4, 7, 4],
-        "GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-5": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 5, 7, 4],
-        "GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-6": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 6, 7, 4],
-        "GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-7": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 7, 7, 4],
+        "GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-1": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 1, 3, 4],
+        "GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-2": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 2, 3, 4],
+        "GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-3": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 3, 3, 4],
     ]
     fullSet += SBSASlurmTestConfigs.keySet()
 
@@ -3348,64 +3343,52 @@ def launchTestJobs(pipeline, testFilter)
         "GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-2": ["auto:gb200-flex", "l0_gb200_multi_nodes", 2, 3, 8, 2],
         "GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-3": ["auto:gb200-flex", "l0_gb200_multi_nodes", 3, 3, 8, 2],
     ]
-    // PerfSanity post-merge aggregated
-    // 2 Nodes
+    // PerfSanity post-merge aggr tests
     multiNodesSBSAConfigs += buildStageConfigs(
-        "GB200-8_GPUs-2_Nodes-PyTorch-PerfSanity-Node2-GPU8-Post-Merge",
+        "GB200-8_GPUs-2_Nodes-PyTorch-Aggr-PerfSanity-Node2-GPU8-Post-Merge",
         "auto:gb200-flex",
-        "l0_gb200_multi_nodes_perf_sanity_node2_gpu8",
-        7,
-        8,
-        2
-    )
-    // PerfSanity post-merge disaggregated
-    // 2 Nodes
-    multiNodesSBSAConfigs += buildStageConfigs(
-        "GB200-8_GPUs-2_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU1-GEN1-NODE1-GPU2-Post-Merge",
-        "auto:gb200-flex",
-        "l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu1_gen1_node1_gpu2",
-        3,
+        "l0_gb200_multi_nodes_aggr_perf_sanity_node2_gpu8",
+        5,
         8,
         2
     )
+    // PerfSanity post-merge disagg tests
     multiNodesSBSAConfigs += buildStageConfigs(
         "GB200-8_GPUs-2_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU1-GEN1-NODE1-GPU4-Post-Merge",
         "auto:gb200-flex",
-        "l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu1_gen1_node1_gpu4",
-        4,
+        "l0_gb200_multi_nodes_disagg_perf_sanity_ctx1_node1_gpu1_gen1_node1_gpu4",
+        1,
         8,
         2
     )
     multiNodesSBSAConfigs += buildStageConfigs(
         "GB200-8_GPUs-2_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU4-GEN1-NODE1-GPU4-Post-Merge",
         "auto:gb200-flex",
-        "l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node1_gpu4",
+        "l0_gb200_multi_nodes_disagg_perf_sanity_ctx1_node1_gpu4_gen1_node1_gpu4",
         3,
         8,
         2
     )
-    // 3 Nodes
     multiNodesSBSAConfigs += buildStageConfigs(
         "GB200-12_GPUs-3_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU1-GEN1-NODE2-GPU8-Post-Merge",
         "auto:gb200-flex",
-        "l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu1_gen1_node2_gpu8",
+        "l0_gb200_multi_nodes_disagg_perf_sanity_ctx1_node1_gpu1_gen1_node2_gpu8",
         1,
         12,
         3
     )
     multiNodesSBSAConfigs += buildStageConfigs(
         "GB200-12_GPUs-3_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU4-GEN1-NODE2-GPU8-Post-Merge",
         "auto:gb200-flex",
-        "l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node2_gpu8",
-        3,
+        "l0_gb200_multi_nodes_disagg_perf_sanity_ctx1_node1_gpu4_gen1_node2_gpu8",
+        5,
         12,
         3
     )
-    // 4 Nodes
     multiNodesSBSAConfigs += buildStageConfigs(
         "GB200-16_GPUs-4_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE2-GPU8-GEN1-NODE2-GPU8-Post-Merge",
         "auto:gb200-flex",
-        "l0_gb200_multi_nodes_perf_sanity_ctx1_node2_gpu8_gen1_node2_gpu8",
+        "l0_gb200_multi_nodes_disagg_perf_sanity_ctx1_node2_gpu8_gen1_node2_gpu8",
         1,
         16,
         4

@@ -6,7 +6,10 @@ summaries to Slack.
 
 ## Basic Usage
 
-This script is run by the Jenkins pipeline. Inputs are configured in `jenkins/runPerfSanityTriage.groovy`:
+This script is run by the Jenkins pipeline:
+https://prod.blsm.nvidia.com/sw-tensorrt-top-1/job/LLM/job/TRTLLM-Perf/job/PerfSanityTriage/
+
+Inputs are configured in `jenkins/runPerfSanityTriage.groovy`:
 
 - `BRANCH`: repo branch to checkout
 - `OPEN_SEARCH_PROJECT_NAME`: OpenSearch project name

@@ -19,33 +19,33 @@ echo "Installation completed on all nodes"
 # Start gen servers
 echo "Starting gen servers..."
 for i in $(seq 0 $((numGenServers - 1))); do
-    gen_world_size=$((nodesPerGenServer * gpusPerNodePerGenServer))
+    gen_world_size=$((nodesPerGenServer * gpusPerfNodePerfGenServer))
     export DISAGG_SERVING_TYPE="GEN_$i"
     export pytestCommand="$pytestCommandWorker"
     srun "${srunArgs[@]}" --kill-on-bad-exit=1 \
         -N $nodesPerGenServer \
         --ntasks=$gen_world_size \
-        --ntasks-per-node=$gpusPerNodePerGenServer \
+        --ntasks-per-node=$gpusPerfNodePerfGenServer \
         $runScript &> $jobWorkspace/gen_server_$i.log &
     echo "Started gen server $i"
 done
 
-# Start ctx servers (skip if gen_only_no_context mode)
+# Start ctx servers (skip if gen_only mode)
 if [ "${TRTLLM_DISAGG_BENCHMARK_GEN_ONLY:-0}" != "1" ]; then
     echo "Starting ctx servers..."
     for i in $(seq 0 $((numCtxServers - 1))); do
-        ctx_world_size=$((nodesPerCtxServer * gpusPerNodePerCtxServer))
+        ctx_world_size=$((nodesPerCtxServer * gpusPerfNodePerfCtxServer))
         export DISAGG_SERVING_TYPE="CTX_$i"
         export pytestCommand="$pytestCommandWorker"
         srun "${srunArgs[@]}" --kill-on-bad-exit=1 \
             -N $nodesPerCtxServer \
         --ntasks=$ctx_world_size \
-        --ntasks-per-node=$gpusPerNodePerCtxServer \
+        --ntasks-per-node=$gpusPerfNodePerfCtxServer \
             $runScript &> $jobWorkspace/ctx_server_$i.log &
         echo "Started ctx server $i"
     done
 else
-    echo "Skipping ctx servers (gen_only_no_context mode)"
+    echo "Skipping ctx servers (gen_only mode)"
 fi
 
 

@@ -4,16 +4,12 @@
 
 import yaml
 
-DISAGG_CONFIG_FOLDER = "tests/integration/defs/perf/disagg/test_configs/disagg/perf-sanity"
-
 
 def get_hardware_config(config, benchmark_mode):
     hardware = config.get("hardware", {})
     worker_config = config.get("worker_config", {})
 
-    num_ctx_servers = (
-        0 if "gen_only_no_context" in benchmark_mode else hardware.get("num_ctx_servers")
-    )
+    num_ctx_servers = 0 if "gen_only" in benchmark_mode else hardware.get("num_ctx_servers")
     num_gen_servers = hardware.get("num_gen_servers")
     gpus_per_node = hardware.get("gpus_per_node")
 
@@ -97,10 +93,12 @@ def get_env_config(config):
 def get_benchmark_config(config):
     benchmark = config.get("benchmark", {})
 
+    mode = benchmark.get("mode", "e2e")
     concurrency_str = benchmark.get("concurrency_list", "1")
     concurrency = int(concurrency_str) if isinstance(concurrency_str, str) else concurrency_str
 
     return {
+        "mode": mode,
         "concurrency": concurrency,
     }
 
@@ -222,17 +220,7 @@ def is_output_file_part(part):
     )
 
 
-def parse_test_case_name(test_list_path, llm_src):
-    """Parse test list to get config yaml path and benchmark mode.
-
-    Test formats for disagg:
-    - Disagg e2e: disagg_upload-e2e-{config_base}
-    - Disagg gen_only: disagg_upload-gen_only-{config_base}
-
-    Returns:
-        tuple: (config_yaml_path, benchmark_mode)
-            - benchmark_mode: "e2e" or "gen_only"
-    """
+def get_config_yaml(test_list_path, llm_src):
     with open(test_list_path, "r") as f:
         first_line = f.readline().strip()
 
@@ -242,33 +230,32 @@ def parse_test_case_name(test_list_path, llm_src):
         )
     bracket_content = first_line.split("[")[-1].split("]")[0]
     parts = bracket_content.split("-")
-
-    if len(parts) < 3:
+    if len(parts) < 2:
         raise ValueError(
-            f"Invalid disagg test format. Expected: disagg-{{mode}}-{{config}}, "
-            f"got: {bracket_content}"
+            f"Invalid test name format. Expected format: prefix-config_name, got: {bracket_content}"
         )
 
-    # parts[0] is the prefix, parts[1] is benchmark_mode, parts[2:] is the config name
+    # parts[0] is the prefix, parts[1:] is the config name
     if "disagg" not in parts[0]:
         raise ValueError(
-            f"Invalid test name format. Expected format: disagg-mode-config_name, "
-            f"got: {bracket_content}"
+            f"Invalid test name format. Expected format: disagg-config_name, got: {bracket_content}"
         )
-
-    benchmark_mode = parts[1]  # e2e or gen_only
-    if benchmark_mode not in ("e2e", "gen_only"):
-        raise ValueError(
-            f"Invalid benchmark_mode for disagg: {benchmark_mode}. Expected 'e2e' or 'gen_only'."
-        )
-
-    config_base_name = "-".join(parts[2:])
-    config_yaml_path = os.path.join(llm_src, DISAGG_CONFIG_FOLDER, f"{config_base_name}.yaml")
-
+    config_base_name = "-".join(parts[1:])
+    config_yaml_path = os.path.join(
+        llm_src,
+        "tests",
+        "integration",
+        "defs",
+        "perf",
+        "disagg",
+        "test_configs",
+        "disagg",
+        "perf-sanity",
+        f"{config_base_name}.yaml",
+    )
     if not os.path.exists(config_yaml_path):
         raise FileNotFoundError(f"Config file not found: {config_yaml_path}")
-
-    return config_yaml_path, benchmark_mode
+    return config_yaml_path
 
 
 def main():
@@ -306,7 +293,7 @@ def main():
 
     args = parser.parse_args()
 
-    config_yaml, benchmark_mode = parse_test_case_name(args.test_list, args.llm_src)
+    config_yaml = get_config_yaml(args.test_list, args.llm_src)
 
     with open(config_yaml, "r") as f:
         config = yaml.safe_load(f)
@@ -319,6 +306,7 @@ def main():
 
     benchmark_config = get_benchmark_config(config)
     print(f"Benchmark configuration: {benchmark_config}")
+    benchmark_mode = benchmark_config["mode"]
 
     hardware_config = get_hardware_config(config, benchmark_mode)
     print(f"Hardware configuration: {hardware_config}")
@@ -344,18 +332,16 @@ def main():
     # Build worker env vars, add extra env vars for gen_only mode
     worker_env_vars = env_config["worker_env_var"]
     server_env_vars = env_config["server_env_var"]
-    # Handle gen only mode
-    if "gen_only_no_context" in benchmark_mode:
-        worker_env_vars = f"TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1 {worker_env_vars}"
-        server_env_vars = f"TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1 {server_env_vars}"
-        script_prefix_lines.append("export TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1")
-        srun_args_lines.append("--container-env=TRTLLM_DISAGG_BENCHMARK_GEN_ONLY")
-    elif "gen_only" in benchmark_mode:
-        concurrency = benchmark_config.get("concurrency", 1)
+    if "gen_only" in benchmark_config["mode"]:
+        concurrency = benchmark_config["concurrency"]
         worker_env_vars = (
+            "TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1 "
             f"TRTLLM_DISABLE_KV_CACHE_TRANSFER_OVERLAP=1 "
             f"TLLM_BENCHMARK_REQ_QUEUES_SIZE={concurrency} {worker_env_vars}"
         )
+        server_env_vars = f"TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1 {server_env_vars}"
+        script_prefix_lines.append("export TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1")
+        srun_args_lines.append("--container-env=TRTLLM_DISAGG_BENCHMARK_GEN_ONLY")
 
     script_prefix_lines.extend(
         [
@@ -375,8 +361,8 @@ def main():
             f"export gpusPerGenServer={hardware_config['gpus_per_gen_server']}",
             f"export nodesPerCtxServer={hardware_config['nodes_per_ctx_server']}",
             f"export nodesPerGenServer={hardware_config['nodes_per_gen_server']}",
-            f"export gpusPerNodePerCtxServer={hardware_config['gpus_per_node_per_ctx_server']}",
-            f"export gpusPerNodePerGenServer={hardware_config['gpus_per_node_per_gen_server']}",
+            f"export gpusPerfNodePerfCtxServer={hardware_config['gpus_per_node_per_ctx_server']}",
+            f"export gpusPerfNodePerfGenServer={hardware_config['gpus_per_node_per_gen_server']}",
             f"export totalNodes={hardware_config['total_nodes']}",
             f"export totalGpus={hardware_config['total_gpus']}",
         ]