Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 18 additions & 35 deletions jenkins/L0_Test.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -3290,10 +3290,9 @@ def launchTestJobs(pipeline, testFilter)
"DGX_B300-4_GPUs-PyTorch-Post-Merge-1": ["b300-x4", "l0_dgx_b300", 1, 2, 4],
"DGX_B300-4_GPUs-PyTorch-Post-Merge-2": ["b300-x4", "l0_dgx_b300", 2, 2, 4],
// PerfSanity post-merge tests
"DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-1": ["auto:dgx-b200-flex", "l0_b200_multi_gpus_perf_sanity", 1, 4, 8, 1, true],
"DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-2": ["auto:dgx-b200-flex", "l0_b200_multi_gpus_perf_sanity", 2, 4, 8, 1, true],
"DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-3": ["auto:dgx-b200-flex", "l0_b200_multi_gpus_perf_sanity", 3, 4, 8, 1, true],
"DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-4": ["auto:dgx-b200-flex", "l0_b200_multi_gpus_perf_sanity", 4, 4, 8, 1, true],
"DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-1": ["auto:dgx-b200-flex", "l0_dgx_b200_perf_sanity", 1, 3, 8, 1, true],
"DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-2": ["auto:dgx-b200-flex", "l0_dgx_b200_perf_sanity", 2, 3, 8, 1, true],
"DGX_B200-8_GPUs-PyTorch-PerfSanity-Post-Merge-3": ["auto:dgx-b200-flex", "l0_dgx_b200_perf_sanity", 3, 3, 8, 1, true],
]
fullSet += x86SlurmTestConfigs.keySet()

Expand Down Expand Up @@ -3329,13 +3328,9 @@ def launchTestJobs(pipeline, testFilter)
// PerfSanity pre-merge tests
"GB200-4_GPUs-PyTorch-PerfSanity-1": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 1, 1, 4],
// PerfSanity post-merge tests
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-1": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 1, 7, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-2": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 2, 7, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-3": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 3, 7, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-4": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 4, 7, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-5": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 5, 7, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-6": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 6, 7, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-7": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 7, 7, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-1": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 1, 3, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-2": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 2, 3, 4],
"GB200-4_GPUs-PyTorch-PerfSanity-Post-Merge-3": ["auto:gb200-x4", "l0_gb200_multi_gpus_perf_sanity", 3, 3, 4],
]
fullSet += SBSASlurmTestConfigs.keySet()

Expand All @@ -3348,64 +3343,52 @@ def launchTestJobs(pipeline, testFilter)
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-2": ["auto:gb200-flex", "l0_gb200_multi_nodes", 2, 3, 8, 2],
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-3": ["auto:gb200-flex", "l0_gb200_multi_nodes", 3, 3, 8, 2],
]
// PerfSanity post-merge aggregated
// 2 Nodes
// PerfSanity post-merge aggr tests
multiNodesSBSAConfigs += buildStageConfigs(
"GB200-8_GPUs-2_Nodes-PyTorch-PerfSanity-Node2-GPU8-Post-Merge",
"GB200-8_GPUs-2_Nodes-PyTorch-Aggr-PerfSanity-Node2-GPU8-Post-Merge",
"auto:gb200-flex",
"l0_gb200_multi_nodes_perf_sanity_node2_gpu8",
7,
8,
2
)
// PerfSanity post-merge disaggregated
// 2 Nodes
multiNodesSBSAConfigs += buildStageConfigs(
"GB200-8_GPUs-2_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU1-GEN1-NODE1-GPU2-Post-Merge",
"auto:gb200-flex",
"l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu1_gen1_node1_gpu2",
3,
"l0_gb200_multi_nodes_aggr_perf_sanity_node2_gpu8",
5,
8,
2
)
// PerfSanity post-merge disagg tests
multiNodesSBSAConfigs += buildStageConfigs(
"GB200-8_GPUs-2_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU1-GEN1-NODE1-GPU4-Post-Merge",
"auto:gb200-flex",
"l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu1_gen1_node1_gpu4",
4,
"l0_gb200_multi_nodes_disagg_perf_sanity_ctx1_node1_gpu1_gen1_node1_gpu4",
1,
8,
2
)
multiNodesSBSAConfigs += buildStageConfigs(
"GB200-8_GPUs-2_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU4-GEN1-NODE1-GPU4-Post-Merge",
"auto:gb200-flex",
"l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node1_gpu4",
"l0_gb200_multi_nodes_disagg_perf_sanity_ctx1_node1_gpu4_gen1_node1_gpu4",
3,
8,
2
)
// 3 Nodes
multiNodesSBSAConfigs += buildStageConfigs(
"GB200-12_GPUs-3_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU1-GEN1-NODE2-GPU8-Post-Merge",
"auto:gb200-flex",
"l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu1_gen1_node2_gpu8",
"l0_gb200_multi_nodes_disagg_perf_sanity_ctx1_node1_gpu1_gen1_node2_gpu8",
1,
12,
3
)
multiNodesSBSAConfigs += buildStageConfigs(
"GB200-12_GPUs-3_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE1-GPU4-GEN1-NODE2-GPU8-Post-Merge",
"auto:gb200-flex",
"l0_gb200_multi_nodes_perf_sanity_ctx1_node1_gpu4_gen1_node2_gpu8",
3,
"l0_gb200_multi_nodes_disagg_perf_sanity_ctx1_node1_gpu4_gen1_node2_gpu8",
5,
12,
3
)
// 4 Nodes
multiNodesSBSAConfigs += buildStageConfigs(
"GB200-16_GPUs-4_Nodes-PyTorch-Disagg-PerfSanity-CTX1-NODE2-GPU8-GEN1-NODE2-GPU8-Post-Merge",
"auto:gb200-flex",
"l0_gb200_multi_nodes_perf_sanity_ctx1_node2_gpu8_gen1_node2_gpu8",
"l0_gb200_multi_nodes_disagg_perf_sanity_ctx1_node2_gpu8_gen1_node2_gpu8",
1,
16,
4
Expand Down
5 changes: 4 additions & 1 deletion jenkins/scripts/perf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ summaries to Slack.

## Basic Usage

This script is run by the Jenkins pipeline. Inputs are configured in `jenkins/runPerfSanityTriage.groovy`:
This script is run by the Jenkins pipeline:
https://prod.blsm.nvidia.com/sw-tensorrt-top-1/job/LLM/job/TRTLLM-Perf/job/PerfSanityTriage/

Inputs are configured in `jenkins/runPerfSanityTriage.groovy`:

- `BRANCH`: repo branch to checkout
- `OPEN_SEARCH_PROJECT_NAME`: OpenSearch project name
Expand Down
23 changes: 0 additions & 23 deletions jenkins/scripts/perf/aggregated/slurm_launch_draft.sh

This file was deleted.

12 changes: 6 additions & 6 deletions jenkins/scripts/perf/disaggregated/slurm_launch_draft.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,33 +19,33 @@ echo "Installation completed on all nodes"
# Start gen servers
echo "Starting gen servers..."
for i in $(seq 0 $((numGenServers - 1))); do
gen_world_size=$((nodesPerGenServer * gpusPerNodePerGenServer))
gen_world_size=$((nodesPerGenServer * gpusPerfNodePerfGenServer))
export DISAGG_SERVING_TYPE="GEN_$i"
export pytestCommand="$pytestCommandWorker"
srun "${srunArgs[@]}" --kill-on-bad-exit=1 \
-N $nodesPerGenServer \
--ntasks=$gen_world_size \
--ntasks-per-node=$gpusPerNodePerGenServer \
--ntasks-per-node=$gpusPerfNodePerfGenServer \
$runScript &> $jobWorkspace/gen_server_$i.log &
echo "Started gen server $i"
done

# Start ctx servers (skip if gen_only_no_context mode)
# Start ctx servers (skip if gen_only mode)
if [ "${TRTLLM_DISAGG_BENCHMARK_GEN_ONLY:-0}" != "1" ]; then
echo "Starting ctx servers..."
for i in $(seq 0 $((numCtxServers - 1))); do
ctx_world_size=$((nodesPerCtxServer * gpusPerNodePerCtxServer))
ctx_world_size=$((nodesPerCtxServer * gpusPerfNodePerfCtxServer))
export DISAGG_SERVING_TYPE="CTX_$i"
export pytestCommand="$pytestCommandWorker"
srun "${srunArgs[@]}" --kill-on-bad-exit=1 \
-N $nodesPerCtxServer \
--ntasks=$ctx_world_size \
--ntasks-per-node=$gpusPerNodePerCtxServer \
--ntasks-per-node=$gpusPerfNodePerfCtxServer \
$runScript &> $jobWorkspace/ctx_server_$i.log &
echo "Started ctx server $i"
done
else
echo "Skipping ctx servers (gen_only_no_context mode)"
echo "Skipping ctx servers (gen_only mode)"
fi


Expand Down
78 changes: 32 additions & 46 deletions jenkins/scripts/perf/disaggregated/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,12 @@

import yaml

DISAGG_CONFIG_FOLDER = "tests/integration/defs/perf/disagg/test_configs/disagg/perf-sanity"


def get_hardware_config(config, benchmark_mode):
hardware = config.get("hardware", {})
worker_config = config.get("worker_config", {})

num_ctx_servers = (
0 if "gen_only_no_context" in benchmark_mode else hardware.get("num_ctx_servers")
)
num_ctx_servers = 0 if "gen_only" in benchmark_mode else hardware.get("num_ctx_servers")
num_gen_servers = hardware.get("num_gen_servers")
gpus_per_node = hardware.get("gpus_per_node")

Expand Down Expand Up @@ -97,10 +93,12 @@ def get_env_config(config):
def get_benchmark_config(config):
benchmark = config.get("benchmark", {})

mode = benchmark.get("mode", "e2e")
concurrency_str = benchmark.get("concurrency_list", "1")
concurrency = int(concurrency_str) if isinstance(concurrency_str, str) else concurrency_str

return {
"mode": mode,
"concurrency": concurrency,
}

Expand Down Expand Up @@ -222,17 +220,7 @@ def is_output_file_part(part):
)


def parse_test_case_name(test_list_path, llm_src):
"""Parse test list to get config yaml path and benchmark mode.

Test formats for disagg:
- Disagg e2e: disagg_upload-e2e-{config_base}
- Disagg gen_only: disagg_upload-gen_only-{config_base}

Returns:
tuple: (config_yaml_path, benchmark_mode)
- benchmark_mode: "e2e" or "gen_only"
"""
def get_config_yaml(test_list_path, llm_src):
with open(test_list_path, "r") as f:
first_line = f.readline().strip()

Expand All @@ -242,33 +230,32 @@ def parse_test_case_name(test_list_path, llm_src):
)
bracket_content = first_line.split("[")[-1].split("]")[0]
parts = bracket_content.split("-")

if len(parts) < 3:
if len(parts) < 2:
raise ValueError(
f"Invalid disagg test format. Expected: disagg-{{mode}}-{{config}}, "
f"got: {bracket_content}"
f"Invalid test name format. Expected format: prefix-config_name, got: {bracket_content}"
)

# parts[0] is the prefix, parts[1] is benchmark_mode, parts[2:] is the config name
# parts[0] is the prefix, parts[1:] is the config name
if "disagg" not in parts[0]:
raise ValueError(
f"Invalid test name format. Expected format: disagg-mode-config_name, "
f"got: {bracket_content}"
f"Invalid test name format. Expected format: disagg-config_name, got: {bracket_content}"
)

benchmark_mode = parts[1] # e2e or gen_only
if benchmark_mode not in ("e2e", "gen_only"):
raise ValueError(
f"Invalid benchmark_mode for disagg: {benchmark_mode}. Expected 'e2e' or 'gen_only'."
)

config_base_name = "-".join(parts[2:])
config_yaml_path = os.path.join(llm_src, DISAGG_CONFIG_FOLDER, f"{config_base_name}.yaml")

config_base_name = "-".join(parts[1:])
config_yaml_path = os.path.join(
llm_src,
"tests",
"integration",
"defs",
"perf",
"disagg",
"test_configs",
"disagg",
"perf-sanity",
f"{config_base_name}.yaml",
)
if not os.path.exists(config_yaml_path):
raise FileNotFoundError(f"Config file not found: {config_yaml_path}")

return config_yaml_path, benchmark_mode
return config_yaml_path


def main():
Expand Down Expand Up @@ -306,7 +293,7 @@ def main():

args = parser.parse_args()

config_yaml, benchmark_mode = parse_test_case_name(args.test_list, args.llm_src)
config_yaml = get_config_yaml(args.test_list, args.llm_src)

with open(config_yaml, "r") as f:
config = yaml.safe_load(f)
Expand All @@ -319,6 +306,7 @@ def main():

benchmark_config = get_benchmark_config(config)
print(f"Benchmark configuration: {benchmark_config}")
benchmark_mode = benchmark_config["mode"]

hardware_config = get_hardware_config(config, benchmark_mode)
print(f"Hardware configuration: {hardware_config}")
Expand All @@ -344,18 +332,16 @@ def main():
# Build worker env vars, add extra env vars for gen_only mode
worker_env_vars = env_config["worker_env_var"]
server_env_vars = env_config["server_env_var"]
# Handle gen only mode
if "gen_only_no_context" in benchmark_mode:
worker_env_vars = f"TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1 {worker_env_vars}"
server_env_vars = f"TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1 {server_env_vars}"
script_prefix_lines.append("export TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1")
srun_args_lines.append("--container-env=TRTLLM_DISAGG_BENCHMARK_GEN_ONLY")
elif "gen_only" in benchmark_mode:
concurrency = benchmark_config.get("concurrency", 1)
if "gen_only" in benchmark_config["mode"]:
concurrency = benchmark_config["concurrency"]
worker_env_vars = (
"TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1 "
f"TRTLLM_DISABLE_KV_CACHE_TRANSFER_OVERLAP=1 "
f"TLLM_BENCHMARK_REQ_QUEUES_SIZE={concurrency} {worker_env_vars}"
)
server_env_vars = f"TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1 {server_env_vars}"
script_prefix_lines.append("export TRTLLM_DISAGG_BENCHMARK_GEN_ONLY=1")
srun_args_lines.append("--container-env=TRTLLM_DISAGG_BENCHMARK_GEN_ONLY")

script_prefix_lines.extend(
[
Expand All @@ -375,8 +361,8 @@ def main():
f"export gpusPerGenServer={hardware_config['gpus_per_gen_server']}",
f"export nodesPerCtxServer={hardware_config['nodes_per_ctx_server']}",
f"export nodesPerGenServer={hardware_config['nodes_per_gen_server']}",
f"export gpusPerNodePerCtxServer={hardware_config['gpus_per_node_per_ctx_server']}",
f"export gpusPerNodePerGenServer={hardware_config['gpus_per_node_per_gen_server']}",
f"export gpusPerfNodePerfCtxServer={hardware_config['gpus_per_node_per_ctx_server']}",
f"export gpusPerfNodePerfGenServer={hardware_config['gpus_per_node_per_gen_server']}",
f"export totalNodes={hardware_config['total_nodes']}",
f"export totalGpus={hardware_config['total_gpus']}",
]
Expand Down
Loading
Loading