Skip to content

Commit 0467fcd

Browse files
committed
[None][fix] Add run_benchmark_aiperf.sh for aiperf-based disagg benchmarking
Script used by submit.py when use_aiperf=true. Installs the correct aiperf version with trust_remote_code support and runs mooncake_trace dataset benchmarks against the disagg serving endpoint. Signed-off-by: Lizhi Zhou <1432185+reasonsolo@users.noreply.github.com>
1 parent 1e21fd1 commit 0467fcd

File tree

1 file changed

+102
-0
lines changed

1 file changed

+102
-0
lines changed
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#!/bin/bash
2+
3+
# aiperf-based benchmark script for disaggregated serving
4+
# Args: model_name dataset_file multi_round num_gen_servers concurrency_list streaming log_path hostname port ucx_warmup_requests
5+
6+
set -e
7+
set -u
8+
trap 'echo "Error occurred at line $LINENO"; exit 1' ERR
9+
10+
if [ "$#" -lt 10 ]; then
11+
echo "Error: Missing required arguments, got $# arguments, args: $@"
12+
echo "Usage: $0 model_name dataset_file multi_round num_gen_servers concurrency_list streaming log_path hostname port ucx_warmup_requests"
13+
exit 1
14+
fi
15+
16+
model_name=$1
17+
dataset_file=$2
18+
multi_round=$3
19+
num_gen_servers=$4
20+
concurrency_list=$5
21+
streaming=$6
22+
log_path=$7
23+
hostname=$8
24+
port=$9
25+
ucx_warmup_requests=${10}
26+
27+
# check process id is not 0
28+
if [[ ${SLURM_PROCID} != "0" ]]; then
29+
echo "Process id is ${SLURM_PROCID} for loadgen, exiting"
30+
exit 0
31+
fi
32+
33+
# Always install/upgrade aiperf to ensure we have the version with trust_remote_code fix
34+
# (container may have an older version with parallel_decode.py that lacks trust_remote_code)
35+
echo "Installing aiperf..."
36+
pip install --force-reinstall --no-deps 'aiperf @ git+https://github.com/ai-dynamo/aiperf.git@ac3d91652e5e024bfb4ac38d48603423aad666bc'
37+
38+
# warmup requests for ucx connections
39+
if [ "${ucx_warmup_requests}" -gt 0 ]; then
40+
echo "warming up ucx connections with small requests... ${ucx_warmup_requests}"
41+
python -m tensorrt_llm.serve.scripts.benchmark_serving \
42+
--model ${model_name} \
43+
--dataset-name random \
44+
--random-ids \
45+
--random-input-len 100 \
46+
--random-output-len 10 \
47+
--num-prompts ${ucx_warmup_requests} \
48+
--host ${hostname} \
49+
--port ${port} \
50+
--ignore-eos \
51+
--trust-remote-code \
52+
--non-streaming
53+
echo "UCX warmup done"
54+
fi
55+
56+
# Trust remote code globally for custom tokenizers in parallel workers
57+
export HF_HUB_TRUST_REMOTE_CODE=1
58+
59+
echo "Hostname: ${hostname}, Port: ${port}"
60+
echo "Starting aiperf benchmark..."
61+
62+
concurrency_list=$(echo "${concurrency_list}" | tr ',' ' ')
63+
for concurrency in ${concurrency_list}; do
64+
concurrency=$((concurrency))
65+
request_count=$((concurrency * multi_round))
66+
# benchmark_duration: 20min per round
67+
benchmark_duration=$((multi_round * 1200))
68+
echo "Benchmarking with concurrency ${concurrency} ... ${request_count} requests, duration ${benchmark_duration}s"
69+
mkdir -p ${log_path}/concurrency_${concurrency}
70+
71+
aiperf profile \
72+
-m ${model_name} \
73+
--tokenizer ${model_name} \
74+
--tokenizer-trust-remote-code \
75+
--url http://${hostname}:${port} \
76+
--streaming \
77+
--ui simple \
78+
--input-file ${dataset_file} \
79+
--artifact-dir ${log_path}/concurrency_${concurrency} \
80+
--concurrency ${concurrency} \
81+
--concurrency-ramp-duration 60 \
82+
--custom-dataset-type mooncake_trace \
83+
--benchmark-duration ${benchmark_duration} \
84+
--benchmark-grace-period 60 \
85+
--workers-max 200 \
86+
--request-timeout-seconds 1200 \
87+
--profile-export-level records \
88+
--extra-inputs ignore_eos:true \
89+
--request-count ${request_count} \
90+
--record-processors 8
91+
92+
echo "Benchmark with concurrency ${concurrency} done"
93+
done
94+
95+
# Fetch perf metrics from disagg server
96+
echo "Fetching perf metrics from http://${hostname}:${port}/perf_metrics ..."
97+
curl -s "http://${hostname}:${port}/perf_metrics" > ${log_path}/perf_metrics.json 2>&1 || true
98+
if [ -s "${log_path}/perf_metrics.json" ]; then
99+
echo "Perf metrics saved to ${log_path}/perf_metrics.json"
100+
else
101+
echo "Warning: perf_metrics response was empty or endpoint not available"
102+
fi

0 commit comments

Comments
 (0)