Skip to content

Commit 1190704

Browse files
russellbYuqi Zhang
authored andcommitted
[Benchmarks] Refactor run_structured_output_benchmarks.sh (vllm-project#17722)
Signed-off-by: Russell Bryant <[email protected]> Signed-off-by: Yuqi Zhang <[email protected]>
1 parent fb8cefa commit 1190704

File tree

1 file changed

+85
-16
lines changed

1 file changed

+85
-16
lines changed

benchmarks/run_structured_output_benchmark.sh

Lines changed: 85 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,98 @@
11
#!/bin/bash
22

3-
# Define the model to use
4-
MODEL=${1:-"Qwen/Qwen2.5-7B-Instruct"}
5-
6-
# Define the backend to use
7-
BACKEND=${2:-"vllm"}
8-
9-
# Define the dataset to use
10-
DATASET=${3:-"xgrammar_bench"}
11-
3+
# default values
4+
MODEL=${MODEL:-"Qwen/Qwen2.5-7B-Instruct"}
5+
BACKEND=${BACKEND:-"vllm"}
6+
DATASET=${DATASET:-"xgrammar_bench"}
127
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
13-
OUTPUT_DIR=${4:-"$SCRIPT_DIR/structured_output_benchmark_results"}
8+
OUTPUT_DIR=${OUTPUT_DIR:-"$SCRIPT_DIR/structured_output_benchmark_results"}
9+
PORT=${PORT:-8000}
10+
STRUCTURED_OUTPUT_RATIO=${STRUCTURED_OUTPUT_RATIO:-1}
11+
TOTAL_SECONDS=${TOTAL_SECONDS:-90}
12+
MAX_NEW_TOKENS=${MAX_NEW_TOKENS:-300}
13+
TOKENIZER_MODE=${TOKENIZER_MODE:-"auto"}
1414

15-
GUIDED_RATIO=${5:-0.5}
15+
usage() {
16+
echo "Usage: $0 [options]"
17+
echo "Options:"
18+
echo " --model MODEL Model to benchmark (default: $MODEL)"
19+
echo " --backend BACKEND Backend to use (default: $BACKEND)"
20+
echo " --dataset DATASET Dataset to use (default: $DATASET)"
21+
echo " --max-new-tokens N Maximum number of tokens to generate (default: $MAX_NEW_TOKENS)"
22+
echo " --output-dir DIR Output directory for results (default: $OUTPUT_DIR)"
23+
echo " --port PORT Port to use (default: $PORT)"
24+
echo " --structured-output-ratio N Ratio of structured outputs (default: $STRUCTURED_OUTPUT_RATIO)"
25+
echo " --tokenizer-mode MODE Tokenizer mode to use (default: $TOKENIZER_MODE)"
26+
echo " --total-seconds N Total seconds to run the benchmark (default: $TOTAL_SECONDS)"
27+
echo " -h, --help Show this help message and exit"
28+
exit 0
29+
}
30+
31+
# parse command line arguments
32+
while [[ $# -gt 0 ]]; do
33+
case $1 in
34+
--model)
35+
MODEL="$2"
36+
shift 2
37+
;;
38+
--backend)
39+
BACKEND="$2"
40+
shift 2
41+
;;
42+
--dataset)
43+
DATASET="$2"
44+
shift 2
45+
;;
46+
--max-new-tokens)
47+
MAX_NEW_TOKENS="$2"
48+
shift 2
49+
;;
50+
--output-dir)
51+
OUTPUT_DIR="$2"
52+
shift 2
53+
;;
54+
--port)
55+
PORT="$2"
56+
shift 2
57+
;;
58+
--structured-output-ratio)
59+
STRUCTURED_OUTPUT_RATIO="$2"
60+
shift 2
61+
;;
62+
--tokenizer-mode)
63+
TOKENIZER_MODE="$2"
64+
shift 2
65+
;;
66+
--total-seconds)
67+
TOTAL_SECONDS="$2"
68+
shift 2
69+
;;
70+
-h|--help)
71+
usage
72+
;;
73+
*)
74+
echo "Unknown argument: $1\n"
75+
usage
76+
;;
77+
esac
78+
done
1679

1780
# Create output directory if it doesn't exist
1881
mkdir -p "$OUTPUT_DIR"
1982

2083
# Define QPS values to test
21-
QPS_VALUES=(70 60 50 25 20 15 10)
84+
QPS_VALUES=(25 20 15 10 5 1)
2285

2386
# Common parameters
2487
COMMON_PARAMS="--backend $BACKEND \
2588
--model $MODEL \
2689
--dataset $DATASET \
27-
--structured-output-ratio $GUIDED_RATIO \
90+
--structured-output-ratio $STRUCTURED_OUTPUT_RATIO \
2891
--save-results \
29-
--result-dir $OUTPUT_DIR"
92+
--result-dir $OUTPUT_DIR \
93+
--output-len $MAX_NEW_TOKENS \
94+
--port $PORT \
95+
--tokenizer-mode $TOKENIZER_MODE"
3096

3197
echo "Starting structured output benchmark with model: $MODEL"
3298
echo "Backend: $BACKEND"
@@ -45,12 +111,15 @@ for qps in "${QPS_VALUES[@]}"; do
45111
# Construct filename for this run
46112
FILENAME="${BACKEND}_${qps}qps_$(basename $MODEL)_${DATASET}_${GIT_HASH}.json"
47113

114+
NUM_PROMPTS=$(echo "$TOTAL_SECONDS * $qps" | bc)
115+
NUM_PROMPTS=${NUM_PROMPTS%.*} # Remove fractional part
116+
echo "Running benchmark with $NUM_PROMPTS prompts"
117+
48118
# Run the benchmark
49119
python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \
50120
--request-rate $qps \
51121
--result-filename "$FILENAME" \
52-
--tokenizer-mode ${TOKENIZER_MODE:-"auto"} \
53-
--port ${PORT:-8000}
122+
--num-prompts $NUM_PROMPTS
54123

55124
echo "Completed benchmark with QPS: $qps"
56125
echo "----------------------------------------"

0 commit comments

Comments
 (0)