1
1
#! /bin/bash
2
2
3
- # Define the model to use
4
- MODEL=${1:- " Qwen/Qwen2.5-7B-Instruct" }
5
-
6
- # Define the backend to use
7
- BACKEND=${2:- " vllm" }
8
-
9
- # Define the dataset to use
10
- DATASET=${3:- " xgrammar_bench" }
11
-
3
+ # default values
4
+ MODEL=${MODEL:- " Qwen/Qwen2.5-7B-Instruct" }
5
+ BACKEND=${BACKEND:- " vllm" }
6
+ DATASET=${DATASET:- " xgrammar_bench" }
12
7
SCRIPT_DIR=" $( cd " $( dirname " ${BASH_SOURCE[0]} " ) " && pwd) "
13
- OUTPUT_DIR=${4:- " $SCRIPT_DIR /structured_output_benchmark_results" }
8
+ OUTPUT_DIR=${OUTPUT_DIR:- " $SCRIPT_DIR /structured_output_benchmark_results" }
9
+ PORT=${PORT:- 8000}
10
+ STRUCTURED_OUTPUT_RATIO=${STRUCTURED_OUTPUT_RATIO:- 1}
11
+ TOTAL_SECONDS=${TOTAL_SECONDS:- 90}
12
+ MAX_NEW_TOKENS=${MAX_NEW_TOKENS:- 300}
13
+ TOKENIZER_MODE=${TOKENIZER_MODE:- " auto" }
14
14
15
- GUIDED_RATIO=${5:- 0.5}
15
+ usage () {
16
+ echo " Usage: $0 [options]"
17
+ echo " Options:"
18
+ echo " --model MODEL Model to benchmark (default: $MODEL )"
19
+ echo " --backend BACKEND Backend to use (default: $BACKEND )"
20
+ echo " --dataset DATASET Dataset to use (default: $DATASET )"
21
+ echo " --max-new-tokens N Maximum number of tokens to generate (default: $MAX_NEW_TOKENS )"
22
+ echo " --output-dir DIR Output directory for results (default: $OUTPUT_DIR )"
23
+ echo " --port PORT Port to use (default: $PORT )"
24
+ echo " --structured-output-ratio N Ratio of structured outputs (default: $STRUCTURED_OUTPUT_RATIO )"
25
+ echo " --tokenizer-mode MODE Tokenizer mode to use (default: $TOKENIZER_MODE )"
26
+ echo " --total-seconds N Total seconds to run the benchmark (default: $TOTAL_SECONDS )"
27
+ echo " -h, --help Show this help message and exit"
28
+ exit 0
29
+ }
30
+
31
+ # parse command line arguments
32
+ while [[ $# -gt 0 ]]; do
33
+ case $1 in
34
+ --model)
35
+ MODEL=" $2 "
36
+ shift 2
37
+ ;;
38
+ --backend)
39
+ BACKEND=" $2 "
40
+ shift 2
41
+ ;;
42
+ --dataset)
43
+ DATASET=" $2 "
44
+ shift 2
45
+ ;;
46
+ --max-new-tokens)
47
+ MAX_NEW_TOKENS=" $2 "
48
+ shift 2
49
+ ;;
50
+ --output-dir)
51
+ OUTPUT_DIR=" $2 "
52
+ shift 2
53
+ ;;
54
+ --port)
55
+ PORT=" $2 "
56
+ shift 2
57
+ ;;
58
+ --structured-output-ratio)
59
+ STRUCTURED_OUTPUT_RATIO=" $2 "
60
+ shift 2
61
+ ;;
62
+ --tokenizer-mode)
63
+ TOKENIZER_MODE=" $2 "
64
+ shift 2
65
+ ;;
66
+ --total-seconds)
67
+ TOTAL_SECONDS=" $2 "
68
+ shift 2
69
+ ;;
70
+ -h|--help)
71
+ usage
72
+ ;;
73
+ * )
74
+ echo " Unknown argument: $1 \n"
75
+ usage
76
+ ;;
77
+ esac
78
+ done
16
79
17
80
# Create output directory if it doesn't exist
18
81
mkdir -p " $OUTPUT_DIR "
19
82
20
83
# Define QPS values to test
21
- QPS_VALUES=(70 60 50 25 20 15 10)
84
+ QPS_VALUES=(25 20 15 10 5 1 )
22
85
23
86
# Common parameters
24
87
COMMON_PARAMS=" --backend $BACKEND \
25
88
--model $MODEL \
26
89
--dataset $DATASET \
27
- --structured-output-ratio $GUIDED_RATIO \
90
+ --structured-output-ratio $STRUCTURED_OUTPUT_RATIO \
28
91
--save-results \
29
- --result-dir $OUTPUT_DIR "
92
+ --result-dir $OUTPUT_DIR \
93
+ --output-len $MAX_NEW_TOKENS \
94
+ --port $PORT \
95
+ --tokenizer-mode $TOKENIZER_MODE "
30
96
31
97
echo " Starting structured output benchmark with model: $MODEL "
32
98
echo " Backend: $BACKEND "
@@ -45,12 +111,15 @@ for qps in "${QPS_VALUES[@]}"; do
45
111
# Construct filename for this run
46
112
FILENAME=" ${BACKEND} _${qps} qps_$( basename $MODEL ) _${DATASET} _${GIT_HASH} .json"
47
113
114
+ NUM_PROMPTS=$( echo " $TOTAL_SECONDS * $qps " | bc)
115
+ NUM_PROMPTS=${NUM_PROMPTS% .* } # Remove fractional part
116
+ echo " Running benchmark with $NUM_PROMPTS prompts"
117
+
48
118
# Run the benchmark
49
119
python " $SCRIPT_DIR /benchmark_serving_structured_output.py" $COMMON_PARAMS \
50
120
--request-rate $qps \
51
121
--result-filename " $FILENAME " \
52
- --tokenizer-mode ${TOKENIZER_MODE:- " auto" } \
53
- --port ${PORT:- 8000}
122
+ --num-prompts $NUM_PROMPTS
54
123
55
124
echo " Completed benchmark with QPS: $qps "
56
125
echo " ----------------------------------------"
0 commit comments