55# SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
66# Long run:
77# SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
8+ # To test only router:
9+ # BENCHMARK_ROUTER_ONLY=true ./run_bench.sh
810
911set -x -e
1012
@@ -16,24 +18,38 @@ export ROUTER_MODELS="${ROUTER_MODELS:-auto}"
1618export VLLM_MODELS=" ${VLLM_MODELS:- openai/ gpt-oss-20b} "
1719export SAMPLES_PER_CATEGORY=" ${SAMPLES_PER_CATEGORY:- 5} "
1820export CONCURRENT_REQUESTS=" ${CONCURRENT_REQUESTS:- 4} "
21+ export BENCHMARK_ROUTER_ONLY=" ${BENCHMARK_ROUTER_ONLY:- false} "
1922
2023# Run the benchmark
21- python router_reason_bench.py \
22- --run-router \
23- --router-endpoint " $ROUTER_ENDPOINT " \
24- --router-api-key " $ROUTER_API_KEY " \
25- --router-models " $ROUTER_MODELS " \
26- --run-vllm \
27- --vllm-endpoint " $VLLM_ENDPOINT " \
28- --vllm-api-key " $VLLM_API_KEY " \
29- --vllm-models " $VLLM_MODELS " \
30- --samples-per-category " $SAMPLES_PER_CATEGORY " \
31- --vllm-exec-modes NR XC \
32- --concurrent-requests " $CONCURRENT_REQUESTS " \
33- --output-dir results/reasonbench
34-
35- # Generate plots
36- echo " Processing model paths..."
24+ if [ " ${BENCHMARK_ROUTER_ONLY} " = " true" ]; then
25+ echo " Running router-only benchmark"
26+ python bench/router_reason_bench.py \
27+ --run-router \
28+ --router-endpoint " $ROUTER_ENDPOINT " \
29+ --router-api-key " $ROUTER_API_KEY " \
30+ --router-models " $ROUTER_MODELS " \
31+ --samples-per-category " $SAMPLES_PER_CATEGORY " \
32+ --concurrent-requests " $CONCURRENT_REQUESTS " \
33+ --output-dir results/reasonbench
34+ else
35+ echo " Running full benchmark (router + vLLM)..."
36+ python bench/router_reason_bench.py \
37+ --run-router \
38+ --router-endpoint " $ROUTER_ENDPOINT " \
39+ --router-api-key " $ROUTER_API_KEY " \
40+ --router-models " $ROUTER_MODELS " \
41+ --run-vllm \
42+ --vllm-endpoint " $VLLM_ENDPOINT " \
43+ --vllm-api-key " $VLLM_API_KEY " \
44+ --vllm-models " $VLLM_MODELS " \
45+ --samples-per-category " $SAMPLES_PER_CATEGORY " \
46+ --vllm-exec-modes NR XC \
47+ --concurrent-requests " $CONCURRENT_REQUESTS " \
48+ --output-dir results/reasonbench
49+ fi
50+
51+ # Generate plots if summary files exist
52+ echo " Checking for plot generation..."
3753echo " VLLM_MODELS: $VLLM_MODELS "
3854echo " ROUTER_MODELS: $ROUTER_MODELS "
3955
@@ -54,6 +70,19 @@ echo "Looking for summaries at:"
5470echo " VLLM: $VLLM_SUMMARY "
5571echo " Router: $ROUTER_SUMMARY "
5672
57- python bench_plot.py \
58- --summary " $VLLM_SUMMARY " \
59- --router-summary " $ROUTER_SUMMARY "
73+ # Check if at least one summary file exists and generate plots
74+ if [ -f " $ROUTER_SUMMARY " ]; then
75+ echo " Found router summary, generating plots..."
76+ if [ -f " $VLLM_SUMMARY " ]; then
77+ echo " Found both summaries, generating comparison plots..."
78+ python bench/bench_plot.py \
79+ --summary " $VLLM_SUMMARY " \
80+ --router-summary " $ROUTER_SUMMARY "
81+ else
82+ echo " vLLM summary not found, generating router-only plots..."
83+ python bench/bench_plot.py \
84+ --router-summary " $ROUTER_SUMMARY "
85+ fi
86+ else
87+ echo " No router summary found, skipping plot generation"
88+ fi
0 commit comments