|
1 | 1 | #!/bin/bash |
2 | 2 |
|
3 | | -set -x |
| 3 | +# Example usage: |
| 4 | +# Quick run: |
| 5 | +# SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh |
| 6 | +# Long run: |
| 7 | +# SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh |
| 8 | +# To test only router: |
| 9 | +# BENCHMARK_ROUTER_ONLY=true ./run_bench.sh |
4 | 10 |
|
5 | | -export ROUTER_API_KEY="1234567890" |
6 | | -export VLLM_API_KEY="1234567890" |
7 | | -export ROUTER_ENDPOINT="http://localhost:8801/v1" |
8 | | -export VLLM_ENDPOINT="http://localhost:8000/v1" |
9 | | -export ROUTER_MODELS="auto" |
10 | | -export VLLM_MODELS="openai/gpt-oss-20b" |
| 11 | +set -x -e |
| 12 | + |
| 13 | +export ROUTER_API_KEY="${ROUTER_API_KEY:-1234567890}" |
| 14 | +export VLLM_API_KEY="${VLLM_API_KEY:-1234567890}" |
| 15 | +export ROUTER_ENDPOINT="${ROUTER_ENDPOINT:-http://localhost:8801/v1}" |
| 16 | +export VLLM_ENDPOINT="${VLLM_ENDPOINT:-http://localhost:8000/v1}" |
| 17 | +export ROUTER_MODELS="${ROUTER_MODELS:-auto}" |
| 18 | +export VLLM_MODELS="${VLLM_MODELS:-openai/gpt-oss-20b}" |
| 19 | +export SAMPLES_PER_CATEGORY="${SAMPLES_PER_CATEGORY:-5}" |
| 20 | +export CONCURRENT_REQUESTS="${CONCURRENT_REQUESTS:-4}" |
| 21 | +export BENCHMARK_ROUTER_ONLY="${BENCHMARK_ROUTER_ONLY:-false}" |
11 | 22 |
|
12 | 23 | # Run the benchmark |
13 | | -python router_reason_bench.py \ |
14 | | - --run-router \ |
15 | | - --router-endpoint "$ROUTER_ENDPOINT" \ |
16 | | - --router-api-key "$ROUTER_API_KEY" \ |
17 | | - --router-models "$ROUTER_MODELS" \ |
18 | | - --run-vllm \ |
19 | | - --vllm-endpoint "$VLLM_ENDPOINT" \ |
20 | | - --vllm-api-key "$VLLM_API_KEY" \ |
21 | | - --vllm-models "$VLLM_MODELS" \ |
22 | | - --samples-per-category 5 \ |
23 | | - --vllm-exec-modes NR XC \ |
24 | | - --concurrent-requests 4 \ |
25 | | - --output-dir results/reasonbench |
26 | | - |
27 | | -# Generate plots |
28 | | -VLLM_MODEL_FIRST="${VLLM_MODELS%% *}" |
29 | | -ROUTER_MODEL_FIRST="${ROUTER_MODELS%% *}" |
30 | | -VLLM_MODELS_SAFE="${VLLM_MODEL_FIRST//\//_}" |
31 | | -ROUTER_MODELS_SAFE="${ROUTER_MODEL_FIRST//\//_}" |
32 | | -python bench_plot.py \ |
33 | | - --summary "results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" \ |
34 | | - --router-summary "results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json" |
| 24 | +if [ "${BENCHMARK_ROUTER_ONLY}" = "true" ]; then |
| 25 | + echo "Running router-only benchmark" |
| 26 | + python bench/router_reason_bench.py \ |
| 27 | + --run-router \ |
| 28 | + --router-endpoint "$ROUTER_ENDPOINT" \ |
| 29 | + --router-api-key "$ROUTER_API_KEY" \ |
| 30 | + --router-models "$ROUTER_MODELS" \ |
| 31 | + --samples-per-category "$SAMPLES_PER_CATEGORY" \ |
| 32 | + --concurrent-requests "$CONCURRENT_REQUESTS" \ |
| 33 | + --output-dir results/reasonbench |
| 34 | +else |
| 35 | + echo "Running full benchmark (router + vLLM)..." |
| 36 | + python bench/router_reason_bench.py \ |
| 37 | + --run-router \ |
| 38 | + --router-endpoint "$ROUTER_ENDPOINT" \ |
| 39 | + --router-api-key "$ROUTER_API_KEY" \ |
| 40 | + --router-models "$ROUTER_MODELS" \ |
| 41 | + --run-vllm \ |
| 42 | + --vllm-endpoint "$VLLM_ENDPOINT" \ |
| 43 | + --vllm-api-key "$VLLM_API_KEY" \ |
| 44 | + --vllm-models "$VLLM_MODELS" \ |
| 45 | + --samples-per-category "$SAMPLES_PER_CATEGORY" \ |
| 46 | + --vllm-exec-modes NR XC \ |
| 47 | + --concurrent-requests "$CONCURRENT_REQUESTS" \ |
| 48 | + --output-dir results/reasonbench |
| 49 | +fi |
| 50 | + |
| 51 | +# Generate plots if summary files exist |
| 52 | +echo "Checking for plot generation..." |
| 53 | +echo "VLLM_MODELS: $VLLM_MODELS" |
| 54 | +echo "ROUTER_MODELS: $ROUTER_MODELS" |
| 55 | + |
| 56 | +# Get first model name and make it path-safe |
| 57 | +VLLM_MODEL_FIRST=$(echo "$VLLM_MODELS" | cut -d' ' -f1) |
| 58 | +ROUTER_MODEL_FIRST=$(echo "$ROUTER_MODELS" | cut -d' ' -f1) |
| 59 | +echo "First models: VLLM=$VLLM_MODEL_FIRST, Router=$ROUTER_MODEL_FIRST" |
| 60 | + |
| 61 | +# Replace / with _ for path safety |
| 62 | +VLLM_MODELS_SAFE=$(echo "$VLLM_MODEL_FIRST" | tr '/' '_') |
| 63 | +ROUTER_MODELS_SAFE=$(echo "$ROUTER_MODEL_FIRST" | tr '/' '_') |
| 64 | +echo "Safe paths: VLLM=$VLLM_MODELS_SAFE, Router=$ROUTER_MODELS_SAFE" |
| 65 | + |
| 66 | +# Construct the full paths |
| 67 | +VLLM_SUMMARY="results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" |
| 68 | +ROUTER_SUMMARY="results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json" |
| 69 | +echo "Looking for summaries at:" |
| 70 | +echo "VLLM: $VLLM_SUMMARY" |
| 71 | +echo "Router: $ROUTER_SUMMARY" |
| 72 | + |
| 73 | +# Check if at least one summary file exists and generate plots |
| 74 | +if [ -f "$ROUTER_SUMMARY" ]; then |
| 75 | + echo "Found router summary, generating plots..." |
| 76 | + if [ -f "$VLLM_SUMMARY" ]; then |
| 77 | + echo "Found both summaries, generating comparison plots..." |
| 78 | + python bench/bench_plot.py \ |
| 79 | + --summary "$VLLM_SUMMARY" \ |
| 80 | + --router-summary "$ROUTER_SUMMARY" |
| 81 | + else |
| 82 | + echo "vLLM summary not found, generating router-only plots..." |
| 83 | + python bench/bench_plot.py \ |
| 84 | + --router-summary "$ROUTER_SUMMARY" |
| 85 | + fi |
| 86 | +else |
| 87 | + echo "No router summary found, skipping plot generation" |
| 88 | +fi |
0 commit comments