Skip to content

Commit 589692e

Browse files
authored
Merge branch 'main' into feat/add-batch-metrics
2 parents c805974 + 99736b0 commit 589692e

File tree

2 files changed

+103
-34
lines changed

2 files changed

+103
-34
lines changed

Makefile

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,35 @@ build: rust build-router
1111

1212
# Build the Rust library
1313
rust:
14-
@echo "Building Rust library..."
15-
cd candle-binding && cargo build --release
14+
@echo "Ensuring rust is installed..."
15+
@bash -c 'if ! command -v rustc >/dev/null 2>&1; then \
16+
echo "rustc not found, installing..."; \
17+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \
18+
fi && \
19+
if [ -f "$$HOME/.cargo/env" ]; then \
20+
echo "Loading Rust environment from $$HOME/.cargo/env..." && \
21+
. $$HOME/.cargo/env; \
22+
fi && \
23+
if ! command -v cargo >/dev/null 2>&1; then \
24+
echo "Error: cargo not found in PATH" && exit 1; \
25+
fi && \
26+
echo "Building Rust library..." && \
27+
cd candle-binding && cargo build --release'
1628

1729
# Build router
1830
build-router: rust
1931
@echo "Building router..."
2032
@mkdir -p bin
2133
@cd src/semantic-router && go build -o ../../bin/router cmd/main.go
2234

35+
# Config file path with default
36+
CONFIG_FILE ?= config/config.yaml
37+
2338
# Run the router
24-
run-router: build-router
25-
@echo "Running router..."
39+
run-router: build-router download-models
40+
@echo "Running router with config: ${CONFIG_FILE}"
2641
@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
27-
./bin/router -config=config/config.yaml
42+
./bin/router -config=${CONFIG_FILE}
2843

2944
# Prepare Envoy
3045
prepare-envoy:

bench/run_bench.sh

100644100755
Lines changed: 83 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,88 @@
11
#!/bin/bash
22

3-
set -x
3+
# Example usage:
4+
# Quick run:
5+
# SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
6+
# Long run:
7+
# SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
8+
# To test only router:
9+
# BENCHMARK_ROUTER_ONLY=true ./run_bench.sh
410

5-
export ROUTER_API_KEY="1234567890"
6-
export VLLM_API_KEY="1234567890"
7-
export ROUTER_ENDPOINT="http://localhost:8801/v1"
8-
export VLLM_ENDPOINT="http://localhost:8000/v1"
9-
export ROUTER_MODELS="auto"
10-
export VLLM_MODELS="openai/gpt-oss-20b"
11+
set -x -e
12+
13+
export ROUTER_API_KEY="${ROUTER_API_KEY:-1234567890}"
14+
export VLLM_API_KEY="${VLLM_API_KEY:-1234567890}"
15+
export ROUTER_ENDPOINT="${ROUTER_ENDPOINT:-http://localhost:8801/v1}"
16+
export VLLM_ENDPOINT="${VLLM_ENDPOINT:-http://localhost:8000/v1}"
17+
export ROUTER_MODELS="${ROUTER_MODELS:-auto}"
18+
export VLLM_MODELS="${VLLM_MODELS:-openai/gpt-oss-20b}"
19+
export SAMPLES_PER_CATEGORY="${SAMPLES_PER_CATEGORY:-5}"
20+
export CONCURRENT_REQUESTS="${CONCURRENT_REQUESTS:-4}"
21+
export BENCHMARK_ROUTER_ONLY="${BENCHMARK_ROUTER_ONLY:-false}"
1122

1223
# Run the benchmark
13-
python router_reason_bench.py \
14-
--run-router \
15-
--router-endpoint "$ROUTER_ENDPOINT" \
16-
--router-api-key "$ROUTER_API_KEY" \
17-
--router-models "$ROUTER_MODELS" \
18-
--run-vllm \
19-
--vllm-endpoint "$VLLM_ENDPOINT" \
20-
--vllm-api-key "$VLLM_API_KEY" \
21-
--vllm-models "$VLLM_MODELS" \
22-
--samples-per-category 5 \
23-
--vllm-exec-modes NR XC \
24-
--concurrent-requests 4 \
25-
--output-dir results/reasonbench
26-
27-
# Generate plots
28-
VLLM_MODEL_FIRST="${VLLM_MODELS%% *}"
29-
ROUTER_MODEL_FIRST="${ROUTER_MODELS%% *}"
30-
VLLM_MODELS_SAFE="${VLLM_MODEL_FIRST//\//_}"
31-
ROUTER_MODELS_SAFE="${ROUTER_MODEL_FIRST//\//_}"
32-
python bench_plot.py \
33-
--summary "results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" \
34-
--router-summary "results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json"
24+
if [ "${BENCHMARK_ROUTER_ONLY}" = "true" ]; then
25+
echo "Running router-only benchmark"
26+
python bench/router_reason_bench.py \
27+
--run-router \
28+
--router-endpoint "$ROUTER_ENDPOINT" \
29+
--router-api-key "$ROUTER_API_KEY" \
30+
--router-models "$ROUTER_MODELS" \
31+
--samples-per-category "$SAMPLES_PER_CATEGORY" \
32+
--concurrent-requests "$CONCURRENT_REQUESTS" \
33+
--output-dir results/reasonbench
34+
else
35+
echo "Running full benchmark (router + vLLM)..."
36+
python bench/router_reason_bench.py \
37+
--run-router \
38+
--router-endpoint "$ROUTER_ENDPOINT" \
39+
--router-api-key "$ROUTER_API_KEY" \
40+
--router-models "$ROUTER_MODELS" \
41+
--run-vllm \
42+
--vllm-endpoint "$VLLM_ENDPOINT" \
43+
--vllm-api-key "$VLLM_API_KEY" \
44+
--vllm-models "$VLLM_MODELS" \
45+
--samples-per-category "$SAMPLES_PER_CATEGORY" \
46+
--vllm-exec-modes NR XC \
47+
--concurrent-requests "$CONCURRENT_REQUESTS" \
48+
--output-dir results/reasonbench
49+
fi
50+
51+
# Generate plots if summary files exist
52+
echo "Checking for plot generation..."
53+
echo "VLLM_MODELS: $VLLM_MODELS"
54+
echo "ROUTER_MODELS: $ROUTER_MODELS"
55+
56+
# Get first model name and make it path-safe
57+
VLLM_MODEL_FIRST=$(echo "$VLLM_MODELS" | cut -d' ' -f1)
58+
ROUTER_MODEL_FIRST=$(echo "$ROUTER_MODELS" | cut -d' ' -f1)
59+
echo "First models: VLLM=$VLLM_MODEL_FIRST, Router=$ROUTER_MODEL_FIRST"
60+
61+
# Replace / with _ for path safety
62+
VLLM_MODELS_SAFE=$(echo "$VLLM_MODEL_FIRST" | tr '/' '_')
63+
ROUTER_MODELS_SAFE=$(echo "$ROUTER_MODEL_FIRST" | tr '/' '_')
64+
echo "Safe paths: VLLM=$VLLM_MODELS_SAFE, Router=$ROUTER_MODELS_SAFE"
65+
66+
# Construct the full paths
67+
VLLM_SUMMARY="results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json"
68+
ROUTER_SUMMARY="results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json"
69+
echo "Looking for summaries at:"
70+
echo "VLLM: $VLLM_SUMMARY"
71+
echo "Router: $ROUTER_SUMMARY"
72+
73+
# Check if at least one summary file exists and generate plots
74+
if [ -f "$ROUTER_SUMMARY" ]; then
75+
echo "Found router summary, generating plots..."
76+
if [ -f "$VLLM_SUMMARY" ]; then
77+
echo "Found both summaries, generating comparison plots..."
78+
python bench/bench_plot.py \
79+
--summary "$VLLM_SUMMARY" \
80+
--router-summary "$ROUTER_SUMMARY"
81+
else
82+
echo "vLLM summary not found, generating router-only plots..."
83+
python bench/bench_plot.py \
84+
--router-summary "$ROUTER_SUMMARY"
85+
fi
86+
else
87+
echo "No router summary found, skipping plot generation"
88+
fi

0 commit comments

Comments
 (0)