From a837ba330d24df0d1679ba83cfb9c59693a2455e Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Thu, 4 Sep 2025 17:21:51 +0000 Subject: [PATCH 1/5] chore: install rust if not present Signed-off-by: Huamin Chen --- Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f3cff9cc..08c3f67f 100644 --- a/Makefile +++ b/Makefile @@ -11,8 +11,15 @@ build: rust build-router # Build the Rust library rust: - @echo "Building Rust library..." - cd candle-binding && cargo build --release + @echo "Ensuring rust is installed..." + @bash -c 'if ! command -v rustc >/dev/null 2>&1; then \ + echo "rustc not found, installing..."; \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \ + fi && \ + echo "Loading Rust environment..." && \ + . $$HOME/.cargo/env && \ + echo "Building Rust library..." && \ + cd candle-binding && cargo build --release' # Build router build-router: rust From 1064b0f4ed4668e2dd03574711c59b093fa1cd1e Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Thu, 4 Sep 2025 17:34:38 +0000 Subject: [PATCH 2/5] ensure rust env works for different configurations Signed-off-by: Huamin Chen --- Makefile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 08c3f67f..6b85d62f 100644 --- a/Makefile +++ b/Makefile @@ -16,8 +16,13 @@ rust: echo "rustc not found, installing..."; \ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \ fi && \ - echo "Loading Rust environment..." && \ - . $$HOME/.cargo/env && \ + if [ -f "$$HOME/.cargo/env" ]; then \ + echo "Loading Rust environment from $$HOME/.cargo/env..." && \ + . $$HOME/.cargo/env; \ + fi && \ + if ! command -v cargo >/dev/null 2>&1; then \ + echo "Error: cargo not found in PATH" && exit 1; \ + fi && \ echo "Building Rust library..." && \ cd candle-binding && cargo build --release' From ba3ba40c32f396f7e10d91d3f1bc09436601566d Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Thu, 4 Sep 2025 19:30:26 +0000 Subject: [PATCH 3/5] chore: allow custom config.yaml in make run-router Signed-off-by: Huamin Chen --- Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 6b85d62f..4d547304 100644 --- a/Makefile +++ b/Makefile @@ -32,11 +32,14 @@ build-router: rust @mkdir -p bin @cd src/semantic-router && go build -o ../../bin/router cmd/main.go +# Config file path with default +CONFIG_FILE ?= config/config.yaml + # Run the router -run-router: build-router - @echo "Running router..." +run-router: build-router download-models + @echo "Running router with config: ${CONFIG_FILE}" @export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \ - ./bin/router -config=config/config.yaml + ./bin/router -config=${CONFIG_FILE} # Prepare Envoy prepare-envoy: From 1994c298cf2ec0f56ed551e20eee8787e5ea0581 Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Thu, 4 Sep 2025 20:33:41 +0000 Subject: [PATCH 4/5] chore: expose bench configs in env var Signed-off-by: Huamin Chen --- bench/run_bench.sh | 55 +++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 15 deletions(-) mode change 100644 => 100755 bench/run_bench.sh diff --git a/bench/run_bench.sh b/bench/run_bench.sh old mode 100644 new mode 100755 index d13c6917..2da18eee --- a/bench/run_bench.sh +++ b/bench/run_bench.sh @@ -1,13 +1,21 @@ #!/bin/bash -set -x +# Example usage: +# Quick run: +# SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh +# Long run: +# SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh -export ROUTER_API_KEY="1234567890" -export VLLM_API_KEY="1234567890" -export ROUTER_ENDPOINT="http://localhost:8801/v1" -export VLLM_ENDPOINT="http://localhost:8000/v1" -export ROUTER_MODELS="auto" -export VLLM_MODELS="openai/gpt-oss-20b" +set -x -e + +export ROUTER_API_KEY="${ROUTER_API_KEY:-1234567890}" +export VLLM_API_KEY="${VLLM_API_KEY:-1234567890}" +export ROUTER_ENDPOINT="${ROUTER_ENDPOINT:-http://localhost:8801/v1}" +export VLLM_ENDPOINT="${VLLM_ENDPOINT:-http://localhost:8000/v1}" +export ROUTER_MODELS="${ROUTER_MODELS:-auto}" +export VLLM_MODELS="${VLLM_MODELS:-openai/gpt-oss-20b}" +export SAMPLES_PER_CATEGORY="${SAMPLES_PER_CATEGORY:-5}" +export CONCURRENT_REQUESTS="${CONCURRENT_REQUESTS:-4}" # Run the benchmark python router_reason_bench.py \ @@ -19,16 +27,33 @@ python router_reason_bench.py \ --vllm-endpoint "$VLLM_ENDPOINT" \ --vllm-api-key "$VLLM_API_KEY" \ --vllm-models "$VLLM_MODELS" \ - --samples-per-category 5 \ + --samples-per-category "$SAMPLES_PER_CATEGORY" \ --vllm-exec-modes NR XC \ - --concurrent-requests 4 \ + --concurrent-requests "$CONCURRENT_REQUESTS" \ --output-dir results/reasonbench # Generate plots -VLLM_MODEL_FIRST="${VLLM_MODELS%% *}" -ROUTER_MODEL_FIRST="${ROUTER_MODELS%% *}" -VLLM_MODELS_SAFE="${VLLM_MODEL_FIRST//\//_}" -ROUTER_MODELS_SAFE="${ROUTER_MODEL_FIRST//\//_}" +echo "Processing model paths..." +echo "VLLM_MODELS: $VLLM_MODELS" +echo "ROUTER_MODELS: $ROUTER_MODELS" + +# Get first model name and make it path-safe +VLLM_MODEL_FIRST=$(echo "$VLLM_MODELS" | cut -d' ' -f1) +ROUTER_MODEL_FIRST=$(echo "$ROUTER_MODELS" | cut -d' ' -f1) +echo "First models: VLLM=$VLLM_MODEL_FIRST, Router=$ROUTER_MODEL_FIRST" + +# Replace / with _ for path safety +VLLM_MODELS_SAFE=$(echo "$VLLM_MODEL_FIRST" | tr '/' '_') +ROUTER_MODELS_SAFE=$(echo "$ROUTER_MODEL_FIRST" | tr '/' '_') +echo "Safe paths: VLLM=$VLLM_MODELS_SAFE, Router=$ROUTER_MODELS_SAFE" + +# Construct the full paths +VLLM_SUMMARY="results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" +ROUTER_SUMMARY="results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json" +echo "Looking for summaries at:" +echo "VLLM: $VLLM_SUMMARY" +echo "Router: $ROUTER_SUMMARY" + python bench_plot.py \ - --summary "results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" \ - --router-summary "results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json" + --summary "$VLLM_SUMMARY" \ + --router-summary "$ROUTER_SUMMARY" From 6f365e3767fb81d62a3ddcbb0413960951e988af Mon Sep 17 00:00:00 2001 From: Huamin Chen Date: Thu, 4 Sep 2025 22:41:10 +0000 Subject: [PATCH 5/5] chore: option to run router only bench Signed-off-by: Huamin Chen --- bench/run_bench.sh | 67 +++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/bench/run_bench.sh b/bench/run_bench.sh index 2da18eee..67877f51 100755 --- a/bench/run_bench.sh +++ b/bench/run_bench.sh @@ -5,6 +5,8 @@ # SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh # Long run: # SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh +# To test only router: +# BENCHMARK_ROUTER_ONLY=true ./run_bench.sh set -x -e @@ -16,24 +18,38 @@ export ROUTER_MODELS="${ROUTER_MODELS:-auto}" export VLLM_MODELS="${VLLM_MODELS:-openai/gpt-oss-20b}" export SAMPLES_PER_CATEGORY="${SAMPLES_PER_CATEGORY:-5}" export CONCURRENT_REQUESTS="${CONCURRENT_REQUESTS:-4}" +export BENCHMARK_ROUTER_ONLY="${BENCHMARK_ROUTER_ONLY:-false}" # Run the benchmark -python router_reason_bench.py \ - --run-router \ - --router-endpoint "$ROUTER_ENDPOINT" \ - --router-api-key "$ROUTER_API_KEY" \ - --router-models "$ROUTER_MODELS" \ - --run-vllm \ - --vllm-endpoint "$VLLM_ENDPOINT" \ - --vllm-api-key "$VLLM_API_KEY" \ - --vllm-models "$VLLM_MODELS" \ - --samples-per-category "$SAMPLES_PER_CATEGORY" \ - --vllm-exec-modes NR XC \ - --concurrent-requests "$CONCURRENT_REQUESTS" \ - --output-dir results/reasonbench - -# Generate plots -echo "Processing model paths..." +if [ "${BENCHMARK_ROUTER_ONLY}" = "true" ]; then + echo "Running router-only benchmark" + python bench/router_reason_bench.py \ + --run-router \ + --router-endpoint "$ROUTER_ENDPOINT" \ + --router-api-key "$ROUTER_API_KEY" \ + --router-models "$ROUTER_MODELS" \ + --samples-per-category "$SAMPLES_PER_CATEGORY" \ + --concurrent-requests "$CONCURRENT_REQUESTS" \ + --output-dir results/reasonbench +else + echo "Running full benchmark (router + vLLM)..." + python bench/router_reason_bench.py \ + --run-router \ + --router-endpoint "$ROUTER_ENDPOINT" \ + --router-api-key "$ROUTER_API_KEY" \ + --router-models "$ROUTER_MODELS" \ + --run-vllm \ + --vllm-endpoint "$VLLM_ENDPOINT" \ + --vllm-api-key "$VLLM_API_KEY" \ + --vllm-models "$VLLM_MODELS" \ + --samples-per-category "$SAMPLES_PER_CATEGORY" \ + --vllm-exec-modes NR XC \ + --concurrent-requests "$CONCURRENT_REQUESTS" \ + --output-dir results/reasonbench +fi + +# Generate plots if summary files exist +echo "Checking for plot generation..." echo "VLLM_MODELS: $VLLM_MODELS" echo "ROUTER_MODELS: $ROUTER_MODELS" @@ -54,6 +70,19 @@ echo "Looking for summaries at:" echo "VLLM: $VLLM_SUMMARY" echo "Router: $ROUTER_SUMMARY" -python bench_plot.py \ - --summary "$VLLM_SUMMARY" \ - --router-summary "$ROUTER_SUMMARY" +# Check if at least one summary file exists and generate plots +if [ -f "$ROUTER_SUMMARY" ]; then + echo "Found router summary, generating plots..." + if [ -f "$VLLM_SUMMARY" ]; then + echo "Found both summaries, generating comparison plots..." + python bench/bench_plot.py \ + --summary "$VLLM_SUMMARY" \ + --router-summary "$ROUTER_SUMMARY" + else + echo "vLLM summary not found, generating router-only plots..." + python bench/bench_plot.py \ + --router-summary "$ROUTER_SUMMARY" + fi +else + echo "No router summary found, skipping plot generation" +fi