From a837ba330d24df0d1679ba83cfb9c59693a2455e Mon Sep 17 00:00:00 2001
From: Huamin Chen <hchen@redhat.com>
Date: Thu, 4 Sep 2025 17:21:51 +0000
Subject: [PATCH 1/5] chore: install rust if not present

Signed-off-by: Huamin Chen <hchen@redhat.com>
---
 Makefile | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index f3cff9cc..08c3f67f 100644
--- a/Makefile
+++ b/Makefile
@@ -11,8 +11,15 @@ build: rust build-router
 
 # Build the Rust library
 rust:
-	@echo "Building Rust library..."
-	cd candle-binding && cargo build --release
+	@echo "Ensuring rust is installed..."
+	@bash -c 'if ! command -v rustc >/dev/null 2>&1; then \
+		echo "rustc not found, installing..."; \
+		curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \
+	fi && \
+	echo "Loading Rust environment..." && \
+	. $$HOME/.cargo/env && \
+	echo "Building Rust library..." && \
+	cd candle-binding && cargo build --release'
 
 # Build router
 build-router: rust

From 1064b0f4ed4668e2dd03574711c59b093fa1cd1e Mon Sep 17 00:00:00 2001
From: Huamin Chen <hchen@redhat.com>
Date: Thu, 4 Sep 2025 17:34:38 +0000
Subject: [PATCH 2/5] ensure rust env works for different configurations

Signed-off-by: Huamin Chen <hchen@redhat.com>
---
 Makefile | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 08c3f67f..6b85d62f 100644
--- a/Makefile
+++ b/Makefile
@@ -16,8 +16,13 @@ rust:
 		echo "rustc not found, installing..."; \
 		curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \
 	fi && \
-	echo "Loading Rust environment..." && \
-	. $$HOME/.cargo/env && \
+	if [ -f "$$HOME/.cargo/env" ]; then \
+		echo "Loading Rust environment from $$HOME/.cargo/env..." && \
+		. $$HOME/.cargo/env; \
+	fi && \
+	if ! command -v cargo >/dev/null 2>&1; then \
+		echo "Error: cargo not found in PATH" && exit 1; \
+	fi && \
 	echo "Building Rust library..." && \
 	cd candle-binding && cargo build --release'
 

From ba3ba40c32f396f7e10d91d3f1bc09436601566d Mon Sep 17 00:00:00 2001
From: Huamin Chen <hchen@redhat.com>
Date: Thu, 4 Sep 2025 19:30:26 +0000
Subject: [PATCH 3/5] chore: allow custom config.yaml in make run-router

Signed-off-by: Huamin Chen <hchen@redhat.com>
---
 Makefile | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 6b85d62f..4d547304 100644
--- a/Makefile
+++ b/Makefile
@@ -32,11 +32,14 @@ build-router: rust
 	@mkdir -p bin
 	@cd src/semantic-router && go build -o ../../bin/router cmd/main.go
 
+# Config file path with default
+CONFIG_FILE ?= config/config.yaml
+
 # Run the router
-run-router: build-router
-	@echo "Running router..."
+run-router: build-router download-models
+	@echo "Running router with config: ${CONFIG_FILE}"
 	@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
-		./bin/router -config=config/config.yaml
+		./bin/router -config=${CONFIG_FILE}
 
 # Prepare Envoy
 prepare-envoy:

From 1994c298cf2ec0f56ed551e20eee8787e5ea0581 Mon Sep 17 00:00:00 2001
From: Huamin Chen <hchen@redhat.com>
Date: Thu, 4 Sep 2025 20:33:41 +0000
Subject: [PATCH 4/5] chore: expose bench configs in env var

Signed-off-by: Huamin Chen <hchen@redhat.com>
---
 bench/run_bench.sh | 55 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 15 deletions(-)
 mode change 100644 => 100755 bench/run_bench.sh

diff --git a/bench/run_bench.sh b/bench/run_bench.sh
old mode 100644
new mode 100755
index d13c6917..2da18eee
--- a/bench/run_bench.sh
+++ b/bench/run_bench.sh
@@ -1,13 +1,21 @@
 #!/bin/bash
 
-set -x 
+# Example usage:
+# Quick run:
+# SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
+# Long run:
+# SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
 
-export ROUTER_API_KEY="1234567890"
-export VLLM_API_KEY="1234567890"
-export ROUTER_ENDPOINT="http://localhost:8801/v1"
-export VLLM_ENDPOINT="http://localhost:8000/v1"
-export ROUTER_MODELS="auto"
-export VLLM_MODELS="openai/gpt-oss-20b"
+set -x -e
+
+export ROUTER_API_KEY="${ROUTER_API_KEY:-1234567890}"
+export VLLM_API_KEY="${VLLM_API_KEY:-1234567890}"
+export ROUTER_ENDPOINT="${ROUTER_ENDPOINT:-http://localhost:8801/v1}"
+export VLLM_ENDPOINT="${VLLM_ENDPOINT:-http://localhost:8000/v1}"
+export ROUTER_MODELS="${ROUTER_MODELS:-auto}"
+export VLLM_MODELS="${VLLM_MODELS:-openai/gpt-oss-20b}"
+export SAMPLES_PER_CATEGORY="${SAMPLES_PER_CATEGORY:-5}"
+export CONCURRENT_REQUESTS="${CONCURRENT_REQUESTS:-4}"
 
 # Run the benchmark
 python router_reason_bench.py \
@@ -19,16 +27,33 @@ python router_reason_bench.py \
   --vllm-endpoint "$VLLM_ENDPOINT" \
   --vllm-api-key "$VLLM_API_KEY" \
   --vllm-models "$VLLM_MODELS" \
-  --samples-per-category 5 \
+  --samples-per-category "$SAMPLES_PER_CATEGORY" \
   --vllm-exec-modes NR XC \
-  --concurrent-requests 4 \
+  --concurrent-requests "$CONCURRENT_REQUESTS" \
   --output-dir results/reasonbench
 
 # Generate plots
-VLLM_MODEL_FIRST="${VLLM_MODELS%% *}"
-ROUTER_MODEL_FIRST="${ROUTER_MODELS%% *}"
-VLLM_MODELS_SAFE="${VLLM_MODEL_FIRST//\//_}"
-ROUTER_MODELS_SAFE="${ROUTER_MODEL_FIRST//\//_}"
+echo "Processing model paths..."
+echo "VLLM_MODELS: $VLLM_MODELS"
+echo "ROUTER_MODELS: $ROUTER_MODELS"
+
+# Get first model name and make it path-safe
+VLLM_MODEL_FIRST=$(echo "$VLLM_MODELS" | cut -d' ' -f1)
+ROUTER_MODEL_FIRST=$(echo "$ROUTER_MODELS" | cut -d' ' -f1)
+echo "First models: VLLM=$VLLM_MODEL_FIRST, Router=$ROUTER_MODEL_FIRST"
+
+# Replace / with _ for path safety
+VLLM_MODELS_SAFE=$(echo "$VLLM_MODEL_FIRST" | tr '/' '_')
+ROUTER_MODELS_SAFE=$(echo "$ROUTER_MODEL_FIRST" | tr '/' '_')
+echo "Safe paths: VLLM=$VLLM_MODELS_SAFE, Router=$ROUTER_MODELS_SAFE"
+
+# Construct the full paths
+VLLM_SUMMARY="results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json"
+ROUTER_SUMMARY="results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json"
+echo "Looking for summaries at:"
+echo "VLLM: $VLLM_SUMMARY"
+echo "Router: $ROUTER_SUMMARY"
+
 python bench_plot.py \
-  --summary "results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" \
-  --router-summary "results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json"
+  --summary "$VLLM_SUMMARY" \
+  --router-summary "$ROUTER_SUMMARY"

From 6f365e3767fb81d62a3ddcbb0413960951e988af Mon Sep 17 00:00:00 2001
From: Huamin Chen <hchen@redhat.com>
Date: Thu, 4 Sep 2025 22:41:10 +0000
Subject: [PATCH 5/5] chore: option to run router only bench

Signed-off-by: Huamin Chen <hchen@redhat.com>
---
 bench/run_bench.sh | 67 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 48 insertions(+), 19 deletions(-)

diff --git a/bench/run_bench.sh b/bench/run_bench.sh
index 2da18eee..67877f51 100755
--- a/bench/run_bench.sh
+++ b/bench/run_bench.sh
@@ -5,6 +5,8 @@
 # SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
 # Long run:
 # SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
+# To test only router:
+# BENCHMARK_ROUTER_ONLY=true ./run_bench.sh
 
 set -x -e
 
@@ -16,24 +18,38 @@ export ROUTER_MODELS="${ROUTER_MODELS:-auto}"
 export VLLM_MODELS="${VLLM_MODELS:-openai/gpt-oss-20b}"
 export SAMPLES_PER_CATEGORY="${SAMPLES_PER_CATEGORY:-5}"
 export CONCURRENT_REQUESTS="${CONCURRENT_REQUESTS:-4}"
+export BENCHMARK_ROUTER_ONLY="${BENCHMARK_ROUTER_ONLY:-false}"
 
 # Run the benchmark
-python router_reason_bench.py \
-  --run-router \
-  --router-endpoint "$ROUTER_ENDPOINT" \
-  --router-api-key "$ROUTER_API_KEY" \
-  --router-models "$ROUTER_MODELS" \
-  --run-vllm \
-  --vllm-endpoint "$VLLM_ENDPOINT" \
-  --vllm-api-key "$VLLM_API_KEY" \
-  --vllm-models "$VLLM_MODELS" \
-  --samples-per-category "$SAMPLES_PER_CATEGORY" \
-  --vllm-exec-modes NR XC \
-  --concurrent-requests "$CONCURRENT_REQUESTS" \
-  --output-dir results/reasonbench
-
-# Generate plots
-echo "Processing model paths..."
+if [ "${BENCHMARK_ROUTER_ONLY}" = "true" ]; then
+  echo "Running router-only benchmark"
+  python bench/router_reason_bench.py \
+    --run-router \
+    --router-endpoint "$ROUTER_ENDPOINT" \
+    --router-api-key "$ROUTER_API_KEY" \
+    --router-models "$ROUTER_MODELS" \
+    --samples-per-category "$SAMPLES_PER_CATEGORY" \
+    --concurrent-requests "$CONCURRENT_REQUESTS" \
+    --output-dir results/reasonbench
+else
+  echo "Running full benchmark (router + vLLM)..."
+  python bench/router_reason_bench.py \
+    --run-router \
+    --router-endpoint "$ROUTER_ENDPOINT" \
+    --router-api-key "$ROUTER_API_KEY" \
+    --router-models "$ROUTER_MODELS" \
+    --run-vllm \
+    --vllm-endpoint "$VLLM_ENDPOINT" \
+    --vllm-api-key "$VLLM_API_KEY" \
+    --vllm-models "$VLLM_MODELS" \
+    --samples-per-category "$SAMPLES_PER_CATEGORY" \
+    --vllm-exec-modes NR XC \
+    --concurrent-requests "$CONCURRENT_REQUESTS" \
+    --output-dir results/reasonbench
+fi
+
+# Generate plots if summary files exist
+echo "Checking for plot generation..."
 echo "VLLM_MODELS: $VLLM_MODELS"
 echo "ROUTER_MODELS: $ROUTER_MODELS"
 
@@ -54,6 +70,19 @@ echo "Looking for summaries at:"
 echo "VLLM: $VLLM_SUMMARY"
 echo "Router: $ROUTER_SUMMARY"
 
-python bench_plot.py \
-  --summary "$VLLM_SUMMARY" \
-  --router-summary "$ROUTER_SUMMARY"
+# Check if at least one summary file exists and generate plots
+if [ -f "$ROUTER_SUMMARY" ]; then
+  echo "Found router summary, generating plots..."
+  if [ -f "$VLLM_SUMMARY" ]; then
+    echo "Found both summaries, generating comparison plots..."
+    python bench/bench_plot.py \
+      --summary "$VLLM_SUMMARY" \
+      --router-summary "$ROUTER_SUMMARY"
+  else
+    echo "vLLM summary not found, generating router-only plots..."
+    python bench/bench_plot.py \
+      --router-summary "$ROUTER_SUMMARY"
+  fi
+else
+  echo "No router summary found, skipping plot generation"
+fi