Merge branch 'main' into feat/add-batch-metrics

rootfs · web-flow · commit 589692ec4f42 · 2025-09-05T07:58:59.000-04:00
diff --git a/Makefile b/Makefile
@@ -11,20 +11,35 @@ build: rust build-router
 
 # Build the Rust library
 rust:
-	@echo "Building Rust library..."
-	cd candle-binding && cargo build --release
+	@echo "Ensuring rust is installed..."
+	@bash -c 'if ! command -v rustc >/dev/null 2>&1; then \
+		echo "rustc not found, installing..."; \
+		curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \
+	fi && \
+	if [ -f "$$HOME/.cargo/env" ]; then \
+		echo "Loading Rust environment from $$HOME/.cargo/env..." && \
+		. $$HOME/.cargo/env; \
+	fi && \
+	if ! command -v cargo >/dev/null 2>&1; then \
+		echo "Error: cargo not found in PATH" && exit 1; \
+	fi && \
+	echo "Building Rust library..." && \
+	cd candle-binding && cargo build --release'
 
 # Build router
 build-router: rust
 	@echo "Building router..."
 	@mkdir -p bin
 	@cd src/semantic-router && go build -o ../../bin/router cmd/main.go
 
+# Config file path with default
+CONFIG_FILE ?= config/config.yaml
+
 # Run the router
-run-router: build-router
-	@echo "Running router..."
+run-router: build-router download-models
+	@echo "Running router with config: ${CONFIG_FILE}"
 	@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
-		./bin/router -config=config/config.yaml
+		./bin/router -config=${CONFIG_FILE}
 
 # Prepare Envoy
 prepare-envoy:
diff --git a/bench/run_bench.sh b/bench/run_bench.sh
@@ -1,34 +1,88 @@
 #!/bin/bash
 
-set -x 
+# Example usage:
+# Quick run:
+# SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
+# Long run:
+# SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
+# To test only router:
+# BENCHMARK_ROUTER_ONLY=true ./run_bench.sh
 
-export ROUTER_API_KEY="1234567890"
-export VLLM_API_KEY="1234567890"
-export ROUTER_ENDPOINT="http://localhost:8801/v1"
-export VLLM_ENDPOINT="http://localhost:8000/v1"
-export ROUTER_MODELS="auto"
-export VLLM_MODELS="openai/gpt-oss-20b"
+set -x -e
+
+export ROUTER_API_KEY="${ROUTER_API_KEY:-1234567890}"
+export VLLM_API_KEY="${VLLM_API_KEY:-1234567890}"
+export ROUTER_ENDPOINT="${ROUTER_ENDPOINT:-http://localhost:8801/v1}"
+export VLLM_ENDPOINT="${VLLM_ENDPOINT:-http://localhost:8000/v1}"
+export ROUTER_MODELS="${ROUTER_MODELS:-auto}"
+export VLLM_MODELS="${VLLM_MODELS:-openai/gpt-oss-20b}"
+export SAMPLES_PER_CATEGORY="${SAMPLES_PER_CATEGORY:-5}"
+export CONCURRENT_REQUESTS="${CONCURRENT_REQUESTS:-4}"
+export BENCHMARK_ROUTER_ONLY="${BENCHMARK_ROUTER_ONLY:-false}"
 
 # Run the benchmark
-python router_reason_bench.py \
-  --run-router \
-  --router-endpoint "$ROUTER_ENDPOINT" \
-  --router-api-key "$ROUTER_API_KEY" \
-  --router-models "$ROUTER_MODELS" \
-  --run-vllm \
-  --vllm-endpoint "$VLLM_ENDPOINT" \
-  --vllm-api-key "$VLLM_API_KEY" \
-  --vllm-models "$VLLM_MODELS" \
-  --samples-per-category 5 \
-  --vllm-exec-modes NR XC \
-  --concurrent-requests 4 \
-  --output-dir results/reasonbench
-
-# Generate plots
-VLLM_MODEL_FIRST="${VLLM_MODELS%% *}"
-ROUTER_MODEL_FIRST="${ROUTER_MODELS%% *}"
-VLLM_MODELS_SAFE="${VLLM_MODEL_FIRST//\//_}"
-ROUTER_MODELS_SAFE="${ROUTER_MODEL_FIRST//\//_}"
-python bench_plot.py \
-  --summary "results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" \
-  --router-summary "results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json"
+if [ "${BENCHMARK_ROUTER_ONLY}" = "true" ]; then
+  echo "Running router-only benchmark"
+  python bench/router_reason_bench.py \
+    --run-router \
+    --router-endpoint "$ROUTER_ENDPOINT" \
+    --router-api-key "$ROUTER_API_KEY" \
+    --router-models "$ROUTER_MODELS" \
+    --samples-per-category "$SAMPLES_PER_CATEGORY" \
+    --concurrent-requests "$CONCURRENT_REQUESTS" \
+    --output-dir results/reasonbench
+else
+  echo "Running full benchmark (router + vLLM)..."
+  python bench/router_reason_bench.py \
+    --run-router \
+    --router-endpoint "$ROUTER_ENDPOINT" \
+    --router-api-key "$ROUTER_API_KEY" \
+    --router-models "$ROUTER_MODELS" \
+    --run-vllm \
+    --vllm-endpoint "$VLLM_ENDPOINT" \
+    --vllm-api-key "$VLLM_API_KEY" \
+    --vllm-models "$VLLM_MODELS" \
+    --samples-per-category "$SAMPLES_PER_CATEGORY" \
+    --vllm-exec-modes NR XC \
+    --concurrent-requests "$CONCURRENT_REQUESTS" \
+    --output-dir results/reasonbench
+fi
+
+# Generate plots if summary files exist
+echo "Checking for plot generation..."
+echo "VLLM_MODELS: $VLLM_MODELS"
+echo "ROUTER_MODELS: $ROUTER_MODELS"
+
+# Get first model name and make it path-safe
+VLLM_MODEL_FIRST=$(echo "$VLLM_MODELS" | cut -d' ' -f1)
+ROUTER_MODEL_FIRST=$(echo "$ROUTER_MODELS" | cut -d' ' -f1)
+echo "First models: VLLM=$VLLM_MODEL_FIRST, Router=$ROUTER_MODEL_FIRST"
+
+# Replace / with _ for path safety
+VLLM_MODELS_SAFE=$(echo "$VLLM_MODEL_FIRST" | tr '/' '_')
+ROUTER_MODELS_SAFE=$(echo "$ROUTER_MODEL_FIRST" | tr '/' '_')
+echo "Safe paths: VLLM=$VLLM_MODELS_SAFE, Router=$ROUTER_MODELS_SAFE"
+
+# Construct the full paths
+VLLM_SUMMARY="results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json"
+ROUTER_SUMMARY="results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json"
+echo "Looking for summaries at:"
+echo "VLLM: $VLLM_SUMMARY"
+echo "Router: $ROUTER_SUMMARY"
+
+# Check if at least one summary file exists and generate plots
+if [ -f "$ROUTER_SUMMARY" ]; then
+  echo "Found router summary, generating plots..."
+  if [ -f "$VLLM_SUMMARY" ]; then
+    echo "Found both summaries, generating comparison plots..."
+    python bench/bench_plot.py \
+      --summary "$VLLM_SUMMARY" \
+      --router-summary "$ROUTER_SUMMARY"
+  else
+    echo "vLLM summary not found, generating router-only plots..."
+    python bench/bench_plot.py \
+      --router-summary "$ROUTER_SUMMARY"
+  fi
+else
+  echo "No router summary found, skipping plot generation"
+fi