diff --git a/Makefile b/Makefile index f3cff9cc..4d547304 100644 --- a/Makefile +++ b/Makefile @@ -11,8 +11,20 @@ build: rust build-router # Build the Rust library rust: - @echo "Building Rust library..." - cd candle-binding && cargo build --release + @echo "Ensuring rust is installed..." + @bash -c 'if ! command -v rustc >/dev/null 2>&1; then \ + echo "rustc not found, installing..."; \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \ + fi && \ + if [ -f "$$HOME/.cargo/env" ]; then \ + echo "Loading Rust environment from $$HOME/.cargo/env..." && \ + . $$HOME/.cargo/env; \ + fi && \ + if ! command -v cargo >/dev/null 2>&1; then \ + echo "Error: cargo not found in PATH" && exit 1; \ + fi && \ + echo "Building Rust library..." && \ + cd candle-binding && cargo build --release' # Build router build-router: rust @@ -20,11 +32,14 @@ build-router: rust @mkdir -p bin @cd src/semantic-router && go build -o ../../bin/router cmd/main.go +# Config file path with default +CONFIG_FILE ?= config/config.yaml + # Run the router -run-router: build-router - @echo "Running router..." +run-router: build-router download-models + @echo "Running router with config: ${CONFIG_FILE}" @export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \ - ./bin/router -config=config/config.yaml + ./bin/router -config=${CONFIG_FILE} # Prepare Envoy prepare-envoy: diff --git a/bench/run_bench.sh b/bench/run_bench.sh old mode 100644 new mode 100755 index d13c6917..67877f51 --- a/bench/run_bench.sh +++ b/bench/run_bench.sh @@ -1,34 +1,88 @@ #!/bin/bash -set -x +# Example usage: +# Quick run: +# SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh +# Long run: +# SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh +# To test only router: +# BENCHMARK_ROUTER_ONLY=true ./run_bench.sh -export ROUTER_API_KEY="1234567890" -export VLLM_API_KEY="1234567890" -export ROUTER_ENDPOINT="http://localhost:8801/v1" -export VLLM_ENDPOINT="http://localhost:8000/v1" -export ROUTER_MODELS="auto" -export VLLM_MODELS="openai/gpt-oss-20b" +set -x -e + +export ROUTER_API_KEY="${ROUTER_API_KEY:-1234567890}" +export VLLM_API_KEY="${VLLM_API_KEY:-1234567890}" +export ROUTER_ENDPOINT="${ROUTER_ENDPOINT:-http://localhost:8801/v1}" +export VLLM_ENDPOINT="${VLLM_ENDPOINT:-http://localhost:8000/v1}" +export ROUTER_MODELS="${ROUTER_MODELS:-auto}" +export VLLM_MODELS="${VLLM_MODELS:-openai/gpt-oss-20b}" +export SAMPLES_PER_CATEGORY="${SAMPLES_PER_CATEGORY:-5}" +export CONCURRENT_REQUESTS="${CONCURRENT_REQUESTS:-4}" +export BENCHMARK_ROUTER_ONLY="${BENCHMARK_ROUTER_ONLY:-false}" # Run the benchmark -python router_reason_bench.py \ - --run-router \ - --router-endpoint "$ROUTER_ENDPOINT" \ - --router-api-key "$ROUTER_API_KEY" \ - --router-models "$ROUTER_MODELS" \ - --run-vllm \ - --vllm-endpoint "$VLLM_ENDPOINT" \ - --vllm-api-key "$VLLM_API_KEY" \ - --vllm-models "$VLLM_MODELS" \ - --samples-per-category 5 \ - --vllm-exec-modes NR XC \ - --concurrent-requests 4 \ - --output-dir results/reasonbench - -# Generate plots -VLLM_MODEL_FIRST="${VLLM_MODELS%% *}" -ROUTER_MODEL_FIRST="${ROUTER_MODELS%% *}" -VLLM_MODELS_SAFE="${VLLM_MODEL_FIRST//\//_}" -ROUTER_MODELS_SAFE="${ROUTER_MODEL_FIRST//\//_}" -python bench_plot.py \ - --summary "results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" \ - --router-summary "results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json" +if [ "${BENCHMARK_ROUTER_ONLY}" = "true" ]; then + echo "Running router-only benchmark" + python bench/router_reason_bench.py \ + --run-router \ + --router-endpoint "$ROUTER_ENDPOINT" \ + --router-api-key "$ROUTER_API_KEY" \ + --router-models "$ROUTER_MODELS" \ + --samples-per-category "$SAMPLES_PER_CATEGORY" \ + --concurrent-requests "$CONCURRENT_REQUESTS" \ + --output-dir results/reasonbench +else + echo "Running full benchmark (router + vLLM)..." + python bench/router_reason_bench.py \ + --run-router \ + --router-endpoint "$ROUTER_ENDPOINT" \ + --router-api-key "$ROUTER_API_KEY" \ + --router-models "$ROUTER_MODELS" \ + --run-vllm \ + --vllm-endpoint "$VLLM_ENDPOINT" \ + --vllm-api-key "$VLLM_API_KEY" \ + --vllm-models "$VLLM_MODELS" \ + --samples-per-category "$SAMPLES_PER_CATEGORY" \ + --vllm-exec-modes NR XC \ + --concurrent-requests "$CONCURRENT_REQUESTS" \ + --output-dir results/reasonbench +fi + +# Generate plots if summary files exist +echo "Checking for plot generation..." +echo "VLLM_MODELS: $VLLM_MODELS" +echo "ROUTER_MODELS: $ROUTER_MODELS" + +# Get first model name and make it path-safe +VLLM_MODEL_FIRST=$(echo "$VLLM_MODELS" | cut -d' ' -f1) +ROUTER_MODEL_FIRST=$(echo "$ROUTER_MODELS" | cut -d' ' -f1) +echo "First models: VLLM=$VLLM_MODEL_FIRST, Router=$ROUTER_MODEL_FIRST" + +# Replace / with _ for path safety +VLLM_MODELS_SAFE=$(echo "$VLLM_MODEL_FIRST" | tr '/' '_') +ROUTER_MODELS_SAFE=$(echo "$ROUTER_MODEL_FIRST" | tr '/' '_') +echo "Safe paths: VLLM=$VLLM_MODELS_SAFE, Router=$ROUTER_MODELS_SAFE" + +# Construct the full paths +VLLM_SUMMARY="results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" +ROUTER_SUMMARY="results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json" +echo "Looking for summaries at:" +echo "VLLM: $VLLM_SUMMARY" +echo "Router: $ROUTER_SUMMARY" + +# Check if at least one summary file exists and generate plots +if [ -f "$ROUTER_SUMMARY" ]; then + echo "Found router summary, generating plots..." + if [ -f "$VLLM_SUMMARY" ]; then + echo "Found both summaries, generating comparison plots..." + python bench/bench_plot.py \ + --summary "$VLLM_SUMMARY" \ + --router-summary "$ROUTER_SUMMARY" + else + echo "vLLM summary not found, generating router-only plots..." + python bench/bench_plot.py \ + --router-summary "$ROUTER_SUMMARY" + fi +else + echo "No router summary found, skipping plot generation" +fi