Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,35 @@ build: rust build-router

# Build the Rust library
rust:
@echo "Building Rust library..."
cd candle-binding && cargo build --release
@echo "Ensuring rust is installed..."
@bash -c 'if ! command -v rustc >/dev/null 2>&1; then \
echo "rustc not found, installing..."; \
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \
fi && \
if [ -f "$$HOME/.cargo/env" ]; then \
echo "Loading Rust environment from $$HOME/.cargo/env..." && \
. $$HOME/.cargo/env; \
fi && \
if ! command -v cargo >/dev/null 2>&1; then \
echo "Error: cargo not found in PATH" && exit 1; \
fi && \
echo "Building Rust library..." && \
cd candle-binding && cargo build --release'

# Build router
build-router: rust
@echo "Building router..."
@mkdir -p bin
@cd src/semantic-router && go build -o ../../bin/router cmd/main.go

# Config file path with default
CONFIG_FILE ?= config/config.yaml

# Run the router
run-router: build-router
@echo "Running router..."
run-router: build-router download-models
@echo "Running router with config: ${CONFIG_FILE}"
@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
./bin/router -config=config/config.yaml
./bin/router -config=${CONFIG_FILE}

# Prepare Envoy
prepare-envoy:
Expand Down
112 changes: 83 additions & 29 deletions bench/run_bench.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,34 +1,88 @@
#!/bin/bash

set -x
# Example usage:
# Quick run:
# SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
# Long run:
# SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
# To test only router:
# BENCHMARK_ROUTER_ONLY=true ./run_bench.sh

export ROUTER_API_KEY="1234567890"
export VLLM_API_KEY="1234567890"
export ROUTER_ENDPOINT="http://localhost:8801/v1"
export VLLM_ENDPOINT="http://localhost:8000/v1"
export ROUTER_MODELS="auto"
export VLLM_MODELS="openai/gpt-oss-20b"
set -x -e

export ROUTER_API_KEY="${ROUTER_API_KEY:-1234567890}"
export VLLM_API_KEY="${VLLM_API_KEY:-1234567890}"
export ROUTER_ENDPOINT="${ROUTER_ENDPOINT:-http://localhost:8801/v1}"
export VLLM_ENDPOINT="${VLLM_ENDPOINT:-http://localhost:8000/v1}"
export ROUTER_MODELS="${ROUTER_MODELS:-auto}"
export VLLM_MODELS="${VLLM_MODELS:-openai/gpt-oss-20b}"
export SAMPLES_PER_CATEGORY="${SAMPLES_PER_CATEGORY:-5}"
export CONCURRENT_REQUESTS="${CONCURRENT_REQUESTS:-4}"
export BENCHMARK_ROUTER_ONLY="${BENCHMARK_ROUTER_ONLY:-false}"

# Run the benchmark
python router_reason_bench.py \
--run-router \
--router-endpoint "$ROUTER_ENDPOINT" \
--router-api-key "$ROUTER_API_KEY" \
--router-models "$ROUTER_MODELS" \
--run-vllm \
--vllm-endpoint "$VLLM_ENDPOINT" \
--vllm-api-key "$VLLM_API_KEY" \
--vllm-models "$VLLM_MODELS" \
--samples-per-category 5 \
--vllm-exec-modes NR XC \
--concurrent-requests 4 \
--output-dir results/reasonbench

# Generate plots
VLLM_MODEL_FIRST="${VLLM_MODELS%% *}"
ROUTER_MODEL_FIRST="${ROUTER_MODELS%% *}"
VLLM_MODELS_SAFE="${VLLM_MODEL_FIRST//\//_}"
ROUTER_MODELS_SAFE="${ROUTER_MODEL_FIRST//\//_}"
python bench_plot.py \
--summary "results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" \
--router-summary "results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json"
if [ "${BENCHMARK_ROUTER_ONLY}" = "true" ]; then
echo "Running router-only benchmark"
python bench/router_reason_bench.py \
--run-router \
--router-endpoint "$ROUTER_ENDPOINT" \
--router-api-key "$ROUTER_API_KEY" \
--router-models "$ROUTER_MODELS" \
--samples-per-category "$SAMPLES_PER_CATEGORY" \
--concurrent-requests "$CONCURRENT_REQUESTS" \
--output-dir results/reasonbench
else
echo "Running full benchmark (router + vLLM)..."
python bench/router_reason_bench.py \
--run-router \
--router-endpoint "$ROUTER_ENDPOINT" \
--router-api-key "$ROUTER_API_KEY" \
--router-models "$ROUTER_MODELS" \
--run-vllm \
--vllm-endpoint "$VLLM_ENDPOINT" \
--vllm-api-key "$VLLM_API_KEY" \
--vllm-models "$VLLM_MODELS" \
--samples-per-category "$SAMPLES_PER_CATEGORY" \
--vllm-exec-modes NR XC \
--concurrent-requests "$CONCURRENT_REQUESTS" \
--output-dir results/reasonbench
fi

# Generate plots if summary files exist
echo "Checking for plot generation..."
echo "VLLM_MODELS: $VLLM_MODELS"
echo "ROUTER_MODELS: $ROUTER_MODELS"

# Get first model name and make it path-safe
VLLM_MODEL_FIRST=$(echo "$VLLM_MODELS" | cut -d' ' -f1)
ROUTER_MODEL_FIRST=$(echo "$ROUTER_MODELS" | cut -d' ' -f1)
echo "First models: VLLM=$VLLM_MODEL_FIRST, Router=$ROUTER_MODEL_FIRST"

# Replace / with _ for path safety
VLLM_MODELS_SAFE=$(echo "$VLLM_MODEL_FIRST" | tr '/' '_')
ROUTER_MODELS_SAFE=$(echo "$ROUTER_MODEL_FIRST" | tr '/' '_')
echo "Safe paths: VLLM=$VLLM_MODELS_SAFE, Router=$ROUTER_MODELS_SAFE"

# Construct the full paths
VLLM_SUMMARY="results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json"
ROUTER_SUMMARY="results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json"
echo "Looking for summaries at:"
echo "VLLM: $VLLM_SUMMARY"
echo "Router: $ROUTER_SUMMARY"

# Check if at least one summary file exists and generate plots
if [ -f "$ROUTER_SUMMARY" ]; then
echo "Found router summary, generating plots..."
if [ -f "$VLLM_SUMMARY" ]; then
echo "Found both summaries, generating comparison plots..."
python bench/bench_plot.py \
--summary "$VLLM_SUMMARY" \
--router-summary "$ROUTER_SUMMARY"
else
echo "vLLM summary not found, generating router-only plots..."
python bench/bench_plot.py \
--router-summary "$ROUTER_SUMMARY"
fi
else
echo "No router summary found, skipping plot generation"
fi
Loading