vllm-project
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 27 additions & 1 deletion b/‎CONTRIBUTING.md‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 20 additions & 5 deletions b/‎Makefile‎
Lines changed: 20 additions & 5 deletions
diff --git a/‎README.md‎
Lines changed: 7 additions & 1 deletion b/‎README.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎bench/run_bench.sh‎
100644100755
Lines changed: 83 additions & 29 deletions b/‎bench/run_bench.sh‎
100644100755
Lines changed: 83 additions & 29 deletions
diff --git a/‎config/config.yaml‎
Lines changed: 19 additions & 1 deletion b/‎config/config.yaml‎
Lines changed: 19 additions & 1 deletion
@@ -10,6 +10,7 @@ Thank you for your interest in contributing to the vLLM Semantic Router project!
 - [Running Tests](#running-tests)
 - [Development Workflow](#development-workflow)
 - [Code Style and Standards](#code-style-and-standards)
+  - [Code Quality Checks](#code-quality-checks)
 - [Submitting Changes](#submitting-changes)
 - [Project Structure](#project-structure)
 
@@ -29,7 +30,7 @@ Before you begin, ensure you have the following installed:
 
 1. **Clone the repository:**
    ```bash
-   git clone <repository-url>
+   git clone https://github.com/vllm-project/semantic-router.git
    cd semantic-router
    ```
 
@@ -191,6 +192,31 @@ The test suite includes:
 
 ## Code Style and Standards
 
+### Code Quality Checks
+
+Before submitting a PR, please run the pre-commit hooks to ensure code quality and consistency. **These checks are mandatory** and will be automatically run on every commit once installed.
+
+**Step 1: Install pre-commit tool**
+```bash
+# Using pip (recommended)
+pip install pre-commit
+
+# Or using conda
+conda install -c conda-forge pre-commit
+
+# Or using homebrew (macOS)
+brew install pre-commit
+```
+
+**Step 2: Install pre-commit hooks for this repository**
+```bash
+# Install pre-commit hooks
+pre-commit install
+
+# Run all checks
+pre-commit run --all-files
+```
+
 ### Go Code
 - Follow standard Go formatting (`gofmt`)
 - Use meaningful variable and function names
 
@@ -11,20 +11,35 @@ build: rust build-router
 
 # Build the Rust library
 rust:
-	@echo "Building Rust library..."
-	cd candle-binding && cargo build --release
+	@echo "Ensuring rust is installed..."
+	@bash -c 'if ! command -v rustc >/dev/null 2>&1; then \
+		echo "rustc not found, installing..."; \
+		curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \
+	fi && \
+	if [ -f "$$HOME/.cargo/env" ]; then \
+		echo "Loading Rust environment from $$HOME/.cargo/env..." && \
+		. $$HOME/.cargo/env; \
+	fi && \
+	if ! command -v cargo >/dev/null 2>&1; then \
+		echo "Error: cargo not found in PATH" && exit 1; \
+	fi && \
+	echo "Building Rust library..." && \
+	cd candle-binding && cargo build --release'
 
 # Build router
 build-router: rust
 	@echo "Building router..."
 	@mkdir -p bin
 	@cd src/semantic-router && go build -o ../../bin/router cmd/main.go
 
+# Config file path with default
+CONFIG_FILE ?= config/config.yaml
+
 # Run the router
-run-router: build-router
-	@echo "Running router..."
+run-router: build-router download-models
+	@echo "Running router with config: ${CONFIG_FILE}"
 	@export LD_LIBRARY_PATH=${PWD}/candle-binding/target/release && \
-		./bin/router -config=config/config.yaml
+		./bin/router -config=${CONFIG_FILE}
 
 # Prepare Envoy
 prepare-envoy:
 
@@ -1,6 +1,6 @@
 <div align="center">
 
-<img src="website/static/img/repo.png" alt="vLLM Semantic Router"/>
+<img src="website/static/img/repo.png" alt="vLLM Semantic Router" width="80%"/>
 
 [![Documentation](https://img.shields.io/badge/docs-read%20the%20docs-blue)](https://vllm-semantic-router.com)
 [![Hugging Face](https://img.shields.io/badge/🤗%20Hugging%20Face-Community-yellow)](https://huggingface.co/LLM-Semantic-Router)
@@ -81,3 +81,9 @@ If you find Semantic Router helpful in your research or projects, please conside
   howpublished={\url{https://github.com/vllm-project/semantic-router}},
 }
 ```
+
+## Star History 🔥
+
+We opened the project at Aug 31, 2025. We love open source  and collaboration ❤️
+
+[![Star History Chart](https://api.star-history.com/svg?repos=vllm-project/semantic-router&type=Date)](https://www.star-history.com/#vllm-project/semantic-router&Date)
@@ -1,34 +1,88 @@
 #!/bin/bash
 
-set -x 
+# Example usage:
+# Quick run:
+# SAMPLES_PER_CATEGORY=5 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
+# Long run:
+# SAMPLES_PER_CATEGORY=100 CONCURRENT_REQUESTS=4 VLLM_MODELS="openai/gpt-oss-20b" ROUTER_MODELS="auto" ./run_bench.sh
+# To test only router:
+# BENCHMARK_ROUTER_ONLY=true ./run_bench.sh
 
-export ROUTER_API_KEY="1234567890"
-export VLLM_API_KEY="1234567890"
-export ROUTER_ENDPOINT="http://localhost:8801/v1"
-export VLLM_ENDPOINT="http://localhost:8000/v1"
-export ROUTER_MODELS="auto"
-export VLLM_MODELS="openai/gpt-oss-20b"
+set -x -e
+
+export ROUTER_API_KEY="${ROUTER_API_KEY:-1234567890}"
+export VLLM_API_KEY="${VLLM_API_KEY:-1234567890}"
+export ROUTER_ENDPOINT="${ROUTER_ENDPOINT:-http://localhost:8801/v1}"
+export VLLM_ENDPOINT="${VLLM_ENDPOINT:-http://localhost:8000/v1}"
+export ROUTER_MODELS="${ROUTER_MODELS:-auto}"
+export VLLM_MODELS="${VLLM_MODELS:-openai/gpt-oss-20b}"
+export SAMPLES_PER_CATEGORY="${SAMPLES_PER_CATEGORY:-5}"
+export CONCURRENT_REQUESTS="${CONCURRENT_REQUESTS:-4}"
+export BENCHMARK_ROUTER_ONLY="${BENCHMARK_ROUTER_ONLY:-false}"
 
 # Run the benchmark
-python router_reason_bench.py \
-  --run-router \
-  --router-endpoint "$ROUTER_ENDPOINT" \
-  --router-api-key "$ROUTER_API_KEY" \
-  --router-models "$ROUTER_MODELS" \
-  --run-vllm \
-  --vllm-endpoint "$VLLM_ENDPOINT" \
-  --vllm-api-key "$VLLM_API_KEY" \
-  --vllm-models "$VLLM_MODELS" \
-  --samples-per-category 5 \
-  --vllm-exec-modes NR XC \
-  --concurrent-requests 4 \
-  --output-dir results/reasonbench
-
-# Generate plots
-VLLM_MODEL_FIRST="${VLLM_MODELS%% *}"
-ROUTER_MODEL_FIRST="${ROUTER_MODELS%% *}"
-VLLM_MODELS_SAFE="${VLLM_MODEL_FIRST//\//_}"
-ROUTER_MODELS_SAFE="${ROUTER_MODEL_FIRST//\//_}"
-python bench_plot.py \
-  --summary "results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json" \
-  --router-summary "results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json"
+if [ "${BENCHMARK_ROUTER_ONLY}" = "true" ]; then
+  echo "Running router-only benchmark"
+  python bench/router_reason_bench.py \
+    --run-router \
+    --router-endpoint "$ROUTER_ENDPOINT" \
+    --router-api-key "$ROUTER_API_KEY" \
+    --router-models "$ROUTER_MODELS" \
+    --samples-per-category "$SAMPLES_PER_CATEGORY" \
+    --concurrent-requests "$CONCURRENT_REQUESTS" \
+    --output-dir results/reasonbench
+else
+  echo "Running full benchmark (router + vLLM)..."
+  python bench/router_reason_bench.py \
+    --run-router \
+    --router-endpoint "$ROUTER_ENDPOINT" \
+    --router-api-key "$ROUTER_API_KEY" \
+    --router-models "$ROUTER_MODELS" \
+    --run-vllm \
+    --vllm-endpoint "$VLLM_ENDPOINT" \
+    --vllm-api-key "$VLLM_API_KEY" \
+    --vllm-models "$VLLM_MODELS" \
+    --samples-per-category "$SAMPLES_PER_CATEGORY" \
+    --vllm-exec-modes NR XC \
+    --concurrent-requests "$CONCURRENT_REQUESTS" \
+    --output-dir results/reasonbench
+fi
+
+# Generate plots if summary files exist
+echo "Checking for plot generation..."
+echo "VLLM_MODELS: $VLLM_MODELS"
+echo "ROUTER_MODELS: $ROUTER_MODELS"
+
+# Get first model name and make it path-safe
+VLLM_MODEL_FIRST=$(echo "$VLLM_MODELS" | cut -d' ' -f1)
+ROUTER_MODEL_FIRST=$(echo "$ROUTER_MODELS" | cut -d' ' -f1)
+echo "First models: VLLM=$VLLM_MODEL_FIRST, Router=$ROUTER_MODEL_FIRST"
+
+# Replace / with _ for path safety
+VLLM_MODELS_SAFE=$(echo "$VLLM_MODEL_FIRST" | tr '/' '_')
+ROUTER_MODELS_SAFE=$(echo "$ROUTER_MODEL_FIRST" | tr '/' '_')
+echo "Safe paths: VLLM=$VLLM_MODELS_SAFE, Router=$ROUTER_MODELS_SAFE"
+
+# Construct the full paths
+VLLM_SUMMARY="results/reasonbench/vllm::${VLLM_MODELS_SAFE}/summary.json"
+ROUTER_SUMMARY="results/reasonbench/router::${ROUTER_MODELS_SAFE}/summary.json"
+echo "Looking for summaries at:"
+echo "VLLM: $VLLM_SUMMARY"
+echo "Router: $ROUTER_SUMMARY"
+
+# Check if at least one summary file exists and generate plots
+if [ -f "$ROUTER_SUMMARY" ]; then
+  echo "Found router summary, generating plots..."
+  if [ -f "$VLLM_SUMMARY" ]; then
+    echo "Found both summaries, generating comparison plots..."
+    python bench/bench_plot.py \
+      --summary "$VLLM_SUMMARY" \
+      --router-summary "$ROUTER_SUMMARY"
+  else
+    echo "vLLM summary not found, generating router-only plots..."
+    python bench/bench_plot.py \
+      --router-summary "$ROUTER_SUMMARY"
+  fi
+else
+  echo "No router summary found, skipping plot generation"
+fi
@@ -237,4 +237,22 @@ categories:
   - model: phi4
     score: 0.2
 default_model: mistral-small3.1
-default_reasoning_effort: medium  # Default reasoning effort level (low, medium, high)
+default_reasoning_effort: medium  # Default reasoning effort level (low, medium, high)
+
+# API Configuration
+api:
+  batch_classification:
+    max_batch_size: 100          # Maximum number of texts in a single batch
+    concurrency_threshold: 5     # Switch to concurrent processing when batch size > this value
+    max_concurrency: 8           # Maximum number of concurrent goroutines
+        
+    # Metrics configuration for monitoring batch classification performance
+    metrics:
+      enabled: true              # Enable comprehensive metrics collection
+      detailed_goroutine_tracking: true  # Track individual goroutine lifecycle 
+      high_resolution_timing: false      # Use nanosecond precision timing 
+      sample_rate: 1.0                   # Collect metrics for all requests (1.0 = 100%, 0.5 = 50%)
+      
+      # Histogram buckets for metrics (directly configure what you need)
+      duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
+      size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]