diff --git a/.github/workflows/router-e2e-test.yml b/.github/workflows/router-e2e-test.yml index b689223ad..ab63b7c48 100644 --- a/.github/workflows/router-e2e-test.yml +++ b/.github/workflows/router-e2e-test.yml @@ -231,3 +231,62 @@ jobs: pkill -f "python3 -m src.vllm_router.app" || true - run: echo "๐Ÿ Static discovery e2e test job status is ${{ job.status }}." + + router-stress-test: + runs-on: self-hosted + needs: e2e-test + if: github.event.pull_request.draft == false + env: + LOG_DIR: /tmp/router-stress-test-${{ github.event.pull_request.number || 'main' }} + + steps: + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -e . + + - name: Install Apache Bench + run: | + sudo apt-get update + sudo apt-get install -y apache2-utils + + - name: Run Router Stress Test + env: + PYTHONPATH: ${{ github.workspace }}/src + VLLM_ROUTER_STRESS_TEST_MODE: true + run: | + echo "๐Ÿงช Running router stress test with mock backends" + chmod +x tests/e2e/router-stress-test.sh + ./tests/e2e/router-stress-test.sh \ + --concurrent 20000 \ + --requests 100000 \ + --port 30080 \ + --log-dir "$LOG_DIR" \ + --model "facebook/opt-125m" \ + --backend1-port 8000 \ + --backend2-port 8001 + timeout-minutes: 10 + + - name: Archive router stress test results and logs + uses: actions/upload-artifact@v4 + if: always() + with: + name: router-stress-test-results-pr-${{ github.event.pull_request.number || 'main' }} + path: | + ${{ env.LOG_DIR }}/* + + - name: Cleanup router processes + if: always() + run: | + echo "๐Ÿงน Cleaning up router processes" + pkill -f "python3 -m src.vllm_router.app" || true + + - run: echo "๐Ÿ Router stress test job status is ${{ job.status }}." diff --git a/src/vllm_router/services/request_service/request.py b/src/vllm_router/services/request_service/request.py index 46969b2b2..4b130d6ab 100644 --- a/src/vllm_router/services/request_service/request.py +++ b/src/vllm_router/services/request_service/request.py @@ -95,6 +95,24 @@ async def process_request( request.app.state.request_stats_monitor.on_new_request( backend_url, request_id, start_time ) + + # Check if stress test mode is enabled; used for tests/e2e/stress-test.sh + if os.getenv("VLLM_ROUTER_STRESS_TEST_MODE", "false").lower() == "true": + # Mock response for stress testing - skip backend calls + mock_headers = {"content-type": "application/json", "x-request-id": request_id} + mock_response = b'{"id":"test","object":"chat.completion","choices":[{"message":{"role":"assistant","content":"Test"},"index":0,"finish_reason":"stop"}]}' + + # Yield headers and mock response + yield mock_headers, 200 + request.app.state.request_stats_monitor.on_request_response( + backend_url, request_id, time.time() + ) + yield mock_response + request.app.state.request_stats_monitor.on_request_complete( + backend_url, request_id, time.time() + ) + return + # Check if this is a streaming request is_streaming = False try: diff --git a/tests/e2e/router-stress-test.sh b/tests/e2e/router-stress-test.sh new file mode 100755 index 000000000..107f64d92 --- /dev/null +++ b/tests/e2e/router-stress-test.sh @@ -0,0 +1,299 @@ +#!/bin/bash + +# Router Stress Test - Pure Router Logic Testing +# Tests round-robin routing logic under high concurrent loads +# +# IMPORTANT NOTES: +# - This test uses MOCK backends and MOCK responses (no real vLLM servers) +# - Backend ports are dummy placeholders - no actual services run on them +# - Model names are dummy placeholders - no real models are loaded +# - When VLLM_ROUTER_STRESS_TEST_MODE=true, the router returns mock responses +# instead of forwarding requests to backends (see src/vllm_router/services/request_service/request.py) +# - This test validates ONLY the router's routing logic, load balancing, and performance +# under high concurrent loads, not actual inference capabilities +# +# Purpose: Verify that the router correctly distributes requests using round-robin +# logic and can handle high concurrency without routing logic failures. +# +# TODO: will add tests for prefix-aware and session-based router later +# TODO: will add performance comparison tests & threshold later + +set -euo pipefail + +# Default values +ROUTER_PORT=30080 +CONCURRENT=20000 +REQUESTS=100000 +LOG_DIR="/tmp/router-stress-logs" +MODEL="facebook/opt-125m" +BACKEND1_PORT=8000 +BACKEND2_PORT=8001 +BACKENDS_URL="http://localhost:$BACKEND1_PORT,http://localhost:$BACKEND2_PORT" + +# Colors for output +GREEN='\033[0;32m' +RED='\033[0;31m' +YELLOW='\033[1;33m' +NC='\033[0m' + +print_status() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +show_usage() { + cat << EOF +Router Stress Test - Tests round-robin routing logic + +Usage: $0 [options] + +Options: + -c, --concurrent N Concurrent requests (default: 20000) + -n, --requests N Total requests (default: 100000) + -p, --port PORT Router port (default: 30080) + -l, --log-dir DIR Log directory (default: /tmp/router-stress-logs) + -m, --model MODEL Model to use (default: facebook/opt-125m) + --backend1-port PORT First backend port (default: 8000) + --backend2-port PORT Second backend port (default: 8001) + -h, --help Show this help + +Examples: + $0 # Basic test (20000 concurrent, 100000 requests) + $0 -c 500 -n 20000 # High load test + $0 -p 8080 -c 100 # Different port, lower load + $0 --backend1-port 9000 --backend2-port 9001 # Custom backend ports + +Prerequisites: + - Router must be started with VLLM_ROUTER_STRESS_TEST_MODE=true +EOF +} + +# Check if Apache Bench is available +check_ab() { + if ! command -v ab >/dev/null 2>&1; then + print_error "Apache Bench (ab) not found!" + print_error "Install with: sudo apt-get install apache2-utils" + exit 1 + fi +} + +# Function to cleanup processes +cleanup() { + print_status "Cleaning up router processes..." + pkill -f "python3 -m src.vllm_router.app" || true + sleep 2 +} + +# Function to start router +start_router() { + local log_file="$LOG_DIR/router.log" + + print_status "Starting router with round-robin routing (stress test mode)" + + # Create log directory + mkdir -p "$(dirname "$log_file")" + + # Set stress test mode + export VLLM_ROUTER_STRESS_TEST_MODE=true + + # Start router with detailed logging + python3 -m src.vllm_router.app --port "$ROUTER_PORT" \ + --service-discovery static \ + --static-backends "$BACKENDS_URL" \ + --static-models "$MODEL,$MODEL" \ + --static-model-types "chat,chat" \ + --routing-logic roundrobin \ + --log-stats \ + --log-stats-interval 5 > "$log_file" 2>&1 & + + ROUTER_PID=$! + print_status "Router started with PID: $ROUTER_PID" + + # Wait for router to be ready + print_status "Waiting for router to be ready..." + timeout 30 bash -c "until curl -s http://localhost:$ROUTER_PORT/v1/models > /dev/null 2>&1; do sleep 1; done" || { + print_error "Router failed to start within 30 seconds" + print_error "Router log:" + tail -20 "$log_file" || true + exit 1 + } + print_status "Router is ready" +} + +# Function to run stress test +run_stress_test() { + print_status "Running stress test with Apache Bench" + print_status "Concurrent: $CONCURRENT, Total: $REQUESTS" + + # Create payload file + local payload_file="/tmp/stress_payload.json" + cat > "$payload_file" << EOF +{ + "model": "$MODEL", + "messages": [ + {"role": "user", "content": "Test message for stress testing"} + ], + "max_tokens": 10, + "temperature": 0.7 +} +EOF + + # Run Apache Bench + ab -c "$CONCURRENT" \ + -n "$REQUESTS" \ + -p "$payload_file" \ + -T "application/json" \ + -H "Authorization: Bearer test" \ + -H "x-user-id: stress-test-user" \ + "http://localhost:$ROUTER_PORT/v1/chat/completions" + + # Clean up payload file + rm -f "$payload_file" + + print_status "Stress test completed" + + # Small delay to ensure all logs are written + sleep 2 +} + +# Function to check round-robin correctness +check_roundrobin_correctness() { + local log_file="$LOG_DIR/router.log" + + print_status "Checking round-robin routing correctness..." + + if [ ! -f "$log_file" ]; then + print_error "Router log file not found: $log_file" + return 1 + fi + + # Extract backend routing decisions from logs + # Look for "Routing request ... to http://localhost:XXXX" + local backend1_count + backend1_count=$(grep -c "to http://localhost:$BACKEND1_PORT" "$log_file" || echo "0") + + local backend2_count + backend2_count=$(grep -c "to http://localhost:$BACKEND2_PORT" "$log_file" || echo "0") + + local total_routed=$((backend1_count + backend2_count)) + + print_status "Round-robin routing results:" + print_status " Backend localhost:$BACKEND1_PORT: $backend1_count requests" + print_status " Backend localhost:$BACKEND2_PORT: $backend2_count requests" + print_status " Total routed: $total_routed requests" + + if [ "$total_routed" -eq 0 ]; then + print_error "No routing decisions found in logs" + return 1 + fi + + # Calculate percentages + local backend1_pct=$((backend1_count * 100 / total_routed)) + local backend2_pct=$((backend2_count * 100 / total_routed)) + + print_status " Backend localhost:$BACKEND1_PORT: ${backend1_pct}%" + print_status " Backend localhost:$BACKEND2_PORT: ${backend2_pct}%" + + # Check if distribution is roughly even (within 20% tolerance) + local diff=$((backend1_pct > backend2_pct ? backend1_pct - backend2_pct : backend2_pct - backend1_pct)) + + if [ "$diff" -le 20 ]; then + print_status "โœ… Round-robin routing is working correctly (${diff}% difference)" + return 0 + else + print_error "โŒ Round-robin routing appears uneven (${diff}% difference)" + print_status "Last 10 routing decisions from logs:" + grep "Routing request.*to http://localhost:" "$log_file" | tail -10 | sed 's/^/ /' || true + return 1 + fi +} + +# Function to show log summary +show_log_summary() { + local log_file="$LOG_DIR/router.log" + + if [ -f "$log_file" ]; then + print_status "Log summary (last 20 lines):" + tail -20 "$log_file" | sed 's/^/ /' + fi +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -c|--concurrent) + CONCURRENT="$2" + shift 2 + ;; + -n|--requests) + REQUESTS="$2" + shift 2 + ;; + -p|--port) + ROUTER_PORT="$2" + shift 2 + ;; + -l|--log-dir) + LOG_DIR="$2" + shift 2 + ;; + -m|--model) + MODEL="$2" + shift 2 + ;; + --backend1-port) + BACKEND1_PORT="$2" + shift 2 + ;; + --backend2-port) + BACKEND2_PORT="$2" + shift 2 + ;; + -h|--help) + show_usage + exit 0 + ;; + *) + print_error "Unknown option: $1" + show_usage + exit 1 + ;; + esac +done + +# Set trap for cleanup +trap cleanup EXIT + +# Update backends URL with final port values +BACKENDS_URL="http://localhost:$BACKEND1_PORT,http://localhost:$BACKEND2_PORT" + +# Check prerequisites +print_status "Checking prerequisites..." +check_ab + +print_status "Router stress test configuration:" +print_status " Concurrent requests: $CONCURRENT" +print_status " Total requests: $REQUESTS" +print_status " Router port: $ROUTER_PORT" +print_status " Backend ports: $BACKEND1_PORT, $BACKEND2_PORT" +print_status " Model: $MODEL" + +# Run test +start_router +run_stress_test + +# Check correctness and show results +if check_roundrobin_correctness; then + print_status "Test completed successfully!" +else + print_error "Test completed but round-robin routing correctness check failed!" + show_log_summary + exit 1 +fi