From d8e3124b19f8e2b5103094ebc4d1de33878902bc Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Thu, 19 Jun 2025 00:47:57 +0000 Subject: [PATCH 01/12] [CI] Add prefix aware routing test Signed-off-by: Rui Zhang --- .github/run-k8s-routing-test.sh | 368 +++++++++++++++++ .github/workflows/router-e2e-test.yml | 103 +++++ tests/e2e/test-prefix-aware-routing.py | 537 +++++++++++++++++++++++++ 3 files changed, 1008 insertions(+) create mode 100644 .github/run-k8s-routing-test.sh create mode 100644 tests/e2e/test-prefix-aware-routing.py diff --git a/.github/run-k8s-routing-test.sh b/.github/run-k8s-routing-test.sh new file mode 100644 index 000000000..a16a20413 --- /dev/null +++ b/.github/run-k8s-routing-test.sh @@ -0,0 +1,368 @@ +#!/bin/bash + +# Script to run k8s routing tests with different routing logic +# Usage: ./run-k8s-routing-test.sh [options] + +set -euo pipefail + +# Default values +TEST_TYPE="" +HELM_VALUES_FILE="" +TEST_SCRIPT="" +TEST_ARGS="" +MODEL="facebook/opt-125m" +NUM_ROUNDS=3 +NUM_REQUESTS_PER_SAMPLE=3 +CHUNK_SIZE=128 +VERBOSE="" +DEBUG="" +TIMEOUT_MINUTES=10 + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Helper functions +print_status() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +# Function to show usage +show_usage() { + cat << EOF +Usage: $0 [options] + +Test Type Options: + session - Test sticky routing with session management + prefixaware - Test prefix-aware routing + kvaware - Test KV-aware routing + disaggregated-prefill - Test disaggregated prefill routing + roundrobin - Test round-robin routing + all - Run all available tests sequentially + +Options: + -m, --model MODEL Model to use (default: facebook/opt-125m) + -n, --num-rounds N Number of rounds for sticky routing (default: 3) + -r, --num-requests N Number of requests per sample (default: 3) + -c, --chunk-size N Chunk size for prefix-aware routing (default: 128) + -v, --verbose Enable verbose output + -d, --debug Enable debug mode + -t, --timeout N Timeout in minutes (default: 10) + -h, --help Show this help message + +Examples: + $0 session --model "facebook/opt-125m" --num-rounds 5 --verbose + $0 prefixaware --model "facebook/opt-125m" --chunk-size 256 --debug + $0 session --verbose --debug + $0 all --verbose --debug + +EOF +} + +# Function to deploy helm chart +deploy_helm_chart() { + local values_file=$1 + print_status "๐Ÿš€ Deploying setup with helm using $values_file" + if helm list -q | grep -q "^vllm$"; then + print_status "๐Ÿ“ฆ Upgrading existing vllm deployment" + helm upgrade vllm ./helm -f "$values_file" + else + print_status "๐Ÿš€ Installing new vllm deployment" + helm install vllm ./helm -f "$values_file" + fi +} + +# Function to wait for pods +wait_for_pods() { + print_status "โณ Waiting for pods to be ready" + chmod +x .github/wait-for-pods.sh + ./.github/wait-for-pods.sh --pod-prefix vllm --timeout 300 --verbose +} + +# Function to run test +run_test() { + local test_type=$1 + local test_script=$2 + local test_args=$3 + + print_status "๐Ÿงช Running $test_type test" + + # Make test script executable + chmod +x "$test_script" + + # Build test command + local test_cmd="" + case $test_type in + "session") + test_cmd="./$test_script --model \"$MODEL\" --num-rounds $NUM_ROUNDS" + ;; + "prefixaware") + test_cmd="python $test_script --model \"$MODEL\" --num-requests-per-sample $NUM_REQUESTS_PER_SAMPLE --chunk-size $CHUNK_SIZE" + ;; + "roundrobin") + test_cmd="python $test_script --model \"$MODEL\" --num-requests-per-sample $NUM_REQUESTS_PER_SAMPLE" + ;; + "kvaware") + test_cmd="python $test_script --model \"$MODEL\" --num-requests-per-sample $NUM_REQUESTS_PER_SAMPLE" + ;; + "disaggregated-prefill") + test_cmd="python $test_script --model \"$MODEL\" --num-requests-per-sample $NUM_REQUESTS_PER_SAMPLE" + ;; + *) + test_cmd="python $test_script" + ;; + esac + + # Add common arguments + if [ "$VERBOSE" = "true" ]; then + test_cmd="$test_cmd --verbose" + fi + + if [ "$DEBUG" = "true" ]; then + test_cmd="$test_cmd --debug" + fi + + # Add custom test arguments + if [ -n "$test_args" ]; then + test_cmd="$test_cmd $test_args" + fi + + print_status "Executing: $test_cmd" + timeout ${TIMEOUT_MINUTES}m bash -c "$test_cmd" +} + +# Function to collect debug logs +collect_debug_logs() { + local test_type=$1 + print_status "๐Ÿ“‹ Collecting logs for debugging" + mkdir -p debug-logs + # Get router logs + kubectl logs -l app.kubernetes.io/component=router --tail=100 > debug-logs/router.log || true + # Get serving engine logs + kubectl logs -l app.kubernetes.io/component=serving-engine --tail=100 > debug-logs/serving-engines.log || true + # Get pod status + kubectl get pods -o wide > debug-logs/pod-status.txt || true + # Get services + kubectl get svc > debug-logs/services.txt || true +} + +# Function to cleanup resources +cleanup_resources() { + print_status "๐Ÿงน Cleaning up resources" + helm uninstall vllm || true + sudo docker image prune -f || true +} + +# Function to run complete test +run_complete_test() { + local test_type=$1 + local helm_values_file=$2 + local test_script=$3 + local test_args=$4 + + print_status "==========================================" + print_status "Starting $test_type test" + print_status "==========================================" + + # Deploy helm chart + deploy_helm_chart "$helm_values_file" + + # Wait for pods + wait_for_pods + + # Run test + if run_test "$test_type" "$test_script" "$test_args"; then + print_status "โœ… $test_type test completed successfully" + else + print_error "โŒ $test_type test failed" + return 1 + fi + + # Collect debug logs + collect_debug_logs "$test_type" + + print_status "==========================================" + print_status "$test_type test completed" + print_status "==========================================" +} + +# Function to run all tests +run_all_tests() { + print_status "๐Ÿš€ Starting all k8s routing tests" + + # Define all available test types + local all_test_types=("session" "prefixaware" "roundrobin" "kvaware" "disaggregated-prefill") + local failed_tests=() + local successful_tests=() + + for test_type in "${all_test_types[@]}"; do + print_status "==========================================" + print_status "Running $test_type test" + print_status "==========================================" + + # Set configuration for this test type + case $test_type in + "session") + local helm_values_file=".github/values-06-session-routing.yaml" + local test_script="tests/e2e/test-sticky-routing.sh" + ;; + "prefixaware") + local helm_values_file=".github/values-07-prefix-routing.yaml" + local test_script="tests/e2e/test-prefix-aware-routing.py" + ;; + "roundrobin") + local helm_values_file=".github/values-08-round-robin-routing.yaml" + local test_script="tests/e2e/test-round-robin-routing.py" + ;; + "kvaware") + local helm_values_file=".github/values-09-kv-aware-routing.yaml" + local test_script="tests/e2e/test-kv-aware-routing.py" + ;; + "disaggregated-prefill") + local helm_values_file=".github/values-10-disaggregated-prefill-routing.yaml" + local test_script="tests/e2e/test-disaggregated-prefill-routing.py" + ;; + *) + print_warning "Unknown test type: $test_type, skipping" + continue + ;; + esac + + # Run the test + if run_complete_test "$test_type" "$helm_values_file" "$test_script" ""; then + print_status "โœ… $test_type test passed" + successful_tests+=("$test_type") + else + print_error "โŒ $test_type test failed" + failed_tests+=("$test_type") + fi + + # Small delay between tests + sleep 5 + done + + # Report final results + print_status "==========================================" + print_status "All Tests Summary" + print_status "==========================================" + + if [ ${#successful_tests[@]} -gt 0 ]; then + print_status "โœ… Successful tests: ${successful_tests[*]}" + fi + + if [ ${#failed_tests[@]} -gt 0 ]; then + print_error "โŒ Failed tests: ${failed_tests[*]}" + fi + + if [ ${#failed_tests[@]} -eq 0 ]; then + print_status "๐ŸŽ‰ All tests passed!" + return 0 + else + print_error "๐Ÿ’ฅ Some tests failed" + return 1 + fi +} + +# Parse command line arguments +if [ $# -eq 0 ]; then + show_usage + exit 1 +fi + +# Check if first argument is help +if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then + show_usage + exit 0 +fi + +# Get test type +TEST_TYPE="$1" +shift + +# Parse remaining options +while [[ $# -gt 0 ]]; do + case $1 in + -m|--model) + MODEL="$2" + shift 2 + ;; + -n|--num-rounds) + NUM_ROUNDS="$2" + shift 2 + ;; + -r|--num-requests) + NUM_REQUESTS_PER_SAMPLE="$2" + shift 2 + ;; + -c|--chunk-size) + CHUNK_SIZE="$2" + shift 2 + ;; + -v|--verbose) + VERBOSE="true" + shift + ;; + -d|--debug) + DEBUG="true" + shift + ;; + -t|--timeout) + TIMEOUT_MINUTES="$2" + shift 2 + ;; + *) + print_error "Unknown option: $1" + show_usage + exit 1 + ;; + esac +done + +# Run tests based on test type +if [ "$TEST_TYPE" = "all" ]; then + # Run all tests + run_all_tests + cleanup_resources +else + # Validate test type and set configuration for single test + case $TEST_TYPE in + "session") + HELM_VALUES_FILE=".github/values-06-session-routing.yaml" + TEST_SCRIPT="tests/e2e/test-sticky-routing.sh" + ;; + "prefixaware") + HELM_VALUES_FILE=".github/values-07-prefix-routing.yaml" + TEST_SCRIPT="tests/e2e/test-prefix-aware-routing.py" + ;; + "kvaware") + HELM_VALUES_FILE=".github/values-09-kv-aware-routing.yaml" + TEST_SCRIPT="tests/e2e/test-kv-aware-routing.py" + ;; + "disaggregated-prefill") + print_warning "Disaggregated prefill routing test not yet implemented" + exit 1 + ;; + "roundrobin") + HELM_VALUES_FILE=".github/values-08-round-robin-routing.yaml" + TEST_SCRIPT="tests/e2e/test-round-robin-routing.py" + ;; + *) + print_error "Invalid test type: $TEST_TYPE" + print_error "Valid options: session, prefixaware, kvaware, disaggregated-prefill, roundrobin, all" + exit 1 + ;; + esac + + # Run single test + run_complete_test "$TEST_TYPE" "$HELM_VALUES_FILE" "$TEST_SCRIPT" "" + cleanup_resources \ No newline at end of file diff --git a/.github/workflows/router-e2e-test.yml b/.github/workflows/router-e2e-test.yml index 85f7c4c47..7ad51f957 100644 --- a/.github/workflows/router-e2e-test.yml +++ b/.github/workflows/router-e2e-test.yml @@ -231,3 +231,106 @@ jobs: pkill -f "python3 -m src.vllm_router.app" || true - run: echo "๐Ÿ Static discovery e2e test job status is ${{ job.status }}." + + prefix-aware-e2e-test: + runs-on: self-hosted + needs: e2e-test + if: github.event.pull_request.draft == false + steps: + - name: Check out repository code + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -r benchmarks/multi-round-qa/requirements.txt + pip install -e . + + - name: Setup minikube environment + env: + DOCKER_BUILDKIT: 1 + run: | + echo "๐Ÿ”ง Setting up minikube environment" + sudo sysctl fs.protected_regular=0 + # Verify minikube is running + minikube status + # Ensure kubectl is configured for minikube + kubectl config use-context minikube + + - name: Build and deploy router image + env: + DOCKER_BUILDKIT: 1 + run: | + echo "๐Ÿ”จ Building router docker image" + cd ${{ github.workspace }} + sudo docker build --build-arg INSTALL_OPTIONAL_DEP=default -t localhost:5000/git-act-router -f docker/Dockerfile . + sudo docker push localhost:5000/git-act-router + minikube image load localhost:5000/git-act-router + + - name: Deploy prefix-aware routing setup via helm charts + run: | + echo "๐Ÿš€ Deploying prefix-aware routing setup with helm" + cd ${{ github.workspace }} + helm install vllm ./helm -f .github/values-07-prefix-routing.yaml + + - name: Wait for pods to be ready + run: | + echo "โณ Making wait-for-pods script executable and running it" + chmod +x .github/wait-for-pods.sh + ./.github/wait-for-pods.sh --pod-prefix vllm --timeout 300 --verbose + + - name: Make test script executable + run: | + chmod +x tests/e2e/test-prefix-aware-routing.py + + - name: Run prefix-aware routing e2e test + run: | + echo "๐Ÿงช Running prefix-aware routing test" + cd ${{ github.workspace }} + # Set the model to match what's deployed in the helm values + # Enable debug mode to preserve temp files for artifact collection + python tests/e2e/test-prefix-aware-routing.py --model "facebook/opt-125m" --num-requests-per-sample 3 --verbose --debug --chunk-size 128 + timeout-minutes: 10 + + - name: Archive prefix-aware routing test results + uses: actions/upload-artifact@v4 + if: always() + with: + name: prefix-aware-routing-test-results-pr-${{ github.event.pull_request.number || 'main' }} + path: | + /tmp/prefix-aware-routing-results-* + + - name: Get router and pod logs for debugging + if: always() + run: | + echo "๐Ÿ“‹ Collecting logs for debugging" + mkdir -p debug-logs + # Get router logs + kubectl logs -l app.kubernetes.io/component=router --tail=100 > debug-logs/router.log || true + # Get serving engine logs + kubectl logs -l app.kubernetes.io/component=serving-engine --tail=100 > debug-logs/serving-engines.log || true + # Get pod status + kubectl get pods -o wide > debug-logs/pod-status.txt || true + # Get services + kubectl get svc > debug-logs/services.txt || true + + - name: Upload debug logs + uses: actions/upload-artifact@v4 + if: always() + with: + name: debug-logs-prefix-aware-pr-${{ github.event.pull_request.number || 'main' }} + path: debug-logs/ + + - name: Helm uninstall and cleanup + run: | + echo "๐Ÿงน Cleaning up resources" + helm uninstall vllm || true + sudo docker image prune -f || true + if: always() + + - run: echo "๐Ÿ Prefix-aware routing e2e test job status is ${{ job.status }}." diff --git a/tests/e2e/test-prefix-aware-routing.py b/tests/e2e/test-prefix-aware-routing.py new file mode 100644 index 000000000..701e39921 --- /dev/null +++ b/tests/e2e/test-prefix-aware-routing.py @@ -0,0 +1,537 @@ +#!/usr/bin/env python3 + +import argparse +import json +import logging +import os +import re +import shutil +import signal +import subprocess +import sys +import tempfile +import time +from concurrent.futures import ThreadPoolExecutor +from typing import List, Optional, Tuple + +import requests + +# Configure logging +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +# Colors for output +class Colors: + RED = "\033[0;31m" + GREEN = "\033[0;32m" + YELLOW = "\033[1;33m" + NC = "\033[0m" # No Color + + +def print_status(message: str): + """Print status message in green""" + print(f"{Colors.GREEN}[INFO]{Colors.NC} {message}") + + +def print_error(message: str): + """Print error message in red""" + print(f"{Colors.RED}[ERROR]{Colors.NC} {message}") + + +def print_warning(message: str): + """Print warning message in yellow""" + print(f"{Colors.YELLOW}[WARNING]{Colors.NC} {message}") + + +class PrefixAwareRoutingTest: + def __init__( + self, + base_url: str = "", + model: str = "facebook/opt-125m", + num_requests_per_sample: int = 3, + verbose: bool = False, + debug: bool = False, + chunk_size: int = 128, + ): + self.base_url = base_url + self.model = model + self.num_requests_per_sample = num_requests_per_sample + self.verbose = verbose + self.debug = debug + self.temp_dir = tempfile.mkdtemp() + self.results_dir = f"/tmp/prefix-aware-routing-results-{int(time.time())}" + self.port_forward_pid = None + self.chunk_size = chunk_size + + # Create results directory + os.makedirs(self.results_dir, exist_ok=True) + + # Load test prefixes + self.test_prefix_groups = self._load_test_prefix_groups() + + def _load_test_prefix_groups(self) -> List[Tuple[str, List[str]]]: + """Load test prefixes for routing testing""" + return [ + # Success case - should route to same endpoint + ( + "1", + [ + "1" * self.chunk_size, + "1" * self.chunk_size + "2" * self.chunk_size, + "1" * self.chunk_size + + "2" * self.chunk_size + + "3" * self.chunk_size, + ], + ), + ( + "2", + [ + "2" * self.chunk_size + + "3" * self.chunk_size + + "4" * self.chunk_size, + "2" * self.chunk_size + "3" * self.chunk_size, + "2" * self.chunk_size, + ], + ), + ( + "3", + [ + "5" * self.chunk_size, + "5" * self.chunk_size + "6" * self.chunk_size, + "5" * self.chunk_size + + "6" * self.chunk_size + + "7" * self.chunk_size, + ], + ), + ( + "4", + [ + "8" * self.chunk_size + + "9" * self.chunk_size + + "10" * self.chunk_size, + "8" * self.chunk_size, + "8" * self.chunk_size + "9" * self.chunk_size, + ], + ), + # Failure case - should route to different endpoints + ( + "5", + [ + "1" * self.chunk_size, + "2" * self.chunk_size, + "5" * self.chunk_size, + "8" * self.chunk_size, + ], + ), + ] + + def cleanup(self): + """Cleanup resources""" + if self.port_forward_pid: + print_status(f"Cleaning up port forwarding (PID: {self.port_forward_pid})") + try: + os.kill(self.port_forward_pid, signal.SIGTERM) + except ProcessLookupError: + pass + + if self.debug: + print_status(f"Debug mode: Preserving temp directory: {self.temp_dir}") + print_status(f"Debug mode: Results also saved to: {self.results_dir}") + # Copy all files to results directory + for file in os.listdir(self.temp_dir): + src = os.path.join(self.temp_dir, file) + dst = os.path.join(self.results_dir, file) + if os.path.isfile(src): + shutil.copy2(src, dst) + else: + # Copy specific files to results directory + for filename in ["router_logs.txt"]: + src = os.path.join(self.temp_dir, filename) + dst = os.path.join(self.results_dir, filename) + if os.path.exists(src): + shutil.copy2(src, dst) + + # Remove temp directory + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def setup_port_forwarding(self) -> bool: + """Set up port forwarding if base_url is not provided""" + if self.base_url: + return True + + # Check if vllm-router-service exists + try: + subprocess.run( + ["kubectl", "get", "svc", "vllm-router-service"], + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError: + print_error( + "vllm-router-service not found. Please ensure the service exists or provide --base-url" + ) + return False + + local_port = 30080 + print_status( + f"Setting up port forwarding to vllm-router-service on localhost:{local_port}" + ) + + # Start port forwarding + try: + process = subprocess.Popen( + [ + "kubectl", + "port-forward", + "svc/vllm-router-service", + f"{local_port}:80", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + self.port_forward_pid = process.pid + time.sleep(3) # Wait for port forwarding to establish + self.base_url = f"http://localhost:{local_port}/v1" + print_status(f"Using port forwarding: {self.base_url}") + return True + except Exception as e: + print_error(f"Failed to set up port forwarding: {e}") + return False + + def get_router_logs(self) -> Optional[str]: + """Get router logs from Kubernetes""" + print_status("Fetching router logs...") + + # Try multiple common router pod selectors + router_selectors = [ + "environment=router", + "release=router", + "app.kubernetes.io/component=router", + "app=vllmrouter-sample", + ] + + raw_log_file = os.path.join(self.temp_dir, "raw_router_logs.txt") + + for selector in router_selectors: + try: + # Check if pods exist with this selector + result = subprocess.run( + ["kubectl", "get", "pods", "-l", selector, "--no-headers"], + capture_output=True, + text=True, + check=True, + ) + + if result.stdout.strip(): + print_status(f"Found router pods with selector: {selector}") + + # Get logs + with open(raw_log_file, "w") as f: + subprocess.run( + ["kubectl", "logs", "-l", selector, "--tail=5000"], + stdout=f, + stderr=subprocess.PIPE, + check=True, + ) + return raw_log_file + + except subprocess.CalledProcessError: + continue + + print_error("Could not fetch router logs. Router log verification failed.") + return None + + def verify_routing_consistency(self) -> bool: + """Verify that routing is consistent based on discovered behavior""" + print_status("Verifying routing consistency based on discovered behavior...") + + raw_log_file = self.get_router_logs() + if not raw_log_file: + return False + + # Filter logs to only include routing decision logs + router_log_file = os.path.join(self.temp_dir, "router_logs.txt") + + try: + with open(raw_log_file, "r") as f: + content = f.read() + + # Filter for routing decisions + routing_lines = [] + for line in content.split("\n"): + if ( + re.search(r"Routing request.*to.*at.*process time", line) + and "/health" not in line + ): + routing_lines.append(line) + + # Write filtered logs + with open(router_log_file, "w") as f: + f.write("\n".join(routing_lines[-1000:])) # Last 1000 lines + + if not routing_lines: + print_error( + "No routing decision logs found. Router log verification failed." + ) + return False + + except Exception as e: + print_error(f"Error processing router logs: {e}") + return False + + # Get prefix -> endpoint mapping from logs + prefix_to_endpoints = {} + filter_routing_lines = routing_lines[-1000:] + prefix_group_ids = [prefix_group[0] for prefix_group in self.test_prefix_groups] + + for line in filter_routing_lines: + match = re.search( + r"Routing request ([^ ]*) with session id [^ ]* to ([^ ]*) at ", line + ) + if match: + prefix_group_id = match.group(1) + endpoint = match.group(2) + if prefix_group_id not in prefix_group_ids: + continue + if prefix_group_id not in prefix_to_endpoints: + prefix_to_endpoints[prefix_group_id] = set() + prefix_to_endpoints[prefix_group_id].add(endpoint) + + print_status(f"Prefix to endpoint mapping: {prefix_to_endpoints}") + + # Verify that all requests with the same prefix are routed to the same endpoint + prefix_with_issues = 0 + for prefix_group_id, endpoints in prefix_to_endpoints.items(): + # Failure case - should route to different endpoints + if prefix_group_id == "5" and len(endpoints) < 2: + print_error( + f"Prefix group '{prefix_group_id}' is routed to less than 2 endpoints: {endpoints}" + ) + prefix_with_issues += 1 + # Success case - should route to same endpoint + elif prefix_group_id != "5" and len(endpoints) > 1: + print_error( + f"Prefix group '{prefix_group_id}' is routed to multiple endpoints: {endpoints}" + ) + prefix_with_issues += 1 + + if prefix_with_issues > 0: + print_error( + f"โŒ Router verification failed: {prefix_with_issues} prefix groups have routing issues" + ) + return False + else: + print_status( + "โœ… Router verification passed: All prefix groups show consistent routing behavior" + ) + return True + + def send_request(self, request: str, prefix_group_id: str) -> bool: + """Send a single request""" + try: + prompt = f"This is request: {request}. Please respond briefly." + + payload = { + "model": self.model, + "prompt": prompt, + "temperature": 0.7, + "max_tokens": 10, + } + + headers = { + "Content-Type": "application/json", + "Authorization": "Bearer dummy", + "X-Request-Id": prefix_group_id, + } + + response = requests.post( + f"{self.base_url}/completions", + json=payload, + headers=headers, + timeout=30, + ) + + response.raise_for_status() + + # Verify response is valid JSON + response.json() + + if self.verbose: + print_status( + f"โœ… Response received for request {request} in prefix group {prefix_group_id}" + ) + + return True + + except requests.exceptions.RequestException as e: + print_error( + f"ERROR: Request failed for request {request} in prefix group {prefix_group_id}: {e}" + ) + return False + except json.JSONDecodeError as e: + print_error( + f"ERROR: Invalid JSON response for request {request} in prefix group {prefix_group_id}: {e}" + ) + return False + + def send_prefix_requests(self, prefix_group: Tuple[str, List[str]]) -> bool: + """Send multiple requests for a specific prefix""" + print_status( + f"[Prefix group: {prefix_group[0]}] Starting {len(prefix_group[1])} requests, repeated {self.num_requests_per_sample} times" + ) + + prefix_group_id = prefix_group[0] + requests = prefix_group[1] + + success_count = 0 + + # Send requests + for request_idx, request in enumerate(requests): + for i in range(1, self.num_requests_per_sample + 1): + if self.verbose: + print_status( + f"[Prefix group: {prefix_group_id}] Sending request {request_idx + 1}/{len(requests)} times {i}/{self.num_requests_per_sample}" + ) + if self.send_request(request, prefix_group_id): + success_count += 1 + else: + return False + time.sleep(0.5) # Small delay between requests + + if success_count == len(requests) * self.num_requests_per_sample: + print_status( + f"[Prefix group: {prefix_group_id}] โœ… All {len(requests)} requests completed successfully" + ) + return True + else: + print_error( + f"[Prefix group: {prefix_group_id}] โŒ Failed to send {success_count} requests" + ) + return False + + def send_all_prefix_requests(self) -> bool: + """Send requests for all prefixes""" + print_status(f"Sending requests for {len(self.test_prefix_groups)} prefixes") + + failed_prefixes = [] + + # Use ThreadPoolExecutor to run all prefixes in parallel + with ThreadPoolExecutor( + max_workers=min(len(self.test_prefix_groups), 10) + ) as executor: + # Submit all prefix requests + future_to_prefix = { + executor.submit(self.send_prefix_requests, prefix_group): prefix_group[ + 0 + ] + for prefix_group in self.test_prefix_groups + } + + # Collect results + for future in future_to_prefix: + prefix_group_id = future_to_prefix[future] + try: + if future.result(): + print_status( + f"โœ… Prefix group '{prefix_group_id}' completed successfully" + ) + else: + print_error(f"โŒ Prefix group '{prefix_group_id}' failed") + failed_prefixes.append(prefix_group_id) + except Exception as e: + print_error( + f"โŒ Prefix group '{prefix_group_id}' failed with exception: {e}" + ) + failed_prefixes.append(prefix_group_id) + + if failed_prefixes: + print_error(f"Failed prefixes: {len(failed_prefixes)}") + return False + + print_status( + f"โœ… All requests completed successfully across {len(self.test_prefix_groups)} prefix groups" + ) + return True + + def run_test(self) -> bool: + """Run the complete prefix-aware routing test""" + try: + print_status( + f"Starting prefix-aware routing test with {self.num_requests_per_sample} requests per sample" + ) + + # Set up port forwarding if needed + if not self.setup_port_forwarding(): + return False + + # Send all prefix requests + if not self.send_all_prefix_requests(): + return False + + print_status("โœ… Prefix request script completed successfully") + + # Verify router logs for prefix-based routing consistency + if not self.verify_routing_consistency(): + print_error("Router log verification failed!") + return False + + print_status("โœ… Prefix-aware routing test passed!") + print_status("Router logs confirm consistent prefix-based routing") + return True + + except KeyboardInterrupt: + print_error("Test interrupted by user") + return False + except Exception as e: + print_error(f"Unexpected error during test: {e}") + return False + + +def main(): + parser = argparse.ArgumentParser(description="Test prefix-aware routing") + parser.add_argument( + "--base-url", default="", help="Base URL for the vLLM router service" + ) + parser.add_argument( + "--model", default="facebook/opt-125m", help="Model to use for testing" + ) + parser.add_argument( + "--num-requests-per-sample", + type=int, + default=3, + help="Number of requests per sample", + ) + parser.add_argument("--verbose", action="store_true", help="Enable verbose output") + parser.add_argument( + "--debug", action="store_true", help="Enable debug mode (preserve temp files)" + ) + parser.add_argument( + "--chunk-size", type=int, default=128, help="Chunk size for prefixes" + ) + + args = parser.parse_args() + + # Create test instance + test = PrefixAwareRoutingTest( + base_url=args.base_url, + model=args.model, + num_requests_per_sample=args.num_requests_per_sample, + verbose=args.verbose, + debug=args.debug, + chunk_size=args.chunk_size, + ) + + # Ensure cleanup happens + try: + success = test.run_test() + sys.exit(0 if success else 1) + finally: + test.cleanup() + + +if __name__ == "__main__": + main() From a978e7c0ed56f7500b151c9b95eeba2aff28b5b7 Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Thu, 26 Jun 2025 00:45:28 +0000 Subject: [PATCH 02/12] [ci] refactor k8s discovery e2e test Signed-off-by: Rui Zhang --- .github/run-k8s-routing-test.sh | 368 ----------------- .github/workflows/router-e2e-test.yml | 103 ----- tests/e2e/test-prefix-aware-routing.py | 537 ------------------------- 3 files changed, 1008 deletions(-) delete mode 100644 .github/run-k8s-routing-test.sh delete mode 100644 tests/e2e/test-prefix-aware-routing.py diff --git a/.github/run-k8s-routing-test.sh b/.github/run-k8s-routing-test.sh deleted file mode 100644 index a16a20413..000000000 --- a/.github/run-k8s-routing-test.sh +++ /dev/null @@ -1,368 +0,0 @@ -#!/bin/bash - -# Script to run k8s routing tests with different routing logic -# Usage: ./run-k8s-routing-test.sh [options] - -set -euo pipefail - -# Default values -TEST_TYPE="" -HELM_VALUES_FILE="" -TEST_SCRIPT="" -TEST_ARGS="" -MODEL="facebook/opt-125m" -NUM_ROUNDS=3 -NUM_REQUESTS_PER_SAMPLE=3 -CHUNK_SIZE=128 -VERBOSE="" -DEBUG="" -TIMEOUT_MINUTES=10 - -# Colors for output -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -# Helper functions -print_status() { - echo -e "${GREEN}[INFO]${NC} $1" -} - -print_error() { - echo -e "${RED}[ERROR]${NC} $1" -} - -print_warning() { - echo -e "${YELLOW}[WARNING]${NC} $1" -} - -# Function to show usage -show_usage() { - cat << EOF -Usage: $0 [options] - -Test Type Options: - session - Test sticky routing with session management - prefixaware - Test prefix-aware routing - kvaware - Test KV-aware routing - disaggregated-prefill - Test disaggregated prefill routing - roundrobin - Test round-robin routing - all - Run all available tests sequentially - -Options: - -m, --model MODEL Model to use (default: facebook/opt-125m) - -n, --num-rounds N Number of rounds for sticky routing (default: 3) - -r, --num-requests N Number of requests per sample (default: 3) - -c, --chunk-size N Chunk size for prefix-aware routing (default: 128) - -v, --verbose Enable verbose output - -d, --debug Enable debug mode - -t, --timeout N Timeout in minutes (default: 10) - -h, --help Show this help message - -Examples: - $0 session --model "facebook/opt-125m" --num-rounds 5 --verbose - $0 prefixaware --model "facebook/opt-125m" --chunk-size 256 --debug - $0 session --verbose --debug - $0 all --verbose --debug - -EOF -} - -# Function to deploy helm chart -deploy_helm_chart() { - local values_file=$1 - print_status "๐Ÿš€ Deploying setup with helm using $values_file" - if helm list -q | grep -q "^vllm$"; then - print_status "๐Ÿ“ฆ Upgrading existing vllm deployment" - helm upgrade vllm ./helm -f "$values_file" - else - print_status "๐Ÿš€ Installing new vllm deployment" - helm install vllm ./helm -f "$values_file" - fi -} - -# Function to wait for pods -wait_for_pods() { - print_status "โณ Waiting for pods to be ready" - chmod +x .github/wait-for-pods.sh - ./.github/wait-for-pods.sh --pod-prefix vllm --timeout 300 --verbose -} - -# Function to run test -run_test() { - local test_type=$1 - local test_script=$2 - local test_args=$3 - - print_status "๐Ÿงช Running $test_type test" - - # Make test script executable - chmod +x "$test_script" - - # Build test command - local test_cmd="" - case $test_type in - "session") - test_cmd="./$test_script --model \"$MODEL\" --num-rounds $NUM_ROUNDS" - ;; - "prefixaware") - test_cmd="python $test_script --model \"$MODEL\" --num-requests-per-sample $NUM_REQUESTS_PER_SAMPLE --chunk-size $CHUNK_SIZE" - ;; - "roundrobin") - test_cmd="python $test_script --model \"$MODEL\" --num-requests-per-sample $NUM_REQUESTS_PER_SAMPLE" - ;; - "kvaware") - test_cmd="python $test_script --model \"$MODEL\" --num-requests-per-sample $NUM_REQUESTS_PER_SAMPLE" - ;; - "disaggregated-prefill") - test_cmd="python $test_script --model \"$MODEL\" --num-requests-per-sample $NUM_REQUESTS_PER_SAMPLE" - ;; - *) - test_cmd="python $test_script" - ;; - esac - - # Add common arguments - if [ "$VERBOSE" = "true" ]; then - test_cmd="$test_cmd --verbose" - fi - - if [ "$DEBUG" = "true" ]; then - test_cmd="$test_cmd --debug" - fi - - # Add custom test arguments - if [ -n "$test_args" ]; then - test_cmd="$test_cmd $test_args" - fi - - print_status "Executing: $test_cmd" - timeout ${TIMEOUT_MINUTES}m bash -c "$test_cmd" -} - -# Function to collect debug logs -collect_debug_logs() { - local test_type=$1 - print_status "๐Ÿ“‹ Collecting logs for debugging" - mkdir -p debug-logs - # Get router logs - kubectl logs -l app.kubernetes.io/component=router --tail=100 > debug-logs/router.log || true - # Get serving engine logs - kubectl logs -l app.kubernetes.io/component=serving-engine --tail=100 > debug-logs/serving-engines.log || true - # Get pod status - kubectl get pods -o wide > debug-logs/pod-status.txt || true - # Get services - kubectl get svc > debug-logs/services.txt || true -} - -# Function to cleanup resources -cleanup_resources() { - print_status "๐Ÿงน Cleaning up resources" - helm uninstall vllm || true - sudo docker image prune -f || true -} - -# Function to run complete test -run_complete_test() { - local test_type=$1 - local helm_values_file=$2 - local test_script=$3 - local test_args=$4 - - print_status "==========================================" - print_status "Starting $test_type test" - print_status "==========================================" - - # Deploy helm chart - deploy_helm_chart "$helm_values_file" - - # Wait for pods - wait_for_pods - - # Run test - if run_test "$test_type" "$test_script" "$test_args"; then - print_status "โœ… $test_type test completed successfully" - else - print_error "โŒ $test_type test failed" - return 1 - fi - - # Collect debug logs - collect_debug_logs "$test_type" - - print_status "==========================================" - print_status "$test_type test completed" - print_status "==========================================" -} - -# Function to run all tests -run_all_tests() { - print_status "๐Ÿš€ Starting all k8s routing tests" - - # Define all available test types - local all_test_types=("session" "prefixaware" "roundrobin" "kvaware" "disaggregated-prefill") - local failed_tests=() - local successful_tests=() - - for test_type in "${all_test_types[@]}"; do - print_status "==========================================" - print_status "Running $test_type test" - print_status "==========================================" - - # Set configuration for this test type - case $test_type in - "session") - local helm_values_file=".github/values-06-session-routing.yaml" - local test_script="tests/e2e/test-sticky-routing.sh" - ;; - "prefixaware") - local helm_values_file=".github/values-07-prefix-routing.yaml" - local test_script="tests/e2e/test-prefix-aware-routing.py" - ;; - "roundrobin") - local helm_values_file=".github/values-08-round-robin-routing.yaml" - local test_script="tests/e2e/test-round-robin-routing.py" - ;; - "kvaware") - local helm_values_file=".github/values-09-kv-aware-routing.yaml" - local test_script="tests/e2e/test-kv-aware-routing.py" - ;; - "disaggregated-prefill") - local helm_values_file=".github/values-10-disaggregated-prefill-routing.yaml" - local test_script="tests/e2e/test-disaggregated-prefill-routing.py" - ;; - *) - print_warning "Unknown test type: $test_type, skipping" - continue - ;; - esac - - # Run the test - if run_complete_test "$test_type" "$helm_values_file" "$test_script" ""; then - print_status "โœ… $test_type test passed" - successful_tests+=("$test_type") - else - print_error "โŒ $test_type test failed" - failed_tests+=("$test_type") - fi - - # Small delay between tests - sleep 5 - done - - # Report final results - print_status "==========================================" - print_status "All Tests Summary" - print_status "==========================================" - - if [ ${#successful_tests[@]} -gt 0 ]; then - print_status "โœ… Successful tests: ${successful_tests[*]}" - fi - - if [ ${#failed_tests[@]} -gt 0 ]; then - print_error "โŒ Failed tests: ${failed_tests[*]}" - fi - - if [ ${#failed_tests[@]} -eq 0 ]; then - print_status "๐ŸŽ‰ All tests passed!" - return 0 - else - print_error "๐Ÿ’ฅ Some tests failed" - return 1 - fi -} - -# Parse command line arguments -if [ $# -eq 0 ]; then - show_usage - exit 1 -fi - -# Check if first argument is help -if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then - show_usage - exit 0 -fi - -# Get test type -TEST_TYPE="$1" -shift - -# Parse remaining options -while [[ $# -gt 0 ]]; do - case $1 in - -m|--model) - MODEL="$2" - shift 2 - ;; - -n|--num-rounds) - NUM_ROUNDS="$2" - shift 2 - ;; - -r|--num-requests) - NUM_REQUESTS_PER_SAMPLE="$2" - shift 2 - ;; - -c|--chunk-size) - CHUNK_SIZE="$2" - shift 2 - ;; - -v|--verbose) - VERBOSE="true" - shift - ;; - -d|--debug) - DEBUG="true" - shift - ;; - -t|--timeout) - TIMEOUT_MINUTES="$2" - shift 2 - ;; - *) - print_error "Unknown option: $1" - show_usage - exit 1 - ;; - esac -done - -# Run tests based on test type -if [ "$TEST_TYPE" = "all" ]; then - # Run all tests - run_all_tests - cleanup_resources -else - # Validate test type and set configuration for single test - case $TEST_TYPE in - "session") - HELM_VALUES_FILE=".github/values-06-session-routing.yaml" - TEST_SCRIPT="tests/e2e/test-sticky-routing.sh" - ;; - "prefixaware") - HELM_VALUES_FILE=".github/values-07-prefix-routing.yaml" - TEST_SCRIPT="tests/e2e/test-prefix-aware-routing.py" - ;; - "kvaware") - HELM_VALUES_FILE=".github/values-09-kv-aware-routing.yaml" - TEST_SCRIPT="tests/e2e/test-kv-aware-routing.py" - ;; - "disaggregated-prefill") - print_warning "Disaggregated prefill routing test not yet implemented" - exit 1 - ;; - "roundrobin") - HELM_VALUES_FILE=".github/values-08-round-robin-routing.yaml" - TEST_SCRIPT="tests/e2e/test-round-robin-routing.py" - ;; - *) - print_error "Invalid test type: $TEST_TYPE" - print_error "Valid options: session, prefixaware, kvaware, disaggregated-prefill, roundrobin, all" - exit 1 - ;; - esac - - # Run single test - run_complete_test "$TEST_TYPE" "$HELM_VALUES_FILE" "$TEST_SCRIPT" "" - cleanup_resources \ No newline at end of file diff --git a/.github/workflows/router-e2e-test.yml b/.github/workflows/router-e2e-test.yml index 7ad51f957..85f7c4c47 100644 --- a/.github/workflows/router-e2e-test.yml +++ b/.github/workflows/router-e2e-test.yml @@ -231,106 +231,3 @@ jobs: pkill -f "python3 -m src.vllm_router.app" || true - run: echo "๐Ÿ Static discovery e2e test job status is ${{ job.status }}." - - prefix-aware-e2e-test: - runs-on: self-hosted - needs: e2e-test - if: github.event.pull_request.draft == false - steps: - - name: Check out repository code - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - name: Install Python dependencies - run: | - python -m pip install --upgrade pip - pip install -r benchmarks/multi-round-qa/requirements.txt - pip install -e . - - - name: Setup minikube environment - env: - DOCKER_BUILDKIT: 1 - run: | - echo "๐Ÿ”ง Setting up minikube environment" - sudo sysctl fs.protected_regular=0 - # Verify minikube is running - minikube status - # Ensure kubectl is configured for minikube - kubectl config use-context minikube - - - name: Build and deploy router image - env: - DOCKER_BUILDKIT: 1 - run: | - echo "๐Ÿ”จ Building router docker image" - cd ${{ github.workspace }} - sudo docker build --build-arg INSTALL_OPTIONAL_DEP=default -t localhost:5000/git-act-router -f docker/Dockerfile . - sudo docker push localhost:5000/git-act-router - minikube image load localhost:5000/git-act-router - - - name: Deploy prefix-aware routing setup via helm charts - run: | - echo "๐Ÿš€ Deploying prefix-aware routing setup with helm" - cd ${{ github.workspace }} - helm install vllm ./helm -f .github/values-07-prefix-routing.yaml - - - name: Wait for pods to be ready - run: | - echo "โณ Making wait-for-pods script executable and running it" - chmod +x .github/wait-for-pods.sh - ./.github/wait-for-pods.sh --pod-prefix vllm --timeout 300 --verbose - - - name: Make test script executable - run: | - chmod +x tests/e2e/test-prefix-aware-routing.py - - - name: Run prefix-aware routing e2e test - run: | - echo "๐Ÿงช Running prefix-aware routing test" - cd ${{ github.workspace }} - # Set the model to match what's deployed in the helm values - # Enable debug mode to preserve temp files for artifact collection - python tests/e2e/test-prefix-aware-routing.py --model "facebook/opt-125m" --num-requests-per-sample 3 --verbose --debug --chunk-size 128 - timeout-minutes: 10 - - - name: Archive prefix-aware routing test results - uses: actions/upload-artifact@v4 - if: always() - with: - name: prefix-aware-routing-test-results-pr-${{ github.event.pull_request.number || 'main' }} - path: | - /tmp/prefix-aware-routing-results-* - - - name: Get router and pod logs for debugging - if: always() - run: | - echo "๐Ÿ“‹ Collecting logs for debugging" - mkdir -p debug-logs - # Get router logs - kubectl logs -l app.kubernetes.io/component=router --tail=100 > debug-logs/router.log || true - # Get serving engine logs - kubectl logs -l app.kubernetes.io/component=serving-engine --tail=100 > debug-logs/serving-engines.log || true - # Get pod status - kubectl get pods -o wide > debug-logs/pod-status.txt || true - # Get services - kubectl get svc > debug-logs/services.txt || true - - - name: Upload debug logs - uses: actions/upload-artifact@v4 - if: always() - with: - name: debug-logs-prefix-aware-pr-${{ github.event.pull_request.number || 'main' }} - path: debug-logs/ - - - name: Helm uninstall and cleanup - run: | - echo "๐Ÿงน Cleaning up resources" - helm uninstall vllm || true - sudo docker image prune -f || true - if: always() - - - run: echo "๐Ÿ Prefix-aware routing e2e test job status is ${{ job.status }}." diff --git a/tests/e2e/test-prefix-aware-routing.py b/tests/e2e/test-prefix-aware-routing.py deleted file mode 100644 index 701e39921..000000000 --- a/tests/e2e/test-prefix-aware-routing.py +++ /dev/null @@ -1,537 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import json -import logging -import os -import re -import shutil -import signal -import subprocess -import sys -import tempfile -import time -from concurrent.futures import ThreadPoolExecutor -from typing import List, Optional, Tuple - -import requests - -# Configure logging -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) -logger = logging.getLogger(__name__) - - -# Colors for output -class Colors: - RED = "\033[0;31m" - GREEN = "\033[0;32m" - YELLOW = "\033[1;33m" - NC = "\033[0m" # No Color - - -def print_status(message: str): - """Print status message in green""" - print(f"{Colors.GREEN}[INFO]{Colors.NC} {message}") - - -def print_error(message: str): - """Print error message in red""" - print(f"{Colors.RED}[ERROR]{Colors.NC} {message}") - - -def print_warning(message: str): - """Print warning message in yellow""" - print(f"{Colors.YELLOW}[WARNING]{Colors.NC} {message}") - - -class PrefixAwareRoutingTest: - def __init__( - self, - base_url: str = "", - model: str = "facebook/opt-125m", - num_requests_per_sample: int = 3, - verbose: bool = False, - debug: bool = False, - chunk_size: int = 128, - ): - self.base_url = base_url - self.model = model - self.num_requests_per_sample = num_requests_per_sample - self.verbose = verbose - self.debug = debug - self.temp_dir = tempfile.mkdtemp() - self.results_dir = f"/tmp/prefix-aware-routing-results-{int(time.time())}" - self.port_forward_pid = None - self.chunk_size = chunk_size - - # Create results directory - os.makedirs(self.results_dir, exist_ok=True) - - # Load test prefixes - self.test_prefix_groups = self._load_test_prefix_groups() - - def _load_test_prefix_groups(self) -> List[Tuple[str, List[str]]]: - """Load test prefixes for routing testing""" - return [ - # Success case - should route to same endpoint - ( - "1", - [ - "1" * self.chunk_size, - "1" * self.chunk_size + "2" * self.chunk_size, - "1" * self.chunk_size - + "2" * self.chunk_size - + "3" * self.chunk_size, - ], - ), - ( - "2", - [ - "2" * self.chunk_size - + "3" * self.chunk_size - + "4" * self.chunk_size, - "2" * self.chunk_size + "3" * self.chunk_size, - "2" * self.chunk_size, - ], - ), - ( - "3", - [ - "5" * self.chunk_size, - "5" * self.chunk_size + "6" * self.chunk_size, - "5" * self.chunk_size - + "6" * self.chunk_size - + "7" * self.chunk_size, - ], - ), - ( - "4", - [ - "8" * self.chunk_size - + "9" * self.chunk_size - + "10" * self.chunk_size, - "8" * self.chunk_size, - "8" * self.chunk_size + "9" * self.chunk_size, - ], - ), - # Failure case - should route to different endpoints - ( - "5", - [ - "1" * self.chunk_size, - "2" * self.chunk_size, - "5" * self.chunk_size, - "8" * self.chunk_size, - ], - ), - ] - - def cleanup(self): - """Cleanup resources""" - if self.port_forward_pid: - print_status(f"Cleaning up port forwarding (PID: {self.port_forward_pid})") - try: - os.kill(self.port_forward_pid, signal.SIGTERM) - except ProcessLookupError: - pass - - if self.debug: - print_status(f"Debug mode: Preserving temp directory: {self.temp_dir}") - print_status(f"Debug mode: Results also saved to: {self.results_dir}") - # Copy all files to results directory - for file in os.listdir(self.temp_dir): - src = os.path.join(self.temp_dir, file) - dst = os.path.join(self.results_dir, file) - if os.path.isfile(src): - shutil.copy2(src, dst) - else: - # Copy specific files to results directory - for filename in ["router_logs.txt"]: - src = os.path.join(self.temp_dir, filename) - dst = os.path.join(self.results_dir, filename) - if os.path.exists(src): - shutil.copy2(src, dst) - - # Remove temp directory - shutil.rmtree(self.temp_dir, ignore_errors=True) - - def setup_port_forwarding(self) -> bool: - """Set up port forwarding if base_url is not provided""" - if self.base_url: - return True - - # Check if vllm-router-service exists - try: - subprocess.run( - ["kubectl", "get", "svc", "vllm-router-service"], - capture_output=True, - text=True, - check=True, - ) - except subprocess.CalledProcessError: - print_error( - "vllm-router-service not found. Please ensure the service exists or provide --base-url" - ) - return False - - local_port = 30080 - print_status( - f"Setting up port forwarding to vllm-router-service on localhost:{local_port}" - ) - - # Start port forwarding - try: - process = subprocess.Popen( - [ - "kubectl", - "port-forward", - "svc/vllm-router-service", - f"{local_port}:80", - ], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - self.port_forward_pid = process.pid - time.sleep(3) # Wait for port forwarding to establish - self.base_url = f"http://localhost:{local_port}/v1" - print_status(f"Using port forwarding: {self.base_url}") - return True - except Exception as e: - print_error(f"Failed to set up port forwarding: {e}") - return False - - def get_router_logs(self) -> Optional[str]: - """Get router logs from Kubernetes""" - print_status("Fetching router logs...") - - # Try multiple common router pod selectors - router_selectors = [ - "environment=router", - "release=router", - "app.kubernetes.io/component=router", - "app=vllmrouter-sample", - ] - - raw_log_file = os.path.join(self.temp_dir, "raw_router_logs.txt") - - for selector in router_selectors: - try: - # Check if pods exist with this selector - result = subprocess.run( - ["kubectl", "get", "pods", "-l", selector, "--no-headers"], - capture_output=True, - text=True, - check=True, - ) - - if result.stdout.strip(): - print_status(f"Found router pods with selector: {selector}") - - # Get logs - with open(raw_log_file, "w") as f: - subprocess.run( - ["kubectl", "logs", "-l", selector, "--tail=5000"], - stdout=f, - stderr=subprocess.PIPE, - check=True, - ) - return raw_log_file - - except subprocess.CalledProcessError: - continue - - print_error("Could not fetch router logs. Router log verification failed.") - return None - - def verify_routing_consistency(self) -> bool: - """Verify that routing is consistent based on discovered behavior""" - print_status("Verifying routing consistency based on discovered behavior...") - - raw_log_file = self.get_router_logs() - if not raw_log_file: - return False - - # Filter logs to only include routing decision logs - router_log_file = os.path.join(self.temp_dir, "router_logs.txt") - - try: - with open(raw_log_file, "r") as f: - content = f.read() - - # Filter for routing decisions - routing_lines = [] - for line in content.split("\n"): - if ( - re.search(r"Routing request.*to.*at.*process time", line) - and "/health" not in line - ): - routing_lines.append(line) - - # Write filtered logs - with open(router_log_file, "w") as f: - f.write("\n".join(routing_lines[-1000:])) # Last 1000 lines - - if not routing_lines: - print_error( - "No routing decision logs found. Router log verification failed." - ) - return False - - except Exception as e: - print_error(f"Error processing router logs: {e}") - return False - - # Get prefix -> endpoint mapping from logs - prefix_to_endpoints = {} - filter_routing_lines = routing_lines[-1000:] - prefix_group_ids = [prefix_group[0] for prefix_group in self.test_prefix_groups] - - for line in filter_routing_lines: - match = re.search( - r"Routing request ([^ ]*) with session id [^ ]* to ([^ ]*) at ", line - ) - if match: - prefix_group_id = match.group(1) - endpoint = match.group(2) - if prefix_group_id not in prefix_group_ids: - continue - if prefix_group_id not in prefix_to_endpoints: - prefix_to_endpoints[prefix_group_id] = set() - prefix_to_endpoints[prefix_group_id].add(endpoint) - - print_status(f"Prefix to endpoint mapping: {prefix_to_endpoints}") - - # Verify that all requests with the same prefix are routed to the same endpoint - prefix_with_issues = 0 - for prefix_group_id, endpoints in prefix_to_endpoints.items(): - # Failure case - should route to different endpoints - if prefix_group_id == "5" and len(endpoints) < 2: - print_error( - f"Prefix group '{prefix_group_id}' is routed to less than 2 endpoints: {endpoints}" - ) - prefix_with_issues += 1 - # Success case - should route to same endpoint - elif prefix_group_id != "5" and len(endpoints) > 1: - print_error( - f"Prefix group '{prefix_group_id}' is routed to multiple endpoints: {endpoints}" - ) - prefix_with_issues += 1 - - if prefix_with_issues > 0: - print_error( - f"โŒ Router verification failed: {prefix_with_issues} prefix groups have routing issues" - ) - return False - else: - print_status( - "โœ… Router verification passed: All prefix groups show consistent routing behavior" - ) - return True - - def send_request(self, request: str, prefix_group_id: str) -> bool: - """Send a single request""" - try: - prompt = f"This is request: {request}. Please respond briefly." - - payload = { - "model": self.model, - "prompt": prompt, - "temperature": 0.7, - "max_tokens": 10, - } - - headers = { - "Content-Type": "application/json", - "Authorization": "Bearer dummy", - "X-Request-Id": prefix_group_id, - } - - response = requests.post( - f"{self.base_url}/completions", - json=payload, - headers=headers, - timeout=30, - ) - - response.raise_for_status() - - # Verify response is valid JSON - response.json() - - if self.verbose: - print_status( - f"โœ… Response received for request {request} in prefix group {prefix_group_id}" - ) - - return True - - except requests.exceptions.RequestException as e: - print_error( - f"ERROR: Request failed for request {request} in prefix group {prefix_group_id}: {e}" - ) - return False - except json.JSONDecodeError as e: - print_error( - f"ERROR: Invalid JSON response for request {request} in prefix group {prefix_group_id}: {e}" - ) - return False - - def send_prefix_requests(self, prefix_group: Tuple[str, List[str]]) -> bool: - """Send multiple requests for a specific prefix""" - print_status( - f"[Prefix group: {prefix_group[0]}] Starting {len(prefix_group[1])} requests, repeated {self.num_requests_per_sample} times" - ) - - prefix_group_id = prefix_group[0] - requests = prefix_group[1] - - success_count = 0 - - # Send requests - for request_idx, request in enumerate(requests): - for i in range(1, self.num_requests_per_sample + 1): - if self.verbose: - print_status( - f"[Prefix group: {prefix_group_id}] Sending request {request_idx + 1}/{len(requests)} times {i}/{self.num_requests_per_sample}" - ) - if self.send_request(request, prefix_group_id): - success_count += 1 - else: - return False - time.sleep(0.5) # Small delay between requests - - if success_count == len(requests) * self.num_requests_per_sample: - print_status( - f"[Prefix group: {prefix_group_id}] โœ… All {len(requests)} requests completed successfully" - ) - return True - else: - print_error( - f"[Prefix group: {prefix_group_id}] โŒ Failed to send {success_count} requests" - ) - return False - - def send_all_prefix_requests(self) -> bool: - """Send requests for all prefixes""" - print_status(f"Sending requests for {len(self.test_prefix_groups)} prefixes") - - failed_prefixes = [] - - # Use ThreadPoolExecutor to run all prefixes in parallel - with ThreadPoolExecutor( - max_workers=min(len(self.test_prefix_groups), 10) - ) as executor: - # Submit all prefix requests - future_to_prefix = { - executor.submit(self.send_prefix_requests, prefix_group): prefix_group[ - 0 - ] - for prefix_group in self.test_prefix_groups - } - - # Collect results - for future in future_to_prefix: - prefix_group_id = future_to_prefix[future] - try: - if future.result(): - print_status( - f"โœ… Prefix group '{prefix_group_id}' completed successfully" - ) - else: - print_error(f"โŒ Prefix group '{prefix_group_id}' failed") - failed_prefixes.append(prefix_group_id) - except Exception as e: - print_error( - f"โŒ Prefix group '{prefix_group_id}' failed with exception: {e}" - ) - failed_prefixes.append(prefix_group_id) - - if failed_prefixes: - print_error(f"Failed prefixes: {len(failed_prefixes)}") - return False - - print_status( - f"โœ… All requests completed successfully across {len(self.test_prefix_groups)} prefix groups" - ) - return True - - def run_test(self) -> bool: - """Run the complete prefix-aware routing test""" - try: - print_status( - f"Starting prefix-aware routing test with {self.num_requests_per_sample} requests per sample" - ) - - # Set up port forwarding if needed - if not self.setup_port_forwarding(): - return False - - # Send all prefix requests - if not self.send_all_prefix_requests(): - return False - - print_status("โœ… Prefix request script completed successfully") - - # Verify router logs for prefix-based routing consistency - if not self.verify_routing_consistency(): - print_error("Router log verification failed!") - return False - - print_status("โœ… Prefix-aware routing test passed!") - print_status("Router logs confirm consistent prefix-based routing") - return True - - except KeyboardInterrupt: - print_error("Test interrupted by user") - return False - except Exception as e: - print_error(f"Unexpected error during test: {e}") - return False - - -def main(): - parser = argparse.ArgumentParser(description="Test prefix-aware routing") - parser.add_argument( - "--base-url", default="", help="Base URL for the vLLM router service" - ) - parser.add_argument( - "--model", default="facebook/opt-125m", help="Model to use for testing" - ) - parser.add_argument( - "--num-requests-per-sample", - type=int, - default=3, - help="Number of requests per sample", - ) - parser.add_argument("--verbose", action="store_true", help="Enable verbose output") - parser.add_argument( - "--debug", action="store_true", help="Enable debug mode (preserve temp files)" - ) - parser.add_argument( - "--chunk-size", type=int, default=128, help="Chunk size for prefixes" - ) - - args = parser.parse_args() - - # Create test instance - test = PrefixAwareRoutingTest( - base_url=args.base_url, - model=args.model, - num_requests_per_sample=args.num_requests_per_sample, - verbose=args.verbose, - debug=args.debug, - chunk_size=args.chunk_size, - ) - - # Ensure cleanup happens - try: - success = test.run_test() - sys.exit(0 if success else 1) - finally: - test.cleanup() - - -if __name__ == "__main__": - main() From 65171cc338a847e91fec919149063d174ff3eb8d Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Fri, 20 Jun 2025 21:29:37 +0000 Subject: [PATCH 03/12] [CI] Refactor static discovery testing so that it can support multiple logic Signed-off-by: Rui Zhang --- .github/workflows/router-e2e-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/router-e2e-test.yml b/.github/workflows/router-e2e-test.yml index 85f7c4c47..29ec40145 100644 --- a/.github/workflows/router-e2e-test.yml +++ b/.github/workflows/router-e2e-test.yml @@ -219,7 +219,7 @@ jobs: uses: actions/upload-artifact@v4 if: always() with: - name: static-discovery-test-results-pr-${{ github.event.pull_request.number || 'main' }} + name: static-discovery-${{ matrix.strategy }}-test-results-pr-${{ github.event.pull_request.number || 'main' }} path: | ${{ env.LOG_DIR }}/* From 15f91672177689c6f336918c30319ce6db1d9399 Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Sat, 21 Jun 2025 01:29:46 +0000 Subject: [PATCH 04/12] [CI] Add static e2e test for prefixaware Signed-off-by: Rui Zhang --- .../disaggregated_prefill/routing_lines.txt | 83 ++ tests/e2e/disaggregated_prefill/router.log | 107 ++ tests/e2e/kvaware/router.log | 23 + .../prefixaware/prefixaware/routing_lines.txt | 241 ++++ tests/e2e/prefixaware/router.log | 1008 +++++++++++++++++ .../roundrobin/roundrobin/routing_lines.txt | 41 + tests/e2e/roundrobin/router.log | 191 ++++ 7 files changed, 1694 insertions(+) create mode 100644 tests/e2e/disaggregated_prefill/disaggregated_prefill/routing_lines.txt create mode 100644 tests/e2e/disaggregated_prefill/router.log create mode 100644 tests/e2e/kvaware/router.log create mode 100644 tests/e2e/prefixaware/prefixaware/routing_lines.txt create mode 100644 tests/e2e/prefixaware/router.log create mode 100644 tests/e2e/roundrobin/roundrobin/routing_lines.txt create mode 100644 tests/e2e/roundrobin/router.log diff --git a/tests/e2e/disaggregated_prefill/disaggregated_prefill/routing_lines.txt b/tests/e2e/disaggregated_prefill/disaggregated_prefill/routing_lines.txt new file mode 100644 index 000000000..012d45b3c --- /dev/null +++ b/tests/e2e/disaggregated_prefill/disaggregated_prefill/routing_lines.txt @@ -0,0 +1,83 @@ +[2025-06-27 00:37:47,943] INFO: Routing request aaa3f812-fa10-453b-9b1d-e1ddb2321f46 with session id None to http://localhost:8001 at 1750984667.9436514, process time = 0.0190 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:47,943] INFO: Routing request aaa3f812-fa10-453b-9b1d-e1ddb2321f46 with session id None to http://localhost:8002 at 1750984667.943875, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:47,995] INFO: Routing request fbd909f7-ef67-4207-8fce-c50cd8efe500 with session id None to http://localhost:8001 at 1750984667.9951248, process time = 0.0094 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:47,995] INFO: Routing request fbd909f7-ef67-4207-8fce-c50cd8efe500 with session id None to http://localhost:8002 at 1750984667.9953818, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,042] INFO: Routing request a2f307d1-7715-43da-b46f-a092387e95be with session id None to http://localhost:8001 at 1750984668.0424466, process time = 0.0096 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,042] INFO: Routing request a2f307d1-7715-43da-b46f-a092387e95be with session id None to http://localhost:8002 at 1750984668.0426898, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,088] INFO: Routing request 7d1fa1b6-1675-4ba9-8b7e-17f2597d1f4d with session id None to http://localhost:8001 at 1750984668.0880878, process time = 0.0085 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,088] INFO: Routing request 7d1fa1b6-1675-4ba9-8b7e-17f2597d1f4d with session id None to http://localhost:8002 at 1750984668.088294, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,134] INFO: Routing request 1e7786b0-fd7c-44e0-9c76-b334ad4ec9f4 with session id None to http://localhost:8001 at 1750984668.1339238, process time = 0.0092 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,134] INFO: Routing request 1e7786b0-fd7c-44e0-9c76-b334ad4ec9f4 with session id None to http://localhost:8002 at 1750984668.134152, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,180] INFO: Routing request 39473ab2-3d09-46e2-8d03-23045d97a3c1 with session id None to http://localhost:8001 at 1750984668.1798692, process time = 0.0088 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,180] INFO: Routing request 39473ab2-3d09-46e2-8d03-23045d97a3c1 with session id None to http://localhost:8002 at 1750984668.1800926, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,229] INFO: Routing request eb045054-acf2-4fd0-8ae1-26b9242aedb5 with session id None to http://localhost:8001 at 1750984668.2297182, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,229] INFO: Routing request eb045054-acf2-4fd0-8ae1-26b9242aedb5 with session id None to http://localhost:8002 at 1750984668.2299647, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,278] INFO: Routing request 3edc6ddc-aa45-4a73-b4af-efd4a4b6dad1 with session id None to http://localhost:8001 at 1750984668.2783382, process time = 0.0103 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,278] INFO: Routing request 3edc6ddc-aa45-4a73-b4af-efd4a4b6dad1 with session id None to http://localhost:8002 at 1750984668.2786064, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,327] INFO: Routing request 00f2cada-fc33-4e9b-aa38-3ee578f76507 with session id None to http://localhost:8001 at 1750984668.3270485, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,327] INFO: Routing request 00f2cada-fc33-4e9b-aa38-3ee578f76507 with session id None to http://localhost:8002 at 1750984668.3272886, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,375] INFO: Routing request 8e20a084-33a8-430b-b611-c31104080fdd with session id None to http://localhost:8001 at 1750984668.3751562, process time = 0.0099 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,375] INFO: Routing request 8e20a084-33a8-430b-b611-c31104080fdd with session id None to http://localhost:8002 at 1750984668.3754344, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,415] INFO: Routing request ac2179d5-488b-4b11-8a65-cd03cf5c321c with session id None to http://localhost:8001 at 1750984668.415038, process time = 0.0095 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,415] INFO: Routing request ac2179d5-488b-4b11-8a65-cd03cf5c321c with session id None to http://localhost:8002 at 1750984668.4152763, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,462] INFO: Routing request be04eb3a-93b8-4985-a280-2eb49cace340 with session id None to http://localhost:8001 at 1750984668.4622283, process time = 0.0091 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,462] INFO: Routing request be04eb3a-93b8-4985-a280-2eb49cace340 with session id None to http://localhost:8002 at 1750984668.462454, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,509] INFO: Routing request ddaf6e5f-5f8a-4195-9f6f-532e761f3595 with session id None to http://localhost:8001 at 1750984668.5089805, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,509] INFO: Routing request ddaf6e5f-5f8a-4195-9f6f-532e761f3595 with session id None to http://localhost:8002 at 1750984668.5092068, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,555] INFO: Routing request 3de0a304-7c65-48d7-8774-c9f3e6a39d20 with session id None to http://localhost:8001 at 1750984668.5551498, process time = 0.0090 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,555] INFO: Routing request 3de0a304-7c65-48d7-8774-c9f3e6a39d20 with session id None to http://localhost:8002 at 1750984668.555379, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,600] INFO: Routing request 3548b2ac-39e4-4f7d-b90f-c9466c73f8d9 with session id None to http://localhost:8001 at 1750984668.6000633, process time = 0.0087 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,600] INFO: Routing request 3548b2ac-39e4-4f7d-b90f-c9466c73f8d9 with session id None to http://localhost:8002 at 1750984668.6002853, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,646] INFO: Routing request f8a677f7-4536-4091-b29a-47024e583d26 with session id None to http://localhost:8001 at 1750984668.646061, process time = 0.0088 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,646] INFO: Routing request f8a677f7-4536-4091-b29a-47024e583d26 with session id None to http://localhost:8002 at 1750984668.6462815, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,692] INFO: Routing request 869a0887-ceaf-495f-9b87-f54ea9491ba8 with session id None to http://localhost:8001 at 1750984668.6927145, process time = 0.0093 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,692] INFO: Routing request 869a0887-ceaf-495f-9b87-f54ea9491ba8 with session id None to http://localhost:8002 at 1750984668.692946, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,737] INFO: Routing request 74817c90-8acc-412e-b275-ef2e796ea421 with session id None to http://localhost:8001 at 1750984668.737268, process time = 0.0084 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,737] INFO: Routing request 74817c90-8acc-412e-b275-ef2e796ea421 with session id None to http://localhost:8002 at 1750984668.7374759, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,782] INFO: Routing request f9281b4b-ec15-426e-b046-7e039bf669ae with session id None to http://localhost:8001 at 1750984668.782778, process time = 0.0094 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,783] INFO: Routing request f9281b4b-ec15-426e-b046-7e039bf669ae with session id None to http://localhost:8002 at 1750984668.782998, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,828] INFO: Routing request 7508d53f-289d-4e3b-858e-ec75e98cd928 with session id None to http://localhost:8001 at 1750984668.8283942, process time = 0.0086 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,828] INFO: Routing request 7508d53f-289d-4e3b-858e-ec75e98cd928 with session id None to http://localhost:8002 at 1750984668.828631, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,875] INFO: Routing request 9d618679-5490-4959-b7a7-d2660b1f9c6b with session id None to http://localhost:8001 at 1750984668.875015, process time = 0.0092 (request.py:373:vllm_router.services.request_service.request) + +[2025-06-27 00:37:48,875] INFO: Routing request 9d618679-5490-4959-b7a7-d2660b1f9c6b with session id None to http://localhost:8002 at 1750984668.8752494, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) diff --git a/tests/e2e/disaggregated_prefill/router.log b/tests/e2e/disaggregated_prefill/router.log new file mode 100644 index 000000000..f140371c3 --- /dev/null +++ b/tests/e2e/disaggregated_prefill/router.log @@ -0,0 +1,107 @@ +/home/zrfishnoodles/production-stack/src/vllm_router/parsers/parser.py:20: RuntimeWarning: Failed to read commit hash: +No module named 'vllm_router._version' + from vllm_router.version import __version__ +[2025-06-27 00:37:47,240] INFO: Initializing disaggregated prefill routing logic (routing_logic.py:505:vllm_router.routers.routing_logic) +INFO: Started server process [2126868] +INFO: Waiting for application startup. +[2025-06-27 00:37:47,321] INFO: httpx AsyncClient instantiated. Id 140506988436000 (httpx_client.py:31:vllm_router.httpx_client) +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:30080 (Press CTRL+C to quit) +[2025-06-27 00:37:47,331] INFO: Scraping metrics from 2 serving engine(s) (engine_stats.py:146:vllm_router.stats.engine_stats) +INFO: 127.0.0.1:58408 - "GET / HTTP/1.1" 404 Not Found +INFO: 127.0.0.1:58412 - "GET /health HTTP/1.1" 200 OK +INFO: 127.0.0.1:58418 - "GET /v1/models HTTP/1.1" 200 OK +[2025-06-27 00:37:47,924] DEBUG: No cache hit, forwarding request to backend (main_router.py:69:vllm_router.routers.main_router) +[2025-06-27 00:37:47,943] INFO: aaa3f812-fa10-453b-9b1d-e1ddb2321f46 prefill time (TTFT): 0.0189 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:47,943] INFO: Routing request aaa3f812-fa10-453b-9b1d-e1ddb2321f46 with session id None to http://localhost:8001 at 1750984667.9436514, process time = 0.0190 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:47,943] INFO: Routing request aaa3f812-fa10-453b-9b1d-e1ddb2321f46 with session id None to http://localhost:8002 at 1750984667.943875, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58420 - "POST /v1/chat/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:47,995] INFO: fbd909f7-ef67-4207-8fce-c50cd8efe500 prefill time (TTFT): 0.0093 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:47,995] INFO: Routing request fbd909f7-ef67-4207-8fce-c50cd8efe500 with session id None to http://localhost:8001 at 1750984667.9951248, process time = 0.0094 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:47,995] INFO: Routing request fbd909f7-ef67-4207-8fce-c50cd8efe500 with session id None to http://localhost:8002 at 1750984667.9953818, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58430 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,042] INFO: a2f307d1-7715-43da-b46f-a092387e95be prefill time (TTFT): 0.0095 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,042] INFO: Routing request a2f307d1-7715-43da-b46f-a092387e95be with session id None to http://localhost:8001 at 1750984668.0424466, process time = 0.0096 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,042] INFO: Routing request a2f307d1-7715-43da-b46f-a092387e95be with session id None to http://localhost:8002 at 1750984668.0426898, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58436 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,088] INFO: 7d1fa1b6-1675-4ba9-8b7e-17f2597d1f4d prefill time (TTFT): 0.0084 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,088] INFO: Routing request 7d1fa1b6-1675-4ba9-8b7e-17f2597d1f4d with session id None to http://localhost:8001 at 1750984668.0880878, process time = 0.0085 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,088] INFO: Routing request 7d1fa1b6-1675-4ba9-8b7e-17f2597d1f4d with session id None to http://localhost:8002 at 1750984668.088294, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58448 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,133] INFO: 1e7786b0-fd7c-44e0-9c76-b334ad4ec9f4 prefill time (TTFT): 0.0091 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,134] INFO: Routing request 1e7786b0-fd7c-44e0-9c76-b334ad4ec9f4 with session id None to http://localhost:8001 at 1750984668.1339238, process time = 0.0092 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,134] INFO: Routing request 1e7786b0-fd7c-44e0-9c76-b334ad4ec9f4 with session id None to http://localhost:8002 at 1750984668.134152, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58450 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,179] INFO: 39473ab2-3d09-46e2-8d03-23045d97a3c1 prefill time (TTFT): 0.0088 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,180] INFO: Routing request 39473ab2-3d09-46e2-8d03-23045d97a3c1 with session id None to http://localhost:8001 at 1750984668.1798692, process time = 0.0088 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,180] INFO: Routing request 39473ab2-3d09-46e2-8d03-23045d97a3c1 with session id None to http://localhost:8002 at 1750984668.1800926, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58466 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,229] INFO: eb045054-acf2-4fd0-8ae1-26b9242aedb5 prefill time (TTFT): 0.0097 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,229] INFO: Routing request eb045054-acf2-4fd0-8ae1-26b9242aedb5 with session id None to http://localhost:8001 at 1750984668.2297182, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,229] INFO: Routing request eb045054-acf2-4fd0-8ae1-26b9242aedb5 with session id None to http://localhost:8002 at 1750984668.2299647, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58482 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,278] INFO: 3edc6ddc-aa45-4a73-b4af-efd4a4b6dad1 prefill time (TTFT): 0.0103 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,278] INFO: Routing request 3edc6ddc-aa45-4a73-b4af-efd4a4b6dad1 with session id None to http://localhost:8001 at 1750984668.2783382, process time = 0.0103 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,278] INFO: Routing request 3edc6ddc-aa45-4a73-b4af-efd4a4b6dad1 with session id None to http://localhost:8002 at 1750984668.2786064, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58494 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,327] INFO: 00f2cada-fc33-4e9b-aa38-3ee578f76507 prefill time (TTFT): 0.0096 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,327] INFO: Routing request 00f2cada-fc33-4e9b-aa38-3ee578f76507 with session id None to http://localhost:8001 at 1750984668.3270485, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,327] INFO: Routing request 00f2cada-fc33-4e9b-aa38-3ee578f76507 with session id None to http://localhost:8002 at 1750984668.3272886, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58500 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,375] INFO: 8e20a084-33a8-430b-b611-c31104080fdd prefill time (TTFT): 0.0098 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,375] INFO: Routing request 8e20a084-33a8-430b-b611-c31104080fdd with session id None to http://localhost:8001 at 1750984668.3751562, process time = 0.0099 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,375] INFO: Routing request 8e20a084-33a8-430b-b611-c31104080fdd with session id None to http://localhost:8002 at 1750984668.3754344, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58504 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,415] INFO: ac2179d5-488b-4b11-8a65-cd03cf5c321c prefill time (TTFT): 0.0094 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,415] INFO: Routing request ac2179d5-488b-4b11-8a65-cd03cf5c321c with session id None to http://localhost:8001 at 1750984668.415038, process time = 0.0095 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,415] INFO: Routing request ac2179d5-488b-4b11-8a65-cd03cf5c321c with session id None to http://localhost:8002 at 1750984668.4152763, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58510 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,462] INFO: be04eb3a-93b8-4985-a280-2eb49cace340 prefill time (TTFT): 0.0091 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,462] INFO: Routing request be04eb3a-93b8-4985-a280-2eb49cace340 with session id None to http://localhost:8001 at 1750984668.4622283, process time = 0.0091 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,462] INFO: Routing request be04eb3a-93b8-4985-a280-2eb49cace340 with session id None to http://localhost:8002 at 1750984668.462454, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58524 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,509] INFO: ddaf6e5f-5f8a-4195-9f6f-532e761f3595 prefill time (TTFT): 0.0097 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,509] INFO: Routing request ddaf6e5f-5f8a-4195-9f6f-532e761f3595 with session id None to http://localhost:8001 at 1750984668.5089805, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,509] INFO: Routing request ddaf6e5f-5f8a-4195-9f6f-532e761f3595 with session id None to http://localhost:8002 at 1750984668.5092068, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58540 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,555] INFO: 3de0a304-7c65-48d7-8774-c9f3e6a39d20 prefill time (TTFT): 0.0089 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,555] INFO: Routing request 3de0a304-7c65-48d7-8774-c9f3e6a39d20 with session id None to http://localhost:8001 at 1750984668.5551498, process time = 0.0090 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,555] INFO: Routing request 3de0a304-7c65-48d7-8774-c9f3e6a39d20 with session id None to http://localhost:8002 at 1750984668.555379, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58544 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,600] INFO: 3548b2ac-39e4-4f7d-b90f-c9466c73f8d9 prefill time (TTFT): 0.0087 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,600] INFO: Routing request 3548b2ac-39e4-4f7d-b90f-c9466c73f8d9 with session id None to http://localhost:8001 at 1750984668.6000633, process time = 0.0087 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,600] INFO: Routing request 3548b2ac-39e4-4f7d-b90f-c9466c73f8d9 with session id None to http://localhost:8002 at 1750984668.6002853, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58546 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,646] INFO: f8a677f7-4536-4091-b29a-47024e583d26 prefill time (TTFT): 0.0088 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,646] INFO: Routing request f8a677f7-4536-4091-b29a-47024e583d26 with session id None to http://localhost:8001 at 1750984668.646061, process time = 0.0088 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,646] INFO: Routing request f8a677f7-4536-4091-b29a-47024e583d26 with session id None to http://localhost:8002 at 1750984668.6462815, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58548 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,692] INFO: 869a0887-ceaf-495f-9b87-f54ea9491ba8 prefill time (TTFT): 0.0092 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,692] INFO: Routing request 869a0887-ceaf-495f-9b87-f54ea9491ba8 with session id None to http://localhost:8001 at 1750984668.6927145, process time = 0.0093 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,692] INFO: Routing request 869a0887-ceaf-495f-9b87-f54ea9491ba8 with session id None to http://localhost:8002 at 1750984668.692946, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58564 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,737] INFO: 74817c90-8acc-412e-b275-ef2e796ea421 prefill time (TTFT): 0.0083 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,737] INFO: Routing request 74817c90-8acc-412e-b275-ef2e796ea421 with session id None to http://localhost:8001 at 1750984668.737268, process time = 0.0084 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,737] INFO: Routing request 74817c90-8acc-412e-b275-ef2e796ea421 with session id None to http://localhost:8002 at 1750984668.7374759, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58568 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,782] INFO: f9281b4b-ec15-426e-b046-7e039bf669ae prefill time (TTFT): 0.0093 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,782] INFO: Routing request f9281b4b-ec15-426e-b046-7e039bf669ae with session id None to http://localhost:8001 at 1750984668.782778, process time = 0.0094 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,783] INFO: Routing request f9281b4b-ec15-426e-b046-7e039bf669ae with session id None to http://localhost:8002 at 1750984668.782998, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58576 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,828] INFO: 7508d53f-289d-4e3b-858e-ec75e98cd928 prefill time (TTFT): 0.0085 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,828] INFO: Routing request 7508d53f-289d-4e3b-858e-ec75e98cd928 with session id None to http://localhost:8001 at 1750984668.8283942, process time = 0.0086 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,828] INFO: Routing request 7508d53f-289d-4e3b-858e-ec75e98cd928 with session id None to http://localhost:8002 at 1750984668.828631, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58592 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:48,875] INFO: 9d618679-5490-4959-b7a7-d2660b1f9c6b prefill time (TTFT): 0.0092 (request.py:372:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,875] INFO: Routing request 9d618679-5490-4959-b7a7-d2660b1f9c6b with session id None to http://localhost:8001 at 1750984668.875015, process time = 0.0092 (request.py:373:vllm_router.services.request_service.request) +[2025-06-27 00:37:48,875] INFO: Routing request 9d618679-5490-4959-b7a7-d2660b1f9c6b with session id None to http://localhost:8002 at 1750984668.8752494, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) +INFO: 127.0.0.1:58594 - "POST /v1/completions HTTP/1.1" 200 OK +INFO: Shutting down +INFO: Waiting for application shutdown. +[2025-06-27 00:37:49,128] INFO: httpx async_client.is_closed(): False - Now close it. Id (will be unchanged): 140506988436000 (httpx_client.py:35:vllm_router.httpx_client) +[2025-06-27 00:37:49,128] INFO: httpx async_client.is_closed(): True. Id (will be unchanged): 140506988436000 (httpx_client.py:39:vllm_router.httpx_client) +[2025-06-27 00:37:49,129] INFO: httpx AsyncClient closed (httpx_client.py:43:vllm_router.httpx_client) +INFO: Closing engine stats scraper +INFO: Closing service discovery module +INFO: Application shutdown complete. +INFO: Finished server process [2126868] diff --git a/tests/e2e/kvaware/router.log b/tests/e2e/kvaware/router.log new file mode 100644 index 000000000..0b94fb193 --- /dev/null +++ b/tests/e2e/kvaware/router.log @@ -0,0 +1,23 @@ +/home/zrfishnoodles/production-stack/src/vllm_router/parsers/parser.py:20: RuntimeWarning: Failed to read commit hash: +No module named 'vllm_router._version' + from vllm_router.version import __version__ +[2025-06-27 00:37:38,971] INFO: Initializing kvaware routing logic (routing_logic.py:493:vllm_router.routers.routing_logic) +[2025-06-27 00:37:38,971] INFO: Initializing KvawareRouter with port: 9000 (routing_logic.py:229:vllm_router.routers.routing_logic) +INFO: Started server process [2126604] +INFO: Waiting for application startup. +[2025-06-27 00:37:39,059] INFO: httpx AsyncClient instantiated. Id 140269717546144 (httpx_client.py:31:vllm_router.httpx_client) +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:30080 (Press CTRL+C to quit) +[2025-06-27 00:37:39,070] INFO: Scraping metrics from 2 serving engine(s) (engine_stats.py:146:vllm_router.stats.engine_stats) +INFO: 127.0.0.1:39940 - "GET / HTTP/1.1" 404 Not Found +INFO: 127.0.0.1:39946 - "GET /health HTTP/1.1" 200 OK +INFO: 127.0.0.1:39948 - "GET /v1/models HTTP/1.1" 200 OK +INFO: Shutting down +INFO: Waiting for application shutdown. +[2025-06-27 00:37:39,773] INFO: httpx async_client.is_closed(): False - Now close it. Id (will be unchanged): 140269717546144 (httpx_client.py:35:vllm_router.httpx_client) +[2025-06-27 00:37:39,778] INFO: httpx async_client.is_closed(): True. Id (will be unchanged): 140269717546144 (httpx_client.py:39:vllm_router.httpx_client) +[2025-06-27 00:37:39,779] INFO: httpx AsyncClient closed (httpx_client.py:43:vllm_router.httpx_client) +INFO: Closing engine stats scraper +INFO: Closing service discovery module +INFO: Application shutdown complete. +INFO: Finished server process [2126604] diff --git a/tests/e2e/prefixaware/prefixaware/routing_lines.txt b/tests/e2e/prefixaware/prefixaware/routing_lines.txt new file mode 100644 index 000000000..4793a276a --- /dev/null +++ b/tests/e2e/prefixaware/prefixaware/routing_lines.txt @@ -0,0 +1,241 @@ +[2025-06-27 00:37:18,048] INFO: Routing request de8f7549-da7e-4a04-921b-ca2a72860ac2 with session id None to http://localhost:8001 at 1750984638.0479178, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:18,161] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.1609552, process time = 0.0644 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:18,267] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.266841, process time = 0.0648 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:18,373] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.372828, process time = 0.0653 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:18,478] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.478203, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:18,582] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.5823739, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:18,688] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.6880844, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:18,794] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.7945032, process time = 0.0649 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:18,902] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.902532, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:19,008] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.0082219, process time = 0.0655 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:19,115] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.1151836, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:19,221] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.2210135, process time = 0.0650 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:19,328] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.328283, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:19,435] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.434806, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:19,541] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.5408177, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:19,641] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.6410813, process time = 0.0651 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:19,748] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.748613, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:19,860] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.860118, process time = 0.0702 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:19,971] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.9712284, process time = 0.0695 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:20,081] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.080897, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:20,191] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.1912558, process time = 0.0688 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:20,302] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.3020163, process time = 0.0682 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:20,411] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.411141, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:20,520] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.5200288, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:20,627] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.627369, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:20,736] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.7357795, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:20,845] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.845095, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:20,954] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.9543989, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:21,064] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.0637932, process time = 0.0686 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:21,172] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.1724975, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:21,280] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.280093, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:21,387] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.3867934, process time = 0.0662 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:21,495] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.495012, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:21,603] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.603595, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:21,710] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.7101386, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:21,817] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.8173587, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:21,927] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.9270127, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:22,037] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.037251, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:22,146] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.146045, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:22,255] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.25482, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:22,363] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.3632615, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:22,471] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.471359, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:22,579] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.5792356, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:22,686] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.6863146, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:22,795] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.7948356, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:22,910] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.9097764, process time = 0.0699 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:23,015] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.015756, process time = 0.0655 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:23,124] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.1244655, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:23,231] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.2315497, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:23,342] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.3425603, process time = 0.0703 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:23,450] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.4502356, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:23,558] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.5582733, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:23,666] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.6666589, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:23,774] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.7745495, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:23,882] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.8826053, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:23,991] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.991657, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:24,102] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.1023266, process time = 0.0676 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:24,215] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.2154505, process time = 0.0724 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:24,324] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.3241663, process time = 0.0678 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:24,433] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.4332342, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:24,540] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.5406127, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:24,649] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.6492922, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:24,758] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.7583642, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:24,866] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.8657682, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:24,975] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.975641, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:25,084] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.0846174, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:25,193] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.1928189, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:25,303] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.3037314, process time = 0.0696 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:25,412] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.4126892, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:25,521] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.5211408, process time = 0.0675 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:25,632] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.6324995, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:25,741] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.7407722, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:25,851] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.8508508, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:25,960] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.9604592, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:26,074] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984646.0739088, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:26,190] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984646.1897907, process time = 0.0675 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:26,304] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.3046403, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:26,413] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.4135015, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:26,522] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.521984, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:26,631] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.6308997, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:26,738] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.73849, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:26,846] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.8459172, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:26,956] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.956389, process time = 0.0699 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:27,064] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.0638897, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:27,170] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.1705747, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:27,281] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.2812662, process time = 0.0698 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:27,421] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.4211028, process time = 0.0977 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:27,541] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.5415916, process time = 0.0779 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:27,648] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.6481113, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:27,755] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.7556157, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:27,864] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.8643925, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:27,973] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984647.9731848, process time = 0.0657 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:28,084] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.0844905, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:28,196] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.196575, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:28,307] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.307035, process time = 0.0683 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:28,416] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.4163585, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:28,528] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.5286276, process time = 0.0717 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:28,638] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.6378303, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:28,751] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.7509508, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:28,858] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.8584132, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:28,967] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.9675782, process time = 0.0676 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:29,075] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.0751295, process time = 0.0664 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:29,181] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.1816075, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:29,291] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.2909973, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:29,400] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.4001827, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:29,508] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.5087323, process time = 0.0678 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:29,617] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.6168947, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:29,725] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.7254033, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:29,839] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.8394322, process time = 0.0697 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:29,954] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.954238, process time = 0.0731 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:30,062] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.062516, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:30,169] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.1692052, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:30,279] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.2792509, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:30,388] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.3883035, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:30,496] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.4963098, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:30,604] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.6043844, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:30,714] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.7143903, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:30,826] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.826696, process time = 0.0690 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:30,939] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.939221, process time = 0.0681 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:31,054] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984651.054538, process time = 0.0703 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:31,169] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984651.1689696, process time = 0.0706 (request.py:292:vllm_router.services.request_service.request) diff --git a/tests/e2e/prefixaware/router.log b/tests/e2e/prefixaware/router.log new file mode 100644 index 000000000..6e2b1ceb9 --- /dev/null +++ b/tests/e2e/prefixaware/router.log @@ -0,0 +1,1008 @@ +/home/zrfishnoodles/production-stack/src/vllm_router/parsers/parser.py:20: RuntimeWarning: Failed to read commit hash: +No module named 'vllm_router._version' + from vllm_router.version import __version__ +[2025-06-27 00:37:17,262] INFO: Initializing prefix-aware routing logic (routing_logic.py:502:vllm_router.routers.routing_logic) +INFO: Started server process [2126015] +INFO: Waiting for application startup. +[2025-06-27 00:37:17,352] INFO: httpx AsyncClient instantiated. Id 140498136262800 (httpx_client.py:31:vllm_router.httpx_client) +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:30080 (Press CTRL+C to quit) +[2025-06-27 00:37:17,360] INFO: Scraping metrics from 2 serving engine(s) (engine_stats.py:146:vllm_router.stats.engine_stats) +INFO: 127.0.0.1:57032 - "GET / HTTP/1.1" 404 Not Found +INFO: 127.0.0.1:57044 - "GET /health HTTP/1.1" 200 OK +INFO: 127.0.0.1:57058 - "GET /v1/models HTTP/1.1" 200 OK +[2025-06-27 00:37:17,981] DEBUG: No cache hit, forwarding request to backend (main_router.py:69:vllm_router.routers.main_router) +[2025-06-27 00:37:17,982] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,047] DEBUG: Routing request de8f7549-da7e-4a04-921b-ca2a72860ac2 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,047] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,048] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,048] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'content-length': '197'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,048] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,048] INFO: Routing request de8f7549-da7e-4a04-921b-ca2a72860ac2 with session id None to http://localhost:8001 at 1750984638.0479178, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57064 - "POST /v1/chat/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:18,096] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,160] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,160] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,161] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,161] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,161] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,161] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.1609552, process time = 0.0644 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57072 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:18,202] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,266] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,266] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,266] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,266] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,267] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,267] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.266841, process time = 0.0648 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57086 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:18,307] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,372] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,372] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,372] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,372] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,373] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,373] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.372828, process time = 0.0653 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57102 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:18,413] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,477] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,478] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,478] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,478] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,478] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,478] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.478203, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57110 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:18,517] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,581] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,582] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,582] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,582] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,582] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,582] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.5823739, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57118 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:18,622] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,687] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,688] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,688] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,688] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,688] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,688] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.6880844, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57132 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:18,729] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,793] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,794] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,794] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,794] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,794] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,794] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.7945032, process time = 0.0649 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57134 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:18,838] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,902] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,902] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,902] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,902] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,902] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:18,902] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.902532, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57150 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:18,942] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,007] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,008] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,008] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,008] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,008] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,008] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.0082219, process time = 0.0655 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57164 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:19,049] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,114] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,115] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,115] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,115] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,115] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,115] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.1151836, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57180 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:19,156] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,220] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,221] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,221] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,221] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,221] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,221] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.2210135, process time = 0.0650 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57190 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:19,262] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,327] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,328] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,328] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,328] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,328] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,328] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.328283, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57202 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:19,369] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,434] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,434] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,434] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,434] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,435] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,435] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.434806, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57210 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:19,475] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,540] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,540] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,540] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,540] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,541] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,541] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.5408177, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57214 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:19,576] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,640] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,641] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,641] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,641] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,641] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,641] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.6410813, process time = 0.0651 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57216 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:19,682] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,748] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,748] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,748] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,748] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,748] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,748] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.748613, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57228 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:19,789] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,859] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,860] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,860] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,860] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,860] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,860] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.860118, process time = 0.0702 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57242 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:19,901] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,970] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,971] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,971] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,971] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,971] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:19,971] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.9712284, process time = 0.0695 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57248 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:20,012] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,080] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,080] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,080] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,081] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,081] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,081] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.080897, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57260 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:20,122] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,190] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,191] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,191] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,191] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,191] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,191] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.1912558, process time = 0.0688 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57266 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:20,233] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,301] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,302] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,302] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,302] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,302] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,302] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.3020163, process time = 0.0682 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57270 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:20,344] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,410] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,411] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,411] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,411] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,411] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,411] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.411141, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57282 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:20,453] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,519] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,520] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,520] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,520] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,520] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,520] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.5200288, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57290 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:20,560] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,626] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,627] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,627] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,627] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,627] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,627] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.627369, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57302 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:20,668] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,735] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,735] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,735] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,736] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,736] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,736] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.7357795, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57316 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:20,777] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,844] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,845] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,845] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,845] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,845] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,845] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.845095, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57328 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:20,888] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,953] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,954] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,954] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,954] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,954] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:20,954] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.9543989, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57344 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:20,995] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,063] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,063] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,063] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,063] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,064] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,064] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.0637932, process time = 0.0686 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57346 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:21,106] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,171] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,172] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,172] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,172] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,172] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,172] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.1724975, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:57358 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:21,212] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,279] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,280] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,280] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,280] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,280] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,280] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.280093, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47866 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:21,320] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,386] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,386] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,386] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,386] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,386] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,387] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.3867934, process time = 0.0662 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47868 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:21,428] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,494] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,495] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,495] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,495] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,495] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,495] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.495012, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47884 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:21,536] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,603] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,603] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,603] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,603] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,603] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,603] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.603595, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47898 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:21,643] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,709] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,710] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,710] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,710] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,710] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,710] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.7101386, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47900 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:21,751] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,816] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,817] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,817] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,817] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,817] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,817] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.8173587, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47912 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:21,861] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,926] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,927] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,927] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,927] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,927] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:21,927] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.9270127, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47926 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:21,969] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,036] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,037] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,037] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,037] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,037] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,037] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.037251, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47928 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:22,078] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,145] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,146] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,146] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,146] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,146] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,146] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.146045, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47940 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:22,188] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,254] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,254] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,254] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,254] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,255] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,255] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.25482, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47950 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:22,295] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,362] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,363] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,363] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,363] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,363] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,363] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.3632615, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47966 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:22,404] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,470] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,471] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,471] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,471] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,471] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,471] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.471359, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47968 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:22,512] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,578] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,579] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,579] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,579] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,579] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,579] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.5792356, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47982 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:22,620] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,685] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,686] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,686] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,686] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,686] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,686] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.6863146, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:47998 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:22,727] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,794] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,794] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,794] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,795] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,795] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,795] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.7948356, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48010 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:22,839] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,909] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,909] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,909] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,909] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,909] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:22,910] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.9097764, process time = 0.0699 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48018 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:22,950] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,015] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,015] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,015] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,015] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,015] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,015] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.015756, process time = 0.0655 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48028 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:23,058] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,123] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,124] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,124] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,124] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,124] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,124] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.1244655, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48030 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:23,165] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,231] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,231] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,231] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,231] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,231] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,231] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.2315497, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48040 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:23,272] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,341] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,342] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,342] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,342] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,342] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,342] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.3425603, process time = 0.0703 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48052 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:23,383] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,449] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,450] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,450] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,450] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,450] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,450] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.4502356, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48054 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:23,491] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,557] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,558] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,558] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,558] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,558] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,558] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.5582733, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48060 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:23,599] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,666] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,666] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,666] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,666] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,666] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,666] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.6666589, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48074 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:23,707] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,773] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,774] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,774] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,774] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,774] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,774] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.7745495, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48084 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:23,814] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,881] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,882] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,882] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,882] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,882] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,882] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.8826053, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48098 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:23,924] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,990] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,991] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,991] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,991] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,991] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:23,991] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.991657, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48112 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:24,034] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,101] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,102] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,102] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,102] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,102] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,102] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.1023266, process time = 0.0676 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48116 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:24,143] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,214] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,215] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,215] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,215] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,215] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,215] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.2154505, process time = 0.0724 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48118 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:24,256] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,323] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,324] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,324] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,324] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,324] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,324] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.3241663, process time = 0.0678 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48124 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:24,366] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,432] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,433] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,433] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,433] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,433] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,433] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.4332342, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48132 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:24,473] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,539] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,540] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,540] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,540] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,540] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,540] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.5406127, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48144 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:24,582] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,648] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,649] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,649] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,649] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,649] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,649] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.6492922, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48152 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:24,690] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,757] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,758] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,758] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,758] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,758] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,758] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.7583642, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48168 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:24,799] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,865] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,865] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,865] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,865] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,865] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,866] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.8657682, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48184 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:24,907] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,974] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,975] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,975] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,975] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,975] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:24,975] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.975641, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48200 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:25,017] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,083] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,084] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,084] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,084] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,084] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,084] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.0846174, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48214 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:25,125] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,192] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,192] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,192] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,192] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,193] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,193] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.1928189, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48228 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:25,234] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,303] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,303] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,303] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,303] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,303] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,303] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.3037314, process time = 0.0696 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48238 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:25,345] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,411] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,412] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,412] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,412] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,412] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,412] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.4126892, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48244 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:25,453] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,520] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,521] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,521] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,521] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,521] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,521] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.5211408, process time = 0.0675 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48256 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:25,564] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,631] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,632] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,632] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,632] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,632] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,632] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.6324995, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48258 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:25,673] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,739] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,740] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,740] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,740] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,740] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,741] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.7407722, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48270 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:25,783] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,850] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,850] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,850] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,851] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,851] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,851] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.8508508, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48276 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:25,893] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,959] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,960] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,960] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,960] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,960] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:25,960] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.9604592, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48292 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:26,007] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,073] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,073] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,074] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,074] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,074] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,074] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984646.0739088, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48296 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:26,122] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,188] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,189] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,189] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,189] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,190] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,190] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984646.1897907, process time = 0.0675 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48312 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:26,237] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,303] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,304] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,304] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,304] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,304] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,304] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.3046403, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48328 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:26,346] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,412] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,413] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,413] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,413] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,413] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,413] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.4135015, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48344 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:26,454] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,521] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,522] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,522] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,522] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,522] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,522] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.521984, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48354 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:26,564] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,630] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,630] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,630] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,631] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,631] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,631] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.6308997, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48364 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:26,671] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,737] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,738] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,738] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,738] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,738] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,738] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.73849, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48380 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:26,779] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,845] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,845] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,846] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,846] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,846] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,846] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.8459172, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48392 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:26,886] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,955] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,956] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,956] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,956] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,956] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:26,956] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.956389, process time = 0.0699 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48398 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:26,997] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,063] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,063] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,063] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,064] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,064] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,064] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.0638897, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48400 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:27,104] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,169] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,170] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,170] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,170] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,170] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,170] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.1705747, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48406 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:27,211] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,280] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,281] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,281] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,281] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,281] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,281] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.2812662, process time = 0.0698 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48418 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:27,323] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,364] INFO: +================================================== +Server: http://localhost:8001 +Models: + - facebook/opt-125m + Engine Stats: Running Requests: 0.0, Queued Requests: 0.0, GPU Cache Hit Rate: 0.00 + Request Stats: QPS: 6.10, Avg Latency: 0.04, Avg ITL: -1, Prefill Requests: 0, Decoding Requests: 0, Swapped Requests: 0, Finished: 61, Uptime: 9.32 sec +-------------------------------------------------- +Server: http://localhost:8002 +Models: + - facebook/opt-125m + Engine Stats: Running Requests: 0.0, Queued Requests: 0.0, GPU Cache Hit Rate: 0.00 + Request Stats: QPS: 2.50, Avg Latency: 0.04, Avg ITL: -1, Prefill Requests: 0, Decoding Requests: 0, Swapped Requests: 0, Finished: 25, Uptime: 9.32 sec +-------------------------------------------------- +================================================== + (log_stats.py:115:vllm_router.stats.log_stats) +[2025-06-27 00:37:27,420] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,421] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,421] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,421] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,421] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,421] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.4211028, process time = 0.0977 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48432 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:27,463] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,484] INFO: Scraping metrics from 2 serving engine(s) (engine_stats.py:146:vllm_router.stats.engine_stats) +[2025-06-27 00:37:27,541] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,541] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,541] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,541] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,541] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,541] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.5415916, process time = 0.0779 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48434 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:27,582] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,647] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,648] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,648] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,648] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,648] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,648] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.6481113, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48436 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:27,689] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,754] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,755] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,755] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,755] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,755] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,755] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.7556157, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48446 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:27,797] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,863] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,864] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,864] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,864] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,864] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,864] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.8643925, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48456 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:27,907] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,972] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,973] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,973] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,973] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,973] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:27,973] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984647.9731848, process time = 0.0657 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48466 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:28,016] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,084] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,084] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,084] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,084] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,084] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,084] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.0844905, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48480 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:28,130] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,196] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,196] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,196] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,196] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,196] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,196] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.196575, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48486 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:28,238] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,306] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,307] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,307] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,307] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,307] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,307] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.307035, process time = 0.0683 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48488 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:28,349] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,415] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,416] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,416] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,416] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,416] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,416] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.4163585, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48502 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:28,457] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,528] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,528] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,528] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,528] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,528] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,528] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.5286276, process time = 0.0717 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48508 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:28,571] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,637] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,637] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,637] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,637] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,638] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,638] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.6378303, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48524 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:28,682] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,750] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,750] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,751] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,751] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,751] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,751] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.7509508, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48530 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:28,791] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,857] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,858] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,858] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,858] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,858] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,858] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.8584132, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48544 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:28,900] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,967] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,967] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,967] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,967] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,967] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:28,967] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.9675782, process time = 0.0676 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48560 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:29,008] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,074] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,075] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,075] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,075] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,075] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,075] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.0751295, process time = 0.0664 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48574 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:29,115] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,181] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,181] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,181] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,181] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,181] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,181] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.1816075, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48586 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:29,222] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,290] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,291] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,291] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,291] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,291] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,291] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.2909973, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48588 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:29,333] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,399] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,400] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,400] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,400] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,400] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,400] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.4001827, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48592 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:29,440] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,508] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,508] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,508] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,508] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,508] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,508] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.5087323, process time = 0.0678 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48606 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:29,549] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,616] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,616] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,616] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,617] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,617] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,617] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.6168947, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48614 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:29,657] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,725] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,725] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,725] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,725] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,725] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,725] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.7254033, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48622 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:29,769] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,838] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,839] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,839] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,839] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,839] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,839] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.8394322, process time = 0.0697 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48624 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:29,881] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,953] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,954] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,954] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,954] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,954] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:29,954] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.954238, process time = 0.0731 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48634 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:29,996] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,061] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,062] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,062] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,062] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,062] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,062] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.062516, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48644 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:30,102] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,168] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,169] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,169] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,169] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,169] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,169] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.1692052, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48646 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:30,212] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,278] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,279] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,279] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,279] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,279] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,279] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.2792509, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48662 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:30,320] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,387] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,388] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,388] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,388] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,388] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,388] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.3883035, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48678 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:30,429] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,495] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,496] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,496] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,496] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,496] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,496] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.4963098, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48684 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:30,536] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,603] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,604] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,604] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,604] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,604] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,604] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.6043844, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48696 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:30,646] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,713] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,714] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,714] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,714] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,714] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,714] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.7143903, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48706 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:30,757] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,825] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,826] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,826] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,826] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,826] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,826] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.826696, process time = 0.0690 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48712 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:30,871] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,938] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,939] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,939] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,939] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,939] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:30,939] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.939221, process time = 0.0681 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48722 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:30,984] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,053] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,054] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,054] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,054] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,054] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,054] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984651.054538, process time = 0.0703 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48736 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:31,098] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,168] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,168] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,169] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,169] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,169] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:31,169] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984651.1689696, process time = 0.0706 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:48750 - "POST /v1/completions HTTP/1.1" 200 OK +INFO: Shutting down +INFO: Waiting for application shutdown. +[2025-06-27 00:37:31,370] INFO: httpx async_client.is_closed(): False - Now close it. Id (will be unchanged): 140498136262800 (httpx_client.py:35:vllm_router.httpx_client) +[2025-06-27 00:37:31,370] INFO: httpx async_client.is_closed(): True. Id (will be unchanged): 140498136262800 (httpx_client.py:39:vllm_router.httpx_client) +[2025-06-27 00:37:31,370] INFO: httpx AsyncClient closed (httpx_client.py:43:vllm_router.httpx_client) +INFO: Closing engine stats scraper +INFO: Closing service discovery module +INFO: Application shutdown complete. +INFO: Finished server process [2126015] diff --git a/tests/e2e/roundrobin/roundrobin/routing_lines.txt b/tests/e2e/roundrobin/roundrobin/routing_lines.txt new file mode 100644 index 000000000..aaa5d6723 --- /dev/null +++ b/tests/e2e/roundrobin/roundrobin/routing_lines.txt @@ -0,0 +1,41 @@ +[2025-06-27 00:37:07,409] INFO: Routing request 67b4cb15-0604-4684-9aae-2bb756c68422 with session id None to http://localhost:8001 at 1750984627.4094923, process time = 0.0664 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:07,526] INFO: Routing request 657d70a1-6230-489a-ad5f-ac43fe0bff10 with session id None to http://localhost:8002 at 1750984627.5264473, process time = 0.0651 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:07,634] INFO: Routing request 5aadda92-f1fc-44d6-b9c6-e184c998dd2e with session id None to http://localhost:8001 at 1750984627.6342344, process time = 0.0654 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:07,746] INFO: Routing request 716ee5b3-267f-4f5f-8b25-cea3ea81b51f with session id None to http://localhost:8002 at 1750984627.7457457, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:07,854] INFO: Routing request 87517d4c-c4c1-49db-9d8b-58190d90cb56 with session id None to http://localhost:8001 at 1750984627.8541684, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:07,967] INFO: Routing request 3b2c6662-a083-4b22-87dd-48f8bcaf624d with session id None to http://localhost:8002 at 1750984627.9669526, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:08,074] INFO: Routing request ee73b7d3-68e9-45d0-82e0-7739d58679e3 with session id None to http://localhost:8001 at 1750984628.073846, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:08,180] INFO: Routing request 9ef6aa12-e06e-42e4-8444-2784413ea2ec with session id None to http://localhost:8002 at 1750984628.1804473, process time = 0.0653 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:08,286] INFO: Routing request 5e2c6e06-c4dc-44e5-8bf1-ec1e90de5493 with session id None to http://localhost:8001 at 1750984628.2863476, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:08,395] INFO: Routing request f3122044-e477-4dd1-ba4e-e544f76bfdfb with session id None to http://localhost:8002 at 1750984628.3953238, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:08,502] INFO: Routing request c735651b-3a68-4afe-bf9b-7ff20d971deb with session id None to http://localhost:8001 at 1750984628.5027213, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:08,610] INFO: Routing request 3a32598e-1a3a-4349-a600-ad1166511c27 with session id None to http://localhost:8002 at 1750984628.6098235, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:08,720] INFO: Routing request 270828df-e7a2-4da0-b5f8-319ee407deeb with session id None to http://localhost:8001 at 1750984628.7205312, process time = 0.0693 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:08,829] INFO: Routing request feac60a9-b5d3-4674-9a4a-48330609e3ee with session id None to http://localhost:8002 at 1750984628.829479, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:08,938] INFO: Routing request db4933f7-fbdf-477c-99fa-748b4de10cc0 with session id None to http://localhost:8001 at 1750984628.9385927, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:09,046] INFO: Routing request d4640044-ad26-40e4-acbc-9a782910ccb3 with session id None to http://localhost:8002 at 1750984629.0460603, process time = 0.0662 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:09,156] INFO: Routing request 0bbea844-2d4d-42a9-a872-131e888f5d93 with session id None to http://localhost:8001 at 1750984629.156086, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:09,265] INFO: Routing request 31a1f251-cc77-4593-900c-61f0038f8f95 with session id None to http://localhost:8002 at 1750984629.2656531, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:09,374] INFO: Routing request 95e0279b-8bb5-4219-8617-94ef3591b16b with session id None to http://localhost:8001 at 1750984629.3738842, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:09,482] INFO: Routing request d2571a45-72d0-48d4-ae05-199ba0f4340c with session id None to http://localhost:8002 at 1750984629.4817224, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) + +[2025-06-27 00:37:09,590] INFO: Routing request 9a777dfa-b6af-4709-b807-8cb33e1029db with session id None to http://localhost:8001 at 1750984629.5900624, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) diff --git a/tests/e2e/roundrobin/router.log b/tests/e2e/roundrobin/router.log new file mode 100644 index 000000000..1fe4c35d5 --- /dev/null +++ b/tests/e2e/roundrobin/router.log @@ -0,0 +1,191 @@ +/home/zrfishnoodles/production-stack/src/vllm_router/parsers/parser.py:20: RuntimeWarning: Failed to read commit hash: +No module named 'vllm_router._version' + from vllm_router.version import __version__ +[2025-06-27 00:37:06,692] INFO: Initializing round-robin routing logic (routing_logic.py:487:vllm_router.routers.routing_logic) +INFO: Started server process [2125604] +INFO: Waiting for application startup. +[2025-06-27 00:37:06,777] INFO: httpx AsyncClient instantiated. Id 139673895638912 (httpx_client.py:31:vllm_router.httpx_client) +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:30080 (Press CTRL+C to quit) +[2025-06-27 00:37:06,787] INFO: Scraping metrics from 2 serving engine(s) (engine_stats.py:146:vllm_router.stats.engine_stats) +INFO: 127.0.0.1:49218 - "GET / HTTP/1.1" 404 Not Found +INFO: 127.0.0.1:49234 - "GET /health HTTP/1.1" 200 OK +INFO: 127.0.0.1:49246 - "GET /v1/models HTTP/1.1" 200 OK +[2025-06-27 00:37:07,342] DEBUG: No cache hit, forwarding request to backend (main_router.py:69:vllm_router.routers.main_router) +[2025-06-27 00:37:07,343] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,408] DEBUG: Routing request 67b4cb15-0604-4684-9aae-2bb756c68422 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,409] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,409] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,409] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'content-length': '197'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,409] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,409] INFO: Routing request 67b4cb15-0604-4684-9aae-2bb756c68422 with session id None to http://localhost:8001 at 1750984627.4094923, process time = 0.0664 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49252 - "POST /v1/chat/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:07,461] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,525] DEBUG: Routing request 657d70a1-6230-489a-ad5f-ac43fe0bff10 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,526] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,526] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,526] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '657d70a1-6230-489a-ad5f-ac43fe0bff10', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,526] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,526] INFO: Routing request 657d70a1-6230-489a-ad5f-ac43fe0bff10 with session id None to http://localhost:8002 at 1750984627.5264473, process time = 0.0651 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49264 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:07,568] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,633] DEBUG: Routing request 5aadda92-f1fc-44d6-b9c6-e184c998dd2e for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,634] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,634] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,634] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '5aadda92-f1fc-44d6-b9c6-e184c998dd2e', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,634] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,634] INFO: Routing request 5aadda92-f1fc-44d6-b9c6-e184c998dd2e with session id None to http://localhost:8001 at 1750984627.6342344, process time = 0.0654 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49274 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:07,678] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,745] DEBUG: Routing request 716ee5b3-267f-4f5f-8b25-cea3ea81b51f for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,745] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,745] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,745] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '716ee5b3-267f-4f5f-8b25-cea3ea81b51f', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,746] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,746] INFO: Routing request 716ee5b3-267f-4f5f-8b25-cea3ea81b51f with session id None to http://localhost:8002 at 1750984627.7457457, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49290 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:07,787] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,853] DEBUG: Routing request 87517d4c-c4c1-49db-9d8b-58190d90cb56 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,854] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,854] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,854] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '87517d4c-c4c1-49db-9d8b-58190d90cb56', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,854] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,854] INFO: Routing request 87517d4c-c4c1-49db-9d8b-58190d90cb56 with session id None to http://localhost:8001 at 1750984627.8541684, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49300 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:07,898] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,966] DEBUG: Routing request 3b2c6662-a083-4b22-87dd-48f8bcaf624d for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,966] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,967] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,967] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3b2c6662-a083-4b22-87dd-48f8bcaf624d', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,967] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:07,967] INFO: Routing request 3b2c6662-a083-4b22-87dd-48f8bcaf624d with session id None to http://localhost:8002 at 1750984627.9669526, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49312 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:08,007] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,073] DEBUG: Routing request ee73b7d3-68e9-45d0-82e0-7739d58679e3 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,073] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,073] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,074] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'ee73b7d3-68e9-45d0-82e0-7739d58679e3', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,074] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,074] INFO: Routing request ee73b7d3-68e9-45d0-82e0-7739d58679e3 with session id None to http://localhost:8001 at 1750984628.073846, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49322 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:08,115] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,179] DEBUG: Routing request 9ef6aa12-e06e-42e4-8444-2784413ea2ec for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,180] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,180] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,180] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '9ef6aa12-e06e-42e4-8444-2784413ea2ec', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,180] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,180] INFO: Routing request 9ef6aa12-e06e-42e4-8444-2784413ea2ec with session id None to http://localhost:8002 at 1750984628.1804473, process time = 0.0653 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49330 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:08,220] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,285] DEBUG: Routing request 5e2c6e06-c4dc-44e5-8bf1-ec1e90de5493 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,286] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,286] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,286] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '5e2c6e06-c4dc-44e5-8bf1-ec1e90de5493', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,286] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,286] INFO: Routing request 5e2c6e06-c4dc-44e5-8bf1-ec1e90de5493 with session id None to http://localhost:8001 at 1750984628.2863476, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49344 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:08,329] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,394] DEBUG: Routing request f3122044-e477-4dd1-ba4e-e544f76bfdfb for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,395] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,395] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,395] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'f3122044-e477-4dd1-ba4e-e544f76bfdfb', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,395] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,395] INFO: Routing request f3122044-e477-4dd1-ba4e-e544f76bfdfb with session id None to http://localhost:8002 at 1750984628.3953238, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49350 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:08,436] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,502] DEBUG: Routing request c735651b-3a68-4afe-bf9b-7ff20d971deb for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,502] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,502] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,502] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'c735651b-3a68-4afe-bf9b-7ff20d971deb', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,502] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,502] INFO: Routing request c735651b-3a68-4afe-bf9b-7ff20d971deb with session id None to http://localhost:8001 at 1750984628.5027213, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49360 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:08,544] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,609] DEBUG: Routing request 3a32598e-1a3a-4349-a600-ad1166511c27 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,609] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,609] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,609] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3a32598e-1a3a-4349-a600-ad1166511c27', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,610] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,610] INFO: Routing request 3a32598e-1a3a-4349-a600-ad1166511c27 with session id None to http://localhost:8002 at 1750984628.6098235, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49372 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:08,651] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,720] DEBUG: Routing request 270828df-e7a2-4da0-b5f8-319ee407deeb for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,720] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,720] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,720] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '270828df-e7a2-4da0-b5f8-319ee407deeb', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,720] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,720] INFO: Routing request 270828df-e7a2-4da0-b5f8-319ee407deeb with session id None to http://localhost:8001 at 1750984628.7205312, process time = 0.0693 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49388 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:08,761] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,828] DEBUG: Routing request feac60a9-b5d3-4674-9a4a-48330609e3ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,829] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,829] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,829] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'feac60a9-b5d3-4674-9a4a-48330609e3ee', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,829] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,829] INFO: Routing request feac60a9-b5d3-4674-9a4a-48330609e3ee with session id None to http://localhost:8002 at 1750984628.829479, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49394 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:08,872] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,938] DEBUG: Routing request db4933f7-fbdf-477c-99fa-748b4de10cc0 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,938] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,938] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,938] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'db4933f7-fbdf-477c-99fa-748b4de10cc0', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,938] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:08,938] INFO: Routing request db4933f7-fbdf-477c-99fa-748b4de10cc0 with session id None to http://localhost:8001 at 1750984628.9385927, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49398 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:08,979] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,045] DEBUG: Routing request d4640044-ad26-40e4-acbc-9a782910ccb3 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,046] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,046] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,046] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4640044-ad26-40e4-acbc-9a782910ccb3', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,046] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,046] INFO: Routing request d4640044-ad26-40e4-acbc-9a782910ccb3 with session id None to http://localhost:8002 at 1750984629.0460603, process time = 0.0662 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49400 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:09,087] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,155] DEBUG: Routing request 0bbea844-2d4d-42a9-a872-131e888f5d93 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,156] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,156] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,156] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '0bbea844-2d4d-42a9-a872-131e888f5d93', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,156] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,156] INFO: Routing request 0bbea844-2d4d-42a9-a872-131e888f5d93 with session id None to http://localhost:8001 at 1750984629.156086, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49402 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:09,198] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,265] DEBUG: Routing request 31a1f251-cc77-4593-900c-61f0038f8f95 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,265] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,265] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,265] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '31a1f251-cc77-4593-900c-61f0038f8f95', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,265] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,265] INFO: Routing request 31a1f251-cc77-4593-900c-61f0038f8f95 with session id None to http://localhost:8002 at 1750984629.2656531, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49414 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:09,306] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,373] DEBUG: Routing request 95e0279b-8bb5-4219-8617-94ef3591b16b for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,373] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,373] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,374] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '95e0279b-8bb5-4219-8617-94ef3591b16b', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,374] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,374] INFO: Routing request 95e0279b-8bb5-4219-8617-94ef3591b16b with session id None to http://localhost:8001 at 1750984629.3738842, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49416 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:09,415] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,481] DEBUG: Routing request d2571a45-72d0-48d4-ae05-199ba0f4340c for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,481] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,481] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,481] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd2571a45-72d0-48d4-ae05-199ba0f4340c', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,481] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,482] INFO: Routing request d2571a45-72d0-48d4-ae05-199ba0f4340c with session id None to http://localhost:8002 at 1750984629.4817224, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49418 - "POST /v1/completions HTTP/1.1" 200 OK +[2025-06-27 00:37:09,522] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,589] DEBUG: Routing request 9a777dfa-b6af-4709-b807-8cb33e1029db for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,590] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,590] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,590] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '9a777dfa-b6af-4709-b807-8cb33e1029db', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,590] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) +[2025-06-27 00:37:09,590] INFO: Routing request 9a777dfa-b6af-4709-b807-8cb33e1029db with session id None to http://localhost:8001 at 1750984629.5900624, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) +INFO: 127.0.0.1:49430 - "POST /v1/completions HTTP/1.1" 200 OK +INFO: Shutting down +INFO: Waiting for application shutdown. +[2025-06-27 00:37:09,792] INFO: httpx async_client.is_closed(): False - Now close it. Id (will be unchanged): 139673895638912 (httpx_client.py:35:vllm_router.httpx_client) +[2025-06-27 00:37:09,792] INFO: httpx async_client.is_closed(): True. Id (will be unchanged): 139673895638912 (httpx_client.py:39:vllm_router.httpx_client) +[2025-06-27 00:37:09,792] INFO: httpx AsyncClient closed (httpx_client.py:43:vllm_router.httpx_client) +INFO: Closing engine stats scraper +INFO: Closing service discovery module +INFO: Application shutdown complete. +INFO: Finished server process [2125604] From 2b3649da200a600f8787b10121ca0641320d1f7f Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Mon, 23 Jun 2025 20:28:58 +0000 Subject: [PATCH 05/12] refactor the code Signed-off-by: Rui Zhang --- .github/workflows/router-e2e-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/router-e2e-test.yml b/.github/workflows/router-e2e-test.yml index 29ec40145..85f7c4c47 100644 --- a/.github/workflows/router-e2e-test.yml +++ b/.github/workflows/router-e2e-test.yml @@ -219,7 +219,7 @@ jobs: uses: actions/upload-artifact@v4 if: always() with: - name: static-discovery-${{ matrix.strategy }}-test-results-pr-${{ github.event.pull_request.number || 'main' }} + name: static-discovery-test-results-pr-${{ github.event.pull_request.number || 'main' }} path: | ${{ env.LOG_DIR }}/* From 2a96fcd82aaf75596f56657af372478660add58a Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Fri, 27 Jun 2025 01:06:28 +0000 Subject: [PATCH 06/12] [CI] refactor Signed-off-by: Rui Zhang --- .../disaggregated_prefill/routing_lines.txt | 83 -- tests/e2e/disaggregated_prefill/router.log | 107 -- tests/e2e/kvaware/router.log | 23 - .../prefixaware/prefixaware/routing_lines.txt | 241 ---- tests/e2e/prefixaware/router.log | 1008 ----------------- .../roundrobin/roundrobin/routing_lines.txt | 41 - tests/e2e/roundrobin/router.log | 191 ---- 7 files changed, 1694 deletions(-) delete mode 100644 tests/e2e/disaggregated_prefill/disaggregated_prefill/routing_lines.txt delete mode 100644 tests/e2e/disaggregated_prefill/router.log delete mode 100644 tests/e2e/kvaware/router.log delete mode 100644 tests/e2e/prefixaware/prefixaware/routing_lines.txt delete mode 100644 tests/e2e/prefixaware/router.log delete mode 100644 tests/e2e/roundrobin/roundrobin/routing_lines.txt delete mode 100644 tests/e2e/roundrobin/router.log diff --git a/tests/e2e/disaggregated_prefill/disaggregated_prefill/routing_lines.txt b/tests/e2e/disaggregated_prefill/disaggregated_prefill/routing_lines.txt deleted file mode 100644 index 012d45b3c..000000000 --- a/tests/e2e/disaggregated_prefill/disaggregated_prefill/routing_lines.txt +++ /dev/null @@ -1,83 +0,0 @@ -[2025-06-27 00:37:47,943] INFO: Routing request aaa3f812-fa10-453b-9b1d-e1ddb2321f46 with session id None to http://localhost:8001 at 1750984667.9436514, process time = 0.0190 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:47,943] INFO: Routing request aaa3f812-fa10-453b-9b1d-e1ddb2321f46 with session id None to http://localhost:8002 at 1750984667.943875, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:47,995] INFO: Routing request fbd909f7-ef67-4207-8fce-c50cd8efe500 with session id None to http://localhost:8001 at 1750984667.9951248, process time = 0.0094 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:47,995] INFO: Routing request fbd909f7-ef67-4207-8fce-c50cd8efe500 with session id None to http://localhost:8002 at 1750984667.9953818, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,042] INFO: Routing request a2f307d1-7715-43da-b46f-a092387e95be with session id None to http://localhost:8001 at 1750984668.0424466, process time = 0.0096 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,042] INFO: Routing request a2f307d1-7715-43da-b46f-a092387e95be with session id None to http://localhost:8002 at 1750984668.0426898, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,088] INFO: Routing request 7d1fa1b6-1675-4ba9-8b7e-17f2597d1f4d with session id None to http://localhost:8001 at 1750984668.0880878, process time = 0.0085 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,088] INFO: Routing request 7d1fa1b6-1675-4ba9-8b7e-17f2597d1f4d with session id None to http://localhost:8002 at 1750984668.088294, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,134] INFO: Routing request 1e7786b0-fd7c-44e0-9c76-b334ad4ec9f4 with session id None to http://localhost:8001 at 1750984668.1339238, process time = 0.0092 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,134] INFO: Routing request 1e7786b0-fd7c-44e0-9c76-b334ad4ec9f4 with session id None to http://localhost:8002 at 1750984668.134152, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,180] INFO: Routing request 39473ab2-3d09-46e2-8d03-23045d97a3c1 with session id None to http://localhost:8001 at 1750984668.1798692, process time = 0.0088 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,180] INFO: Routing request 39473ab2-3d09-46e2-8d03-23045d97a3c1 with session id None to http://localhost:8002 at 1750984668.1800926, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,229] INFO: Routing request eb045054-acf2-4fd0-8ae1-26b9242aedb5 with session id None to http://localhost:8001 at 1750984668.2297182, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,229] INFO: Routing request eb045054-acf2-4fd0-8ae1-26b9242aedb5 with session id None to http://localhost:8002 at 1750984668.2299647, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,278] INFO: Routing request 3edc6ddc-aa45-4a73-b4af-efd4a4b6dad1 with session id None to http://localhost:8001 at 1750984668.2783382, process time = 0.0103 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,278] INFO: Routing request 3edc6ddc-aa45-4a73-b4af-efd4a4b6dad1 with session id None to http://localhost:8002 at 1750984668.2786064, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,327] INFO: Routing request 00f2cada-fc33-4e9b-aa38-3ee578f76507 with session id None to http://localhost:8001 at 1750984668.3270485, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,327] INFO: Routing request 00f2cada-fc33-4e9b-aa38-3ee578f76507 with session id None to http://localhost:8002 at 1750984668.3272886, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,375] INFO: Routing request 8e20a084-33a8-430b-b611-c31104080fdd with session id None to http://localhost:8001 at 1750984668.3751562, process time = 0.0099 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,375] INFO: Routing request 8e20a084-33a8-430b-b611-c31104080fdd with session id None to http://localhost:8002 at 1750984668.3754344, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,415] INFO: Routing request ac2179d5-488b-4b11-8a65-cd03cf5c321c with session id None to http://localhost:8001 at 1750984668.415038, process time = 0.0095 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,415] INFO: Routing request ac2179d5-488b-4b11-8a65-cd03cf5c321c with session id None to http://localhost:8002 at 1750984668.4152763, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,462] INFO: Routing request be04eb3a-93b8-4985-a280-2eb49cace340 with session id None to http://localhost:8001 at 1750984668.4622283, process time = 0.0091 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,462] INFO: Routing request be04eb3a-93b8-4985-a280-2eb49cace340 with session id None to http://localhost:8002 at 1750984668.462454, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,509] INFO: Routing request ddaf6e5f-5f8a-4195-9f6f-532e761f3595 with session id None to http://localhost:8001 at 1750984668.5089805, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,509] INFO: Routing request ddaf6e5f-5f8a-4195-9f6f-532e761f3595 with session id None to http://localhost:8002 at 1750984668.5092068, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,555] INFO: Routing request 3de0a304-7c65-48d7-8774-c9f3e6a39d20 with session id None to http://localhost:8001 at 1750984668.5551498, process time = 0.0090 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,555] INFO: Routing request 3de0a304-7c65-48d7-8774-c9f3e6a39d20 with session id None to http://localhost:8002 at 1750984668.555379, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,600] INFO: Routing request 3548b2ac-39e4-4f7d-b90f-c9466c73f8d9 with session id None to http://localhost:8001 at 1750984668.6000633, process time = 0.0087 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,600] INFO: Routing request 3548b2ac-39e4-4f7d-b90f-c9466c73f8d9 with session id None to http://localhost:8002 at 1750984668.6002853, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,646] INFO: Routing request f8a677f7-4536-4091-b29a-47024e583d26 with session id None to http://localhost:8001 at 1750984668.646061, process time = 0.0088 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,646] INFO: Routing request f8a677f7-4536-4091-b29a-47024e583d26 with session id None to http://localhost:8002 at 1750984668.6462815, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,692] INFO: Routing request 869a0887-ceaf-495f-9b87-f54ea9491ba8 with session id None to http://localhost:8001 at 1750984668.6927145, process time = 0.0093 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,692] INFO: Routing request 869a0887-ceaf-495f-9b87-f54ea9491ba8 with session id None to http://localhost:8002 at 1750984668.692946, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,737] INFO: Routing request 74817c90-8acc-412e-b275-ef2e796ea421 with session id None to http://localhost:8001 at 1750984668.737268, process time = 0.0084 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,737] INFO: Routing request 74817c90-8acc-412e-b275-ef2e796ea421 with session id None to http://localhost:8002 at 1750984668.7374759, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,782] INFO: Routing request f9281b4b-ec15-426e-b046-7e039bf669ae with session id None to http://localhost:8001 at 1750984668.782778, process time = 0.0094 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,783] INFO: Routing request f9281b4b-ec15-426e-b046-7e039bf669ae with session id None to http://localhost:8002 at 1750984668.782998, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,828] INFO: Routing request 7508d53f-289d-4e3b-858e-ec75e98cd928 with session id None to http://localhost:8001 at 1750984668.8283942, process time = 0.0086 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,828] INFO: Routing request 7508d53f-289d-4e3b-858e-ec75e98cd928 with session id None to http://localhost:8002 at 1750984668.828631, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,875] INFO: Routing request 9d618679-5490-4959-b7a7-d2660b1f9c6b with session id None to http://localhost:8001 at 1750984668.875015, process time = 0.0092 (request.py:373:vllm_router.services.request_service.request) - -[2025-06-27 00:37:48,875] INFO: Routing request 9d618679-5490-4959-b7a7-d2660b1f9c6b with session id None to http://localhost:8002 at 1750984668.8752494, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) diff --git a/tests/e2e/disaggregated_prefill/router.log b/tests/e2e/disaggregated_prefill/router.log deleted file mode 100644 index f140371c3..000000000 --- a/tests/e2e/disaggregated_prefill/router.log +++ /dev/null @@ -1,107 +0,0 @@ -/home/zrfishnoodles/production-stack/src/vllm_router/parsers/parser.py:20: RuntimeWarning: Failed to read commit hash: -No module named 'vllm_router._version' - from vllm_router.version import __version__ -[2025-06-27 00:37:47,240] INFO: Initializing disaggregated prefill routing logic (routing_logic.py:505:vllm_router.routers.routing_logic) -INFO: Started server process [2126868] -INFO: Waiting for application startup. -[2025-06-27 00:37:47,321] INFO: httpx AsyncClient instantiated. Id 140506988436000 (httpx_client.py:31:vllm_router.httpx_client) -INFO: Application startup complete. -INFO: Uvicorn running on http://0.0.0.0:30080 (Press CTRL+C to quit) -[2025-06-27 00:37:47,331] INFO: Scraping metrics from 2 serving engine(s) (engine_stats.py:146:vllm_router.stats.engine_stats) -INFO: 127.0.0.1:58408 - "GET / HTTP/1.1" 404 Not Found -INFO: 127.0.0.1:58412 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:58418 - "GET /v1/models HTTP/1.1" 200 OK -[2025-06-27 00:37:47,924] DEBUG: No cache hit, forwarding request to backend (main_router.py:69:vllm_router.routers.main_router) -[2025-06-27 00:37:47,943] INFO: aaa3f812-fa10-453b-9b1d-e1ddb2321f46 prefill time (TTFT): 0.0189 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:47,943] INFO: Routing request aaa3f812-fa10-453b-9b1d-e1ddb2321f46 with session id None to http://localhost:8001 at 1750984667.9436514, process time = 0.0190 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:47,943] INFO: Routing request aaa3f812-fa10-453b-9b1d-e1ddb2321f46 with session id None to http://localhost:8002 at 1750984667.943875, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58420 - "POST /v1/chat/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:47,995] INFO: fbd909f7-ef67-4207-8fce-c50cd8efe500 prefill time (TTFT): 0.0093 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:47,995] INFO: Routing request fbd909f7-ef67-4207-8fce-c50cd8efe500 with session id None to http://localhost:8001 at 1750984667.9951248, process time = 0.0094 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:47,995] INFO: Routing request fbd909f7-ef67-4207-8fce-c50cd8efe500 with session id None to http://localhost:8002 at 1750984667.9953818, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58430 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,042] INFO: a2f307d1-7715-43da-b46f-a092387e95be prefill time (TTFT): 0.0095 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,042] INFO: Routing request a2f307d1-7715-43da-b46f-a092387e95be with session id None to http://localhost:8001 at 1750984668.0424466, process time = 0.0096 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,042] INFO: Routing request a2f307d1-7715-43da-b46f-a092387e95be with session id None to http://localhost:8002 at 1750984668.0426898, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58436 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,088] INFO: 7d1fa1b6-1675-4ba9-8b7e-17f2597d1f4d prefill time (TTFT): 0.0084 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,088] INFO: Routing request 7d1fa1b6-1675-4ba9-8b7e-17f2597d1f4d with session id None to http://localhost:8001 at 1750984668.0880878, process time = 0.0085 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,088] INFO: Routing request 7d1fa1b6-1675-4ba9-8b7e-17f2597d1f4d with session id None to http://localhost:8002 at 1750984668.088294, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58448 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,133] INFO: 1e7786b0-fd7c-44e0-9c76-b334ad4ec9f4 prefill time (TTFT): 0.0091 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,134] INFO: Routing request 1e7786b0-fd7c-44e0-9c76-b334ad4ec9f4 with session id None to http://localhost:8001 at 1750984668.1339238, process time = 0.0092 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,134] INFO: Routing request 1e7786b0-fd7c-44e0-9c76-b334ad4ec9f4 with session id None to http://localhost:8002 at 1750984668.134152, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58450 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,179] INFO: 39473ab2-3d09-46e2-8d03-23045d97a3c1 prefill time (TTFT): 0.0088 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,180] INFO: Routing request 39473ab2-3d09-46e2-8d03-23045d97a3c1 with session id None to http://localhost:8001 at 1750984668.1798692, process time = 0.0088 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,180] INFO: Routing request 39473ab2-3d09-46e2-8d03-23045d97a3c1 with session id None to http://localhost:8002 at 1750984668.1800926, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58466 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,229] INFO: eb045054-acf2-4fd0-8ae1-26b9242aedb5 prefill time (TTFT): 0.0097 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,229] INFO: Routing request eb045054-acf2-4fd0-8ae1-26b9242aedb5 with session id None to http://localhost:8001 at 1750984668.2297182, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,229] INFO: Routing request eb045054-acf2-4fd0-8ae1-26b9242aedb5 with session id None to http://localhost:8002 at 1750984668.2299647, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58482 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,278] INFO: 3edc6ddc-aa45-4a73-b4af-efd4a4b6dad1 prefill time (TTFT): 0.0103 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,278] INFO: Routing request 3edc6ddc-aa45-4a73-b4af-efd4a4b6dad1 with session id None to http://localhost:8001 at 1750984668.2783382, process time = 0.0103 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,278] INFO: Routing request 3edc6ddc-aa45-4a73-b4af-efd4a4b6dad1 with session id None to http://localhost:8002 at 1750984668.2786064, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58494 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,327] INFO: 00f2cada-fc33-4e9b-aa38-3ee578f76507 prefill time (TTFT): 0.0096 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,327] INFO: Routing request 00f2cada-fc33-4e9b-aa38-3ee578f76507 with session id None to http://localhost:8001 at 1750984668.3270485, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,327] INFO: Routing request 00f2cada-fc33-4e9b-aa38-3ee578f76507 with session id None to http://localhost:8002 at 1750984668.3272886, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58500 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,375] INFO: 8e20a084-33a8-430b-b611-c31104080fdd prefill time (TTFT): 0.0098 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,375] INFO: Routing request 8e20a084-33a8-430b-b611-c31104080fdd with session id None to http://localhost:8001 at 1750984668.3751562, process time = 0.0099 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,375] INFO: Routing request 8e20a084-33a8-430b-b611-c31104080fdd with session id None to http://localhost:8002 at 1750984668.3754344, process time = 0.0003 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58504 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,415] INFO: ac2179d5-488b-4b11-8a65-cd03cf5c321c prefill time (TTFT): 0.0094 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,415] INFO: Routing request ac2179d5-488b-4b11-8a65-cd03cf5c321c with session id None to http://localhost:8001 at 1750984668.415038, process time = 0.0095 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,415] INFO: Routing request ac2179d5-488b-4b11-8a65-cd03cf5c321c with session id None to http://localhost:8002 at 1750984668.4152763, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58510 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,462] INFO: be04eb3a-93b8-4985-a280-2eb49cace340 prefill time (TTFT): 0.0091 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,462] INFO: Routing request be04eb3a-93b8-4985-a280-2eb49cace340 with session id None to http://localhost:8001 at 1750984668.4622283, process time = 0.0091 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,462] INFO: Routing request be04eb3a-93b8-4985-a280-2eb49cace340 with session id None to http://localhost:8002 at 1750984668.462454, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58524 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,509] INFO: ddaf6e5f-5f8a-4195-9f6f-532e761f3595 prefill time (TTFT): 0.0097 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,509] INFO: Routing request ddaf6e5f-5f8a-4195-9f6f-532e761f3595 with session id None to http://localhost:8001 at 1750984668.5089805, process time = 0.0097 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,509] INFO: Routing request ddaf6e5f-5f8a-4195-9f6f-532e761f3595 with session id None to http://localhost:8002 at 1750984668.5092068, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58540 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,555] INFO: 3de0a304-7c65-48d7-8774-c9f3e6a39d20 prefill time (TTFT): 0.0089 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,555] INFO: Routing request 3de0a304-7c65-48d7-8774-c9f3e6a39d20 with session id None to http://localhost:8001 at 1750984668.5551498, process time = 0.0090 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,555] INFO: Routing request 3de0a304-7c65-48d7-8774-c9f3e6a39d20 with session id None to http://localhost:8002 at 1750984668.555379, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58544 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,600] INFO: 3548b2ac-39e4-4f7d-b90f-c9466c73f8d9 prefill time (TTFT): 0.0087 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,600] INFO: Routing request 3548b2ac-39e4-4f7d-b90f-c9466c73f8d9 with session id None to http://localhost:8001 at 1750984668.6000633, process time = 0.0087 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,600] INFO: Routing request 3548b2ac-39e4-4f7d-b90f-c9466c73f8d9 with session id None to http://localhost:8002 at 1750984668.6002853, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58546 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,646] INFO: f8a677f7-4536-4091-b29a-47024e583d26 prefill time (TTFT): 0.0088 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,646] INFO: Routing request f8a677f7-4536-4091-b29a-47024e583d26 with session id None to http://localhost:8001 at 1750984668.646061, process time = 0.0088 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,646] INFO: Routing request f8a677f7-4536-4091-b29a-47024e583d26 with session id None to http://localhost:8002 at 1750984668.6462815, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58548 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,692] INFO: 869a0887-ceaf-495f-9b87-f54ea9491ba8 prefill time (TTFT): 0.0092 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,692] INFO: Routing request 869a0887-ceaf-495f-9b87-f54ea9491ba8 with session id None to http://localhost:8001 at 1750984668.6927145, process time = 0.0093 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,692] INFO: Routing request 869a0887-ceaf-495f-9b87-f54ea9491ba8 with session id None to http://localhost:8002 at 1750984668.692946, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58564 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,737] INFO: 74817c90-8acc-412e-b275-ef2e796ea421 prefill time (TTFT): 0.0083 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,737] INFO: Routing request 74817c90-8acc-412e-b275-ef2e796ea421 with session id None to http://localhost:8001 at 1750984668.737268, process time = 0.0084 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,737] INFO: Routing request 74817c90-8acc-412e-b275-ef2e796ea421 with session id None to http://localhost:8002 at 1750984668.7374759, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58568 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,782] INFO: f9281b4b-ec15-426e-b046-7e039bf669ae prefill time (TTFT): 0.0093 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,782] INFO: Routing request f9281b4b-ec15-426e-b046-7e039bf669ae with session id None to http://localhost:8001 at 1750984668.782778, process time = 0.0094 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,783] INFO: Routing request f9281b4b-ec15-426e-b046-7e039bf669ae with session id None to http://localhost:8002 at 1750984668.782998, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58576 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,828] INFO: 7508d53f-289d-4e3b-858e-ec75e98cd928 prefill time (TTFT): 0.0085 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,828] INFO: Routing request 7508d53f-289d-4e3b-858e-ec75e98cd928 with session id None to http://localhost:8001 at 1750984668.8283942, process time = 0.0086 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,828] INFO: Routing request 7508d53f-289d-4e3b-858e-ec75e98cd928 with session id None to http://localhost:8002 at 1750984668.828631, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58592 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:48,875] INFO: 9d618679-5490-4959-b7a7-d2660b1f9c6b prefill time (TTFT): 0.0092 (request.py:372:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,875] INFO: Routing request 9d618679-5490-4959-b7a7-d2660b1f9c6b with session id None to http://localhost:8001 at 1750984668.875015, process time = 0.0092 (request.py:373:vllm_router.services.request_service.request) -[2025-06-27 00:37:48,875] INFO: Routing request 9d618679-5490-4959-b7a7-d2660b1f9c6b with session id None to http://localhost:8002 at 1750984668.8752494, process time = 0.0002 (request.py:384:vllm_router.services.request_service.request) -INFO: 127.0.0.1:58594 - "POST /v1/completions HTTP/1.1" 200 OK -INFO: Shutting down -INFO: Waiting for application shutdown. -[2025-06-27 00:37:49,128] INFO: httpx async_client.is_closed(): False - Now close it. Id (will be unchanged): 140506988436000 (httpx_client.py:35:vllm_router.httpx_client) -[2025-06-27 00:37:49,128] INFO: httpx async_client.is_closed(): True. Id (will be unchanged): 140506988436000 (httpx_client.py:39:vllm_router.httpx_client) -[2025-06-27 00:37:49,129] INFO: httpx AsyncClient closed (httpx_client.py:43:vllm_router.httpx_client) -INFO: Closing engine stats scraper -INFO: Closing service discovery module -INFO: Application shutdown complete. -INFO: Finished server process [2126868] diff --git a/tests/e2e/kvaware/router.log b/tests/e2e/kvaware/router.log deleted file mode 100644 index 0b94fb193..000000000 --- a/tests/e2e/kvaware/router.log +++ /dev/null @@ -1,23 +0,0 @@ -/home/zrfishnoodles/production-stack/src/vllm_router/parsers/parser.py:20: RuntimeWarning: Failed to read commit hash: -No module named 'vllm_router._version' - from vllm_router.version import __version__ -[2025-06-27 00:37:38,971] INFO: Initializing kvaware routing logic (routing_logic.py:493:vllm_router.routers.routing_logic) -[2025-06-27 00:37:38,971] INFO: Initializing KvawareRouter with port: 9000 (routing_logic.py:229:vllm_router.routers.routing_logic) -INFO: Started server process [2126604] -INFO: Waiting for application startup. -[2025-06-27 00:37:39,059] INFO: httpx AsyncClient instantiated. Id 140269717546144 (httpx_client.py:31:vllm_router.httpx_client) -INFO: Application startup complete. -INFO: Uvicorn running on http://0.0.0.0:30080 (Press CTRL+C to quit) -[2025-06-27 00:37:39,070] INFO: Scraping metrics from 2 serving engine(s) (engine_stats.py:146:vllm_router.stats.engine_stats) -INFO: 127.0.0.1:39940 - "GET / HTTP/1.1" 404 Not Found -INFO: 127.0.0.1:39946 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:39948 - "GET /v1/models HTTP/1.1" 200 OK -INFO: Shutting down -INFO: Waiting for application shutdown. -[2025-06-27 00:37:39,773] INFO: httpx async_client.is_closed(): False - Now close it. Id (will be unchanged): 140269717546144 (httpx_client.py:35:vllm_router.httpx_client) -[2025-06-27 00:37:39,778] INFO: httpx async_client.is_closed(): True. Id (will be unchanged): 140269717546144 (httpx_client.py:39:vllm_router.httpx_client) -[2025-06-27 00:37:39,779] INFO: httpx AsyncClient closed (httpx_client.py:43:vllm_router.httpx_client) -INFO: Closing engine stats scraper -INFO: Closing service discovery module -INFO: Application shutdown complete. -INFO: Finished server process [2126604] diff --git a/tests/e2e/prefixaware/prefixaware/routing_lines.txt b/tests/e2e/prefixaware/prefixaware/routing_lines.txt deleted file mode 100644 index 4793a276a..000000000 --- a/tests/e2e/prefixaware/prefixaware/routing_lines.txt +++ /dev/null @@ -1,241 +0,0 @@ -[2025-06-27 00:37:18,048] INFO: Routing request de8f7549-da7e-4a04-921b-ca2a72860ac2 with session id None to http://localhost:8001 at 1750984638.0479178, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:18,161] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.1609552, process time = 0.0644 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:18,267] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.266841, process time = 0.0648 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:18,373] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.372828, process time = 0.0653 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:18,478] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.478203, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:18,582] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.5823739, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:18,688] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.6880844, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:18,794] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.7945032, process time = 0.0649 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:18,902] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.902532, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:19,008] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.0082219, process time = 0.0655 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:19,115] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.1151836, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:19,221] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.2210135, process time = 0.0650 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:19,328] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.328283, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:19,435] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.434806, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:19,541] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.5408177, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:19,641] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.6410813, process time = 0.0651 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:19,748] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.748613, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:19,860] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.860118, process time = 0.0702 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:19,971] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.9712284, process time = 0.0695 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:20,081] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.080897, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:20,191] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.1912558, process time = 0.0688 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:20,302] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.3020163, process time = 0.0682 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:20,411] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.411141, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:20,520] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.5200288, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:20,627] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.627369, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:20,736] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.7357795, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:20,845] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.845095, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:20,954] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.9543989, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:21,064] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.0637932, process time = 0.0686 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:21,172] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.1724975, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:21,280] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.280093, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:21,387] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.3867934, process time = 0.0662 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:21,495] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.495012, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:21,603] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.603595, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:21,710] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.7101386, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:21,817] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.8173587, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:21,927] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.9270127, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:22,037] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.037251, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:22,146] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.146045, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:22,255] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.25482, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:22,363] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.3632615, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:22,471] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.471359, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:22,579] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.5792356, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:22,686] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.6863146, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:22,795] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.7948356, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:22,910] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.9097764, process time = 0.0699 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:23,015] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.015756, process time = 0.0655 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:23,124] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.1244655, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:23,231] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.2315497, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:23,342] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.3425603, process time = 0.0703 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:23,450] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.4502356, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:23,558] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.5582733, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:23,666] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.6666589, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:23,774] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.7745495, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:23,882] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.8826053, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:23,991] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.991657, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:24,102] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.1023266, process time = 0.0676 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:24,215] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.2154505, process time = 0.0724 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:24,324] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.3241663, process time = 0.0678 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:24,433] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.4332342, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:24,540] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.5406127, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:24,649] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.6492922, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:24,758] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.7583642, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:24,866] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.8657682, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:24,975] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.975641, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:25,084] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.0846174, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:25,193] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.1928189, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:25,303] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.3037314, process time = 0.0696 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:25,412] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.4126892, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:25,521] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.5211408, process time = 0.0675 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:25,632] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.6324995, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:25,741] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.7407722, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:25,851] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.8508508, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:25,960] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.9604592, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:26,074] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984646.0739088, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:26,190] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984646.1897907, process time = 0.0675 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:26,304] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.3046403, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:26,413] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.4135015, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:26,522] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.521984, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:26,631] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.6308997, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:26,738] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.73849, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:26,846] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.8459172, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:26,956] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.956389, process time = 0.0699 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:27,064] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.0638897, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:27,170] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.1705747, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:27,281] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.2812662, process time = 0.0698 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:27,421] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.4211028, process time = 0.0977 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:27,541] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.5415916, process time = 0.0779 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:27,648] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.6481113, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:27,755] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.7556157, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:27,864] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.8643925, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:27,973] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984647.9731848, process time = 0.0657 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:28,084] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.0844905, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:28,196] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.196575, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:28,307] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.307035, process time = 0.0683 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:28,416] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.4163585, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:28,528] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.5286276, process time = 0.0717 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:28,638] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.6378303, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:28,751] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.7509508, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:28,858] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.8584132, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:28,967] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.9675782, process time = 0.0676 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:29,075] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.0751295, process time = 0.0664 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:29,181] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.1816075, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:29,291] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.2909973, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:29,400] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.4001827, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:29,508] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.5087323, process time = 0.0678 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:29,617] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.6168947, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:29,725] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.7254033, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:29,839] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.8394322, process time = 0.0697 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:29,954] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.954238, process time = 0.0731 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:30,062] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.062516, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:30,169] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.1692052, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:30,279] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.2792509, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:30,388] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.3883035, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:30,496] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.4963098, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:30,604] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.6043844, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:30,714] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.7143903, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:30,826] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.826696, process time = 0.0690 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:30,939] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.939221, process time = 0.0681 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:31,054] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984651.054538, process time = 0.0703 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:31,169] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984651.1689696, process time = 0.0706 (request.py:292:vllm_router.services.request_service.request) diff --git a/tests/e2e/prefixaware/router.log b/tests/e2e/prefixaware/router.log deleted file mode 100644 index 6e2b1ceb9..000000000 --- a/tests/e2e/prefixaware/router.log +++ /dev/null @@ -1,1008 +0,0 @@ -/home/zrfishnoodles/production-stack/src/vllm_router/parsers/parser.py:20: RuntimeWarning: Failed to read commit hash: -No module named 'vllm_router._version' - from vllm_router.version import __version__ -[2025-06-27 00:37:17,262] INFO: Initializing prefix-aware routing logic (routing_logic.py:502:vllm_router.routers.routing_logic) -INFO: Started server process [2126015] -INFO: Waiting for application startup. -[2025-06-27 00:37:17,352] INFO: httpx AsyncClient instantiated. Id 140498136262800 (httpx_client.py:31:vllm_router.httpx_client) -INFO: Application startup complete. -INFO: Uvicorn running on http://0.0.0.0:30080 (Press CTRL+C to quit) -[2025-06-27 00:37:17,360] INFO: Scraping metrics from 2 serving engine(s) (engine_stats.py:146:vllm_router.stats.engine_stats) -INFO: 127.0.0.1:57032 - "GET / HTTP/1.1" 404 Not Found -INFO: 127.0.0.1:57044 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:57058 - "GET /v1/models HTTP/1.1" 200 OK -[2025-06-27 00:37:17,981] DEBUG: No cache hit, forwarding request to backend (main_router.py:69:vllm_router.routers.main_router) -[2025-06-27 00:37:17,982] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,047] DEBUG: Routing request de8f7549-da7e-4a04-921b-ca2a72860ac2 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,047] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,048] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,048] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'content-length': '197'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,048] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,048] INFO: Routing request de8f7549-da7e-4a04-921b-ca2a72860ac2 with session id None to http://localhost:8001 at 1750984638.0479178, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57064 - "POST /v1/chat/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:18,096] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,160] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,160] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,161] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,161] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,161] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,161] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.1609552, process time = 0.0644 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57072 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:18,202] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,266] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,266] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,266] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,266] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,267] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,267] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.266841, process time = 0.0648 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57086 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:18,307] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,372] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,372] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,372] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,372] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,373] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,373] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.372828, process time = 0.0653 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57102 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:18,413] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,477] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,478] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,478] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,478] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,478] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,478] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.478203, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57110 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:18,517] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,581] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,582] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,582] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,582] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,582] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,582] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.5823739, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57118 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:18,622] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,687] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,688] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,688] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,688] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,688] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,688] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.6880844, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57132 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:18,729] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,793] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,794] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,794] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,794] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,794] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,794] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.7945032, process time = 0.0649 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57134 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:18,838] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,902] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,902] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,902] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,902] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,902] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:18,902] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984638.902532, process time = 0.0646 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57150 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:18,942] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,007] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,008] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,008] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,008] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,008] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,008] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.0082219, process time = 0.0655 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57164 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:19,049] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,114] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,115] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,115] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,115] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,115] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,115] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.1151836, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57180 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:19,156] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,220] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,221] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,221] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,221] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,221] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,221] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.2210135, process time = 0.0650 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57190 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:19,262] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,327] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,328] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,328] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,328] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,328] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,328] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.328283, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57202 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:19,369] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,434] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,434] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,434] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,434] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,435] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,435] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.434806, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57210 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:19,475] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,540] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,540] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,540] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,540] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,541] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,541] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.5408177, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57214 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:19,576] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,640] DEBUG: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,641] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,641] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,641] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3d31001e-4818-47bd-aee6-05ef0027f2ee', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,641] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,641] INFO: Routing request 3d31001e-4818-47bd-aee6-05ef0027f2ee with session id None to http://localhost:8001 at 1750984639.6410813, process time = 0.0651 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57216 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:19,682] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,748] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,748] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,748] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,748] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,748] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,748] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.748613, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57228 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:19,789] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,859] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,860] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,860] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,860] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,860] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,860] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.860118, process time = 0.0702 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57242 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:19,901] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,970] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,971] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,971] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,971] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,971] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:19,971] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984639.9712284, process time = 0.0695 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57248 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:20,012] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,080] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,080] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,080] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,081] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,081] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,081] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.080897, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57260 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:20,122] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,190] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,191] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,191] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,191] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,191] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,191] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.1912558, process time = 0.0688 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57266 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:20,233] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,301] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,302] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,302] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,302] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,302] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,302] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.3020163, process time = 0.0682 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57270 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:20,344] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,410] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,411] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,411] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,411] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,411] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,411] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.411141, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57282 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:20,453] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,519] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,520] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,520] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,520] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,520] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,520] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.5200288, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57290 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:20,560] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,626] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,627] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,627] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,627] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,627] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,627] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.627369, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57302 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:20,668] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,735] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,735] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,735] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,736] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,736] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,736] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.7357795, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57316 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:20,777] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,844] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,845] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,845] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,845] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,845] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,845] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.845095, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57328 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:20,888] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,953] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,954] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,954] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,954] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,954] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:20,954] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984640.9543989, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57344 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:20,995] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,063] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,063] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,063] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,063] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,064] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,064] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.0637932, process time = 0.0686 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57346 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:21,106] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,171] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,172] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,172] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,172] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,172] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,172] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.1724975, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:57358 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:21,212] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,279] DEBUG: Routing request 6352e28f-7921-4287-98c7-dee20326904a for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,280] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,280] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,280] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '6352e28f-7921-4287-98c7-dee20326904a', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,280] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,280] INFO: Routing request 6352e28f-7921-4287-98c7-dee20326904a with session id None to http://localhost:8001 at 1750984641.280093, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47866 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:21,320] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,386] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,386] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,386] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,386] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,386] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,387] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.3867934, process time = 0.0662 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47868 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:21,428] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,494] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,495] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,495] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,495] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,495] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,495] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.495012, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47884 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:21,536] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,603] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,603] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,603] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,603] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,603] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,603] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.603595, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47898 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:21,643] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,709] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,710] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,710] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,710] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,710] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,710] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.7101386, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47900 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:21,751] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,816] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,817] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,817] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,817] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,817] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,817] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.8173587, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47912 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:21,861] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,926] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,927] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,927] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,927] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,927] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:21,927] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984641.9270127, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47926 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:21,969] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,036] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,037] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,037] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,037] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,037] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,037] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.037251, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47928 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:22,078] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,145] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,146] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,146] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,146] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,146] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,146] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.146045, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47940 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:22,188] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,254] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,254] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,254] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,254] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,255] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,255] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.25482, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47950 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:22,295] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,362] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,363] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,363] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,363] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,363] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,363] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.3632615, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47966 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:22,404] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,470] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,471] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,471] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,471] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,471] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,471] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.471359, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47968 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:22,512] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,578] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,579] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,579] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,579] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,579] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,579] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.5792356, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47982 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:22,620] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,685] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,686] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,686] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,686] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,686] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,686] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.6863146, process time = 0.0663 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:47998 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:22,727] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,794] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,794] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,794] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,795] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,795] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,795] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.7948356, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48010 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:22,839] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,909] DEBUG: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,909] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,909] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,909] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '712f3189-14e1-426a-a52d-7b76ab23f390', 'content-length': '466'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,909] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:22,910] INFO: Routing request 712f3189-14e1-426a-a52d-7b76ab23f390 with session id None to http://localhost:8001 at 1750984642.9097764, process time = 0.0699 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48018 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:22,950] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,015] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,015] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,015] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,015] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,015] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,015] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.015756, process time = 0.0655 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48028 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:23,058] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,123] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,124] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,124] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,124] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,124] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,124] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.1244655, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48030 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:23,165] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,231] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,231] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,231] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,231] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,231] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,231] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.2315497, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48040 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:23,272] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,341] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,342] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,342] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,342] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,342] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,342] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.3425603, process time = 0.0703 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48052 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:23,383] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,449] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,450] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,450] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,450] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,450] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,450] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.4502356, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48054 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:23,491] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,557] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,558] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,558] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,558] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,558] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,558] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.5582733, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48060 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:23,599] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,666] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,666] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,666] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,666] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,666] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,666] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.6666589, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48074 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:23,707] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,773] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,774] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,774] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,774] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,774] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,774] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.7745495, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48084 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:23,814] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,881] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,882] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,882] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,882] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,882] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,882] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.8826053, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48098 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:23,924] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,990] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,991] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,991] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,991] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,991] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:23,991] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984643.991657, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48112 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:24,034] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,101] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,102] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,102] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,102] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,102] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,102] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.1023266, process time = 0.0676 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48116 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:24,143] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,214] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,215] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,215] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,215] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,215] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,215] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.2154505, process time = 0.0724 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48118 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:24,256] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,323] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,324] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,324] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,324] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,324] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,324] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.3241663, process time = 0.0678 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48124 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:24,366] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,432] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,433] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,433] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,433] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,433] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,433] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.4332342, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48132 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:24,473] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,539] DEBUG: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,540] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,540] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,540] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '64407ef8-7332-49bd-9a3b-a3df7afc7cf6', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,540] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,540] INFO: Routing request 64407ef8-7332-49bd-9a3b-a3df7afc7cf6 with session id None to http://localhost:8002 at 1750984644.5406127, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48144 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:24,582] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,648] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,649] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,649] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,649] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,649] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,649] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.6492922, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48152 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:24,690] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,757] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,758] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,758] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,758] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,758] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,758] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.7583642, process time = 0.0680 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48168 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:24,799] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,865] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,865] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,865] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,865] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,865] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,866] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.8657682, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48184 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:24,907] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,974] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,975] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,975] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,975] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,975] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:24,975] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984644.975641, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48200 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:25,017] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,083] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,084] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,084] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,084] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,084] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,084] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.0846174, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48214 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:25,125] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,192] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,192] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,192] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,192] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,193] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,193] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.1928189, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48228 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:25,234] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,303] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,303] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,303] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,303] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,303] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,303] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.3037314, process time = 0.0696 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48238 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:25,345] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,411] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,412] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,412] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,412] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,412] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,412] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.4126892, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48244 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:25,453] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,520] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,521] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,521] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,521] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,521] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,521] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.5211408, process time = 0.0675 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48256 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:25,564] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,631] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,632] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,632] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,632] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,632] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,632] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.6324995, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48258 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:25,673] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,739] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,740] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,740] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,740] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,740] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,741] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.7407722, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48270 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:25,783] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,850] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,850] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,850] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,851] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,851] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,851] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.8508508, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48276 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:25,893] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,959] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,960] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,960] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,960] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,960] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:25,960] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984645.9604592, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48292 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:26,007] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,073] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,073] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,074] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,074] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,074] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,074] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984646.0739088, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48296 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:26,122] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,188] DEBUG: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,189] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,189] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,189] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '00f8b7b5-c73e-4b97-a306-4e053dd59eba', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,190] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,190] INFO: Routing request 00f8b7b5-c73e-4b97-a306-4e053dd59eba with session id None to http://localhost:8001 at 1750984646.1897907, process time = 0.0675 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48312 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:26,237] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,303] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,304] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,304] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,304] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,304] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,304] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.3046403, process time = 0.0669 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48328 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:26,346] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,412] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,413] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,413] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,413] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,413] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,413] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.4135015, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48344 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:26,454] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,521] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,522] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,522] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,522] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,522] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,522] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.521984, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48354 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:26,564] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,630] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,630] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,630] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,631] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,631] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,631] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.6308997, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48364 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:26,671] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,737] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,738] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,738] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,738] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,738] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,738] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.73849, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48380 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:26,779] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,845] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,845] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,846] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,846] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,846] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,846] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.8459172, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48392 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:26,886] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,955] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,956] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,956] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,956] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,956] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:26,956] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984646.956389, process time = 0.0699 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48398 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:26,997] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,063] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,063] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,063] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,064] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,064] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,064] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.0638897, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48400 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:27,104] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,169] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,170] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,170] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,170] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,170] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,170] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.1705747, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48406 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:27,211] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,280] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,281] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,281] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,281] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '594'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,281] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,281] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.2812662, process time = 0.0698 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48418 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:27,323] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,364] INFO: -================================================== -Server: http://localhost:8001 -Models: - - facebook/opt-125m - Engine Stats: Running Requests: 0.0, Queued Requests: 0.0, GPU Cache Hit Rate: 0.00 - Request Stats: QPS: 6.10, Avg Latency: 0.04, Avg ITL: -1, Prefill Requests: 0, Decoding Requests: 0, Swapped Requests: 0, Finished: 61, Uptime: 9.32 sec --------------------------------------------------- -Server: http://localhost:8002 -Models: - - facebook/opt-125m - Engine Stats: Running Requests: 0.0, Queued Requests: 0.0, GPU Cache Hit Rate: 0.00 - Request Stats: QPS: 2.50, Avg Latency: 0.04, Avg ITL: -1, Prefill Requests: 0, Decoding Requests: 0, Swapped Requests: 0, Finished: 25, Uptime: 9.32 sec --------------------------------------------------- -================================================== - (log_stats.py:115:vllm_router.stats.log_stats) -[2025-06-27 00:37:27,420] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,421] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,421] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,421] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,421] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,421] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.4211028, process time = 0.0977 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48432 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:27,463] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,484] INFO: Scraping metrics from 2 serving engine(s) (engine_stats.py:146:vllm_router.stats.engine_stats) -[2025-06-27 00:37:27,541] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,541] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,541] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,541] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,541] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,541] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.5415916, process time = 0.0779 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48434 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:27,582] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,647] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,648] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,648] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,648] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,648] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,648] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.6481113, process time = 0.0652 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48436 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:27,689] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,754] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,755] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,755] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,755] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,755] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,755] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.7556157, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48446 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:27,797] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,863] DEBUG: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,864] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,864] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,864] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4be2a4d-725e-489c-a0e3-a30cd49bbbb4', 'content-length': '850'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,864] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,864] INFO: Routing request d4be2a4d-725e-489c-a0e3-a30cd49bbbb4 with session id None to http://localhost:8002 at 1750984647.8643925, process time = 0.0667 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48456 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:27,907] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,972] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,973] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,973] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,973] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,973] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:27,973] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984647.9731848, process time = 0.0657 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48466 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:28,016] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,084] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,084] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,084] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,084] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,084] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,084] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.0844905, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48480 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:28,130] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,196] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,196] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,196] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,196] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,196] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,196] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.196575, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48486 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:28,238] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,306] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,307] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,307] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,307] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,307] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,307] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.307035, process time = 0.0683 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48488 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:28,349] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,415] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,416] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,416] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,416] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,416] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,416] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.4163585, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48502 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:28,457] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,528] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,528] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,528] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,528] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,528] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,528] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.5286276, process time = 0.0717 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48508 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:28,571] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,637] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,637] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,637] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,637] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,638] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,638] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.6378303, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48524 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:28,682] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,750] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,750] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,751] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,751] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,751] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,751] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.7509508, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48530 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:28,791] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,857] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,858] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,858] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,858] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,858] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,858] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.8584132, process time = 0.0668 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48544 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:28,900] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,967] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,967] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,967] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,967] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,967] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:28,967] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984648.9675782, process time = 0.0676 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48560 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:29,008] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,074] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,075] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,075] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,075] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,075] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,075] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.0751295, process time = 0.0664 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48574 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:29,115] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,181] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,181] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,181] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,181] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,181] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,181] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.1816075, process time = 0.0661 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48586 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:29,222] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,290] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,291] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,291] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,291] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,291] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,291] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.2909973, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48588 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:29,333] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,399] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,400] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,400] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,400] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,400] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,400] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.4001827, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48592 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:29,440] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,508] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,508] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,508] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,508] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,508] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,508] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984649.5087323, process time = 0.0678 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48606 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:29,549] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,616] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,616] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,616] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,617] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,617] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,617] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.6168947, process time = 0.0672 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48614 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:29,657] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,725] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,725] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,725] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,725] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,725] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,725] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.7254033, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48622 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:29,769] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,838] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,839] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,839] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,839] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,839] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,839] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.8394322, process time = 0.0697 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48624 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:29,881] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,953] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,954] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,954] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,954] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,954] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:29,954] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984649.954238, process time = 0.0731 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48634 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:29,996] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,061] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,062] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,062] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,062] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '210'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,062] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,062] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.062516, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48644 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:30,102] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,168] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,169] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,169] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,169] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,169] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,169] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.1692052, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48646 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:30,212] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,278] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,279] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,279] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,279] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,279] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,279] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.2792509, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48662 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:30,320] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,387] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,388] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,388] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,388] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,388] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,388] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.3883035, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48678 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:30,429] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,495] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,496] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,496] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,496] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,496] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,496] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.4963098, process time = 0.0671 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48684 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:30,536] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,603] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,604] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,604] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,604] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,604] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,604] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8001 at 1750984650.6043844, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48696 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:30,646] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,713] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,714] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,714] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,714] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,714] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,714] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.7143903, process time = 0.0677 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48706 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:30,757] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,825] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,826] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,826] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,826] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,826] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,826] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.826696, process time = 0.0690 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48712 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:30,871] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,938] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,939] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,939] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,939] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,939] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:30,939] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984650.939221, process time = 0.0681 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48722 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:30,984] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,053] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,054] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,054] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,054] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,054] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,054] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984651.054538, process time = 0.0703 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48736 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:31,098] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,168] DEBUG: Routing request b0c69e3f-a103-451d-9014-4162f0417394 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,168] DEBUG: Debug session extraction - Router type: PrefixAwareRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,169] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,169] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'b0c69e3f-a103-451d-9014-4162f0417394', 'content-length': '338'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,169] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:31,169] INFO: Routing request b0c69e3f-a103-451d-9014-4162f0417394 with session id None to http://localhost:8002 at 1750984651.1689696, process time = 0.0706 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:48750 - "POST /v1/completions HTTP/1.1" 200 OK -INFO: Shutting down -INFO: Waiting for application shutdown. -[2025-06-27 00:37:31,370] INFO: httpx async_client.is_closed(): False - Now close it. Id (will be unchanged): 140498136262800 (httpx_client.py:35:vllm_router.httpx_client) -[2025-06-27 00:37:31,370] INFO: httpx async_client.is_closed(): True. Id (will be unchanged): 140498136262800 (httpx_client.py:39:vllm_router.httpx_client) -[2025-06-27 00:37:31,370] INFO: httpx AsyncClient closed (httpx_client.py:43:vllm_router.httpx_client) -INFO: Closing engine stats scraper -INFO: Closing service discovery module -INFO: Application shutdown complete. -INFO: Finished server process [2126015] diff --git a/tests/e2e/roundrobin/roundrobin/routing_lines.txt b/tests/e2e/roundrobin/roundrobin/routing_lines.txt deleted file mode 100644 index aaa5d6723..000000000 --- a/tests/e2e/roundrobin/roundrobin/routing_lines.txt +++ /dev/null @@ -1,41 +0,0 @@ -[2025-06-27 00:37:07,409] INFO: Routing request 67b4cb15-0604-4684-9aae-2bb756c68422 with session id None to http://localhost:8001 at 1750984627.4094923, process time = 0.0664 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:07,526] INFO: Routing request 657d70a1-6230-489a-ad5f-ac43fe0bff10 with session id None to http://localhost:8002 at 1750984627.5264473, process time = 0.0651 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:07,634] INFO: Routing request 5aadda92-f1fc-44d6-b9c6-e184c998dd2e with session id None to http://localhost:8001 at 1750984627.6342344, process time = 0.0654 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:07,746] INFO: Routing request 716ee5b3-267f-4f5f-8b25-cea3ea81b51f with session id None to http://localhost:8002 at 1750984627.7457457, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:07,854] INFO: Routing request 87517d4c-c4c1-49db-9d8b-58190d90cb56 with session id None to http://localhost:8001 at 1750984627.8541684, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:07,967] INFO: Routing request 3b2c6662-a083-4b22-87dd-48f8bcaf624d with session id None to http://localhost:8002 at 1750984627.9669526, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:08,074] INFO: Routing request ee73b7d3-68e9-45d0-82e0-7739d58679e3 with session id None to http://localhost:8001 at 1750984628.073846, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:08,180] INFO: Routing request 9ef6aa12-e06e-42e4-8444-2784413ea2ec with session id None to http://localhost:8002 at 1750984628.1804473, process time = 0.0653 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:08,286] INFO: Routing request 5e2c6e06-c4dc-44e5-8bf1-ec1e90de5493 with session id None to http://localhost:8001 at 1750984628.2863476, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:08,395] INFO: Routing request f3122044-e477-4dd1-ba4e-e544f76bfdfb with session id None to http://localhost:8002 at 1750984628.3953238, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:08,502] INFO: Routing request c735651b-3a68-4afe-bf9b-7ff20d971deb with session id None to http://localhost:8001 at 1750984628.5027213, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:08,610] INFO: Routing request 3a32598e-1a3a-4349-a600-ad1166511c27 with session id None to http://localhost:8002 at 1750984628.6098235, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:08,720] INFO: Routing request 270828df-e7a2-4da0-b5f8-319ee407deeb with session id None to http://localhost:8001 at 1750984628.7205312, process time = 0.0693 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:08,829] INFO: Routing request feac60a9-b5d3-4674-9a4a-48330609e3ee with session id None to http://localhost:8002 at 1750984628.829479, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:08,938] INFO: Routing request db4933f7-fbdf-477c-99fa-748b4de10cc0 with session id None to http://localhost:8001 at 1750984628.9385927, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:09,046] INFO: Routing request d4640044-ad26-40e4-acbc-9a782910ccb3 with session id None to http://localhost:8002 at 1750984629.0460603, process time = 0.0662 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:09,156] INFO: Routing request 0bbea844-2d4d-42a9-a872-131e888f5d93 with session id None to http://localhost:8001 at 1750984629.156086, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:09,265] INFO: Routing request 31a1f251-cc77-4593-900c-61f0038f8f95 with session id None to http://localhost:8002 at 1750984629.2656531, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:09,374] INFO: Routing request 95e0279b-8bb5-4219-8617-94ef3591b16b with session id None to http://localhost:8001 at 1750984629.3738842, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:09,482] INFO: Routing request d2571a45-72d0-48d4-ae05-199ba0f4340c with session id None to http://localhost:8002 at 1750984629.4817224, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) - -[2025-06-27 00:37:09,590] INFO: Routing request 9a777dfa-b6af-4709-b807-8cb33e1029db with session id None to http://localhost:8001 at 1750984629.5900624, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) diff --git a/tests/e2e/roundrobin/router.log b/tests/e2e/roundrobin/router.log deleted file mode 100644 index 1fe4c35d5..000000000 --- a/tests/e2e/roundrobin/router.log +++ /dev/null @@ -1,191 +0,0 @@ -/home/zrfishnoodles/production-stack/src/vllm_router/parsers/parser.py:20: RuntimeWarning: Failed to read commit hash: -No module named 'vllm_router._version' - from vllm_router.version import __version__ -[2025-06-27 00:37:06,692] INFO: Initializing round-robin routing logic (routing_logic.py:487:vllm_router.routers.routing_logic) -INFO: Started server process [2125604] -INFO: Waiting for application startup. -[2025-06-27 00:37:06,777] INFO: httpx AsyncClient instantiated. Id 139673895638912 (httpx_client.py:31:vllm_router.httpx_client) -INFO: Application startup complete. -INFO: Uvicorn running on http://0.0.0.0:30080 (Press CTRL+C to quit) -[2025-06-27 00:37:06,787] INFO: Scraping metrics from 2 serving engine(s) (engine_stats.py:146:vllm_router.stats.engine_stats) -INFO: 127.0.0.1:49218 - "GET / HTTP/1.1" 404 Not Found -INFO: 127.0.0.1:49234 - "GET /health HTTP/1.1" 200 OK -INFO: 127.0.0.1:49246 - "GET /v1/models HTTP/1.1" 200 OK -[2025-06-27 00:37:07,342] DEBUG: No cache hit, forwarding request to backend (main_router.py:69:vllm_router.routers.main_router) -[2025-06-27 00:37:07,343] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,408] DEBUG: Routing request 67b4cb15-0604-4684-9aae-2bb756c68422 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,409] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,409] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,409] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'content-length': '197'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,409] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,409] INFO: Routing request 67b4cb15-0604-4684-9aae-2bb756c68422 with session id None to http://localhost:8001 at 1750984627.4094923, process time = 0.0664 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49252 - "POST /v1/chat/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:07,461] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,525] DEBUG: Routing request 657d70a1-6230-489a-ad5f-ac43fe0bff10 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,526] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,526] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,526] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '657d70a1-6230-489a-ad5f-ac43fe0bff10', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,526] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,526] INFO: Routing request 657d70a1-6230-489a-ad5f-ac43fe0bff10 with session id None to http://localhost:8002 at 1750984627.5264473, process time = 0.0651 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49264 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:07,568] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,633] DEBUG: Routing request 5aadda92-f1fc-44d6-b9c6-e184c998dd2e for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,634] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,634] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,634] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '5aadda92-f1fc-44d6-b9c6-e184c998dd2e', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,634] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,634] INFO: Routing request 5aadda92-f1fc-44d6-b9c6-e184c998dd2e with session id None to http://localhost:8001 at 1750984627.6342344, process time = 0.0654 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49274 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:07,678] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,745] DEBUG: Routing request 716ee5b3-267f-4f5f-8b25-cea3ea81b51f for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,745] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,745] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,745] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '716ee5b3-267f-4f5f-8b25-cea3ea81b51f', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,746] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,746] INFO: Routing request 716ee5b3-267f-4f5f-8b25-cea3ea81b51f with session id None to http://localhost:8002 at 1750984627.7457457, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49290 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:07,787] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,853] DEBUG: Routing request 87517d4c-c4c1-49db-9d8b-58190d90cb56 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,854] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,854] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,854] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '87517d4c-c4c1-49db-9d8b-58190d90cb56', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,854] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,854] INFO: Routing request 87517d4c-c4c1-49db-9d8b-58190d90cb56 with session id None to http://localhost:8001 at 1750984627.8541684, process time = 0.0665 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49300 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:07,898] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,966] DEBUG: Routing request 3b2c6662-a083-4b22-87dd-48f8bcaf624d for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,966] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,967] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,967] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3b2c6662-a083-4b22-87dd-48f8bcaf624d', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,967] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:07,967] INFO: Routing request 3b2c6662-a083-4b22-87dd-48f8bcaf624d with session id None to http://localhost:8002 at 1750984627.9669526, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49312 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:08,007] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,073] DEBUG: Routing request ee73b7d3-68e9-45d0-82e0-7739d58679e3 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,073] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,073] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,074] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'ee73b7d3-68e9-45d0-82e0-7739d58679e3', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,074] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,074] INFO: Routing request ee73b7d3-68e9-45d0-82e0-7739d58679e3 with session id None to http://localhost:8001 at 1750984628.073846, process time = 0.0660 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49322 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:08,115] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,179] DEBUG: Routing request 9ef6aa12-e06e-42e4-8444-2784413ea2ec for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,180] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,180] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,180] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '9ef6aa12-e06e-42e4-8444-2784413ea2ec', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,180] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,180] INFO: Routing request 9ef6aa12-e06e-42e4-8444-2784413ea2ec with session id None to http://localhost:8002 at 1750984628.1804473, process time = 0.0653 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49330 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:08,220] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,285] DEBUG: Routing request 5e2c6e06-c4dc-44e5-8bf1-ec1e90de5493 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,286] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,286] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,286] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '5e2c6e06-c4dc-44e5-8bf1-ec1e90de5493', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,286] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,286] INFO: Routing request 5e2c6e06-c4dc-44e5-8bf1-ec1e90de5493 with session id None to http://localhost:8001 at 1750984628.2863476, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49344 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:08,329] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,394] DEBUG: Routing request f3122044-e477-4dd1-ba4e-e544f76bfdfb for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,395] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,395] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,395] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'f3122044-e477-4dd1-ba4e-e544f76bfdfb', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,395] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,395] INFO: Routing request f3122044-e477-4dd1-ba4e-e544f76bfdfb with session id None to http://localhost:8002 at 1750984628.3953238, process time = 0.0659 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49350 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:08,436] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,502] DEBUG: Routing request c735651b-3a68-4afe-bf9b-7ff20d971deb for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,502] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,502] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,502] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'c735651b-3a68-4afe-bf9b-7ff20d971deb', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,502] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,502] INFO: Routing request c735651b-3a68-4afe-bf9b-7ff20d971deb with session id None to http://localhost:8001 at 1750984628.5027213, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49360 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:08,544] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,609] DEBUG: Routing request 3a32598e-1a3a-4349-a600-ad1166511c27 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,609] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,609] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,609] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '3a32598e-1a3a-4349-a600-ad1166511c27', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,610] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,610] INFO: Routing request 3a32598e-1a3a-4349-a600-ad1166511c27 with session id None to http://localhost:8002 at 1750984628.6098235, process time = 0.0656 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49372 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:08,651] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,720] DEBUG: Routing request 270828df-e7a2-4da0-b5f8-319ee407deeb for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,720] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,720] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,720] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '270828df-e7a2-4da0-b5f8-319ee407deeb', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,720] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,720] INFO: Routing request 270828df-e7a2-4da0-b5f8-319ee407deeb with session id None to http://localhost:8001 at 1750984628.7205312, process time = 0.0693 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49388 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:08,761] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,828] DEBUG: Routing request feac60a9-b5d3-4674-9a4a-48330609e3ee for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,829] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,829] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,829] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'feac60a9-b5d3-4674-9a4a-48330609e3ee', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,829] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,829] INFO: Routing request feac60a9-b5d3-4674-9a4a-48330609e3ee with session id None to http://localhost:8002 at 1750984628.829479, process time = 0.0679 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49394 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:08,872] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,938] DEBUG: Routing request db4933f7-fbdf-477c-99fa-748b4de10cc0 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,938] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,938] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,938] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'db4933f7-fbdf-477c-99fa-748b4de10cc0', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,938] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:08,938] INFO: Routing request db4933f7-fbdf-477c-99fa-748b4de10cc0 with session id None to http://localhost:8001 at 1750984628.9385927, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49398 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:08,979] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,045] DEBUG: Routing request d4640044-ad26-40e4-acbc-9a782910ccb3 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,046] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,046] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,046] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd4640044-ad26-40e4-acbc-9a782910ccb3', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,046] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,046] INFO: Routing request d4640044-ad26-40e4-acbc-9a782910ccb3 with session id None to http://localhost:8002 at 1750984629.0460603, process time = 0.0662 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49400 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:09,087] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,155] DEBUG: Routing request 0bbea844-2d4d-42a9-a872-131e888f5d93 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,156] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,156] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,156] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '0bbea844-2d4d-42a9-a872-131e888f5d93', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,156] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,156] INFO: Routing request 0bbea844-2d4d-42a9-a872-131e888f5d93 with session id None to http://localhost:8001 at 1750984629.156086, process time = 0.0684 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49402 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:09,198] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,265] DEBUG: Routing request 31a1f251-cc77-4593-900c-61f0038f8f95 for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,265] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,265] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,265] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '31a1f251-cc77-4593-900c-61f0038f8f95', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,265] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,265] INFO: Routing request 31a1f251-cc77-4593-900c-61f0038f8f95 with session id None to http://localhost:8002 at 1750984629.2656531, process time = 0.0674 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49414 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:09,306] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,373] DEBUG: Routing request 95e0279b-8bb5-4219-8617-94ef3591b16b for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,373] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,373] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,374] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '95e0279b-8bb5-4219-8617-94ef3591b16b', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,374] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,374] INFO: Routing request 95e0279b-8bb5-4219-8617-94ef3591b16b with session id None to http://localhost:8001 at 1750984629.3738842, process time = 0.0670 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49416 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:09,415] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,481] DEBUG: Routing request d2571a45-72d0-48d4-ae05-199ba0f4340c for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,481] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,481] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,481] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': 'd2571a45-72d0-48d4-ae05-199ba0f4340c', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,481] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,482] INFO: Routing request d2571a45-72d0-48d4-ae05-199ba0f4340c with session id None to http://localhost:8002 at 1750984629.4817224, process time = 0.0666 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49418 - "POST /v1/completions HTTP/1.1" 200 OK -[2025-06-27 00:37:09,522] INFO: Request for model facebook/opt-125m was rewritten (request.py:207:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,589] DEBUG: Routing request 9a777dfa-b6af-4709-b807-8cb33e1029db for model: facebook/opt-125m (request.py:254:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,590] DEBUG: Debug session extraction - Router type: RoundRobinRouter (request.py:285:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,590] DEBUG: Debug session extraction - Session key config: None (request.py:288:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,590] DEBUG: Debug session extraction - Request headers: {'host': 'localhost:30080', 'user-agent': 'python-requests/2.32.4', 'accept-encoding': 'gzip, deflate', 'accept': '*/*', 'connection': 'keep-alive', 'content-type': 'application/json', 'authorization': 'Bearer dummy', 'x-request-id': '9a777dfa-b6af-4709-b807-8cb33e1029db', 'content-length': '159'} (request.py:289:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,590] DEBUG: Debug session extraction - Extracted session ID: None (request.py:290:vllm_router.services.request_service.request) -[2025-06-27 00:37:09,590] INFO: Routing request 9a777dfa-b6af-4709-b807-8cb33e1029db with session id None to http://localhost:8001 at 1750984629.5900624, process time = 0.0673 (request.py:292:vllm_router.services.request_service.request) -INFO: 127.0.0.1:49430 - "POST /v1/completions HTTP/1.1" 200 OK -INFO: Shutting down -INFO: Waiting for application shutdown. -[2025-06-27 00:37:09,792] INFO: httpx async_client.is_closed(): False - Now close it. Id (will be unchanged): 139673895638912 (httpx_client.py:35:vllm_router.httpx_client) -[2025-06-27 00:37:09,792] INFO: httpx async_client.is_closed(): True. Id (will be unchanged): 139673895638912 (httpx_client.py:39:vllm_router.httpx_client) -[2025-06-27 00:37:09,792] INFO: httpx AsyncClient closed (httpx_client.py:43:vllm_router.httpx_client) -INFO: Closing engine stats scraper -INFO: Closing service discovery module -INFO: Application shutdown complete. -INFO: Finished server process [2125604] From aa71a342dda9db937e35e56196f84cb4283ca8c1 Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Fri, 27 Jun 2025 03:01:31 +0000 Subject: [PATCH 07/12] [CI] Add multiple routing logic test Signed-off-by: Rui Zhang --- .github/values-10-disagg-prefill.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/values-10-disagg-prefill.yaml b/.github/values-10-disagg-prefill.yaml index 548d284f5..7a135cea5 100644 --- a/.github/values-10-disagg-prefill.yaml +++ b/.github/values-10-disagg-prefill.yaml @@ -3,7 +3,7 @@ servingEngineSpec: strategy: type: Recreate enableEngine: true - runtimeClassName: "" + runtimeClassName: "nvidia" containerPort: 8000 modelSpec: # Prefill node configuration From 6f31469875d0e7e9842138bd8675dc7397562bc2 Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Fri, 27 Jun 2025 16:50:16 +0000 Subject: [PATCH 08/12] [CI] fix bug Signed-off-by: Rui Zhang --- .github/values-10-disagg-prefill.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/values-10-disagg-prefill.yaml b/.github/values-10-disagg-prefill.yaml index 7a135cea5..548d284f5 100644 --- a/.github/values-10-disagg-prefill.yaml +++ b/.github/values-10-disagg-prefill.yaml @@ -3,7 +3,7 @@ servingEngineSpec: strategy: type: Recreate enableEngine: true - runtimeClassName: "nvidia" + runtimeClassName: "" containerPort: 8000 modelSpec: # Prefill node configuration From 8de8dbd2474dc44e9e41c41bef16640c49d0b44f Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Wed, 16 Jul 2025 17:53:11 +0000 Subject: [PATCH 09/12] hotfix/add error handle in pd routing Signed-off-by: Rui Zhang --- .../services/request_service/request.py | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/vllm_router/services/request_service/request.py b/src/vllm_router/services/request_service/request.py index 5161f7747..c248b8b18 100644 --- a/src/vllm_router/services/request_service/request.py +++ b/src/vllm_router/services/request_service/request.py @@ -336,12 +336,39 @@ async def send_request_to_decode( "X-Request-Id": request_id, } - async with client.stream( - "POST", endpoint, json=req_data, headers=headers - ) as response: - response.raise_for_status() - async for chunk in response.aiter_bytes(): - yield chunk + try: + async with client.stream( + "POST", endpoint, json=req_data, headers=headers + ) as response: + response.raise_for_status() + async for chunk in response.aiter_bytes(): + yield chunk + except httpx.HTTPStatusError as e: + logger.error(f"HTTP error in decode request: {e}", exc_info=True) + try: + error_text = e.response.text + except Exception: + error_text = f"HTTP {e.response.status_code}" + # Yield error as JSON response + error_response = { + "error": { + "message": f"Backend error: {error_text}", + "type": "backend_error", + "code": e.response.status_code, + } + } + yield json.dumps(error_response).encode("utf-8") + except Exception as e: + logger.error(f"Unexpected error in decode request: {e}", exc_info=True) + # Yield error as JSON response + error_response = { + "error": { + "message": f"Internal server error: {str(e)}", + "type": "internal_error", + "code": 500, + } + } + yield json.dumps(error_response).encode("utf-8") async def route_disaggregated_prefill_request( From 1d3d70f8d86f0c4e1bf0894717f75424b118f2f6 Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Wed, 16 Jul 2025 21:29:39 +0000 Subject: [PATCH 10/12] modify Signed-off-by: Rui Zhang --- .../services/request_service/request.py | 39 +++---------------- 1 file changed, 6 insertions(+), 33 deletions(-) diff --git a/src/vllm_router/services/request_service/request.py b/src/vllm_router/services/request_service/request.py index c248b8b18..5161f7747 100644 --- a/src/vllm_router/services/request_service/request.py +++ b/src/vllm_router/services/request_service/request.py @@ -336,39 +336,12 @@ async def send_request_to_decode( "X-Request-Id": request_id, } - try: - async with client.stream( - "POST", endpoint, json=req_data, headers=headers - ) as response: - response.raise_for_status() - async for chunk in response.aiter_bytes(): - yield chunk - except httpx.HTTPStatusError as e: - logger.error(f"HTTP error in decode request: {e}", exc_info=True) - try: - error_text = e.response.text - except Exception: - error_text = f"HTTP {e.response.status_code}" - # Yield error as JSON response - error_response = { - "error": { - "message": f"Backend error: {error_text}", - "type": "backend_error", - "code": e.response.status_code, - } - } - yield json.dumps(error_response).encode("utf-8") - except Exception as e: - logger.error(f"Unexpected error in decode request: {e}", exc_info=True) - # Yield error as JSON response - error_response = { - "error": { - "message": f"Internal server error: {str(e)}", - "type": "internal_error", - "code": 500, - } - } - yield json.dumps(error_response).encode("utf-8") + async with client.stream( + "POST", endpoint, json=req_data, headers=headers + ) as response: + response.raise_for_status() + async for chunk in response.aiter_bytes(): + yield chunk async def route_disaggregated_prefill_request( From 3edc18f773dbe356b67652d4430a5775a593dadd Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Wed, 23 Jul 2025 17:01:55 +0000 Subject: [PATCH 11/12] Fix lora operator compatibility with other cr Signed-off-by: Rui Zhang --- operator/config/manager/deployment.yaml | 9 +- operator/config/manager/kustomization.yaml | 11 +- operator/config/manager/pvc.yaml | 33 ++++++ operator/config/rbac/role.yaml | 14 +++ ...production-stack_v1alpha1_loraadapter.yaml | 15 +-- .../production-stack_v1alpha1_vllmrouter.yaml | 2 +- ...production-stack_v1alpha1_vllmruntime.yaml | 9 +- .../controller/loraadapter_controller.go | 2 + .../controller/vllmrouter_controller.go | 5 +- .../controller/vllmruntime_controller.go | 105 +++++++++++++++++- 10 files changed, 182 insertions(+), 23 deletions(-) create mode 100644 operator/config/manager/pvc.yaml diff --git a/operator/config/manager/deployment.yaml b/operator/config/manager/deployment.yaml index 092443619..a9f1791d2 100644 --- a/operator/config/manager/deployment.yaml +++ b/operator/config/manager/deployment.yaml @@ -86,7 +86,12 @@ spec: requests: cpu: 10m memory: 64Mi - volumeMounts: [] - volumes: [] + volumeMounts: + - name: shared-pvc-storage + mountPath: /data/shared-pvc-storage + volumes: + - name: shared-pvc-storage + persistentVolumeClaim: + claimName: production-stack-shared-pvc-storage-claim serviceAccountName: production-stack-controller-manager terminationGracePeriodSeconds: 10 diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml index ac10fc9f6..ce52485b0 100644 --- a/operator/config/manager/kustomization.yaml +++ b/operator/config/manager/kustomization.yaml @@ -1,3 +1,10 @@ resources: - - namespace.yaml - - deployment.yaml +- namespace.yaml +- deployment.yaml +- pvc.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: controller + newName: controller + newTag: latest diff --git a/operator/config/manager/pvc.yaml b/operator/config/manager/pvc.yaml new file mode 100644 index 000000000..5c9b94cf0 --- /dev/null +++ b/operator/config/manager/pvc.yaml @@ -0,0 +1,33 @@ +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: shared-pvc-storage + labels: + type: local + app: production-stack + component: shared-pvc-storage +spec: + storageClassName: "" + capacity: + storage: 100Gi + accessModes: + - ReadWriteMany + hostPath: + path: /data/shared-pvc-storage +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shared-pvc-storage-claim + namespace: production-stack-system + labels: + app: production-stack + component: shared-pvc-storage +spec: + storageClassName: "" + accessModes: + - ReadWriteMany + resources: + requests: + storage: 100Gi diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index a2f2d5f46..739675fef 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -8,6 +8,8 @@ rules: - "" resources: - configmaps + - persistentvolumeclaims + - persistentvolumes - secrets - serviceaccounts - services @@ -39,6 +41,18 @@ rules: - patch - update - watch +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - production-stack.vllm.ai resources: diff --git a/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml b/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml index 960017ab8..0224d588e 100644 --- a/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml +++ b/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml @@ -6,20 +6,17 @@ metadata: app.kubernetes.io/managed-by: kustomize name: loraadapter-sample spec: - baseModel: "llama3-8b-instr" # Use the model name with your specified model name in engineSpec - # If you want to use vllm api key, uncomment the following section, you can either use secret or directly set the value - # Option 1: Secret reference + baseModel: "llama-3.1-8b-instruct" # Use the model name with your specified model label in vllmruntime # vllmApiKey: # secretName: "vllm-api-key" # secretKey: "VLLM_API_KEY" - - # Option 2: Direct value - # vllmApiKey: - # value: "abc123" adapterSource: - type: "local" # (local, huggingface, s3) for now we only support local + type: "huggingface" # (local, huggingface) adapterName: "llama-3.1-nemoguard-8b-topic-control" # This will be the adapter ID - adapterPath: "/data/lora-adapters/llama-3.1-nemoguard-8b-topic-control" # This will be the path to the adapter in the persistent volume + repository: "nvidia/llama-3.1-nemoguard-8b-topic-control" + credentialsSecretRef: + name: "huggingface-credentials" + key: "hf_token" loraAdapterDeploymentConfig: algorithm: "default" # for now we only support default algorithm replicas: 1 # if not specified, by default algorithm, the lora adapter will be applied to all llama3-8b models, if specified, the lora adapter will only be applied to the specified number of replicas diff --git a/operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml b/operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml index 80995f0c9..807853f1d 100644 --- a/operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml +++ b/operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml @@ -16,7 +16,7 @@ spec: serviceDiscovery: k8s # Label selector for vLLM runtime pods - k8sLabelSelector: "app=vllmruntime-sample" + k8sLabelSelector: "model=llama-3.1-8b-instruct" # Routing strategy (roundrobin or session) routingLogic: roundrobin diff --git a/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml index b5be819bb..6ba50345b 100644 --- a/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml +++ b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml @@ -4,12 +4,13 @@ metadata: labels: app.kubernetes.io/name: production-stack app.kubernetes.io/managed-by: kustomize - name: vllmruntime-sample + model: "llama-3.1-8b-instruct" + name: llama3 spec: # Model configuration model: - modelURL: "meta-llama/Llama-3.1-8B" - enableLoRA: false + modelURL: "meta-llama/Llama-3.1-8B-Instruct" + enableLoRA: true enableTool: false toolCallParser: "" maxModelLen: 4096 @@ -60,7 +61,7 @@ spec: pullSecretName: "" # Number of replicas - replicas: 1 + replicas: 2 # Deployment strategy deploymentStrategy: "Recreate" diff --git a/operator/internal/controller/loraadapter_controller.go b/operator/internal/controller/loraadapter_controller.go index 50a361378..5f6866c27 100644 --- a/operator/internal/controller/loraadapter_controller.go +++ b/operator/internal/controller/loraadapter_controller.go @@ -63,6 +63,8 @@ type LoraAdapterReconciler struct { // +kubebuilder:rbac:groups=production-stack.vllm.ai,resources=loraadapters/finalizers,verbs=update // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch +// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch;create;update;patch;delete // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. diff --git a/operator/internal/controller/vllmrouter_controller.go b/operator/internal/controller/vllmrouter_controller.go index d9352f5ce..13c2e4b43 100644 --- a/operator/internal/controller/vllmrouter_controller.go +++ b/operator/internal/controller/vllmrouter_controller.go @@ -194,8 +194,9 @@ func (r *VLLMRouterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // deploymentForVLLMRouter returns a VLLMRouter Deployment object func (r *VLLMRouterReconciler) deploymentForVLLMRouter(router *servingv1alpha1.VLLMRouter) *appsv1.Deployment { - labels := map[string]string{ - "app": router.Name, + labels := map[string]string{"app": router.Name} + for k, v := range router.Labels { + labels[k] = v } // Add user-defined environment variables diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go index 72ca8d672..1f6fdfdd4 100644 --- a/operator/internal/controller/vllmruntime_controller.go +++ b/operator/internal/controller/vllmruntime_controller.go @@ -50,6 +50,8 @@ type VLLMRuntimeReconciler struct { // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=persistentvolumes,verbs=get;list;watch;create;update;patch;delete // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -105,6 +107,40 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{Requeue: true}, nil } + // Check if the pv already exists, if not create a new one + foundPV := &corev1.PersistentVolume{} + err = r.Get(ctx, types.NamespacedName{Name: "shared-pvc-storage", Namespace: vllmRuntime.Namespace}, foundPV) + if err != nil && errors.IsNotFound(err) { + // Define a new pv + pv := r.pvForVLLMRuntime(vllmRuntime) + log.Info("Creating a new PV", "PV.Namespace", pv.Namespace, "PV.Name", pv.Name) + err = r.Create(ctx, pv) + if err != nil { + log.Error(err, "Failed to create new PV", "PV.Namespace", pv.Namespace, "PV.Name", pv.Name) + return ctrl.Result{}, err + } + } else if err != nil { + log.Error(err, "Failed to get PV") + return ctrl.Result{}, err + } + + // Check if the pvc already exists, if not create a new one + foundPVC := &corev1.PersistentVolumeClaim{} + err = r.Get(ctx, types.NamespacedName{Name: "shared-pvc-storage-claim", Namespace: vllmRuntime.Namespace}, foundPVC) + if err != nil && errors.IsNotFound(err) { + // Define a new pvc + pvc := r.pvcForVLLMRuntime(vllmRuntime) + log.Info("Creating a new PVC", "PVC.Namespace", pvc.Namespace, "PVC.Name", pvc.Name) + err = r.Create(ctx, pvc) + if err != nil { + log.Error(err, "Failed to create new PVC", "PVC.Namespace", pvc.Namespace, "PVC.Name", pvc.Name) + return ctrl.Result{}, err + } + } else if err != nil { + log.Error(err, "Failed to get PVC") + return ctrl.Result{}, err + } + // Check if the deployment already exists, if not create a new one found := &appsv1.Deployment{} err = r.Get(ctx, types.NamespacedName{Name: vllmRuntime.Name, Namespace: vllmRuntime.Namespace}, found) @@ -148,10 +184,48 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } +func (r *VLLMRuntimeReconciler) pvForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *corev1.PersistentVolume { + return &corev1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{ + Name: "shared-pvc-storage", + Namespace: vllmRuntime.Namespace, + Labels: map[string]string{"app": vllmRuntime.Name}, + }, + Spec: corev1.PersistentVolumeSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany}, + StorageClassName: "", + Capacity: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")}, + PersistentVolumeSource: corev1.PersistentVolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/data/shared-pvc-storage", + }, + }, + }, + } +} + +func (r *VLLMRuntimeReconciler) pvcForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *corev1.PersistentVolumeClaim { + return &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "shared-pvc-storage-claim", + Namespace: vllmRuntime.Namespace, + Labels: map[string]string{"app": vllmRuntime.Name}, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany}, + StorageClassName: &[]string{""}[0], + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")}, + }, + }, + } +} + // deploymentForVLLMRuntime returns a VLLMRuntime Deployment object func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *appsv1.Deployment { - labels := map[string]string{ - "app": vllmRuntime.Name, + labels := map[string]string{"app": vllmRuntime.Name} + for k, v := range vllmRuntime.Labels { + labels[k] = v } // Define probes @@ -178,7 +252,7 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production Scheme: corev1.URISchemeHTTP, }, }, - InitialDelaySeconds: 240, + InitialDelaySeconds: 500, PeriodSeconds: 10, TimeoutSeconds: 3, SuccessThreshold: 1, @@ -260,6 +334,15 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production }) } + if vllmRuntime.Spec.Model.EnableLoRA { + env = append(env, + corev1.EnvVar{ + Name: "VLLM_ALLOW_RUNTIME_LORA_UPDATING", + Value: "True", + }, + ) + } + // LM Cache configuration if vllmRuntime.Spec.LMCacheConfig.Enabled { env = append(env, @@ -424,6 +507,22 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production Resources: resources, ReadinessProbe: readinessProbe, LivenessProbe: livenessProbe, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "shared-pvc-storage", + MountPath: "/data/shared-pvc-storage", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "shared-pvc-storage", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: "shared-pvc-storage-claim", + }, + }, }, }, }, From a3bde66330805f5ca0173e05b5ac72649843fc9c Mon Sep 17 00:00:00 2001 From: Rui Zhang Date: Wed, 23 Jul 2025 20:12:26 +0000 Subject: [PATCH 12/12] modify Signed-off-by: Rui Zhang --- operator/config/default/kustomization.yaml | 3 +- operator/config/manager/deployment.yaml | 2 +- operator/config/manager/kustomization.yaml | 6 +- ...production-stack_v1alpha1_vllmruntime.yaml | 2 +- operator/config/storage/kustomization.yaml | 4 + operator/config/{manager => storage}/pvc.yaml | 2 +- .../controller/vllmruntime_controller.go | 79 +------------------ 7 files changed, 15 insertions(+), 83 deletions(-) create mode 100644 operator/config/storage/kustomization.yaml rename operator/config/{manager => storage}/pvc.yaml (93%) diff --git a/operator/config/default/kustomization.yaml b/operator/config/default/kustomization.yaml index b092f90d4..db5a1903b 100644 --- a/operator/config/default/kustomization.yaml +++ b/operator/config/default/kustomization.yaml @@ -1,5 +1,5 @@ # Adds namespace to all resources. -namespace: production-stack-system +namespace: default # Value of this field is prepended to the # names of all resources, e.g. a deployment named @@ -18,6 +18,7 @@ resources: - ../crd - ../rbac - ../manager + - ../storage # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in # crd/kustomization.yaml #- ../webhook diff --git a/operator/config/manager/deployment.yaml b/operator/config/manager/deployment.yaml index a9f1791d2..e1507bf64 100644 --- a/operator/config/manager/deployment.yaml +++ b/operator/config/manager/deployment.yaml @@ -1,7 +1,7 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: production-stack-controller-manager + name: controller-manager namespace: production-stack-system labels: app.kubernetes.io/name: production-stack diff --git a/operator/config/manager/kustomization.yaml b/operator/config/manager/kustomization.yaml index ce52485b0..4821ee983 100644 --- a/operator/config/manager/kustomization.yaml +++ b/operator/config/manager/kustomization.yaml @@ -1,10 +1,8 @@ -resources: -- namespace.yaml -- deployment.yaml -- pvc.yaml apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller newName: controller newTag: latest +resources: +- deployment.yaml diff --git a/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml index 6ba50345b..053f79fc8 100644 --- a/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml +++ b/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml @@ -5,7 +5,7 @@ metadata: app.kubernetes.io/name: production-stack app.kubernetes.io/managed-by: kustomize model: "llama-3.1-8b-instruct" - name: llama3 + name: vllmruntime-sample spec: # Model configuration model: diff --git a/operator/config/storage/kustomization.yaml b/operator/config/storage/kustomization.yaml new file mode 100644 index 000000000..7bfd4518a --- /dev/null +++ b/operator/config/storage/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- pvc.yaml diff --git a/operator/config/manager/pvc.yaml b/operator/config/storage/pvc.yaml similarity index 93% rename from operator/config/manager/pvc.yaml rename to operator/config/storage/pvc.yaml index 5c9b94cf0..386baee16 100644 --- a/operator/config/manager/pvc.yaml +++ b/operator/config/storage/pvc.yaml @@ -20,7 +20,7 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: name: shared-pvc-storage-claim - namespace: production-stack-system + namespace: default labels: app: production-stack component: shared-pvc-storage diff --git a/operator/internal/controller/vllmruntime_controller.go b/operator/internal/controller/vllmruntime_controller.go index 1f6fdfdd4..dfc39f397 100644 --- a/operator/internal/controller/vllmruntime_controller.go +++ b/operator/internal/controller/vllmruntime_controller.go @@ -107,40 +107,6 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{Requeue: true}, nil } - // Check if the pv already exists, if not create a new one - foundPV := &corev1.PersistentVolume{} - err = r.Get(ctx, types.NamespacedName{Name: "shared-pvc-storage", Namespace: vllmRuntime.Namespace}, foundPV) - if err != nil && errors.IsNotFound(err) { - // Define a new pv - pv := r.pvForVLLMRuntime(vllmRuntime) - log.Info("Creating a new PV", "PV.Namespace", pv.Namespace, "PV.Name", pv.Name) - err = r.Create(ctx, pv) - if err != nil { - log.Error(err, "Failed to create new PV", "PV.Namespace", pv.Namespace, "PV.Name", pv.Name) - return ctrl.Result{}, err - } - } else if err != nil { - log.Error(err, "Failed to get PV") - return ctrl.Result{}, err - } - - // Check if the pvc already exists, if not create a new one - foundPVC := &corev1.PersistentVolumeClaim{} - err = r.Get(ctx, types.NamespacedName{Name: "shared-pvc-storage-claim", Namespace: vllmRuntime.Namespace}, foundPVC) - if err != nil && errors.IsNotFound(err) { - // Define a new pvc - pvc := r.pvcForVLLMRuntime(vllmRuntime) - log.Info("Creating a new PVC", "PVC.Namespace", pvc.Namespace, "PVC.Name", pvc.Name) - err = r.Create(ctx, pvc) - if err != nil { - log.Error(err, "Failed to create new PVC", "PVC.Namespace", pvc.Namespace, "PVC.Name", pvc.Name) - return ctrl.Result{}, err - } - } else if err != nil { - log.Error(err, "Failed to get PVC") - return ctrl.Result{}, err - } - // Check if the deployment already exists, if not create a new one found := &appsv1.Deployment{} err = r.Get(ctx, types.NamespacedName{Name: vllmRuntime.Name, Namespace: vllmRuntime.Namespace}, found) @@ -184,43 +150,6 @@ func (r *VLLMRuntimeReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } -func (r *VLLMRuntimeReconciler) pvForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *corev1.PersistentVolume { - return &corev1.PersistentVolume{ - ObjectMeta: metav1.ObjectMeta{ - Name: "shared-pvc-storage", - Namespace: vllmRuntime.Namespace, - Labels: map[string]string{"app": vllmRuntime.Name}, - }, - Spec: corev1.PersistentVolumeSpec{ - AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany}, - StorageClassName: "", - Capacity: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")}, - PersistentVolumeSource: corev1.PersistentVolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/data/shared-pvc-storage", - }, - }, - }, - } -} - -func (r *VLLMRuntimeReconciler) pvcForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *corev1.PersistentVolumeClaim { - return &corev1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: "shared-pvc-storage-claim", - Namespace: vllmRuntime.Namespace, - Labels: map[string]string{"app": vllmRuntime.Name}, - }, - Spec: corev1.PersistentVolumeClaimSpec{ - AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany}, - StorageClassName: &[]string{""}[0], - Resources: corev1.VolumeResourceRequirements{ - Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("100Gi")}, - }, - }, - } -} - // deploymentForVLLMRuntime returns a VLLMRuntime Deployment object func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *productionstackv1alpha1.VLLMRuntime) *appsv1.Deployment { labels := map[string]string{"app": vllmRuntime.Name} @@ -252,11 +181,11 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production Scheme: corev1.URISchemeHTTP, }, }, - InitialDelaySeconds: 500, - PeriodSeconds: 10, + InitialDelaySeconds: 300, + PeriodSeconds: 20, TimeoutSeconds: 3, SuccessThreshold: 1, - FailureThreshold: 3, + FailureThreshold: 10, } // Build command line arguments @@ -520,7 +449,7 @@ func (r *VLLMRuntimeReconciler) deploymentForVLLMRuntime(vllmRuntime *production Name: "shared-pvc-storage", VolumeSource: corev1.VolumeSource{ PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ - ClaimName: "shared-pvc-storage-claim", + ClaimName: "production-stack-shared-pvc-storage-claim", }, }, },