vllm-project
diff --git a/‎.github/values-06-session-routing.yaml
Lines changed: 33 additions & 0 deletions b/‎.github/values-06-session-routing.yaml
Lines changed: 33 additions & 0 deletions
diff --git a/‎.github/wait-for-pods.sh
Lines changed: 135 additions & 0 deletions b/‎.github/wait-for-pods.sh
Lines changed: 135 additions & 0 deletions
diff --git a/‎.github/workflows/functionality-helm-chart.yml
Lines changed: 60 additions & 0 deletions b/‎.github/workflows/functionality-helm-chart.yml
Lines changed: 60 additions & 0 deletions
diff --git a/‎.github/workflows/router-e2e-test.yml
Lines changed: 103 additions & 0 deletions b/‎.github/workflows/router-e2e-test.yml
Lines changed: 103 additions & 0 deletions
@@ -0,0 +1,33 @@
+servingEngineSpec:
+  strategy:
+    type: Recreate
+  runtimeClassName: ""
+  modelSpec:
+  - name: "opt125m"
+    repository: "vllm/vllm-openai"
+    tag: "latest"
+    modelURL: "facebook/opt-125m"
+
+    replicaCount: 2
+
+    requestCPU: 6
+    requestMemory: "16Gi"
+    requestGPU: 0.5
+
+    pvcStorage: "10Gi"
+    pvcAccessMode:
+      - ReadWriteMany
+
+    vllmConfig:
+      maxModelLen: 1024
+      extraArgs: ["--disable-log-requests", "--gpu-memory-utilization", "0.4"]
+
+routerSpec:
+  repository: "localhost:5000/git-act-router"
+  imagePullPolicy: "IfNotPresent"
+  enableRouter: true
+  routingLogic: "session"
+  sessionKey: "x-user-id"
+  extraArgs:
+    - "--log-level"
+    - "info"
@@ -0,0 +1,135 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Function to print status messages
+print_status() {
+    echo -e "${GREEN}[INFO]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+# Default values
+POD_PREFIX="vllm"
+TIMEOUT_SECONDS=300
+VERBOSE=false
+
+# Function to show usage
+usage() {
+    echo "Usage: $0 [OPTIONS]"
+    echo "Options:"
+    echo "  --pod-prefix PREFIX    Pod name prefix to search for (default: vllm)"
+    echo "  --timeout SECONDS      Timeout in seconds (default: 300)"
+    echo "  --verbose              Enable verbose output"
+    echo "  --help                 Show this help message"
+    echo ""
+    echo "Examples:"
+    echo "  $0                                    # Wait for pods with 'vllm' prefix"
+    echo "  $0 --pod-prefix myapp --timeout 600  # Wait for 'myapp' pods with 10min timeout"
+    echo "  $0 --verbose                         # Wait with verbose output"
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --pod-prefix)
+            POD_PREFIX="$2"
+            shift 2
+            ;;
+        --timeout)
+            TIMEOUT_SECONDS="$2"
+            shift 2
+            ;;
+        --verbose)
+            VERBOSE=true
+            shift
+            ;;
+        --help)
+            usage
+            exit 0
+            ;;
+        *)
+            print_error "Unknown option: $1"
+            usage
+            exit 1
+            ;;
+    esac
+done
+
+# Validate inputs
+if ! [[ "$TIMEOUT_SECONDS" =~ ^[0-9]+$ ]]; then
+    print_error "Timeout must be a positive integer"
+    exit 1
+fi
+
+print_status "⏳ Waiting for all $POD_PREFIX pods to be in Running state"
+print_status "Timeout: ${TIMEOUT_SECONDS} seconds"
+
+# Track start time for timeout
+START_TIME=$SECONDS
+
+# Use the same approach as existing functionality tests
+while true; do
+    # Get all pods containing the prefix in their name and check their status
+    if ! kubectl get pods --no-headers | grep -q "$POD_PREFIX"; then
+        print_warning "No $POD_PREFIX pods found yet. Checking again in 5 seconds..."
+        sleep 5
+
+        # Check timeout
+        if [ $((SECONDS - START_TIME)) -gt "$TIMEOUT_SECONDS" ]; then
+            print_error "❌ Timeout waiting for $POD_PREFIX pods to appear"
+            print_error "No pods with prefix '$POD_PREFIX' found after ${TIMEOUT_SECONDS} seconds"
+            kubectl get pods
+            exit 1
+        fi
+        continue
+    fi
+
+    # Get pod status and readiness
+    pod_status=$(kubectl get pods --no-headers | grep "$POD_PREFIX" | awk '{print $3}' | sort | uniq)
+    pod_ready=$(kubectl get pods --no-headers | grep "$POD_PREFIX" | awk '{print $2}' | sort | uniq)
+
+    if [ "$VERBOSE" = true ]; then
+        print_status "Current pod status: $pod_status, Ready: $pod_ready"
+        kubectl get pods | grep "$POD_PREFIX" || true
+    fi
+
+    # If all pods are Running and Ready (1/1), break the loop
+    if [[ "$pod_status" == "Running" ]] && [[ "$pod_ready" == "1/1" ]]; then
+        print_status "All $POD_PREFIX pods are now Ready and in Running state."
+        break
+    fi
+
+    print_status "Not all $POD_PREFIX pods are ready yet. Current status: $pod_status, Ready: $pod_ready"
+    print_status "Checking again in 5 seconds..."
+    sleep 5
+
+    # Check timeout
+    if [ $((SECONDS - START_TIME)) -gt "$TIMEOUT_SECONDS" ]; then
+        print_error "❌ Timeout waiting for $POD_PREFIX pods to be ready"
+        print_error "Pods were not ready after ${TIMEOUT_SECONDS} seconds"
+        print_error "Current pod status:"
+        kubectl get pods | grep "$POD_PREFIX" || true
+        print_error "Pod descriptions:"
+        kubectl describe pods | grep -A 20 -B 5 "$POD_PREFIX" || true
+        exit 1
+    fi
+done
+
+# Show final pod status
+print_status "✅ All $POD_PREFIX pods are ready:"
+kubectl get pods | grep "$POD_PREFIX"
+
+print_status "🎉 Pod readiness check completed successfully in $((SECONDS - START_TIME)) seconds"
@@ -32,6 +32,26 @@ jobs:
         uses: actions/checkout@v4
       - run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
       - run: echo "🖥️ The workflow is now ready to test your code on the runner."
+      - name: Helm uninstall
+        run: |
+          releases=$(helm list -q)
+
+          if [ -n "$releases" ]; then
+            echo "Found releases: $releases"
+            for r in $releases; do
+              echo "Uninstalling $r..."
+              helm uninstall "$r"
+            done
+          else
+            echo "No Helm releases found; skipping uninstall."
+          fi
+
+          # Wait until no pods remain
+          echo "Waiting for all pods to terminate..."
+          while kubectl get pods --no-headers 2>/dev/null | grep -q .; do
+            sleep 5
+          done
+          echo "All pods have terminated."
       - name: Deploy via helm charts
         env:
           DOCKER_BUILDKIT: 1
@@ -65,6 +85,26 @@ jobs:
     runs-on: self-hosted
     needs: Secure-Minimal-Example
     steps:
+      - name: Helm uninstall
+        run: |
+          releases=$(helm list -q)
+
+          if [ -n "$releases" ]; then
+            echo "Found releases: $releases"
+            for r in $releases; do
+              echo "Uninstalling $r..."
+              helm uninstall "$r"
+            done
+          else
+            echo "No Helm releases found; skipping uninstall."
+          fi
+
+          # Wait until no pods remain
+          echo "Waiting for all pods to terminate..."
+          while kubectl get pods --no-headers 2>/dev/null | grep -q .; do
+            sleep 5
+          done
+          echo "All pods have terminated."
       - name: Deploy via helm charts
         run: |
           cd ${{ github.workspace }}
@@ -90,6 +130,26 @@ jobs:
     runs-on: self-hosted
     needs: Two-Pods-Minimal-Example
     steps:
+      - name: Helm uninstall
+        run: |
+          releases=$(helm list -q)
+
+          if [ -n "$releases" ]; then
+            echo "Found releases: $releases"
+            for r in $releases; do
+              echo "Uninstalling $r..."
+              helm uninstall "$r"
+            done
+          else
+            echo "No Helm releases found; skipping uninstall."
+          fi
+
+          # Wait until no pods remain
+          echo "Waiting for all pods to terminate..."
+          while kubectl get pods --no-headers 2>/dev/null | grep -q .; do
+            sleep 5
+          done
+          echo "All pods have terminated."
       - name: Deploy via helm charts
         run: |
           helm install vllm ./helm -f .github/values-04-multiple-models.yaml
 
@@ -100,3 +100,106 @@ jobs:
           path: |
             ~/.kube/config
             src/tests/perftest/logs
+
+  sticky-routing-e2e-test:
+    runs-on: self-hosted
+    needs: e2e-test
+    if: github.event.pull_request.draft == false
+    steps:
+      - name: Check out repository code
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r benchmarks/multi-round-qa/requirements.txt
+          pip install -e .
+
+      - name: Setup minikube environment
+        env:
+          DOCKER_BUILDKIT: 1
+        run: |
+          echo "🔧 Setting up minikube environment"
+          sudo sysctl fs.protected_regular=0
+          # Verify minikube is running
+          minikube status
+          # Ensure kubectl is configured for minikube
+          kubectl config use-context minikube
+
+      - name: Build and deploy router image
+        env:
+          DOCKER_BUILDKIT: 1
+        run: |
+          echo "🔨 Building router docker image"
+          cd ${{ github.workspace }}
+          sudo docker build --build-arg INSTALL_OPTIONAL_DEP=default -t localhost:5000/git-act-router -f docker/Dockerfile .
+          sudo docker push localhost:5000/git-act-router
+          minikube image load localhost:5000/git-act-router
+
+      - name: Deploy two-pods setup via helm charts
+        run: |
+          echo "🚀 Deploying two-pods setup with helm"
+          cd ${{ github.workspace }}
+          helm install vllm ./helm -f .github/values-06-session-routing.yaml
+
+      - name: Wait for pods to be ready
+        run: |
+          echo "⏳ Making wait-for-pods script executable and running it"
+          chmod +x .github/wait-for-pods.sh
+          ./.github/wait-for-pods.sh --pod-prefix vllm --timeout 300 --verbose
+
+      - name: Make test script executable
+        run: |
+          chmod +x tests/e2e/test-sticky-routing.sh
+
+      - name: Run sticky routing e2e test
+        run: |
+          echo "🧪 Running sticky routing test"
+          cd ${{ github.workspace }}
+          # Set the model to match what's deployed in the helm values
+          # Enable debug mode to preserve temp files for artifact collection
+          ./tests/e2e/test-sticky-routing.sh --model "facebook/opt-125m" --num-rounds 3 --verbose --debug
+        timeout-minutes: 10
+
+      - name: Archive sticky routing test results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: sticky-routing-test-results-pr-${{ github.event.pull_request.number || 'main' }}
+          path: |
+            /tmp/sticky-routing-results-*
+
+      - name: Get router and pod logs for debugging
+        if: always()
+        run: |
+          echo "📋 Collecting logs for debugging"
+          mkdir -p debug-logs
+          # Get router logs
+          kubectl logs -l app.kubernetes.io/component=router --tail=100 > debug-logs/router.log || true
+          # Get serving engine logs
+          kubectl logs -l app.kubernetes.io/component=serving-engine --tail=100 > debug-logs/serving-engines.log || true
+          # Get pod status
+          kubectl get pods -o wide > debug-logs/pod-status.txt || true
+          # Get services
+          kubectl get svc > debug-logs/services.txt || true
+
+      - name: Upload debug logs
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: debug-logs-pr-${{ github.event.pull_request.number || 'main' }}
+          path: debug-logs/
+
+      - name: Helm uninstall and cleanup
+        run: |
+          echo "🧹 Cleaning up resources"
+          helm uninstall vllm || true
+          sudo docker image prune -f || true
+        if: always()
+
+      - run: echo "🍏 Sticky routing e2e test job status is ${{ job.status }}."