diff --git a/.github/workflows/integration-test-ai-gateway.yml b/.github/workflows/integration-test-ai-gateway.yml new file mode 100644 index 000000000..99a0d4812 --- /dev/null +++ b/.github/workflows/integration-test-ai-gateway.yml @@ -0,0 +1,101 @@ +name: Integration Test [AI Gateway] + +on: + pull_request: + branches: + - main + push: + branches: + - main + workflow_dispatch: # Allow manual triggering + +jobs: + test-ai-gateway: + runs-on: ubuntu-latest + timeout-minutes: 60 + + steps: + - name: Check out the repo + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.24' + + - name: Set up Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: 1.90 + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + make \ + curl \ + build-essential \ + pkg-config + + - name: Install Kind + run: | + curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.22.0/kind-linux-amd64 + chmod +x ./kind + sudo mv ./kind /usr/local/bin/kind + + - name: Install kubectl + run: | + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl + sudo mv kubectl /usr/local/bin/kubectl + + - name: Install Helm + run: | + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + + - name: Download E2E test dependencies + run: | + cd e2e && go mod download + + - name: Build E2E test binary + run: | + make build-e2e + + - name: Run AI Gateway E2E tests + id: e2e-test + run: | + make e2e-test PROFILE=ai-gateway + env: + E2E_VERBOSE: "true" + + - name: Show cluster logs on failure + if: failure() + run: | + echo "=== Kind Cluster Info ===" + kind get clusters || true + kubectl cluster-info --context kind-semantic-router-e2e || true + + echo "=== All Pods ===" + kubectl get pods --all-namespaces -o wide || true + + echo "=== Semantic Router Logs ===" + kubectl logs -n vllm-semantic-router-system deployment/semantic-router --tail=100 || true + + echo "=== Envoy Gateway Logs ===" + kubectl logs -n envoy-gateway-system deployment/envoy-gateway --tail=100 || true + + echo "=== AI Gateway Controller Logs ===" + kubectl logs -n envoy-ai-gateway-system deployment/ai-gateway-controller --tail=100 || true + + echo "=== Gateway Resources ===" + kubectl get gateway -A || true + kubectl get httproute -A || true + + echo "=== Events ===" + kubectl get events --all-namespaces --sort-by='.lastTimestamp' || true + + - name: Clean up + if: always() + run: | + make e2e-cleanup || true + diff --git a/.github/workflows/quickstart-integration-test.yml b/.github/workflows/integration-test-docker.yml similarity index 92% rename from .github/workflows/quickstart-integration-test.yml rename to .github/workflows/integration-test-docker.yml index f7ce1cff7..972c17106 100644 --- a/.github/workflows/quickstart-integration-test.yml +++ b/.github/workflows/integration-test-docker.yml @@ -1,16 +1,9 @@ -name: Quickstart Integration Test +name: Integration Test [Docker Compose] on: pull_request: branches: - main - paths: - - 'scripts/quickstart.sh' - - 'deploy/docker-compose/**' - - 'config/config.yaml' - - 'tools/make/common.mk' - - 'tools/make/models.mk' - - 'tools/make/docker.mk' workflow_dispatch: # Allow manual triggering jobs: diff --git a/.github/workflows/helm-ci.yml b/.github/workflows/integration-test-helm.yml similarity index 93% rename from .github/workflows/helm-ci.yml rename to .github/workflows/integration-test-helm.yml index 57159ed57..cb63905e1 100644 --- a/.github/workflows/helm-ci.yml +++ b/.github/workflows/integration-test-helm.yml @@ -1,18 +1,12 @@ -name: Helm Chart CI +name: Integration Test [Helm] on: push: branches: - main - paths: - - 'deploy/helm/**' - - '.github/workflows/helm-ci.yml' pull_request: branches: - main - paths: - - 'deploy/helm/**' - - '.github/workflows/helm-ci.yml' workflow_dispatch: env: @@ -161,9 +155,6 @@ jobs: # CI environment: Download only essential model to avoid OOM # Only download all-MiniLM-L12-v2 (smallest model ~120MB) helm install semantic-router ${{ env.CHART_PATH }} \ - --set initContainer.resources.limits.memory=2Gi \ - --set initContainer.resources.requests.memory=1Gi \ - --set-json 'initContainer.models=[{"name":"all-MiniLM-L12-v2","repo":"sentence-transformers/all-MiniLM-L12-v2"}]' \ --namespace vllm-semantic-router-system \ --wait \ --timeout 10m \ @@ -272,9 +263,6 @@ jobs: echo "::group::Upgrade Chart" # Use same minimal config for upgrade test helm upgrade semantic-router ${{ env.CHART_PATH }} \ - --set initContainer.resources.limits.memory=2Gi \ - --set initContainer.resources.requests.memory=1Gi \ - --set-json 'initContainer.models=[{"name":"all-MiniLM-L12-v2","repo":"sentence-transformers/all-MiniLM-L12-v2"}]' \ --namespace vllm-semantic-router-system \ --wait \ --timeout 10m diff --git a/.github/workflows/k8s-config-test.yml b/.github/workflows/k8s-config-test.yml deleted file mode 100644 index 897cfce90..000000000 --- a/.github/workflows/k8s-config-test.yml +++ /dev/null @@ -1,116 +0,0 @@ -name: Configuration Test - -on: - workflow_call: - inputs: - kustomize_version: - description: "Kustomize version to use" - required: false - type: string - default: "v5.7.1" - -jobs: - test-with-custom-config: - name: Test with Custom Configuration - if: ${{ github.repository == 'vllm-project/semantic-router' }} - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Setup Kubernetes tools - uses: ./tools/github-action/setup-kubetools - with: - kubectl_version: v1.28.0 - kustomize_version: ${{ inputs.kustomize_version }} - - - name: Test kustomize with different overlays - run: | - echo "Testing base kustomization..." - kustomize build deploy/kubernetes/ai-gateway/semantic-router > /tmp/base-manifests.yaml - - echo "Validating generated resources..." - - # Check if all expected resources are present - if ! grep -q "kind: Namespace" /tmp/base-manifests.yaml; then - echo "Error: Namespace not found" - exit 1 - fi - - if ! grep -q "kind: Deployment" /tmp/base-manifests.yaml; then - echo "Error: Deployment not found" - exit 1 - fi - - if ! grep -q "kind: Service" /tmp/base-manifests.yaml; then - echo "Error: Service not found" - exit 1 - fi - - if ! grep -q "kind: ConfigMap" /tmp/base-manifests.yaml; then - echo "Error: ConfigMap not found" - exit 1 - fi - - echo "✓ All expected resources are present" - - - name: Verify ConfigMap generation - run: | - echo "Checking ConfigMap generation..." - kustomize build deploy/kubernetes/ai-gateway/semantic-router | grep -A 20 "kind: ConfigMap" - - # Verify config files are included - if ! kustomize build deploy/kubernetes/ai-gateway/semantic-router | grep -q "config.yaml"; then - echo "Warning: config.yaml might not be properly included in ConfigMap" - fi - - if ! kustomize build deploy/kubernetes/ai-gateway/semantic-router | grep -q "tools_db.json"; then - echo "Warning: tools_db.json might not be properly included in ConfigMap" - fi - - - name: Validate observability kustomization - run: | - echo "Validating observability stack kustomization..." - if [ -d "deploy/kubernetes/ai-gateway/semantic-router/observability" ]; then - kustomize build deploy/kubernetes/ai-gateway/semantic-router/observability > /tmp/observability-manifests.yaml - echo "✓ Observability kustomization is valid" - - # Verify expected resources - for resource in "Deployment" "Service" "ConfigMap" "PersistentVolumeClaim"; do - if ! grep -q "kind: $resource" /tmp/observability-manifests.yaml; then - echo "Warning: $resource not found in observability manifests" - fi - done - else - echo "Observability directory not found, skipping..." - fi - - - name: Validate AI Gateway configurations - run: | - echo "Validating AI Gateway configurations..." - - # Check if ai-gateway directory exists - if [ -d "deploy/kubernetes/ai-gateway/semantic-router/ai-gateway" ]; then - # Validate configuration yamls (without CRDs) - for yaml_file in deploy/kubernetes/ai-gateway/semantic-router/ai-gateway/configuration/*.yaml; do - if [ -f "$yaml_file" ]; then - echo "Checking $yaml_file..." - # Basic YAML syntax check - kubectl create --dry-run=client -f "$yaml_file" || echo "Warning: Issues with $yaml_file" - fi - done - - # Validate inference-pool manifests (skip CRD validation as they may not be installed) - for yaml_file in deploy/kubernetes/ai-gateway/semantic-router/ai-gateway/inference-pool/*.yaml; do - if [ -f "$yaml_file" ]; then - echo "Checking $yaml_file for YAML syntax..." - # Just check if it's valid YAML - kubectl create --dry-run=client -f "$yaml_file" 2>&1 | grep -q "no matches for kind" && echo "✓ $yaml_file syntax valid (CRD not installed)" || echo "Validated $yaml_file" - fi - done - - echo "✓ AI Gateway configuration validation completed" - else - echo "AI Gateway directory not found, skipping..." - fi diff --git a/.github/workflows/k8s-integration-test.yml b/.github/workflows/k8s-integration-test.yml deleted file mode 100644 index 2d59aa5da..000000000 --- a/.github/workflows/k8s-integration-test.yml +++ /dev/null @@ -1,141 +0,0 @@ -name: Kubernetes Integration Test - -# This workflow tests the CORE semantic-router Kubernetes deployment. -# -# Test Scope: -# ✅ Core deployment (namespace, pvc, deployment, service, configmap) -# ✅ Manifest validation (kubeconform) -# ✅ Service connectivity (gRPC, metrics, API ports) -# ✅ API functionality testing (14 comprehensive tests) -# ✅ Security scanning (Trivy, Checkov) -# ✅ Basic syntax validation for observability and ai-gateway configs -# ✅ kind cluster integration with CI-optimized configuration -# ✅ Error handling and edge case testing -# ✅ Performance testing with concurrent requests -# -# Out of Scope (planned for follow-up PRs): -# 🔄 Observability stack deployment (Prometheus + Grafana) -# 🔄 AI Gateway end-to-end testing (Envoy Gateway + InferencePool) -# -# CI Optimizations: -# - Uses CI-specific kind configuration (single node, reduced resources) -# - Generates kind-config.yaml dynamically (no models mount needed) -# - Optimized for GitHub Actions runner constraints -# - Modular workflow design for better maintainability - -on: - pull_request: - paths: - - "deploy/kubernetes/ai-gateway/semantic-router/**" - - ".github/workflows/k8s-integration-test*.yml" - - "Dockerfile.extproc" - - "tools/kind/**" - workflow_dispatch: # Allow manual triggering - schedule: - # Run nightly at 3:00 AM UTC - - cron: "0 3 * * *" - -env: - KIND_VERSION: v0.20.0 - KUBECTL_VERSION: v1.28.0 - KUSTOMIZE_VERSION: v5.7.1 - -jobs: - # Step 1: Validate Kubernetes manifests - validate-manifests: - if: github.repository == 'vllm-project/semantic-router' - uses: ./.github/workflows/k8s-validate-manifests.yml - with: - kustomize_version: v5.7.1 - - # Step 2: Run kind cluster integration test - kind-integration-test: - if: github.repository == 'vllm-project/semantic-router' - uses: ./.github/workflows/k8s-kind-integration-test.yml - needs: validate-manifests - with: - kind_version: v0.20.0 - kustomize_version: v5.7.1 - - # Step 3: Run comprehensive API functionality tests - # test-api-functionality: - # uses: ./.github/workflows/k8s-api-functionality-test.yml - # needs: kind-integration-test - # with: - # kind_version: v0.20.0 - # kustomize_version: v5.7.1 - - # Step 4: Test with custom configurations - test-with-custom-config: - if: github.repository == 'vllm-project/semantic-router' - uses: ./.github/workflows/k8s-config-test.yml - needs: validate-manifests - with: - kustomize_version: v5.7.1 - - # Step 5: Run security scans - security-scan: - if: github.repository == 'vllm-project/semantic-router' - uses: ./.github/workflows/k8s-security-scan.yml - needs: validate-manifests - with: - kustomize_version: v5.7.1 - - # Step 6: Generate test summary - summary: - name: Test Summary - if: github.repository == 'vllm-project/semantic-router' && always() - runs-on: ubuntu-latest - needs: - [ - validate-manifests, - kind-integration-test, - test-with-custom-config, - security-scan, - ] - - steps: - - name: Check test results - run: | - echo "=== Kubernetes Integration Test Summary ===" - echo "Manifest Validation: ${{ needs.validate-manifests.result }}" - echo "kind Integration Test: ${{ needs.kind-integration-test.result }}" - echo "Custom Config Test: ${{ needs.test-with-custom-config.result }}" - echo "Security Scan: ${{ needs.security-scan.result }}" - - # Count failures - FAILURES=0 - if [[ "${{ needs.validate-manifests.result }}" == "failure" ]]; then - echo "❌ Manifest validation failed" - FAILURES=$((FAILURES + 1)) - fi - if [[ "${{ needs.kind-integration-test.result }}" == "failure" ]]; then - echo "❌ kind integration test failed" - FAILURES=$((FAILURES + 1)) - fi - if [[ "${{ needs.test-with-custom-config.result }}" == "failure" ]]; then - echo "❌ Custom config test failed" - FAILURES=$((FAILURES + 1)) - fi - if [[ "${{ needs.security-scan.result }}" == "failure" ]]; then - echo "❌ Security scan failed" - FAILURES=$((FAILURES + 1)) - fi - - echo "" - echo "=== Test Coverage ===" - echo "✅ Core deployment validation" - echo "✅ Service connectivity testing" - echo "✅ Configuration validation" - echo "✅ Security scanning" - echo "✅ Error handling and edge cases" - echo "✅ Performance testing" - - if [ $FAILURES -gt 0 ]; then - echo "" - echo "❌ $FAILURES test(s) failed. Check the logs for details." - exit 1 - else - echo "" - echo "✅ All tests passed! Kubernetes deployment is fully validated." - fi diff --git a/.github/workflows/k8s-kind-integration-test.yml b/.github/workflows/k8s-kind-integration-test.yml deleted file mode 100644 index bb78024bc..000000000 --- a/.github/workflows/k8s-kind-integration-test.yml +++ /dev/null @@ -1,401 +0,0 @@ -name: Kind Cluster Integration Test - -on: - workflow_call: - inputs: - kind_version: - description: "Kind version to use" - required: false - type: string - default: "v0.20.0" - kustomize_version: - description: "Kustomize version to use" - required: false - type: string - default: "v5.7.1" - -jobs: - kind-integration-test: - name: kind Cluster Integration Test - runs-on: ubuntu-latest - if: ${{ github.repository == 'vllm-project/semantic-router' }} - timeout-minutes: 45 # Increased to account for model downloads - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Cleanup disk space (CI) - uses: ./tools/github-action/cleanup-disk - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Setup Kubernetes tools - uses: ./tools/github-action/setup-kubetools - with: - kubectl_version: v1.28.0 - kustomize_version: ${{ inputs.kustomize_version }} - - - name: Generate kind configuration for CI - run: | - echo "Creating CI-optimized kind configuration..." - # Use the existing kind configuration template and modify it for CI - mkdir -p tools/kind - - # Create a CI-specific kind config based on the template - cat > tools/kind/kind-config.yaml << 'EOF' - # kind cluster configuration for CI testing - kind: Cluster - apiVersion: kind.x-k8s.io/v1alpha4 - name: semantic-router-cluster - nodes: - - role: control-plane - # Optimized for CI environment with limited resources - extraPortMappings: - - containerPort: 30080 - hostPort: 30080 - protocol: TCP - kubeadmConfigPatches: - - | - kind: InitConfiguration - nodeRegistration: - kubeletExtraArgs: - # Reduced resource limits for CI - system-reserved: memory=512Mi,cpu=250m - kube-reserved: memory=512Mi,cpu=250m - eviction-hard: memory.available<512Mi,nodefs.available<10% - - | - kind: ClusterConfiguration - apiServer: - extraArgs: - max-requests-inflight: "200" - max-mutating-requests-inflight: "100" - etcd: - local: - extraArgs: - quota-backend-bytes: "4294967296" # 4GB (reduced from 8GB) - EOF - echo "Generated CI-optimized kind-config.yaml:" - cat tools/kind/kind-config.yaml - - - name: Create kind cluster - uses: helm/kind-action@v1.8.0 - with: - version: ${{ inputs.kind_version }} - config: tools/kind/kind-config.yaml - cluster_name: semantic-router-cluster - wait: 120s - - - name: Build semantic-router image - uses: docker/build-push-action@v5 - with: - context: . - file: ./Dockerfile.extproc - tags: ghcr.io/vllm-project/semantic-router/extproc:test - load: true - cache-from: type=gha - cache-to: type=gha,mode=max - - - name: Load image into kind cluster - run: | - echo "Loading image into kind cluster..." - kind load docker-image ghcr.io/vllm-project/semantic-router/extproc:test --name semantic-router-cluster - echo "Image loaded successfully!" - - - name: Clean up after image build - run: | - echo "=== Cleaning up Docker build artifacts ===" - # Remove build cache and unused images - docker builder prune -af - docker image prune -af - - # Keep only the images we need - docker images - - echo "" - echo "=== Disk usage after build cleanup ===" - df -h - - - name: Verify cluster - run: | - echo "=== Verifying kind cluster ===" - kubectl cluster-info - kubectl get nodes -o wide - kubectl version - - # Verify cluster is ready - kubectl wait --for=condition=Ready nodes --all --timeout=120s - - # Check available resources - echo "=== Node resources ===" - kubectl describe nodes - - - name: Ensure kustomize available - run: kustomize version - - - name: Prepare CI deployment - run: | - echo "Preparing CI deployment configuration..." - - # Create a temporary kustomization file for CI - cd deploy/kubernetes/ai-gateway/semantic-router - - # Backup original kustomization.yaml - cp kustomization.yaml kustomization.yaml.backup - - # Create CI-specific kustomization with patches - cat > kustomization.yaml << EOF - apiVersion: kustomize.config.k8s.io/v1beta1 - kind: Kustomization - - metadata: - name: semantic-router - - resources: - - namespace.yaml - - pv-models.yaml - - deployment.yaml - - service.yaml - - # Generate ConfigMap - configMapGenerator: - - name: semantic-router-config - files: - - config.yaml - - tools_db.json - - namespace: vllm-semantic-router-system - - # Use the test image - images: - - name: ghcr.io/vllm-project/semantic-router/extproc - newTag: test - - # Patch for CI - adjust resources for model loading and set imagePullPolicy - patches: - - patch: |- - - op: replace - path: /spec/template/spec/containers/0/resources/requests/memory - value: "2Gi" - - op: replace - path: /spec/template/spec/containers/0/resources/requests/cpu - value: "500m" - - op: replace - path: /spec/template/spec/containers/0/resources/limits/memory - value: "4Gi" - - op: replace - path: /spec/template/spec/containers/0/resources/limits/cpu - value: "1" - - op: replace - path: /spec/template/spec/containers/0/readinessProbe - value: - httpGet: - path: /health - port: classify-api - scheme: HTTP - initialDelaySeconds: 120 - periodSeconds: 15 - timeoutSeconds: 5 - failureThreshold: 20 - - op: add - path: /spec/template/spec/containers/0/imagePullPolicy - value: "IfNotPresent" - target: - kind: Deployment - name: semantic-router - EOF - - echo "=== Generated CI kustomization ===" - cat kustomization.yaml - - - name: Pre-flight check for Hugging Face connectivity - run: | - echo "Testing Hugging Face Hub connectivity..." - curl -I https://huggingface.co || { - echo "⚠️ Warning: Cannot reach huggingface.co" - } - - # Test one of the model repos - curl -I https://huggingface.co/LLM-Semantic-Router/category_classifier_modernbert-base_model || { - echo "⚠️ Warning: Cannot reach model repository" - } - - echo "✓ Connectivity check completed" - - - name: Final disk cleanup before deployment - run: | - echo "=== Final cleanup before deployment ===" - # Clean up any remaining build artifacts - docker system prune -f - - # Clear system caches - sudo sync - echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null || true - - echo "=== Final disk usage ===" - df -h - - echo "=== Available memory ===" - free -h - - - name: Deploy to kind cluster - run: | - echo "Deploying semantic-router to kind cluster..." - kustomize build deploy/kubernetes/ai-gateway/semantic-router | kubectl apply -f - - - echo "Waiting for namespace to be active..." - kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/vllm-semantic-router-system --timeout=60s - - echo "Deployment initiated. Checking resources..." - kubectl get all -n vllm-semantic-router-system - - - name: Wait for deployment readiness - run: | - echo "Waiting for deployment to be ready..." - - # Wait for PVC to be bound - echo "Waiting for PVC to be bound..." - kubectl wait --for=jsonpath='{.status.phase}'=Bound pvc/semantic-router-models -n vllm-semantic-router-system --timeout=300s || { - echo "PVC binding timeout. Checking PVC status..." - kubectl describe pvc -n vllm-semantic-router-system - exit 1 - } - - # Wait for pods to be created - echo "Waiting for pods to be created..." - timeout 300 bash -c 'until kubectl get pods -n vllm-semantic-router-system | grep -q semantic-router; do echo "Waiting for pod creation..."; sleep 5; done' - - # Show pod status - kubectl get pods -n vllm-semantic-router-system - - # Wait for init container to complete (model download) - echo "Waiting for init container to complete (downloading models)..." - kubectl wait --for=condition=Initialized pods -l app=semantic-router -n vllm-semantic-router-system --timeout=1200s || { - echo "❌ Init container did not complete in time. Showing logs..." - kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=200 || true - kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router - exit 1 - } - - # Wait for main container to be ready (increased timeout for model loading) - echo "Waiting for main container to be ready..." - kubectl wait --for=condition=Ready pods -l app=semantic-router -n vllm-semantic-router-system --timeout=1200s || { - echo "❌ Pod did not become ready in time. Showing status and logs..." - kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router - kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=200 || true - exit 1 - } - - echo "✅ Deployment is ready!" - - - name: Verify deployment - run: | - echo "=== Verifying Deployment ===" - - # Check deployment status - kubectl get deployment -n vllm-semantic-router-system semantic-router -o wide - - # Check pod status - kubectl get pods -n vllm-semantic-router-system -o wide - - # Check services - kubectl get svc -n vllm-semantic-router-system - - # Check configmaps - kubectl get configmap -n vllm-semantic-router-system - - # Verify pod is running - POD_STATUS=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].status.phase}') - if [ "$POD_STATUS" != "Running" ]; then - echo "Error: Pod is not running. Status: $POD_STATUS" - kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router - exit 1 - fi - - echo "✓ Pod is running" - - # Verify all containers are ready - READY_CONTAINERS=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].status.containerStatuses[0].ready}') - if [ "$READY_CONTAINERS" != "true" ]; then - echo "Error: Container is not ready" - kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router - exit 1 - fi - - echo "✓ All containers are ready" - - - name: Test service connectivity - run: | - echo "=== Testing Service Connectivity ===" - - # Get pod name - POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].metadata.name}') - echo "Pod name: $POD_NAME" - - # Test basic port connectivity - echo "Testing ports..." - kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 50051 || echo "gRPC port test failed" - kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 9190 || echo "Metrics port test failed" - kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 8080 || echo "API port test failed" - - echo "✓ Service connectivity tests completed" - - - name: Check logs - if: always() - run: | - echo "=== Deployment Logs ===" - kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=200 --all-containers=true || true - - echo "=== Events ===" - kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp' || true - - - name: Export cluster logs on failure - if: failure() - run: | - echo "=== Exporting cluster information for debugging ===" - mkdir -p /tmp/k8s-logs - - # Export kind cluster logs - echo "=== Kind cluster logs ===" - docker logs semantic-router-cluster-control-plane > /tmp/k8s-logs/kind-control-plane.log || true - - # Export pod descriptions - kubectl describe pods -n vllm-semantic-router-system > /tmp/k8s-logs/pod-descriptions.txt || true - - # Export deployment description - kubectl describe deployment -n vllm-semantic-router-system > /tmp/k8s-logs/deployment-description.txt || true - - # Export all logs - kubectl logs -n vllm-semantic-router-system -l app=semantic-router --all-containers=true --previous > /tmp/k8s-logs/previous-logs.txt || true - kubectl logs -n vllm-semantic-router-system -l app=semantic-router --all-containers=true > /tmp/k8s-logs/current-logs.txt || true - - # Export events - kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp' > /tmp/k8s-logs/events.txt || true - - # Export resource status - kubectl get all -n vllm-semantic-router-system -o yaml > /tmp/k8s-logs/all-resources.yaml || true - - # Export kind cluster info - kind get kubeconfig --name semantic-router-cluster > /tmp/k8s-logs/kind-kubeconfig.yaml || true - - - name: Upload cluster logs - if: failure() - uses: actions/upload-artifact@v4 - with: - name: k8s-cluster-logs - path: /tmp/k8s-logs/ - retention-days: 7 - - - name: Cleanup - if: always() - run: | - echo "Cleaning up resources..." - kubectl delete namespace vllm-semantic-router-system --timeout=60s || true - echo "Cleaning up kind cluster..." - kind delete cluster --name semantic-router-cluster || true - echo "Restoring original kustomization..." - cd deploy/kubernetes/ai-gateway/semantic-router - if [ -f kustomization.yaml.backup ]; then - mv kustomization.yaml.backup kustomization.yaml - fi diff --git a/.github/workflows/k8s-security-scan.yml b/.github/workflows/k8s-security-scan.yml deleted file mode 100644 index 4ce899517..000000000 --- a/.github/workflows/k8s-security-scan.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Security Scan - -on: - workflow_call: - inputs: - kustomize_version: - description: "Kustomize version to use" - required: false - type: string - default: "v5.7.1" - -jobs: - security-scan: - name: Security Scan for K8s Manifests - if: ${{ github.repository == 'vllm-project/semantic-router' }} - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Setup Kubernetes tools - uses: ./tools/github-action/setup-kubetools - with: - kubectl_version: v1.28.0 - kustomize_version: ${{ inputs.kustomize_version }} - - - name: Run Trivy security scan - uses: aquasecurity/trivy-action@master - with: - scan-type: "config" - scan-ref: "deploy/kubernetes/ai-gateway/semantic-router" - format: "sarif" - output: "trivy-results.sarif" - severity: "CRITICAL,HIGH" - exit-code: "0" # Don't fail on vulnerabilities, just report - - - name: Upload Trivy results to GitHub Security - uses: github/codeql-action/upload-sarif@v3 - if: always() - with: - sarif_file: "trivy-results.sarif" - - - name: Run Checkov scan - uses: bridgecrewio/checkov-action@master - with: - directory: deploy/kubernetes/ai-gateway/semantic-router - framework: kubernetes - output_format: cli - soft_fail: true # Don't fail the build diff --git a/.github/workflows/k8s-validate-manifests.yml b/.github/workflows/k8s-validate-manifests.yml deleted file mode 100644 index bf3771295..000000000 --- a/.github/workflows/k8s-validate-manifests.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Validate Kubernetes Manifests - -on: - workflow_call: - inputs: - kustomize_version: - description: "Kustomize version to use" - required: false - type: string - default: "v5.7.1" - -jobs: - validate-manifests: - name: Validate Kubernetes Manifests - if: ${{ github.repository == 'vllm-project/semantic-router' }} - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Setup Kubernetes tools - uses: ./tools/github-action/setup-kubetools - with: - kubectl_version: v1.28.0 - kustomize_version: ${{ inputs.kustomize_version }} - - - name: Validate Kustomize build - run: | - echo "Building kustomization..." - kustomize build deploy/kubernetes/ai-gateway/semantic-router > /tmp/k8s-manifests.yaml - echo "Kustomize build successful!" - echo "Generated manifests:" - cat /tmp/k8s-manifests.yaml - - - name: Setup kubeconform - run: | - wget https://github.com/yannh/kubeconform/releases/latest/download/kubeconform-linux-amd64.tar.gz - tar xf kubeconform-linux-amd64.tar.gz - sudo mv kubeconform /usr/local/bin/ - kubeconform -v - - - name: Validate manifests with kubeconform - run: | - echo "Validating Kubernetes manifests..." - kustomize build deploy/kubernetes/ai-gateway/semantic-router | \ - kubeconform -strict -summary \ - -kubernetes-version 1.28.0 \ - -schema-location default \ - -schema-location 'https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json' \ - -skip CustomResourceDefinition \ - -ignore-missing-schemas - - - name: Upload validated manifests - uses: actions/upload-artifact@v4 - with: - name: k8s-manifests - path: /tmp/k8s-manifests.yaml - retention-days: 5 diff --git a/Makefile b/Makefile index fa6f77c54..52aa5e506 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,7 @@ _run: -f tools/make/helm.mk \ -f tools/make/observability.mk \ -f tools/make/openshift.mk \ + -f tools/make/e2e.mk \ $(MAKECMDGOALS) .PHONY: _run diff --git a/e2e/.gitignore b/e2e/.gitignore new file mode 100644 index 000000000..c0d6217a0 --- /dev/null +++ b/e2e/.gitignore @@ -0,0 +1,17 @@ +# Binaries +bin/ +*.exe +*.dll +*.so +*.dylib + +# Test binary +e2e.test +*.test + +# Output +*.out + +# Go workspace file +go.work + diff --git a/e2e/README.md b/e2e/README.md new file mode 100644 index 000000000..546f8e9d4 --- /dev/null +++ b/e2e/README.md @@ -0,0 +1,212 @@ +# E2E Test Framework + +A comprehensive end-to-end testing framework for Semantic Router with support for multiple deployment profiles. + +## Architecture + +The framework is designed to be extensible and supports multiple test profiles: + +- **ai-gateway**: Tests Semantic Router with Envoy AI Gateway integration +- **istio**: Tests Semantic Router with Istio Gateway (future) +- **production-stack**: Tests vLLM Production Stack configurations (future) +- **llm-d**: Tests with LLM-D (future) +- **dynamo**: Tests with Nvidia Dynamo (future) +- **aibrix**: Tests with vLLM AIBrix (future) + +## Directory Structure + +``` +e2e/ +├── cmd/ +│ └── e2e/ # Main test runner +├── pkg/ +│ ├── framework/ # Core test framework +│ ├── cluster/ # Kind cluster management +│ ├── docker/ # Docker image operations +│ ├── helm/ # Helm deployment utilities +│ └── testcases/ # Test case definitions +├── profiles/ +│ ├── ai-gateway/ # AI Gateway test profile +│ ├── istio/ # Istio test profile (future) +│ └── ... +└── README.md +``` + +## Quick Start + +### Run all tests with default profile (ai-gateway) + +```bash +make e2e-test +``` + +### Run specific profile + +```bash +make e2e-test PROFILE=ai-gateway +``` + +### Run with custom options + +```bash +# Keep cluster after test +make e2e-test KEEP_CLUSTER=true + +# Use existing cluster +make e2e-test USE_EXISTING_CLUSTER=true + +# Verbose output +make e2e-test VERBOSE=true +``` + +## Adding New Test Profiles + +1. Create a new directory under `profiles/` +2. Implement the `Profile` interface +3. Register test cases using the test case registry +4. Add profile-specific deployment configurations + +See `profiles/ai-gateway/` for a complete example. + +## Test Case Registration + +Test cases are registered using a simple function-based approach: + +```go +func init() { + testcases.Register("my-test", testcases.TestCase{ + Name: "My Test", + Description: "Description of what this test does", + Fn: func(ctx context.Context, client *kubernetes.Clientset) error { + // Test implementation + return nil + }, + }) +} +``` + +## Framework Features + +- **Automatic cluster lifecycle management**: Creates and cleans up Kind clusters +- **Docker image building and loading**: Builds images and loads them into Kind +- **Helm deployment automation**: Deploys required Helm charts +- **Parallel test execution**: Runs independent tests in parallel +- **Detailed logging**: Provides comprehensive test output +- **Resource cleanup**: Ensures proper cleanup even on failures + +## Prerequisites + +Before running E2E tests, ensure you have the following tools installed: + +- [Go](https://golang.org/doc/install) 1.24 or later +- [Docker](https://docs.docker.com/get-docker/) +- [Kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation) +- [kubectl](https://kubernetes.io/docs/tasks/tools/) +- [Helm](https://helm.sh/docs/intro/install/) + +## Getting Started + +### 1. Install dependencies + +```bash +make e2e-deps +``` + +### 2. Build the E2E test binary + +```bash +make build-e2e +``` + +### 3. Run tests + +```bash +# Run all tests with default profile (ai-gateway) +make e2e-test + +# Run with verbose output +make e2e-test E2E_VERBOSE=true + +# Run and keep cluster for debugging +make e2e-test-debug + +# Run specific test cases +make e2e-test-specific E2E_TESTS="basic-health-check,chat-completions-request" +``` + +## CI Integration + +The E2E tests are automatically run in GitHub Actions on: + +- Pull requests to `main` branch +- Pushes to `main` branch + +See `.github/workflows/integration-test-ai-gateway.yml` for the CI configuration. + +## Troubleshooting + +### Cluster creation fails + +```bash +# Clean up any existing cluster +make e2e-cleanup + +# Try again +make e2e-test +``` + +### Tests fail with timeout + +Increase the timeout in the test case or check if the cluster has enough resources: + +```bash +# Check cluster status +kubectl get nodes +kubectl get pods --all-namespaces +``` + +### Port forward fails + +Make sure no other process is using port 8080: + +```bash +# Check what's using port 8080 +lsof -i :8080 + +# Kill the process if needed +kill -9 +``` + +## Development + +### Adding a new test case + +1. Create a new test function in `profiles//testcases.go` +2. Register it in the `init()` function +3. Add the test case name to the profile's `GetTestCases()` method + +Example: + +```go +func init() { + testcases.Register("my-new-test", testcases.TestCase{ + Description: "My new test description", + Tags: []string{"ai-gateway", "functional"}, + Fn: testMyNewFeature, + }) +} + +func testMyNewFeature(ctx context.Context, client *kubernetes.Clientset, opts testcases.TestCaseOptions) error { + // Test implementation + return nil +} +``` + +### Adding a new profile + +1. Create a new directory under `profiles/` +2. Implement the `Profile` interface +3. Register test cases +4. Update `cmd/e2e/main.go` to include the new profile + +See `profiles/ai-gateway/` for a complete example. diff --git a/e2e/cmd/e2e/main.go b/e2e/cmd/e2e/main.go new file mode 100644 index 000000000..701a312f4 --- /dev/null +++ b/e2e/cmd/e2e/main.go @@ -0,0 +1,81 @@ +package main + +import ( + "context" + "flag" + "fmt" + "os" + "strings" + + "github.com/vllm-project/semantic-router/e2e/pkg/framework" + aigateway "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway" + + // Import profiles to register test cases + _ "github.com/vllm-project/semantic-router/e2e/profiles/ai-gateway" +) + +func main() { + // Parse command line flags + var ( + profile = flag.String("profile", "ai-gateway", "Test profile to run (ai-gateway, istio, etc.)") + clusterName = flag.String("cluster", "semantic-router-e2e", "Kind cluster name") + imageTag = flag.String("image-tag", "e2e-test", "Docker image tag") + keepCluster = flag.Bool("keep-cluster", false, "Keep cluster after tests complete") + useExistingCluster = flag.Bool("use-existing-cluster", false, "Use existing cluster instead of creating a new one") + verbose = flag.Bool("verbose", false, "Enable verbose logging") + parallel = flag.Bool("parallel", false, "Run tests in parallel") + testCases = flag.String("tests", "", "Comma-separated list of test cases to run (empty means all)") + ) + + flag.Parse() + + // Parse test cases + var testCasesList []string + if *testCases != "" { + testCasesList = strings.Split(*testCases, ",") + for i := range testCasesList { + testCasesList[i] = strings.TrimSpace(testCasesList[i]) + } + } + + // Create test options + opts := &framework.TestOptions{ + Profile: *profile, + ClusterName: *clusterName, + ImageTag: *imageTag, + KeepCluster: *keepCluster, + UseExistingCluster: *useExistingCluster, + Verbose: *verbose, + Parallel: *parallel, + TestCases: testCasesList, + } + + // Get the profile implementation + profileImpl, err := getProfile(*profile) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + // Create and run the test runner + runner := framework.NewRunner(opts, profileImpl) + + ctx := context.Background() + if err := runner.Run(ctx); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func getProfile(name string) (framework.Profile, error) { + switch name { + case "ai-gateway": + return aigateway.NewProfile(), nil + // Add more profiles here as they are implemented + // case "istio": + // return istio.NewProfile(), nil + default: + return nil, fmt.Errorf("unknown profile: %s", name) + } +} + diff --git a/e2e/go.mod b/e2e/go.mod new file mode 100644 index 000000000..3780bc383 --- /dev/null +++ b/e2e/go.mod @@ -0,0 +1,50 @@ +module github.com/vllm-project/semantic-router/e2e + +go 1.24 + +require ( + k8s.io/apimachinery v0.31.0 + k8s.io/client-go v0.31.0 +) + +require ( + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.4 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/go-cmp v0.6.0 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/imdario/mergo v0.3.16 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/x448/float16 v0.8.4 // indirect + golang.org/x/net v0.26.0 // indirect + golang.org/x/oauth2 v0.21.0 // indirect + golang.org/x/sys v0.21.0 // indirect + golang.org/x/term v0.21.0 // indirect + golang.org/x/text v0.16.0 // indirect + golang.org/x/time v0.5.0 // indirect + google.golang.org/protobuf v1.34.2 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/api v0.31.0 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect +) diff --git a/e2e/go.sum b/e2e/go.sum new file mode 100644 index 000000000..25fd05367 --- /dev/null +++ b/e2e/go.sum @@ -0,0 +1,154 @@ +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= +github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= +github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= +github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM= +github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw= +github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ= +golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE= +golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= +golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= +golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA= +golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= +golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.31.0 h1:b9LiSjR2ym/SzTOlfMHm1tr7/21aD7fSkqgD/CVJBCo= +k8s.io/api v0.31.0/go.mod h1:0YiFF+JfFxMM6+1hQei8FY8M7s1Mth+z/q7eF1aJkTE= +k8s.io/apimachinery v0.31.0 h1:m9jOiSr3FoSSL5WO9bjm1n6B9KROYYgNZOb4tyZ1lBc= +k8s.io/apimachinery v0.31.0/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8= +k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= +k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/e2e/pkg/cluster/kind.go b/e2e/pkg/cluster/kind.go new file mode 100644 index 000000000..42a392930 --- /dev/null +++ b/e2e/pkg/cluster/kind.go @@ -0,0 +1,150 @@ +package cluster + +import ( + "context" + "fmt" + "os" + "os/exec" + "strings" + "time" +) + +// KindCluster manages Kind cluster lifecycle +type KindCluster struct { + Name string + Verbose bool +} + +// NewKindCluster creates a new Kind cluster manager +func NewKindCluster(name string, verbose bool) *KindCluster { + return &KindCluster{ + Name: name, + Verbose: verbose, + } +} + +// Create creates a new Kind cluster +func (k *KindCluster) Create(ctx context.Context) error { + k.log("Creating Kind cluster: %s", k.Name) + + // Check if cluster already exists + exists, err := k.Exists(ctx) + if err != nil { + return fmt.Errorf("failed to check if cluster exists: %w", err) + } + + if exists { + k.log("Cluster %s already exists", k.Name) + return nil + } + + // Create cluster + cmd := exec.CommandContext(ctx, "kind", "create", "cluster", "--name", k.Name) + if k.Verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to create cluster: %w", err) + } + + // Wait for cluster to be ready + k.log("Waiting for cluster to be ready...") + if err := k.WaitForReady(ctx, 5*time.Minute); err != nil { + return fmt.Errorf("cluster failed to become ready: %w", err) + } + + k.log("Cluster %s created successfully", k.Name) + return nil +} + +// Delete deletes the Kind cluster +func (k *KindCluster) Delete(ctx context.Context) error { + k.log("Deleting Kind cluster: %s", k.Name) + + cmd := exec.CommandContext(ctx, "kind", "delete", "cluster", "--name", k.Name) + if k.Verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to delete cluster: %w", err) + } + + k.log("Cluster %s deleted successfully", k.Name) + return nil +} + +// Exists checks if the cluster exists +func (k *KindCluster) Exists(ctx context.Context) (bool, error) { + cmd := exec.CommandContext(ctx, "kind", "get", "clusters") + output, err := cmd.Output() + if err != nil { + return false, fmt.Errorf("failed to list clusters: %w", err) + } + + clusters := strings.Split(strings.TrimSpace(string(output)), "\n") + for _, cluster := range clusters { + if cluster == k.Name { + return true, nil + } + } + + return false, nil +} + +// WaitForReady waits for the cluster to be ready +func (k *KindCluster) WaitForReady(ctx context.Context, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + cmd := exec.CommandContext(ctx, "kubectl", "wait", + "--for=condition=Ready", + "nodes", + "--all", + "--timeout=300s") + + if k.Verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + + if err := cmd.Run(); err != nil { + return fmt.Errorf("nodes failed to become ready: %w", err) + } + + return nil +} + +// GetKubeConfig returns the path to the kubeconfig file +func (k *KindCluster) GetKubeConfig(ctx context.Context) (string, error) { + cmd := exec.CommandContext(ctx, "kind", "get", "kubeconfig", "--name", k.Name) + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to get kubeconfig: %w", err) + } + + // Write kubeconfig to temp file + tmpFile, err := os.CreateTemp("", fmt.Sprintf("kubeconfig-%s-*.yaml", k.Name)) + if err != nil { + return "", fmt.Errorf("failed to create temp file: %w", err) + } + + if _, err := tmpFile.Write(output); err != nil { + tmpFile.Close() + os.Remove(tmpFile.Name()) + return "", fmt.Errorf("failed to write kubeconfig: %w", err) + } + + tmpFile.Close() + return tmpFile.Name(), nil +} + +func (k *KindCluster) log(format string, args ...interface{}) { + if k.Verbose { + fmt.Printf("[Kind] "+format+"\n", args...) + } +} + diff --git a/e2e/pkg/docker/builder.go b/e2e/pkg/docker/builder.go new file mode 100644 index 000000000..fc6684524 --- /dev/null +++ b/e2e/pkg/docker/builder.go @@ -0,0 +1,97 @@ +package docker + +import ( + "context" + "fmt" + "os" + "os/exec" +) + +// Builder handles Docker image building and loading +type Builder struct { + Verbose bool +} + +// NewBuilder creates a new Docker builder +func NewBuilder(verbose bool) *Builder { + return &Builder{ + Verbose: verbose, + } +} + +// Build builds a Docker image +func (b *Builder) Build(ctx context.Context, opts BuildOptions) error { + b.log("Building Docker image: %s", opts.Tag) + + args := []string{"build"} + + if opts.Dockerfile != "" { + args = append(args, "-f", opts.Dockerfile) + } + + args = append(args, "-t", opts.Tag) + + if opts.BuildContext == "" { + opts.BuildContext = "." + } + args = append(args, opts.BuildContext) + + cmd := exec.CommandContext(ctx, "docker", args...) + if b.Verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to build image: %w", err) + } + + b.log("Image %s built successfully", opts.Tag) + return nil +} + +// LoadToKind loads a Docker image into a Kind cluster +func (b *Builder) LoadToKind(ctx context.Context, clusterName, imageTag string) error { + b.log("Loading image %s to Kind cluster %s", imageTag, clusterName) + + cmd := exec.CommandContext(ctx, "kind", "load", "docker-image", imageTag, "--name", clusterName) + if b.Verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to load image to Kind: %w", err) + } + + b.log("Image %s loaded to Kind cluster %s successfully", imageTag, clusterName) + return nil +} + +// BuildAndLoad builds a Docker image and loads it into a Kind cluster +func (b *Builder) BuildAndLoad(ctx context.Context, clusterName string, opts BuildOptions) error { + if err := b.Build(ctx, opts); err != nil { + return err + } + + return b.LoadToKind(ctx, clusterName, opts.Tag) +} + +func (b *Builder) log(format string, args ...interface{}) { + if b.Verbose { + fmt.Printf("[Docker] "+format+"\n", args...) + } +} + +// BuildOptions contains options for building Docker images +type BuildOptions struct { + // Dockerfile is the path to the Dockerfile + Dockerfile string + + // Tag is the image tag + Tag string + + // BuildContext is the build context directory + BuildContext string +} + diff --git a/e2e/pkg/framework/runner.go b/e2e/pkg/framework/runner.go new file mode 100644 index 000000000..82d12ae67 --- /dev/null +++ b/e2e/pkg/framework/runner.go @@ -0,0 +1,254 @@ +package framework + +import ( + "context" + "fmt" + "strings" + "sync" + "time" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + + "github.com/vllm-project/semantic-router/e2e/pkg/cluster" + "github.com/vllm-project/semantic-router/e2e/pkg/docker" + "github.com/vllm-project/semantic-router/e2e/pkg/testcases" +) + +// Runner orchestrates the E2E test execution +type Runner struct { + opts *TestOptions + profile Profile + cluster *cluster.KindCluster + builder *docker.Builder +} + +// NewRunner creates a new test runner +func NewRunner(opts *TestOptions, profile Profile) *Runner { + return &Runner{ + opts: opts, + profile: profile, + cluster: cluster.NewKindCluster(opts.ClusterName, opts.Verbose), + builder: docker.NewBuilder(opts.Verbose), + } +} + +// Run executes the E2E tests +func (r *Runner) Run(ctx context.Context) error { + r.log("Starting E2E tests for profile: %s", r.profile.Name()) + r.log("Description: %s", r.profile.Description()) + + // Step 1: Setup cluster + if !r.opts.UseExistingCluster { + if err := r.setupCluster(ctx); err != nil { + return fmt.Errorf("failed to setup cluster: %w", err) + } + + if !r.opts.KeepCluster { + defer r.cleanupCluster(ctx) + } + } + + // Step 2: Build and load Docker images + if err := r.buildAndLoadImages(ctx); err != nil { + return fmt.Errorf("failed to build and load images: %w", err) + } + + // Step 3: Get kubeconfig and create Kubernetes client + kubeConfig, err := r.cluster.GetKubeConfig(ctx) + if err != nil { + return fmt.Errorf("failed to get kubeconfig: %w", err) + } + + config, err := clientcmd.BuildConfigFromFlags("", kubeConfig) + if err != nil { + return fmt.Errorf("failed to build kubeconfig: %w", err) + } + + kubeClient, err := kubernetes.NewForConfig(config) + if err != nil { + return fmt.Errorf("failed to create Kubernetes client: %w", err) + } + + // Step 4: Setup profile (deploy Helm charts, etc.) + setupOpts := &SetupOptions{ + KubeClient: kubeClient, + KubeConfig: kubeConfig, + ClusterName: r.opts.ClusterName, + ImageTag: r.opts.ImageTag, + Verbose: r.opts.Verbose, + } + + if err := r.profile.Setup(ctx, setupOpts); err != nil { + return fmt.Errorf("failed to setup profile: %w", err) + } + + defer func() { + teardownOpts := &TeardownOptions{ + KubeClient: kubeClient, + KubeConfig: kubeConfig, + ClusterName: r.opts.ClusterName, + Verbose: r.opts.Verbose, + } + r.profile.Teardown(context.Background(), teardownOpts) + }() + + // Step 5: Run tests + results, err := r.runTests(ctx, kubeClient) + if err != nil { + return fmt.Errorf("failed to run tests: %w", err) + } + + // Step 6: Print results + r.printResults(results) + + // Check if any tests failed + for _, result := range results { + if !result.Passed { + return fmt.Errorf("some tests failed") + } + } + + r.log("✅ All tests passed!") + return nil +} + +func (r *Runner) setupCluster(ctx context.Context) error { + r.log("Setting up Kind cluster: %s", r.opts.ClusterName) + return r.cluster.Create(ctx) +} + +func (r *Runner) cleanupCluster(ctx context.Context) { + r.log("Cleaning up Kind cluster: %s", r.opts.ClusterName) + if err := r.cluster.Delete(ctx); err != nil { + r.log("Warning: failed to delete cluster: %v", err) + } +} + +func (r *Runner) buildAndLoadImages(ctx context.Context) error { + r.log("Building and loading Docker images") + + buildOpts := docker.BuildOptions{ + Dockerfile: "Dockerfile.extproc", + Tag: fmt.Sprintf("ghcr.io/vllm-project/semantic-router/extproc:%s", r.opts.ImageTag), + BuildContext: ".", + } + + return r.builder.BuildAndLoad(ctx, r.opts.ClusterName, buildOpts) +} + +func (r *Runner) runTests(ctx context.Context, kubeClient *kubernetes.Clientset) ([]TestResult, error) { + r.log("Running tests") + + // Get test cases to run + var testCasesToRun []testcases.TestCase + var err error + + if len(r.opts.TestCases) > 0 { + // Run specific test cases + testCasesToRun, err = testcases.ListByNames(r.opts.TestCases...) + if err != nil { + return nil, err + } + } else { + // Run all test cases for the profile + profileTestCases := r.profile.GetTestCases() + testCasesToRun, err = testcases.ListByNames(profileTestCases...) + if err != nil { + return nil, err + } + } + + r.log("Running %d test cases", len(testCasesToRun)) + + results := make([]TestResult, 0, len(testCasesToRun)) + resultsMu := sync.Mutex{} + + if r.opts.Parallel { + // Run tests in parallel + var wg sync.WaitGroup + for _, tc := range testCasesToRun { + wg.Add(1) + go func(tc testcases.TestCase) { + defer wg.Done() + result := r.runSingleTest(ctx, kubeClient, tc) + resultsMu.Lock() + results = append(results, result) + resultsMu.Unlock() + }(tc) + } + wg.Wait() + } else { + // Run tests sequentially + for _, tc := range testCasesToRun { + result := r.runSingleTest(ctx, kubeClient, tc) + results = append(results, result) + } + } + + return results, nil +} + +func (r *Runner) runSingleTest(ctx context.Context, kubeClient *kubernetes.Clientset, tc testcases.TestCase) TestResult { + r.log("Running test: %s", tc.Name) + + start := time.Now() + + opts := testcases.TestCaseOptions{ + Verbose: r.opts.Verbose, + Namespace: "default", + Timeout: "5m", + } + + err := tc.Fn(ctx, kubeClient, opts) + duration := time.Since(start) + + result := TestResult{ + Name: tc.Name, + Passed: err == nil, + Error: err, + Duration: duration.String(), + } + + if err != nil { + r.log("❌ Test %s failed: %v", tc.Name, err) + } else { + r.log("✅ Test %s passed (%s)", tc.Name, duration) + } + + return result +} + +func (r *Runner) printResults(results []TestResult) { + fmt.Println("\n" + strings.Repeat("=", 80)) + fmt.Println("TEST RESULTS") + fmt.Println(strings.Repeat("=", 80)) + + passed := 0 + failed := 0 + + for _, result := range results { + status := "✅ PASSED" + if !result.Passed { + status = "❌ FAILED" + failed++ + } else { + passed++ + } + + fmt.Printf("%s - %s (%s)\n", status, result.Name, result.Duration) + if result.Error != nil { + fmt.Printf(" Error: %v\n", result.Error) + } + } + + fmt.Println(strings.Repeat("=", 80)) + fmt.Printf("Total: %d | Passed: %d | Failed: %d\n", len(results), passed, failed) + fmt.Println(strings.Repeat("=", 80)) +} + +func (r *Runner) log(format string, args ...interface{}) { + if r.opts.Verbose { + fmt.Printf("[Runner] "+format+"\n", args...) + } +} diff --git a/e2e/pkg/framework/types.go b/e2e/pkg/framework/types.go new file mode 100644 index 000000000..0863f2cf6 --- /dev/null +++ b/e2e/pkg/framework/types.go @@ -0,0 +1,107 @@ +package framework + +import ( + "context" + + "k8s.io/client-go/kubernetes" +) + +// Profile defines the interface that all test profiles must implement +type Profile interface { + // Name returns the profile name + Name() string + + // Description returns a description of what this profile tests + Description() string + + // Setup prepares the environment for testing (e.g., deploy Helm charts) + Setup(ctx context.Context, opts *SetupOptions) error + + // Teardown cleans up resources created during setup + Teardown(ctx context.Context, opts *TeardownOptions) error + + // GetTestCases returns the list of test cases to run for this profile + GetTestCases() []string +} + +// SetupOptions contains options for profile setup +type SetupOptions struct { + // KubeClient is the Kubernetes client + KubeClient *kubernetes.Clientset + + // KubeConfig is the path to kubeconfig file + KubeConfig string + + // ClusterName is the name of the Kind cluster + ClusterName string + + // ImageTag is the Docker image tag to use + ImageTag string + + // Verbose enables verbose logging + Verbose bool + + // ValuesFiles contains paths to Helm values files + ValuesFiles map[string]string +} + +// TeardownOptions contains options for profile teardown +type TeardownOptions struct { + // KubeClient is the Kubernetes client + KubeClient *kubernetes.Clientset + + // KubeConfig is the path to kubeconfig file + KubeConfig string + + // ClusterName is the name of the Kind cluster + ClusterName string + + // Verbose enables verbose logging + Verbose bool +} + +// TestOptions contains options for running tests +type TestOptions struct { + // Profile is the test profile to run + Profile string + + // ClusterName is the name of the Kind cluster + ClusterName string + + // ImageTag is the Docker image tag to use + ImageTag string + + // KeepCluster keeps the cluster after tests complete + KeepCluster bool + + // UseExistingCluster uses an existing cluster instead of creating a new one + UseExistingCluster bool + + // Verbose enables verbose logging + Verbose bool + + // Parallel runs tests in parallel + Parallel bool + + // TestCases is a list of specific test cases to run (empty means all) + TestCases []string +} + +// TestResult represents the result of a test case +type TestResult struct { + // Name is the test case name + Name string + + // Passed indicates if the test passed + Passed bool + + // Error contains the error if the test failed + Error error + + // Duration is how long the test took + Duration string + + // Logs contains test output logs + Logs string +} + diff --git a/e2e/pkg/helm/deployer.go b/e2e/pkg/helm/deployer.go new file mode 100644 index 000000000..66a69fee1 --- /dev/null +++ b/e2e/pkg/helm/deployer.go @@ -0,0 +1,149 @@ +package helm + +import ( + "context" + "fmt" + "os" + "os/exec" + "time" +) + +// Deployer handles Helm chart deployments +type Deployer struct { + KubeConfig string + Verbose bool +} + +// NewDeployer creates a new Helm deployer +func NewDeployer(kubeConfig string, verbose bool) *Deployer { + return &Deployer{ + KubeConfig: kubeConfig, + Verbose: verbose, + } +} + +// Install installs a Helm chart +func (d *Deployer) Install(ctx context.Context, opts InstallOptions) error { + d.log("Installing Helm chart: %s/%s", opts.Namespace, opts.ReleaseName) + + args := []string{ + "install", opts.ReleaseName, opts.Chart, + "--namespace", opts.Namespace, + "--create-namespace", + "--kubeconfig", d.KubeConfig, + } + + if opts.Version != "" { + args = append(args, "--version", opts.Version) + } + + for _, valuesFile := range opts.ValuesFiles { + args = append(args, "-f", valuesFile) + } + + for key, value := range opts.Set { + args = append(args, "--set", fmt.Sprintf("%s=%s", key, value)) + } + + if opts.Wait { + args = append(args, "--wait") + if opts.Timeout != "" { + args = append(args, "--timeout", opts.Timeout) + } + } + + cmd := exec.CommandContext(ctx, "helm", args...) + if d.Verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to install chart: %w", err) + } + + d.log("Chart %s installed successfully", opts.ReleaseName) + return nil +} + +// Uninstall uninstalls a Helm release +func (d *Deployer) Uninstall(ctx context.Context, releaseName, namespace string) error { + d.log("Uninstalling Helm release: %s/%s", namespace, releaseName) + + cmd := exec.CommandContext(ctx, "helm", "uninstall", releaseName, + "--namespace", namespace, + "--kubeconfig", d.KubeConfig) + + if d.Verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to uninstall release: %w", err) + } + + d.log("Release %s uninstalled successfully", releaseName) + return nil +} + +// WaitForDeployment waits for a deployment to be ready +func (d *Deployer) WaitForDeployment(ctx context.Context, namespace, deploymentName string, timeout time.Duration) error { + d.log("Waiting for deployment %s/%s to be ready", namespace, deploymentName) + + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + cmd := exec.CommandContext(ctx, "kubectl", "wait", + "--for=condition=Available", + fmt.Sprintf("deployment/%s", deploymentName), + "-n", namespace, + "--timeout=600s", + "--kubeconfig", d.KubeConfig) + + if d.Verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + + if err := cmd.Run(); err != nil { + return fmt.Errorf("deployment failed to become ready: %w", err) + } + + d.log("Deployment %s is ready", deploymentName) + return nil +} + +func (d *Deployer) log(format string, args ...interface{}) { + if d.Verbose { + fmt.Printf("[Helm] "+format+"\n", args...) + } +} + +// InstallOptions contains options for installing Helm charts +type InstallOptions struct { + // ReleaseName is the name of the Helm release + ReleaseName string + + // Chart is the chart reference (can be a path or repo/chart) + Chart string + + // Namespace is the Kubernetes namespace + Namespace string + + // Version is the chart version + Version string + + // ValuesFiles are paths to values files + ValuesFiles []string + + // Set contains key-value pairs to set + Set map[string]string + + // Wait waits for resources to be ready + Wait bool + + // Timeout is the timeout for waiting + Timeout string +} + diff --git a/e2e/pkg/testcases/registry.go b/e2e/pkg/testcases/registry.go new file mode 100644 index 000000000..94e4842f9 --- /dev/null +++ b/e2e/pkg/testcases/registry.go @@ -0,0 +1,117 @@ +package testcases + +import ( + "context" + "fmt" + "sync" + + "k8s.io/client-go/kubernetes" +) + +// TestCase represents a single test case +type TestCase struct { + // Name is the unique identifier for the test case + Name string + + // Description describes what the test does + Description string + + // Tags are optional tags for filtering tests + Tags []string + + // Fn is the test function to execute + Fn func(ctx context.Context, client *kubernetes.Clientset, opts TestCaseOptions) error +} + +// TestCaseOptions contains options passed to test cases +type TestCaseOptions struct { + // Verbose enables verbose logging + Verbose bool + + // Namespace is the Kubernetes namespace to use + Namespace string + + // ServiceURL is the URL of the service to test + ServiceURL string + + // Timeout is the test timeout duration + Timeout string +} + +var ( + registry = make(map[string]TestCase) + mu sync.RWMutex +) + +// Register registers a test case +func Register(name string, tc TestCase) { + mu.Lock() + defer mu.Unlock() + + if _, exists := registry[name]; exists { + panic(fmt.Sprintf("test case %q already registered", name)) + } + + tc.Name = name + registry[name] = tc +} + +// Get retrieves a test case by name +func Get(name string) (TestCase, bool) { + mu.RLock() + defer mu.RUnlock() + + tc, ok := registry[name] + return tc, ok +} + +// List returns all registered test cases +func List() []TestCase { + mu.RLock() + defer mu.RUnlock() + + cases := make([]TestCase, 0, len(registry)) + for _, tc := range registry { + cases = append(cases, tc) + } + return cases +} + +// ListByTags returns test cases matching any of the given tags +func ListByTags(tags ...string) []TestCase { + mu.RLock() + defer mu.RUnlock() + + tagSet := make(map[string]bool) + for _, tag := range tags { + tagSet[tag] = true + } + + cases := make([]TestCase, 0) + for _, tc := range registry { + for _, tag := range tc.Tags { + if tagSet[tag] { + cases = append(cases, tc) + break + } + } + } + return cases +} + +// ListByNames returns test cases matching the given names +func ListByNames(names ...string) ([]TestCase, error) { + mu.RLock() + defer mu.RUnlock() + + cases := make([]TestCase, 0, len(names)) + for _, name := range names { + tc, ok := registry[name] + if !ok { + return nil, fmt.Errorf("test case %q not found", name) + } + cases = append(cases, tc) + } + return cases, nil +} + diff --git a/e2e/profiles/ai-gateway/profile.go b/e2e/profiles/ai-gateway/profile.go new file mode 100644 index 000000000..2cfa45128 --- /dev/null +++ b/e2e/profiles/ai-gateway/profile.go @@ -0,0 +1,215 @@ +package aigateway + +import ( + "context" + "fmt" + "os" + "os/exec" + "time" + + "github.com/vllm-project/semantic-router/e2e/pkg/framework" + "github.com/vllm-project/semantic-router/e2e/pkg/helm" +) + +// Profile implements the AI Gateway test profile +type Profile struct { + verbose bool +} + +// NewProfile creates a new AI Gateway profile +func NewProfile() *Profile { + return &Profile{} +} + +// Name returns the profile name +func (p *Profile) Name() string { + return "ai-gateway" +} + +// Description returns the profile description +func (p *Profile) Description() string { + return "Tests Semantic Router with Envoy AI Gateway integration" +} + +// Setup deploys all required components for AI Gateway testing +func (p *Profile) Setup(ctx context.Context, opts *framework.SetupOptions) error { + p.verbose = opts.Verbose + p.log("Setting up AI Gateway test environment") + + deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose) + + // Step 1: Deploy Semantic Router + p.log("Step 1/4: Deploying Semantic Router") + if err := p.deploySemanticRouter(ctx, deployer, opts); err != nil { + return fmt.Errorf("failed to deploy semantic router: %w", err) + } + + // Step 2: Deploy Envoy Gateway + p.log("Step 2/4: Deploying Envoy Gateway") + if err := p.deployEnvoyGateway(ctx, deployer, opts); err != nil { + return fmt.Errorf("failed to deploy envoy gateway: %w", err) + } + + // Step 3: Deploy Envoy AI Gateway + p.log("Step 3/4: Deploying Envoy AI Gateway") + if err := p.deployEnvoyAIGateway(ctx, deployer, opts); err != nil { + return fmt.Errorf("failed to deploy envoy ai gateway: %w", err) + } + + // Step 4: Deploy Demo LLM and Gateway API Resources + p.log("Step 4/4: Deploying Demo LLM and Gateway API Resources") + if err := p.deployGatewayResources(ctx, opts); err != nil { + return fmt.Errorf("failed to deploy gateway resources: %w", err) + } + + p.log("AI Gateway test environment setup complete") + return nil +} + +// Teardown cleans up all deployed resources +func (p *Profile) Teardown(ctx context.Context, opts *framework.TeardownOptions) error { + p.verbose = opts.Verbose + p.log("Tearing down AI Gateway test environment") + + deployer := helm.NewDeployer(opts.KubeConfig, opts.Verbose) + + // Clean up in reverse order + p.log("Cleaning up Gateway API resources") + p.cleanupGatewayResources(ctx, opts) + + p.log("Uninstalling Envoy AI Gateway") + deployer.Uninstall(ctx, "aieg-crd", "envoy-ai-gateway-system") + deployer.Uninstall(ctx, "aieg", "envoy-ai-gateway-system") + + p.log("Uninstalling Envoy Gateway") + deployer.Uninstall(ctx, "eg", "envoy-gateway-system") + + p.log("Uninstalling Semantic Router") + deployer.Uninstall(ctx, "semantic-router", "vllm-semantic-router-system") + + p.log("AI Gateway test environment teardown complete") + return nil +} + +// GetTestCases returns the list of test cases for this profile +func (p *Profile) GetTestCases() []string { + return []string{ + "basic-health-check", + "chat-completions-request", + } +} + +func (p *Profile) deploySemanticRouter(ctx context.Context, deployer *helm.Deployer, opts *framework.SetupOptions) error { + valuesFile := "deploy/kubernetes/ai-gateway/semantic-router-values/values.yaml" + + installOpts := helm.InstallOptions{ + ReleaseName: "semantic-router", + Chart: "oci://ghcr.io/vllm-project/charts/semantic-router", + Namespace: "vllm-semantic-router-system", + Version: "v0.0.0-latest", + ValuesFiles: []string{valuesFile}, + Wait: true, + Timeout: "10m", + } + + if err := deployer.Install(ctx, installOpts); err != nil { + return err + } + + return deployer.WaitForDeployment(ctx, "vllm-semantic-router-system", "semantic-router", 10*time.Minute) +} + +func (p *Profile) deployEnvoyGateway(ctx context.Context, deployer *helm.Deployer, opts *framework.SetupOptions) error { + installOpts := helm.InstallOptions{ + ReleaseName: "eg", + Chart: "oci://docker.io/envoyproxy/gateway-helm", + Namespace: "envoy-gateway-system", + Version: "v0.0.0-latest", + ValuesFiles: []string{"https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/manifests/envoy-gateway-values.yaml"}, + Wait: true, + Timeout: "5m", + } + + if err := deployer.Install(ctx, installOpts); err != nil { + return err + } + + return deployer.WaitForDeployment(ctx, "envoy-gateway-system", "envoy-gateway", 5*time.Minute) +} + +func (p *Profile) deployEnvoyAIGateway(ctx context.Context, deployer *helm.Deployer, opts *framework.SetupOptions) error { + // Install AI Gateway CRDs + crdOpts := helm.InstallOptions{ + ReleaseName: "aieg-crd", + Chart: "oci://docker.io/envoyproxy/ai-gateway-crds-helm", + Namespace: "envoy-ai-gateway-system", + Version: "v0.0.0-latest", + Wait: true, + Timeout: "5m", + } + + if err := deployer.Install(ctx, crdOpts); err != nil { + return err + } + + // Install AI Gateway + installOpts := helm.InstallOptions{ + ReleaseName: "aieg", + Chart: "oci://docker.io/envoyproxy/ai-gateway-helm", + Namespace: "envoy-ai-gateway-system", + Version: "v0.0.0-latest", + Wait: true, + Timeout: "5m", + } + + if err := deployer.Install(ctx, installOpts); err != nil { + return err + } + + return deployer.WaitForDeployment(ctx, "envoy-ai-gateway-system", "ai-gateway-controller", 5*time.Minute) +} + +func (p *Profile) deployGatewayResources(ctx context.Context, opts *framework.SetupOptions) error { + // Apply base model + if err := p.kubectlApply(ctx, opts.KubeConfig, "https://raw.githubusercontent.com/vllm-project/semantic-router/refs/heads/main/deploy/kubernetes/ai-gateway/aigw-resources/base-model.yaml"); err != nil { + return fmt.Errorf("failed to apply base model: %w", err) + } + + // Apply gateway API resources + if err := p.kubectlApply(ctx, opts.KubeConfig, "https://raw.githubusercontent.com/vllm-project/semantic-router/refs/heads/main/deploy/kubernetes/ai-gateway/aigw-resources/gwapi-resources.yaml"); err != nil { + return fmt.Errorf("failed to apply gateway API resources: %w", err) + } + + return nil +} + +func (p *Profile) cleanupGatewayResources(ctx context.Context, opts *framework.TeardownOptions) error { + // Delete in reverse order + p.kubectlDelete(ctx, opts.KubeConfig, "https://raw.githubusercontent.com/vllm-project/semantic-router/refs/heads/main/deploy/kubernetes/ai-gateway/aigw-resources/gwapi-resources.yaml") + p.kubectlDelete(ctx, opts.KubeConfig, "https://raw.githubusercontent.com/vllm-project/semantic-router/refs/heads/main/deploy/kubernetes/ai-gateway/aigw-resources/base-model.yaml") + return nil +} + +func (p *Profile) kubectlApply(ctx context.Context, kubeConfig, manifest string) error { + return p.runKubectl(ctx, kubeConfig, "apply", "-f", manifest) +} + +func (p *Profile) kubectlDelete(ctx context.Context, kubeConfig, manifest string) error { + return p.runKubectl(ctx, kubeConfig, "delete", "-f", manifest) +} + +func (p *Profile) runKubectl(ctx context.Context, kubeConfig string, args ...string) error { + args = append(args, "--kubeconfig", kubeConfig) + cmd := exec.CommandContext(ctx, "kubectl", args...) + if p.verbose { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + } + return cmd.Run() +} + +func (p *Profile) log(format string, args ...interface{}) { + if p.verbose { + fmt.Printf("[AI-Gateway] "+format+"\n", args...) + } +} diff --git a/e2e/profiles/ai-gateway/testcases.go b/e2e/profiles/ai-gateway/testcases.go new file mode 100644 index 000000000..e67ecd4b1 --- /dev/null +++ b/e2e/profiles/ai-gateway/testcases.go @@ -0,0 +1,186 @@ +package aigateway + +import ( + "context" + "fmt" + "io" + "net/http" + "os/exec" + "strings" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + + "github.com/vllm-project/semantic-router/e2e/pkg/testcases" +) + +func init() { + // Register test cases for AI Gateway profile + testcases.Register("basic-health-check", testcases.TestCase{ + Description: "Verify that all components are deployed and healthy", + Tags: []string{"ai-gateway", "health"}, + Fn: testBasicHealthCheck, + }) + + testcases.Register("chat-completions-request", testcases.TestCase{ + Description: "Send a chat completions request and verify 200 OK response", + Tags: []string{"ai-gateway", "functional"}, + Fn: testChatCompletionsRequest, + }) +} + +func testBasicHealthCheck(ctx context.Context, client *kubernetes.Clientset, opts testcases.TestCaseOptions) error { + if opts.Verbose { + fmt.Println("[Test] Running basic health check") + } + + // Check semantic-router deployment + if err := checkDeployment(ctx, client, "vllm-semantic-router-system", "semantic-router", opts.Verbose); err != nil { + return fmt.Errorf("semantic-router deployment not healthy: %w", err) + } + + // Check envoy-gateway deployment + if err := checkDeployment(ctx, client, "envoy-gateway-system", "envoy-gateway", opts.Verbose); err != nil { + return fmt.Errorf("envoy-gateway deployment not healthy: %w", err) + } + + // Check ai-gateway-controller deployment + if err := checkDeployment(ctx, client, "envoy-ai-gateway-system", "ai-gateway-controller", opts.Verbose); err != nil { + return fmt.Errorf("ai-gateway-controller deployment not healthy: %w", err) + } + + if opts.Verbose { + fmt.Println("[Test] ✅ All deployments are healthy") + } + + return nil +} + +func testChatCompletionsRequest(ctx context.Context, client *kubernetes.Clientset, opts testcases.TestCaseOptions) error { + if opts.Verbose { + fmt.Println("[Test] Testing chat completions endpoint") + } + + // Get the Envoy service name + envoyService, err := getEnvoyServiceName(ctx, opts.Verbose) + if err != nil { + return fmt.Errorf("failed to get Envoy service name: %w", err) + } + + if opts.Verbose { + fmt.Printf("[Test] Envoy service: %s\n", envoyService) + } + + // Set up port forwarding + portForwardCtx, cancel := context.WithCancel(ctx) + defer cancel() + + if err := startPortForward(portForwardCtx, "envoy-gateway-system", envoyService, "8080:80", opts.Verbose); err != nil { + return fmt.Errorf("failed to start port forward: %w", err) + } + + // Wait for port forward to be ready + time.Sleep(3 * time.Second) + + // Send test request + if opts.Verbose { + fmt.Println("[Test] Sending chat completions request") + } + + payload := `{ + "model": "MoM", + "messages": [ + {"role": "user", "content": "What is the derivative of f(x) = x^3?"} + ] + }` + + req, err := http.NewRequestWithContext(ctx, "POST", "http://localhost:8080/v1/chat/completions", strings.NewReader(payload)) + if err != nil { + return fmt.Errorf("failed to create request: %w", err) + } + + req.Header.Set("Content-Type", "application/json") + + httpClient := &http.Client{Timeout: 30 * time.Second} + resp, err := httpClient.Do(req) + if err != nil { + return fmt.Errorf("failed to send request: %w", err) + } + defer resp.Body.Close() + + body, _ := io.ReadAll(resp.Body) + + if opts.Verbose { + fmt.Printf("[Test] Response status: %d\n", resp.StatusCode) + fmt.Printf("[Test] Response body: %s\n", string(body)) + } + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("expected status 200, got %d: %s", resp.StatusCode, string(body)) + } + + if opts.Verbose { + fmt.Println("[Test] ✅ Chat completions request successful") + } + + return nil +} + +func checkDeployment(ctx context.Context, client *kubernetes.Clientset, namespace, name string, verbose bool) error { + deployment, err := client.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return err + } + + if deployment.Status.ReadyReplicas == 0 { + return fmt.Errorf("no ready replicas") + } + + if verbose { + fmt.Printf("[Test] Deployment %s/%s: %d/%d replicas ready\n", + namespace, name, deployment.Status.ReadyReplicas, *deployment.Spec.Replicas) + } + + return nil +} + +func getEnvoyServiceName(ctx context.Context, verbose bool) (string, error) { + cmd := exec.CommandContext(ctx, "kubectl", "get", "svc", + "-n", "envoy-gateway-system", + "--selector=gateway.envoyproxy.io/owning-gateway-namespace=default,gateway.envoyproxy.io/owning-gateway-name=semantic-router", + "-o", "jsonpath={.items[0].metadata.name}") + + output, err := cmd.Output() + if err != nil { + return "", err + } + + serviceName := strings.TrimSpace(string(output)) + if serviceName == "" { + return "", fmt.Errorf("no Envoy service found") + } + + return serviceName, nil +} + +func startPortForward(ctx context.Context, namespace, service, ports string, verbose bool) error { + cmd := exec.CommandContext(ctx, "kubectl", "port-forward", + "-n", namespace, + fmt.Sprintf("svc/%s", service), + ports) + + if verbose { + fmt.Printf("[Test] Starting port forward: kubectl port-forward -n %s svc/%s %s\n", namespace, service, ports) + } + + // Start port forward in background + if err := cmd.Start(); err != nil { + return err + } + + // Wait a bit for port forward to establish + time.Sleep(2 * time.Second) + + return nil +} diff --git a/tools/make/e2e.mk b/tools/make/e2e.mk new file mode 100644 index 000000000..f978b5899 --- /dev/null +++ b/tools/make/e2e.mk @@ -0,0 +1,101 @@ +# ======== e2e.mk ======== +# = E2E Testing Framework = +# ======== e2e.mk ======== + +##@ E2E Testing + +# E2E test configuration +E2E_PROFILE ?= ai-gateway +E2E_CLUSTER_NAME ?= semantic-router-e2e +E2E_IMAGE_TAG ?= e2e-test +E2E_KEEP_CLUSTER ?= false +E2E_USE_EXISTING_CLUSTER ?= false +E2E_VERBOSE ?= true +E2E_PARALLEL ?= false +E2E_TESTS ?= + +# Build the E2E test binary +build-e2e: ## Build the E2E test binary + @$(LOG_TARGET) + @echo "Building E2E test binary..." + @cd e2e && go build -o ../bin/e2e ./cmd/e2e + +# Run E2E tests +e2e-test: ## Run E2E tests (PROFILE=ai-gateway by default) +e2e-test: build-e2e + @$(LOG_TARGET) + @echo "Running E2E tests with profile: $(E2E_PROFILE)" + @./bin/e2e \ + -profile=$(E2E_PROFILE) \ + -cluster=$(E2E_CLUSTER_NAME) \ + -image-tag=$(E2E_IMAGE_TAG) \ + -keep-cluster=$(E2E_KEEP_CLUSTER) \ + -use-existing-cluster=$(E2E_USE_EXISTING_CLUSTER) \ + -verbose=$(E2E_VERBOSE) \ + -parallel=$(E2E_PARALLEL) \ + $(if $(E2E_TESTS),-tests=$(E2E_TESTS),) + +# Run E2E tests with AI Gateway profile +e2e-test-ai-gateway: ## Run E2E tests with AI Gateway profile +e2e-test-ai-gateway: E2E_PROFILE=ai-gateway +e2e-test-ai-gateway: e2e-test + +# Run E2E tests and keep cluster for debugging +e2e-test-debug: ## Run E2E tests and keep cluster for debugging +e2e-test-debug: E2E_KEEP_CLUSTER=true +e2e-test-debug: E2E_VERBOSE=true +e2e-test-debug: e2e-test + +# Run specific E2E test cases +e2e-test-specific: ## Run specific E2E test cases (E2E_TESTS="test1,test2") +e2e-test-specific: + @if [ -z "$(E2E_TESTS)" ]; then \ + echo "Error: E2E_TESTS is not set"; \ + echo "Usage: make e2e-test-specific E2E_TESTS=\"basic-health-check,chat-completions-request\""; \ + exit 1; \ + fi + @$(MAKE) e2e-test E2E_TESTS=$(E2E_TESTS) + +# Clean up E2E test cluster +e2e-cleanup: ## Clean up E2E test cluster + @$(LOG_TARGET) + @echo "Cleaning up E2E test cluster: $(E2E_CLUSTER_NAME)" + @kind delete cluster --name $(E2E_CLUSTER_NAME) || true + +# Download E2E test dependencies +e2e-deps: ## Download E2E test dependencies + @$(LOG_TARGET) + @echo "Downloading E2E test dependencies..." + @cd e2e && go mod download + +# Tidy E2E test dependencies +e2e-tidy: ## Tidy E2E test dependencies + @$(LOG_TARGET) + @echo "Tidying E2E test dependencies..." + @cd e2e && go mod tidy + +# Help for E2E testing +e2e-help: ## Show help for E2E testing + @echo "E2E Testing Framework" + @echo "" + @echo "Available Profiles:" + @echo " ai-gateway - Test Semantic Router with Envoy AI Gateway" + @echo " istio - Test Semantic Router with Istio (coming soon)" + @echo "" + @echo "Environment Variables:" + @echo " E2E_PROFILE - Test profile to run (default: ai-gateway)" + @echo " E2E_CLUSTER_NAME - Kind cluster name (default: semantic-router-e2e)" + @echo " E2E_IMAGE_TAG - Docker image tag (default: e2e-test)" + @echo " E2E_KEEP_CLUSTER - Keep cluster after tests (default: false)" + @echo " E2E_USE_EXISTING_CLUSTER - Use existing cluster (default: false)" + @echo " E2E_VERBOSE - Enable verbose logging (default: true)" + @echo " E2E_PARALLEL - Run tests in parallel (default: false)" + @echo " E2E_TESTS - Comma-separated list of test cases to run" + @echo "" + @echo "Examples:" + @echo " make e2e-test # Run all tests with default profile" + @echo " make e2e-test PROFILE=ai-gateway # Run AI Gateway tests" + @echo " make e2e-test-debug # Run tests and keep cluster" + @echo " make e2e-test-specific E2E_TESTS=\"test1,test2\" # Run specific tests" + @echo " make e2e-cleanup # Clean up test cluster" +