Kubernetes Integration Test #9
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Kubernetes Integration Test | |
# This workflow tests the CORE semantic-router Kubernetes deployment. | |
# | |
# Test Scope: | |
# ✅ Core deployment (namespace, pvc, deployment, service, configmap) | |
# ✅ Manifest validation (kubeconform) | |
# ✅ Service connectivity (gRPC, metrics, API ports) | |
# ✅ Security scanning (Trivy, Checkov) | |
# ✅ Basic syntax validation for observability and ai-gateway configs | |
# | |
# Out of Scope (planned for follow-up PRs): | |
# 🔄 Observability stack deployment (Prometheus + Grafana) | |
# 🔄 AI Gateway end-to-end testing (Envoy Gateway + InferencePool) | |
on: | |
pull_request: | |
paths: | |
- "deploy/kubernetes/**" | |
- ".github/workflows/k8s-integration-test.yml" | |
- "Dockerfile.extproc" | |
- "tools/kind/**" | |
workflow_dispatch: # Allow manual triggering | |
schedule: | |
# Run nightly at 3:00 AM UTC | |
- cron: "0 3 * * *" | |
env: | |
KIND_VERSION: v0.20.0 | |
KUBECTL_VERSION: v1.28.0 | |
KUSTOMIZE_VERSION: v5.2.1 | |
jobs: | |
validate-manifests: | |
name: Validate Kubernetes Manifests | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Setup Kustomize | |
run: | | |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash | |
sudo mv kustomize /usr/local/bin/ | |
kustomize version | |
- name: Validate Kustomize build | |
run: | | |
echo "Building kustomization..." | |
kustomize build deploy/kubernetes > /tmp/k8s-manifests.yaml | |
echo "Kustomize build successful!" | |
echo "Generated manifests:" | |
cat /tmp/k8s-manifests.yaml | |
- name: Setup kubeconform | |
run: | | |
wget https://github.com/yannh/kubeconform/releases/latest/download/kubeconform-linux-amd64.tar.gz | |
tar xf kubeconform-linux-amd64.tar.gz | |
sudo mv kubeconform /usr/local/bin/ | |
kubeconform -v | |
- name: Validate manifests with kubeconform | |
run: | | |
echo "Validating Kubernetes manifests..." | |
kustomize build deploy/kubernetes | \ | |
kubeconform -strict -summary \ | |
-kubernetes-version 1.28.0 \ | |
-schema-location default \ | |
-schema-location 'https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json' \ | |
-skip CustomResourceDefinition \ | |
-ignore-missing-schemas | |
- name: Upload validated manifests | |
uses: actions/upload-artifact@v4 | |
with: | |
name: k8s-manifests | |
path: /tmp/k8s-manifests.yaml | |
retention-days: 5 | |
kind-integration-test: | |
name: kind Cluster Integration Test | |
runs-on: ubuntu-latest | |
needs: validate-manifests | |
timeout-minutes: 45 # Increased to account for model downloads | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Create kind cluster | |
uses: helm/[email protected] | |
with: | |
version: ${{ env.KIND_VERSION }} | |
config: tools/kind/kind-config.yaml | |
cluster_name: semantic-router-test | |
wait: 120s | |
- name: Build semantic-router image | |
uses: docker/build-push-action@v5 | |
with: | |
context: . | |
file: ./Dockerfile.extproc | |
tags: ghcr.io/vllm-project/semantic-router/extproc:test | |
load: true | |
cache-from: type=gha | |
cache-to: type=gha,mode=max | |
- name: Load image into kind cluster | |
run: | | |
echo "Loading image into kind cluster..." | |
kind load docker-image ghcr.io/vllm-project/semantic-router/extproc:test --name semantic-router-test | |
echo "Image loaded successfully!" | |
- name: Verify cluster | |
run: | | |
kubectl cluster-info | |
kubectl get nodes | |
kubectl version | |
- name: Setup Kustomize | |
run: | | |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash | |
sudo mv kustomize /usr/local/bin/ | |
- name: Create temporary kustomization for testing | |
run: | | |
# Create a test overlay directory | |
mkdir -p deploy/kubernetes/test-overlay | |
cd deploy/kubernetes/test-overlay | |
# Copy all base resources to overlay directory | |
cp ../namespace.yaml ./ | |
cp ../service.yaml ./ | |
cp ../config.yaml ./ | |
cp ../tools_db.json ./ | |
# Copy resources for CI testing | |
cp ../deployment.yaml ./deployment.yaml | |
cp ../pvc.yaml ./pvc.yaml | |
# Optimize init container for CI testing | |
# 1. Update pip install to include hf_transfer for faster downloads | |
perl -i -pe 's/pip install --no-cache-dir huggingface_hub\[cli\]/pip install --no-cache-dir "huggingface_hub[cli]" hf_transfer/g' deployment.yaml | |
# 2. Enable HF_HUB_ENABLE_HF_TRANSFER for faster downloads | |
perl -i -pe 's/(env:)/\1\n - name: HF_HUB_ENABLE_HF_TRANSFER\n value: "1"/g' deployment.yaml | |
# 3. Simplify the download logic - remove directory checks since CI always starts fresh | |
# Replace the entire args section with a simpler version | |
perl -i -0pe 's/args:\s*\n\s*-\s*\|\s*\n\s*set -e.*?ls -la \/app\/models\//args:\n - |\n set -e\n echo "Installing Hugging Face CLI..."\n pip install --no-cache-dir "huggingface_hub[cli]" hf_transfer\n \n echo "Downloading models to persistent volume..."\n cd \/app\/models\n \n echo "Downloading category classifier model..."\n hf download LLM-Semantic-Router\/category_classifier_modernbert-base_model --local-dir category_classifier_modernbert-base_model\n \n echo "Downloading PII classifier model..."\n hf download LLM-Semantic-Router\/pii_classifier_modernbert-base_model --local-dir pii_classifier_modernbert-base_model\n \n echo "Downloading jailbreak classifier model..."\n hf download LLM-Semantic-Router\/jailbreak_classifier_modernbert-base_model --local-dir jailbreak_classifier_modernbert-base_model\n \n echo "Downloading PII token classifier model..."\n hf download LLM-Semantic-Router\/pii_classifier_modernbert-base_presidio_token_model --local-dir pii_classifier_modernbert-base_presidio_token_model\n \n echo "All models downloaded successfully!"\n ls -la \/app\/models\//gs' deployment.yaml | |
echo "✓ Updated init container with optimized model download for CI" | |
# Create kustomization with local resources | |
cat > kustomization.yaml << EOF | |
apiVersion: kustomize.config.k8s.io/v1beta1 | |
kind: Kustomization | |
resources: | |
- namespace.yaml | |
- pvc.yaml | |
- deployment.yaml | |
- service.yaml | |
configMapGenerator: | |
- name: semantic-router-config | |
files: | |
- config.yaml | |
- tools_db.json | |
namespace: vllm-semantic-router-system | |
# Use the same image that was loaded into kind cluster | |
images: | |
- name: ghcr.io/vllm-project/semantic-router/extproc | |
newTag: test | |
# Reduce resource requirements for CI testing and set imagePullPolicy | |
patches: | |
# Patch for main container | |
- patch: |- | |
- op: replace | |
path: /spec/template/spec/containers/0/resources/requests/memory | |
value: "2Gi" | |
- op: replace | |
path: /spec/template/spec/containers/0/resources/requests/cpu | |
value: "1" | |
- op: replace | |
path: /spec/template/spec/containers/0/resources/limits/memory | |
value: "4Gi" | |
- op: replace | |
path: /spec/template/spec/containers/0/resources/limits/cpu | |
value: "2" | |
- op: add | |
path: /spec/template/spec/containers/0/imagePullPolicy | |
value: "IfNotPresent" | |
target: | |
kind: Deployment | |
name: semantic-router | |
# Patch for init container - increase resources for faster downloads | |
- patch: |- | |
- op: replace | |
path: /spec/template/spec/initContainers/0/resources/requests/memory | |
value: "1Gi" | |
- op: replace | |
path: /spec/template/spec/initContainers/0/resources/requests/cpu | |
value: "500m" | |
- op: replace | |
path: /spec/template/spec/initContainers/0/resources/limits/memory | |
value: "2Gi" | |
- op: replace | |
path: /spec/template/spec/initContainers/0/resources/limits/cpu | |
value: "1" | |
target: | |
kind: Deployment | |
name: semantic-router | |
EOF | |
echo "=== Generated kustomization.yaml ===" | |
cat kustomization.yaml | |
echo "=== Files in overlay directory ===" | |
ls -la | |
- name: Pre-flight check for Hugging Face connectivity | |
run: | | |
echo "Testing Hugging Face Hub connectivity..." | |
curl -I https://huggingface.co || { | |
echo "⚠️ Warning: Cannot reach huggingface.co" | |
} | |
# Test one of the model repos | |
curl -I https://huggingface.co/LLM-Semantic-Router/category_classifier_modernbert-base_model || { | |
echo "⚠️ Warning: Cannot reach model repository" | |
} | |
echo "✓ Connectivity check completed" | |
- name: Deploy to kind cluster | |
run: | | |
echo "Deploying semantic-router to kind cluster..." | |
kustomize build deploy/kubernetes/test-overlay | kubectl apply -f - | |
echo "Waiting for namespace to be active..." | |
kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/vllm-semantic-router-system --timeout=60s | |
echo "Deployment initiated. Checking resources..." | |
kubectl get all -n vllm-semantic-router-system | |
- name: Wait for deployment readiness | |
run: | | |
echo "Waiting for deployment to be ready (this may take a few minutes)..." | |
echo "Note: Using PVC for model storage, init container will download models" | |
# Wait for PVC to be bound | |
echo "Waiting for PVC to be bound..." | |
kubectl wait --for=jsonpath='{.status.phase}'=Bound pvc/semantic-router-models -n vllm-semantic-router-system --timeout=120s || { | |
echo "PVC binding timeout. Checking PVC status..." | |
kubectl describe pvc -n vllm-semantic-router-system | |
exit 1 | |
} | |
# Wait for pods to be created | |
echo "Waiting for pods to be created..." | |
timeout 120 bash -c 'until kubectl get pods -n vllm-semantic-router-system | grep -q semantic-router; do echo "Waiting for pod creation..."; sleep 5; done' | |
# Show pod status | |
kubectl get pods -n vllm-semantic-router-system | |
# Wait for init container to complete (model download) | |
# Increased timeout to 15 minutes for model downloads | |
echo "Waiting for init container to complete (downloading models, this may take 10-15 minutes)..." | |
kubectl wait --for=condition=Initialized pods -l app=semantic-router -n vllm-semantic-router-system --timeout=900s || { | |
echo "❌ Init container did not complete in time. Showing logs..." | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=200 || true | |
echo "" | |
echo "Checking pod status..." | |
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router | |
exit 1 | |
} | |
# Show init container logs and verify models were downloaded | |
echo "=== Init Container Logs ===" | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=100 || true | |
# Verify models were actually downloaded | |
echo "" | |
echo "=== Verifying Model Downloads ===" | |
POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].metadata.name}') | |
# Check if models directory has content | |
echo "Checking models directory content..." | |
kubectl exec -n vllm-semantic-router-system $POD_NAME -- ls -la /app/models/ || { | |
echo "⚠️ Warning: Could not list models directory" | |
} | |
# Count model directories (should be 4) | |
MODEL_COUNT=$(kubectl exec -n vllm-semantic-router-system $POD_NAME -- sh -c 'ls -1 /app/models/ | grep -c "model" || echo 0') | |
echo "Found $MODEL_COUNT model directories" | |
if [ "$MODEL_COUNT" -lt 4 ]; then | |
echo "❌ Error: Expected 4 model directories, found $MODEL_COUNT" | |
echo "Init container may have failed to download all models" | |
exit 1 | |
fi | |
echo "✓ All models verified successfully" | |
# Wait for main container to be ready | |
echo "" | |
echo "Waiting for main container to be ready..." | |
kubectl wait --for=condition=Ready pods -l app=semantic-router -n vllm-semantic-router-system --timeout=300s || { | |
echo "❌ Pod did not become ready in time. Showing status and logs..." | |
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=200 || true | |
exit 1 | |
} | |
echo "✅ Deployment is ready!" | |
- name: Verify deployment | |
run: | | |
echo "=== Verifying Deployment ===" | |
# Check deployment status | |
kubectl get deployment -n vllm-semantic-router-system semantic-router -o wide | |
# Check pod status | |
kubectl get pods -n vllm-semantic-router-system -o wide | |
# Check services | |
kubectl get svc -n vllm-semantic-router-system | |
# Check configmaps | |
kubectl get configmap -n vllm-semantic-router-system | |
# Verify pod is running | |
POD_STATUS=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].status.phase}') | |
if [ "$POD_STATUS" != "Running" ]; then | |
echo "Error: Pod is not running. Status: $POD_STATUS" | |
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router | |
exit 1 | |
fi | |
echo "✓ Pod is running" | |
# Verify all containers are ready | |
READY_CONTAINERS=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].status.containerStatuses[0].ready}') | |
if [ "$READY_CONTAINERS" != "true" ]; then | |
echo "Error: Container is not ready" | |
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router | |
exit 1 | |
fi | |
echo "✓ All containers are ready" | |
- name: Test service connectivity | |
run: | | |
echo "=== Testing Service Connectivity ===" | |
# Get pod name | |
POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].metadata.name}') | |
echo "Pod name: $POD_NAME" | |
# Test gRPC port | |
echo "Testing gRPC port (50051)..." | |
kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 50051 || { | |
echo "Warning: gRPC port test failed" | |
} | |
# Test metrics port | |
echo "Testing metrics port (9190)..." | |
kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 9190 || { | |
echo "Warning: Metrics port test failed" | |
} | |
# Test classify API port | |
echo "Testing classify API port (8080)..." | |
kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 8080 || { | |
echo "Warning: Classify API port test failed" | |
} | |
# Port forward for external testing | |
echo "Setting up port-forward for testing..." | |
kubectl port-forward -n vllm-semantic-router-system svc/semantic-router 8080:8080 & | |
PF_PID=$! | |
sleep 5 | |
# Test HTTP endpoint (if available) | |
echo "Testing HTTP endpoint..." | |
curl -v http://localhost:8080/health || echo "Health endpoint not available or not implemented" | |
# Cleanup port-forward | |
kill $PF_PID || true | |
echo "✓ Service connectivity tests completed" | |
- name: Check logs | |
if: always() | |
run: | | |
echo "=== Deployment Logs ===" | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=200 --all-containers=true || true | |
echo "=== Events ===" | |
kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp' || true | |
- name: Export cluster logs on failure | |
if: failure() | |
run: | | |
echo "=== Exporting cluster information for debugging ===" | |
mkdir -p /tmp/k8s-logs | |
# Export pod descriptions | |
kubectl describe pods -n vllm-semantic-router-system > /tmp/k8s-logs/pod-descriptions.txt || true | |
# Export deployment description | |
kubectl describe deployment -n vllm-semantic-router-system > /tmp/k8s-logs/deployment-description.txt || true | |
# Export all logs | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --all-containers=true --previous > /tmp/k8s-logs/previous-logs.txt || true | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --all-containers=true > /tmp/k8s-logs/current-logs.txt || true | |
# Export events | |
kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp' > /tmp/k8s-logs/events.txt || true | |
# Export resource status | |
kubectl get all -n vllm-semantic-router-system -o yaml > /tmp/k8s-logs/all-resources.yaml || true | |
- name: Upload cluster logs | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: k8s-cluster-logs | |
path: /tmp/k8s-logs/ | |
retention-days: 7 | |
- name: Cleanup | |
if: always() | |
run: | | |
echo "Cleaning up resources..." | |
kubectl delete namespace vllm-semantic-router-system --timeout=60s || true | |
test-with-custom-config: | |
name: Test with Custom Configuration | |
runs-on: ubuntu-latest | |
needs: validate-manifests | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Setup Kustomize | |
run: | | |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash | |
sudo mv kustomize /usr/local/bin/ | |
- name: Test kustomize with different overlays | |
run: | | |
echo "Testing base kustomization..." | |
kustomize build deploy/kubernetes > /tmp/base-manifests.yaml | |
echo "Validating generated resources..." | |
# Check if all expected resources are present | |
if ! grep -q "kind: Namespace" /tmp/base-manifests.yaml; then | |
echo "Error: Namespace not found" | |
exit 1 | |
fi | |
if ! grep -q "kind: Deployment" /tmp/base-manifests.yaml; then | |
echo "Error: Deployment not found" | |
exit 1 | |
fi | |
if ! grep -q "kind: Service" /tmp/base-manifests.yaml; then | |
echo "Error: Service not found" | |
exit 1 | |
fi | |
if ! grep -q "kind: ConfigMap" /tmp/base-manifests.yaml; then | |
echo "Error: ConfigMap not found" | |
exit 1 | |
fi | |
echo "✓ All expected resources are present" | |
- name: Verify ConfigMap generation | |
run: | | |
echo "Checking ConfigMap generation..." | |
kustomize build deploy/kubernetes | grep -A 20 "kind: ConfigMap" | |
# Verify config files are included | |
if ! kustomize build deploy/kubernetes | grep -q "config.yaml"; then | |
echo "Warning: config.yaml might not be properly included in ConfigMap" | |
fi | |
if ! kustomize build deploy/kubernetes | grep -q "tools_db.json"; then | |
echo "Warning: tools_db.json might not be properly included in ConfigMap" | |
fi | |
- name: Validate observability kustomization | |
run: | | |
echo "Validating observability stack kustomization..." | |
if [ -d "deploy/kubernetes/observability" ]; then | |
kustomize build deploy/kubernetes/observability > /tmp/observability-manifests.yaml | |
echo "✓ Observability kustomization is valid" | |
# Verify expected resources | |
for resource in "Deployment" "Service" "ConfigMap" "PersistentVolumeClaim"; do | |
if ! grep -q "kind: $resource" /tmp/observability-manifests.yaml; then | |
echo "Warning: $resource not found in observability manifests" | |
fi | |
done | |
else | |
echo "Observability directory not found, skipping..." | |
fi | |
- name: Validate AI Gateway configurations | |
run: | | |
echo "Validating AI Gateway configurations..." | |
# Check if ai-gateway directory exists | |
if [ -d "deploy/kubernetes/ai-gateway" ]; then | |
# Validate configuration yamls (without CRDs) | |
for yaml_file in deploy/kubernetes/ai-gateway/configuration/*.yaml; do | |
if [ -f "$yaml_file" ]; then | |
echo "Checking $yaml_file..." | |
# Basic YAML syntax check | |
kubectl create --dry-run=client -f "$yaml_file" || echo "Warning: Issues with $yaml_file" | |
fi | |
done | |
# Validate inference-pool manifests (skip CRD validation as they may not be installed) | |
for yaml_file in deploy/kubernetes/ai-gateway/inference-pool/*.yaml; do | |
if [ -f "$yaml_file" ]; then | |
echo "Checking $yaml_file for YAML syntax..." | |
# Just check if it's valid YAML | |
kubectl create --dry-run=client -f "$yaml_file" 2>&1 | grep -q "no matches for kind" && echo "✓ $yaml_file syntax valid (CRD not installed)" || echo "Validated $yaml_file" | |
fi | |
done | |
echo "✓ AI Gateway configuration validation completed" | |
else | |
echo "AI Gateway directory not found, skipping..." | |
fi | |
security-scan: | |
name: Security Scan for K8s Manifests | |
runs-on: ubuntu-latest | |
needs: validate-manifests | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Setup Kustomize | |
run: | | |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash | |
sudo mv kustomize /usr/local/bin/ | |
- name: Run Trivy security scan | |
uses: aquasecurity/trivy-action@master | |
with: | |
scan-type: "config" | |
scan-ref: "deploy/kubernetes" | |
format: "sarif" | |
output: "trivy-results.sarif" | |
severity: "CRITICAL,HIGH" | |
exit-code: "0" # Don't fail on vulnerabilities, just report | |
- name: Upload Trivy results to GitHub Security | |
uses: github/codeql-action/upload-sarif@v3 | |
if: always() | |
with: | |
sarif_file: "trivy-results.sarif" | |
- name: Run Checkov scan | |
uses: bridgecrewio/checkov-action@master | |
with: | |
directory: deploy/kubernetes | |
framework: kubernetes | |
output_format: cli | |
soft_fail: true # Don't fail the build | |
summary: | |
name: Test Summary | |
runs-on: ubuntu-latest | |
needs: | |
[ | |
validate-manifests, | |
kind-integration-test, | |
test-with-custom-config, | |
security-scan, | |
] | |
if: always() | |
steps: | |
- name: Check test results | |
run: | | |
echo "=== Kubernetes Integration Test Summary ===" | |
echo "Manifest Validation: ${{ needs.validate-manifests.result }}" | |
echo "kind Integration Test: ${{ needs.kind-integration-test.result }}" | |
echo "Custom Config Test: ${{ needs.test-with-custom-config.result }}" | |
echo "Security Scan: ${{ needs.security-scan.result }}" | |
if [[ "${{ needs.validate-manifests.result }}" == "failure" ]] || \ | |
[[ "${{ needs.kind-integration-test.result }}" == "failure" ]] || \ | |
[[ "${{ needs.test-with-custom-config.result }}" == "failure" ]]; then | |
echo "❌ Some tests failed" | |
exit 1 | |
else | |
echo "✅ All tests passed" | |
fi |