feat: add CI test for k8s core deployment #4
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Kubernetes Integration Test | |
# This workflow tests the CORE semantic-router Kubernetes deployment. | |
# | |
# Test Scope: | |
# ✅ Core deployment (namespace, pvc, deployment, service, configmap) | |
# ✅ Manifest validation (kubeconform) | |
# ✅ Service connectivity (gRPC, metrics, API ports) | |
# ✅ Security scanning (Trivy, Checkov) | |
# ✅ Basic syntax validation for observability and ai-gateway configs | |
# | |
# Out of Scope (planned for follow-up PRs): | |
# 🔄 Observability stack deployment (Prometheus + Grafana) | |
# 🔄 AI Gateway end-to-end testing (Envoy Gateway + InferencePool) | |
on: | |
pull_request: | |
paths: | |
- "deploy/kubernetes/**" | |
- ".github/workflows/k8s-integration-test.yml" | |
- "Dockerfile.extproc" | |
- "tools/kind/**" | |
workflow_dispatch: # Allow manual triggering | |
schedule: | |
# Run nightly at 3:00 AM UTC | |
- cron: "0 3 * * *" | |
env: | |
KIND_VERSION: v0.20.0 | |
KUBECTL_VERSION: v1.28.0 | |
KUSTOMIZE_VERSION: v5.2.1 | |
jobs: | |
validate-manifests: | |
name: Validate Kubernetes Manifests | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Setup Kustomize | |
run: | | |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash | |
sudo mv kustomize /usr/local/bin/ | |
kustomize version | |
- name: Validate Kustomize build | |
run: | | |
echo "Building kustomization..." | |
kustomize build deploy/kubernetes > /tmp/k8s-manifests.yaml | |
echo "Kustomize build successful!" | |
echo "Generated manifests:" | |
cat /tmp/k8s-manifests.yaml | |
- name: Setup kubeconform | |
run: | | |
wget https://github.com/yannh/kubeconform/releases/latest/download/kubeconform-linux-amd64.tar.gz | |
tar xf kubeconform-linux-amd64.tar.gz | |
sudo mv kubeconform /usr/local/bin/ | |
kubeconform -v | |
- name: Validate manifests with kubeconform | |
run: | | |
echo "Validating Kubernetes manifests..." | |
kustomize build deploy/kubernetes | \ | |
kubeconform -strict -summary \ | |
-kubernetes-version 1.28.0 \ | |
-schema-location default \ | |
-schema-location 'https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json' \ | |
-skip CustomResourceDefinition \ | |
-ignore-missing-schemas | |
- name: Upload validated manifests | |
uses: actions/upload-artifact@v4 | |
with: | |
name: k8s-manifests | |
path: /tmp/k8s-manifests.yaml | |
retention-days: 5 | |
kind-integration-test: | |
name: kind Cluster Integration Test | |
runs-on: ubuntu-latest | |
needs: validate-manifests | |
timeout-minutes: 30 | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Build semantic-router image | |
uses: docker/build-push-action@v5 | |
with: | |
context: . | |
file: ./Dockerfile.extproc | |
tags: ghcr.io/vllm-project/semantic-router/extproc:test | |
load: true | |
cache-from: type=gha | |
cache-to: type=gha,mode=max | |
- name: Create kind cluster with local registry | |
uses: helm/[email protected] | |
with: | |
version: ${{ env.KIND_VERSION }} | |
config: tools/kind/kind-config.yaml | |
cluster_name: semantic-router-test | |
wait: 120s | |
- name: Setup local Docker registry | |
run: | | |
# Create a local registry container on kind network | |
docker run -d --restart=always -p 5001:5000 \ | |
--network kind --name kind-registry \ | |
registry:2 | |
# Document the local registry for kind cluster | |
kubectl apply -f - <<EOF | |
apiVersion: v1 | |
kind: ConfigMap | |
metadata: | |
name: local-registry-hosting | |
namespace: kube-public | |
data: | |
localRegistryHosting.v1: | | |
host: "kind-registry:5000" | |
help: "https://kind.sigs.k8s.io/docs/user/local-registry/" | |
EOF | |
echo "Waiting for registry to be ready..." | |
sleep 5 | |
# Test registry is accessible | |
curl -v http://localhost:5001/v2/ || echo "Registry not ready yet" | |
docker exec kind-registry registry --version | |
- name: Tag and push image to local registry | |
run: | | |
# Generate unique tag using PR number or run ID | |
IMAGE_TAG="test-${{ github.run_id }}-${{ github.run_attempt }}" | |
echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_ENV | |
# Tag for pushing to localhost:5001 (from host) | |
docker tag ghcr.io/vllm-project/semantic-router/extproc:test localhost:5001/semantic-router/extproc:${IMAGE_TAG} | |
docker push localhost:5001/semantic-router/extproc:${IMAGE_TAG} | |
echo "Image pushed to local registry with tag: ${IMAGE_TAG}" | |
echo "Registry will be accessible from kind cluster as: kind-registry:5000/semantic-router/extproc:${IMAGE_TAG}" | |
- name: Verify cluster | |
run: | | |
kubectl cluster-info | |
kubectl get nodes | |
kubectl version | |
- name: Setup Kustomize | |
run: | | |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash | |
sudo mv kustomize /usr/local/bin/ | |
- name: Create temporary kustomization for testing | |
run: | | |
# Create a test overlay directory | |
mkdir -p deploy/kubernetes/test-overlay | |
cd deploy/kubernetes/test-overlay | |
# Copy all base resources to overlay directory | |
cp ../namespace.yaml ./ | |
cp ../pvc.yaml ./ | |
cp ../deployment.yaml ./ | |
cp ../service.yaml ./ | |
cp ../config.yaml ./ | |
cp ../tools_db.json ./ | |
# Create kustomization with local resources | |
cat > kustomization.yaml << EOF | |
apiVersion: kustomize.config.k8s.io/v1beta1 | |
kind: Kustomization | |
resources: | |
- namespace.yaml | |
- pvc.yaml | |
- deployment.yaml | |
- service.yaml | |
configMapGenerator: | |
- name: semantic-router-config | |
files: | |
- config.yaml | |
- tools_db.json | |
namespace: vllm-semantic-router-system | |
images: | |
- name: ghcr.io/vllm-project/semantic-router/extproc | |
newName: kind-registry:5000/semantic-router/extproc | |
newTag: ${IMAGE_TAG} | |
# Reduce resource requirements for CI testing | |
patches: | |
- patch: |- | |
- op: replace | |
path: /spec/template/spec/containers/0/resources/requests/memory | |
value: "1Gi" | |
- op: replace | |
path: /spec/template/spec/containers/0/resources/requests/cpu | |
value: "500m" | |
- op: replace | |
path: /spec/template/spec/containers/0/resources/limits/memory | |
value: "2Gi" | |
- op: replace | |
path: /spec/template/spec/containers/0/resources/limits/cpu | |
value: "1" | |
- op: replace | |
path: /spec/template/spec/initContainers/0/resources/requests/memory | |
value: "256Mi" | |
- op: replace | |
path: /spec/template/spec/initContainers/0/resources/requests/cpu | |
value: "100m" | |
- op: replace | |
path: /spec/template/spec/initContainers/0/resources/limits/memory | |
value: "512Mi" | |
- op: replace | |
path: /spec/template/spec/initContainers/0/resources/limits/cpu | |
value: "250m" | |
target: | |
kind: Deployment | |
name: semantic-router | |
EOF | |
echo "=== Generated kustomization.yaml ===" | |
cat kustomization.yaml | |
echo "=== Files in overlay directory ===" | |
ls -la | |
- name: Deploy to kind cluster | |
run: | | |
echo "Deploying semantic-router to kind cluster..." | |
kustomize build deploy/kubernetes/test-overlay | kubectl apply -f - | |
echo "Waiting for namespace to be active..." | |
kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/vllm-semantic-router-system --timeout=60s | |
echo "Deployment initiated. Checking resources..." | |
kubectl get all -n vllm-semantic-router-system | |
- name: Wait for deployment readiness | |
run: | | |
echo "Waiting for deployment to be ready (this may take a few minutes)..." | |
# Wait for PVC to be bound | |
echo "Checking PVC status..." | |
kubectl get pvc -n vllm-semantic-router-system | |
# Note: In kind, we might need to wait for local-path-provisioner | |
timeout 300 bash -c 'until kubectl get pvc -n vllm-semantic-router-system semantic-router-models -o jsonpath="{.status.phase}" | grep -q "Bound"; do echo "Waiting for PVC to be bound..."; sleep 5; done' || true | |
# Wait for pods to be created | |
echo "Waiting for pods to be created..." | |
timeout 120 bash -c 'until kubectl get pods -n vllm-semantic-router-system | grep -q semantic-router; do echo "Waiting for pod creation..."; sleep 5; done' | |
# Show pod status | |
kubectl get pods -n vllm-semantic-router-system | |
# Wait for init container to complete (model download) | |
echo "Waiting for init container to complete (downloading models)..." | |
kubectl wait --for=condition=Initialized pods -l app=semantic-router -n vllm-semantic-router-system --timeout=600s || { | |
echo "Init container did not complete in time. Showing logs..." | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router -c model-downloader --tail=100 || true | |
exit 1 | |
} | |
# Wait for main container to be ready | |
echo "Waiting for main container to be ready..." | |
kubectl wait --for=condition=Ready pods -l app=semantic-router -n vllm-semantic-router-system --timeout=300s || { | |
echo "Pod did not become ready in time. Showing status and logs..." | |
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=100 || true | |
exit 1 | |
} | |
echo "Deployment is ready!" | |
- name: Verify deployment | |
run: | | |
echo "=== Verifying Deployment ===" | |
# Check deployment status | |
kubectl get deployment -n vllm-semantic-router-system semantic-router -o wide | |
# Check pod status | |
kubectl get pods -n vllm-semantic-router-system -o wide | |
# Check services | |
kubectl get svc -n vllm-semantic-router-system | |
# Check configmaps | |
kubectl get configmap -n vllm-semantic-router-system | |
# Verify pod is running | |
POD_STATUS=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].status.phase}') | |
if [ "$POD_STATUS" != "Running" ]; then | |
echo "Error: Pod is not running. Status: $POD_STATUS" | |
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router | |
exit 1 | |
fi | |
echo "✓ Pod is running" | |
# Verify all containers are ready | |
READY_CONTAINERS=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].status.containerStatuses[0].ready}') | |
if [ "$READY_CONTAINERS" != "true" ]; then | |
echo "Error: Container is not ready" | |
kubectl describe pods -n vllm-semantic-router-system -l app=semantic-router | |
exit 1 | |
fi | |
echo "✓ All containers are ready" | |
- name: Test service connectivity | |
run: | | |
echo "=== Testing Service Connectivity ===" | |
# Get pod name | |
POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app=semantic-router -o jsonpath='{.items[0].metadata.name}') | |
echo "Pod name: $POD_NAME" | |
# Test gRPC port | |
echo "Testing gRPC port (50051)..." | |
kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 50051 || { | |
echo "Warning: gRPC port test failed" | |
} | |
# Test metrics port | |
echo "Testing metrics port (9190)..." | |
kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 9190 || { | |
echo "Warning: Metrics port test failed" | |
} | |
# Test classify API port | |
echo "Testing classify API port (8080)..." | |
kubectl exec -n vllm-semantic-router-system $POD_NAME -- timeout 5 nc -zv localhost 8080 || { | |
echo "Warning: Classify API port test failed" | |
} | |
# Port forward for external testing | |
echo "Setting up port-forward for testing..." | |
kubectl port-forward -n vllm-semantic-router-system svc/semantic-router 8080:8080 & | |
PF_PID=$! | |
sleep 5 | |
# Test HTTP endpoint (if available) | |
echo "Testing HTTP endpoint..." | |
curl -v http://localhost:8080/health || echo "Health endpoint not available or not implemented" | |
# Cleanup port-forward | |
kill $PF_PID || true | |
echo "✓ Service connectivity tests completed" | |
- name: Check logs | |
if: always() | |
run: | | |
echo "=== Deployment Logs ===" | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --tail=200 --all-containers=true || true | |
echo "=== Events ===" | |
kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp' || true | |
- name: Export cluster logs on failure | |
if: failure() | |
run: | | |
echo "=== Exporting cluster information for debugging ===" | |
mkdir -p /tmp/k8s-logs | |
# Export pod descriptions | |
kubectl describe pods -n vllm-semantic-router-system > /tmp/k8s-logs/pod-descriptions.txt || true | |
# Export deployment description | |
kubectl describe deployment -n vllm-semantic-router-system > /tmp/k8s-logs/deployment-description.txt || true | |
# Export all logs | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --all-containers=true --previous > /tmp/k8s-logs/previous-logs.txt || true | |
kubectl logs -n vllm-semantic-router-system -l app=semantic-router --all-containers=true > /tmp/k8s-logs/current-logs.txt || true | |
# Export events | |
kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp' > /tmp/k8s-logs/events.txt || true | |
# Export resource status | |
kubectl get all -n vllm-semantic-router-system -o yaml > /tmp/k8s-logs/all-resources.yaml || true | |
- name: Upload cluster logs | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: k8s-cluster-logs | |
path: /tmp/k8s-logs/ | |
retention-days: 7 | |
- name: Cleanup | |
if: always() | |
run: | | |
echo "Cleaning up resources..." | |
kubectl delete namespace vllm-semantic-router-system --timeout=60s || true | |
# Stop and remove local registry | |
docker stop kind-registry || true | |
docker rm kind-registry || true | |
test-with-custom-config: | |
name: Test with Custom Configuration | |
runs-on: ubuntu-latest | |
needs: validate-manifests | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Setup Kustomize | |
run: | | |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash | |
sudo mv kustomize /usr/local/bin/ | |
- name: Test kustomize with different overlays | |
run: | | |
echo "Testing base kustomization..." | |
kustomize build deploy/kubernetes > /tmp/base-manifests.yaml | |
echo "Validating generated resources..." | |
# Check if all expected resources are present | |
if ! grep -q "kind: Namespace" /tmp/base-manifests.yaml; then | |
echo "Error: Namespace not found" | |
exit 1 | |
fi | |
if ! grep -q "kind: Deployment" /tmp/base-manifests.yaml; then | |
echo "Error: Deployment not found" | |
exit 1 | |
fi | |
if ! grep -q "kind: Service" /tmp/base-manifests.yaml; then | |
echo "Error: Service not found" | |
exit 1 | |
fi | |
if ! grep -q "kind: ConfigMap" /tmp/base-manifests.yaml; then | |
echo "Error: ConfigMap not found" | |
exit 1 | |
fi | |
echo "✓ All expected resources are present" | |
- name: Verify ConfigMap generation | |
run: | | |
echo "Checking ConfigMap generation..." | |
kustomize build deploy/kubernetes | grep -A 20 "kind: ConfigMap" | |
# Verify config files are included | |
if ! kustomize build deploy/kubernetes | grep -q "config.yaml"; then | |
echo "Warning: config.yaml might not be properly included in ConfigMap" | |
fi | |
if ! kustomize build deploy/kubernetes | grep -q "tools_db.json"; then | |
echo "Warning: tools_db.json might not be properly included in ConfigMap" | |
fi | |
- name: Validate observability kustomization | |
run: | | |
echo "Validating observability stack kustomization..." | |
if [ -d "deploy/kubernetes/observability" ]; then | |
kustomize build deploy/kubernetes/observability > /tmp/observability-manifests.yaml | |
echo "✓ Observability kustomization is valid" | |
# Verify expected resources | |
for resource in "Deployment" "Service" "ConfigMap" "PersistentVolumeClaim"; do | |
if ! grep -q "kind: $resource" /tmp/observability-manifests.yaml; then | |
echo "Warning: $resource not found in observability manifests" | |
fi | |
done | |
else | |
echo "Observability directory not found, skipping..." | |
fi | |
- name: Validate AI Gateway configurations | |
run: | | |
echo "Validating AI Gateway configurations..." | |
# Check if ai-gateway directory exists | |
if [ -d "deploy/kubernetes/ai-gateway" ]; then | |
# Validate configuration yamls (without CRDs) | |
for yaml_file in deploy/kubernetes/ai-gateway/configuration/*.yaml; do | |
if [ -f "$yaml_file" ]; then | |
echo "Checking $yaml_file..." | |
# Basic YAML syntax check | |
kubectl create --dry-run=client -f "$yaml_file" || echo "Warning: Issues with $yaml_file" | |
fi | |
done | |
# Validate inference-pool manifests (skip CRD validation as they may not be installed) | |
for yaml_file in deploy/kubernetes/ai-gateway/inference-pool/*.yaml; do | |
if [ -f "$yaml_file" ]; then | |
echo "Checking $yaml_file for YAML syntax..." | |
# Just check if it's valid YAML | |
kubectl create --dry-run=client -f "$yaml_file" 2>&1 | grep -q "no matches for kind" && echo "✓ $yaml_file syntax valid (CRD not installed)" || echo "Validated $yaml_file" | |
fi | |
done | |
echo "✓ AI Gateway configuration validation completed" | |
else | |
echo "AI Gateway directory not found, skipping..." | |
fi | |
security-scan: | |
name: Security Scan for K8s Manifests | |
runs-on: ubuntu-latest | |
needs: validate-manifests | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v4 | |
- name: Setup Kustomize | |
run: | | |
curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash | |
sudo mv kustomize /usr/local/bin/ | |
- name: Run Trivy security scan | |
uses: aquasecurity/trivy-action@master | |
with: | |
scan-type: "config" | |
scan-ref: "deploy/kubernetes" | |
format: "sarif" | |
output: "trivy-results.sarif" | |
severity: "CRITICAL,HIGH" | |
exit-code: "0" # Don't fail on vulnerabilities, just report | |
- name: Upload Trivy results to GitHub Security | |
uses: github/codeql-action/upload-sarif@v3 | |
if: always() | |
with: | |
sarif_file: "trivy-results.sarif" | |
- name: Run Checkov scan | |
uses: bridgecrewio/checkov-action@master | |
with: | |
directory: deploy/kubernetes | |
framework: kubernetes | |
output_format: cli | |
soft_fail: true # Don't fail the build | |
summary: | |
name: Test Summary | |
runs-on: ubuntu-latest | |
needs: | |
[ | |
validate-manifests, | |
kind-integration-test, | |
test-with-custom-config, | |
security-scan, | |
] | |
if: always() | |
steps: | |
- name: Check test results | |
run: | | |
echo "=== Kubernetes Integration Test Summary ===" | |
echo "Manifest Validation: ${{ needs.validate-manifests.result }}" | |
echo "kind Integration Test: ${{ needs.kind-integration-test.result }}" | |
echo "Custom Config Test: ${{ needs.test-with-custom-config.result }}" | |
echo "Security Scan: ${{ needs.security-scan.result }}" | |
if [[ "${{ needs.validate-manifests.result }}" == "failure" ]] || \ | |
[[ "${{ needs.kind-integration-test.result }}" == "failure" ]] || \ | |
[[ "${{ needs.test-with-custom-config.result }}" == "failure" ]]; then | |
echo "❌ Some tests failed" | |
exit 1 | |
else | |
echo "✅ All tests passed" | |
fi |