vllm-project
diff --git a/‎.github/workflows/k8s-integration-test.yml‎
Lines changed: 65 additions & 3 deletions b/‎.github/workflows/k8s-integration-test.yml‎
Lines changed: 65 additions & 3 deletions
diff --git a/‎deploy/kubernetes/README.md‎
Lines changed: 11 additions & 6 deletions b/‎deploy/kubernetes/README.md‎
Lines changed: 11 additions & 6 deletions
@@ -8,10 +8,16 @@ name: Kubernetes Integration Test
 #   ✅ Service connectivity (gRPC, metrics, API ports)
 #   ✅ Security scanning (Trivy, Checkov)
 #   ✅ Basic syntax validation for observability and ai-gateway configs
+#   ✅ kind cluster integration with CI-optimized configuration
 #
 # Out of Scope (planned for follow-up PRs):
 #   🔄 Observability stack deployment (Prometheus + Grafana)
 #   🔄 AI Gateway end-to-end testing (Envoy Gateway + InferencePool)
+#
+# CI Optimizations:
+#   - Uses CI-specific kind configuration (single node, reduced resources)
+#   - Generates kind-config.yaml dynamically (no models mount needed)
+#   - Optimized for GitHub Actions runner constraints
 
 on:
   pull_request:
@@ -117,12 +123,51 @@ jobs:
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
+      - name: Generate kind configuration for CI
+        run: |
+          echo "Creating CI-optimized kind configuration..."
+          # Create a simplified kind config for CI that doesn't require models mount
+          cat > tools/kind/kind-config.yaml << 'EOF'
+          # kind cluster configuration for CI testing
+          kind: Cluster
+          apiVersion: kind.x-k8s.io/v1alpha4
+          name: semantic-router-cluster
+          nodes:
+            - role: control-plane
+              # Optimized for CI environment with limited resources
+              extraPortMappings:
+                - containerPort: 30080
+                  hostPort: 30080
+                  protocol: TCP
+              kubeadmConfigPatches:
+                - |
+                  kind: InitConfiguration
+                  nodeRegistration:
+                    kubeletExtraArgs:
+                      # Reduced resource limits for CI
+                      system-reserved: memory=512Mi,cpu=250m
+                      kube-reserved: memory=512Mi,cpu=250m
+                      eviction-hard: memory.available<512Mi,nodefs.available<10%
+                - |
+                  kind: ClusterConfiguration
+                  apiServer:
+                    extraArgs:
+                      max-requests-inflight: "200"
+                      max-mutating-requests-inflight: "100"
+                  etcd:
+                    local:
+                      extraArgs:
+                        quota-backend-bytes: "4294967296" # 4GB (reduced from 8GB)
+          EOF
+          echo "Generated CI-optimized kind-config.yaml:"
+          cat tools/kind/kind-config.yaml
+
       - name: Create kind cluster
         uses: helm/[email protected]
         with:
           version: ${{ env.KIND_VERSION }}
           config: tools/kind/kind-config.yaml
-          cluster_name: semantic-router-test
+          cluster_name: semantic-router-cluster
           wait: 120s
 
       - name: Build semantic-router image
@@ -138,7 +183,7 @@ jobs:
       - name: Load image into kind cluster
         run: |
           echo "Loading image into kind cluster..."
-          kind load docker-image ghcr.io/vllm-project/semantic-router/extproc:test --name semantic-router-test
+          kind load docker-image ghcr.io/vllm-project/semantic-router/extproc:test --name semantic-router-cluster
           echo "Image loaded successfully!"
 
       - name: Clean up after image build
@@ -157,10 +202,18 @@ jobs:
 
       - name: Verify cluster
         run: |
+          echo "=== Verifying kind cluster ==="
           kubectl cluster-info
-          kubectl get nodes
+          kubectl get nodes -o wide
           kubectl version
 
+          # Verify cluster is ready
+          kubectl wait --for=condition=Ready nodes --all --timeout=120s
+
+          # Check available resources
+          echo "=== Node resources ==="
+          kubectl describe nodes
+
       - name: Setup Kustomize
         run: |
           curl -s "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" | bash
@@ -469,6 +522,10 @@ jobs:
           echo "=== Exporting cluster information for debugging ==="
           mkdir -p /tmp/k8s-logs
 
+          # Export kind cluster logs
+          echo "=== Kind cluster logs ==="
+          docker logs semantic-router-cluster-control-plane > /tmp/k8s-logs/kind-control-plane.log || true
+
           # Export pod descriptions
           kubectl describe pods -n vllm-semantic-router-system > /tmp/k8s-logs/pod-descriptions.txt || true
 
@@ -485,6 +542,9 @@ jobs:
           # Export resource status
           kubectl get all -n vllm-semantic-router-system -o yaml > /tmp/k8s-logs/all-resources.yaml || true
 
+          # Export kind cluster info
+          kind get kubeconfig --name semantic-router-cluster > /tmp/k8s-logs/kind-kubeconfig.yaml || true
+
       - name: Upload cluster logs
         if: failure()
         uses: actions/upload-artifact@v4
@@ -498,6 +558,8 @@ jobs:
         run: |
           echo "Cleaning up resources..."
           kubectl delete namespace vllm-semantic-router-system --timeout=60s || true
+          echo "Cleaning up kind cluster..."
+          kind delete cluster --name semantic-router-cluster || true
 
   test-with-custom-config:
     name: Test with Custom Configuration
 
@@ -1,6 +1,6 @@
 # Semantic Router Kubernetes Deployment
 
-Kustomize manifests for deploying the Semantic Router and its observability stack (Prometheus, Grafana, Dashboard, optional Open WebUI + Pipelines) on Kubernetes.
+Kustomize manifests for deploying the Semantic Router and its observability stack (Prometheus, Grafana, Dashboard, optional Open WebUI, Chat UI + Pipelines) on Kubernetes.
 
 ## Architecture
 
@@ -14,7 +14,7 @@ The deployment consists of:
 - **Services**:
   - Main service exposing gRPC (50051), Classification API (8080), and metrics (9190)
   - Separate metrics service for monitoring (`semantic-router-metrics`)
-  - Observability services (Grafana, Prometheus, Dashboard, optional Open WebUI)
+  - Observability services (Grafana, Prometheus, Dashboard, optional Open WebUI, Chat UI)
 
 ## Ports
 
@@ -26,7 +26,7 @@ The deployment consists of:
 
 ### Deploy Core (Router)
 
-```bash
+````bash
 kubectl apply -k deploy/kubernetes/
 
 # Check deployment status
@@ -40,7 +40,7 @@ kubectl logs -l app=semantic-router -n vllm-semantic-router-system -f
 
 ```bash
 kubectl apply -k deploy/kubernetes/observability/
-```
+````
 
 Port-forward to UIs (local dev):
 
@@ -49,6 +49,7 @@ kubectl port-forward -n vllm-semantic-router-system svc/prometheus 9090:9090
 kubectl port-forward -n vllm-semantic-router-system svc/grafana 3000:3000
 kubectl port-forward -n vllm-semantic-router-system svc/semantic-router-dashboard 8700:80
 kubectl port-forward -n vllm-semantic-router-system svc/openwebui 3001:8080
+kubectl port-forward -n vllm-semantic-router-system svc/chat-ui 3002:3000
 ```
 
 Then open:
@@ -57,8 +58,9 @@ Then open:
 - Grafana → http://localhost:3000
 - Dashboard → http://localhost:8700
 - Open WebUI (Playground) → http://localhost:3001
+- Chat UI (HuggingChat) → http://localhost:3002
 
-```
+````
 
 ### Kind (Kubernetes in Docker) Deployment
 
@@ -81,7 +83,7 @@ make setup
 # Or step by step:
 make create-cluster
 make deploy
-```
+````
 
 The setup process will:
 
@@ -160,6 +162,7 @@ make port-forward-metrics
 kubectl port-forward -n vllm-semantic-router-system svc/semantic-router-dashboard 8700:80
 kubectl port-forward -n vllm-semantic-router-system svc/grafana 3000:3000
 kubectl port-forward -n vllm-semantic-router-system svc/openwebui 3001:8080
+kubectl port-forward -n vllm-semantic-router-system svc/chat-ui 3002:3000
 ```
 
 Or using kubectl directly:
@@ -179,6 +182,7 @@ kubectl port-forward -n vllm-semantic-router-system svc/prometheus 9090:9090
 kubectl port-forward -n vllm-semantic-router-system svc/grafana 3000:3000
 kubectl port-forward -n vllm-semantic-router-system svc/semantic-router-dashboard 8700:80
 kubectl port-forward -n vllm-semantic-router-system svc/openwebui 3001:8080
+kubectl port-forward -n vllm-semantic-router-system svc/chat-ui 3002:3000
 ```
 
 #### Testing the Deployment
@@ -350,6 +354,7 @@ Edit the `resources` section in `deployment.yaml` accordingly.
 - `tools_db.json` - Tools database for semantic routing
 - `kustomization.yaml` - Kustomize configuration for core deployment
 - `observability/` - Prometheus, Grafana, Dashboard, optional Open WebUI + Pipelines (with its own `kustomization.yaml`)
+  (also includes optional Chat UI)
 
 For detailed observability setup and screenshots, see `deploy/kubernetes/observability/README.md`.