diff --git a/.github/workflows/helm-ci.yml b/.github/workflows/helm-ci.yml new file mode 100644 index 00000000..17301de0 --- /dev/null +++ b/.github/workflows/helm-ci.yml @@ -0,0 +1,363 @@ +name: Helm Chart CI + +on: + push: + branches: + - main + paths: + - 'deploy/helm/**' + - '.github/workflows/helm-ci.yml' + pull_request: + branches: + - main + paths: + - 'deploy/helm/**' + - '.github/workflows/helm-ci.yml' + workflow_dispatch: + +env: + HELM_VERSION: v3.14.0 + KIND_VERSION: v0.22.0 + KUBECTL_VERSION: v1.29.0 + CHART_PATH: deploy/helm/semantic-router + +jobs: + + # Lint and validate Helm chart + lint-chart: + name: Lint Helm Chart + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Helm + uses: azure/setup-helm@v4 + with: + version: ${{ env.HELM_VERSION }} + + - name: Run Helm lint + run: | + echo "::group::Helm Lint" + helm lint ${{ env.CHART_PATH }} + echo "::endgroup::" + + - name: Run Helm lint with dev values + run: | + echo "::group::Helm Lint (Dev Values)" + helm lint ${{ env.CHART_PATH }} -f ${{ env.CHART_PATH }}/values-dev.yaml + echo "::endgroup::" + + - name: Run Helm lint with prod values + run: | + echo "::group::Helm Lint (Prod Values)" + helm lint ${{ env.CHART_PATH }} -f ${{ env.CHART_PATH }}/values-prod.yaml + echo "::endgroup::" + + # Template validation + template-chart: + name: Validate Helm Templates + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Helm + uses: azure/setup-helm@v4 + with: + version: ${{ env.HELM_VERSION }} + + - name: Template with default values + run: | + echo "::group::Template with Default Values" + helm template test-release ${{ env.CHART_PATH }} \ + --namespace test-namespace > /tmp/default-template.yaml + echo "Templates generated successfully" + echo "::endgroup::" + + - name: Template with dev values + run: | + echo "::group::Template with Dev Values" + helm template test-release ${{ env.CHART_PATH }} \ + -f ${{ env.CHART_PATH }}/values-dev.yaml \ + --namespace test-namespace > /tmp/dev-template.yaml + echo "Dev templates generated successfully" + echo "::endgroup::" + + - name: Template with prod values + run: | + echo "::group::Template with Prod Values" + helm template test-release ${{ env.CHART_PATH }} \ + -f ${{ env.CHART_PATH }}/values-prod.yaml \ + --namespace test-namespace > /tmp/prod-template.yaml + echo "Prod templates generated successfully" + echo "::endgroup::" + + - name: Validate generated YAML + run: | + echo "::group::Validate YAML Syntax" + # Check if yamllint is available, install if needed + if ! command -v yamllint &> /dev/null; then + echo "Installing yamllint..." + pip install yamllint + fi + + # Validate generated templates (ignore some Helm template warnings) + yamllint -d "{extends: default, rules: {line-length: {max: 120}, indentation: {spaces: 2}}}" \ + /tmp/default-template.yaml || echo "Some yamllint warnings are expected for Helm templates" + echo "::endgroup::" + + - name: Verify required resources + run: | + echo "::group::Verify Required Resources" + required_resources=( + "Namespace" + "ServiceAccount" + "PersistentVolumeClaim" + "ConfigMap" + "Deployment" + "Service" + ) + + for resource in "${required_resources[@]}"; do + if grep -q "kind: $resource" /tmp/default-template.yaml; then + echo "✓ Found resource: $resource" + else + echo "✗ Missing resource: $resource" + exit 1 + fi + done + echo "All required resources found" + echo "::endgroup::" + + - name: Upload templates as artifacts + uses: actions/upload-artifact@v4 + with: + name: helm-templates + path: /tmp/*-template.yaml + retention-days: 7 + + # CI test: Install chart in Kind cluster + install-chart: + name: Install Chart in Kind + runs-on: ubuntu-latest + needs: [lint-chart, template-chart] + strategy: + matrix: + k8s-version: + - v1.27.11 + - v1.28.7 + - v1.29.2 + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Helm + uses: azure/setup-helm@v4 + with: + version: ${{ env.HELM_VERSION }} + + - name: Set up kubectl + uses: azure/setup-kubectl@v4 + with: + version: ${{ env.KUBECTL_VERSION }} + + - name: Create Kind cluster + uses: helm/kind-action@v1.10.0 + with: + version: ${{ env.KIND_VERSION }} + node_image: kindest/node:${{ matrix.k8s-version }} + cluster_name: helm-test-cluster + wait: 120s + + - name: Verify Kind cluster + run: | + echo "::group::Cluster Info" + kubectl cluster-info + kubectl get nodes + kubectl version + echo "::endgroup::" + + - name: Create namespace + run: | + echo "::group::Create Namespace" + kubectl create namespace vllm-semantic-router-system || echo "Namespace already exists" + kubectl get namespace vllm-semantic-router-system + echo "::endgroup::" + + - name: Install Helm chart with dev values + run: | + echo "::group::Install Chart" + helm install semantic-router ${{ env.CHART_PATH }} \ + -f ${{ env.CHART_PATH }}/values-dev.yaml \ + --namespace vllm-semantic-router-system \ + --wait \ + --timeout 15m \ + --debug + echo "::endgroup::" + + # For ci debug, check init container logs + - name: Check init container logs + if: always() + run: | + echo "::group::Init Container Logs" + # Wait a bit for init container to start + sleep 5 + # Get pod name + POD_NAME=$(kubectl get pods -n vllm-semantic-router-system -l app.kubernetes.io/name=semantic-router -o jsonpath='{.items[0].metadata.name}') + if [ -n "$POD_NAME" ]; then + echo "Checking init container logs for pod: $POD_NAME" + kubectl logs -n vllm-semantic-router-system $POD_NAME -c model-downloader --tail=100 || echo "Init container may have already completed or not started yet" + else + echo "No pod found yet" + fi + echo "::endgroup::" + + - name: Verify installation + run: | + echo "::group::Helm Status" + helm status semantic-router -n vllm-semantic-router-system + echo "::endgroup::" + + echo "::group::Check Resources" + kubectl get all -n vllm-semantic-router-system + echo "::endgroup::" + + echo "::group::Check PVC" + kubectl get pvc -n vllm-semantic-router-system + echo "::endgroup::" + + echo "::group::Check ConfigMap" + kubectl get configmap -n vllm-semantic-router-system + echo "::endgroup::" + + - name: Wait for deployment to be ready + run: | + echo "::group::Wait for Deployment" + kubectl wait --for=condition=Available deployment/semantic-router \ + -n vllm-semantic-router-system \ + --timeout=300s || { + echo "Deployment failed to become ready" + echo "::group::Pod Status" + kubectl get pods -n vllm-semantic-router-system + echo "::endgroup::" + echo "::group::Pod Describe" + kubectl describe pods -n vllm-semantic-router-system + echo "::endgroup::" + echo "::group::Pod Logs" + kubectl logs -n vllm-semantic-router-system -l app.kubernetes.io/name=semantic-router --all-containers=true --tail=100 + echo "::endgroup::" + exit 1 + } + echo "::endgroup::" + + - name: Check pod status + run: | + echo "::group::Pod Details" + kubectl get pods -n vllm-semantic-router-system -o wide + echo "::endgroup::" + + echo "::group::Pod Events" + kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp' + echo "::endgroup::" + + - name: Test service endpoints + run: | + echo "::group::Service Endpoints" + kubectl get svc -n vllm-semantic-router-system + kubectl get endpoints -n vllm-semantic-router-system + echo "::endgroup::" + + # for ci debug. + - name: Collect logs on failure + if: failure() + run: | + echo "::group::Helm Release Info" + helm list -n vllm-semantic-router-system + helm get values semantic-router -n vllm-semantic-router-system --all + echo "::endgroup::" + + echo "::group::All Resources" + kubectl get all -n vllm-semantic-router-system -o wide + echo "::endgroup::" + + echo "::group::Pod Logs" + for pod in $(kubectl get pods -n vllm-semantic-router-system -o name); do + echo "Logs for $pod:" + kubectl logs -n vllm-semantic-router-system $pod --all-containers=true --tail=200 || true + echo "---" + done + echo "::endgroup::" + + echo "::group::Events" + kubectl get events -n vllm-semantic-router-system --sort-by='.lastTimestamp' + echo "::endgroup::" + + - name: Test upgrade + run: | + echo "::group::Upgrade Chart" + # need some time, download models may take a while + helm upgrade semantic-router ${{ env.CHART_PATH }} \ + -f ${{ env.CHART_PATH }}/values-dev.yaml \ + --namespace vllm-semantic-router-system \ + --wait \ + --timeout 15m + echo "::endgroup::" + + echo "::group::Verify Upgrade" + helm status semantic-router -n vllm-semantic-router-system + kubectl get pods -n vllm-semantic-router-system + echo "::endgroup::" + + - name: Test rollback + run: | + echo "::group::Rollback Chart" + helm rollback semantic-router -n vllm-semantic-router-system --wait + echo "::endgroup::" + + echo "::group::Verify Rollback" + helm history semantic-router -n vllm-semantic-router-system + echo "::endgroup::" + + - name: Uninstall chart + if: always() + run: | + echo "::group::Uninstall Chart" + helm uninstall semantic-router -n vllm-semantic-router-system || true + kubectl delete namespace vllm-semantic-router-system --timeout=60s || true + echo "::endgroup::" + + # Job 4: Validation script test + validation-script: + name: Run Validation Script + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Helm + uses: azure/setup-helm@v4 + with: + version: ${{ env.HELM_VERSION }} + + - name: Install yamllint + run: pip install yamllint + + - name: Run validation script + run: | + chmod +x deploy/helm/validate-chart.sh + ./deploy/helm/validate-chart.sh + + # all GHA Job success, print it. + ci-success: + name: CI Success + runs-on: ubuntu-latest + needs: [lint-chart, template-chart, install-chart, validation-script] + if: success() + steps: + - name: Success summary + run: | + echo "✓ Lint checks passed" + echo "✓ Template validation passed" + echo "✓ Chart installation tests passed" + echo "✓ Validation script passed" diff --git a/Makefile b/Makefile index 7002da4b..fa6f77c5 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ _run: -f tools/make/pre-commit.mk \ -f tools/make/docker.mk \ -f tools/make/kube.mk \ + -f tools/make/helm.mk \ -f tools/make/observability.mk \ -f tools/make/openshift.mk \ $(MAKECMDGOALS) diff --git a/deploy/helm/README.md b/deploy/helm/README.md new file mode 100644 index 00000000..79fa026f --- /dev/null +++ b/deploy/helm/README.md @@ -0,0 +1,441 @@ +# Helm Chart Deployment Guide + +This directory contains the Helm chart for deploying Semantic Router on Kubernetes. + +## Directory Structure + +``` +deploy/helm/ +├── MIGRATION.md # Migration guide from Kustomize to Helm +├── validate-chart.sh # Chart validation script +└── semantic-router/ # Helm chart + ├── Chart.yaml # Chart metadata + ├── values.yaml # Default configuration values + ├── values-dev.yaml # Development environment values + ├── values-prod.yaml # Production environment values + ├── README.md # Comprehensive chart documentation + ├── .helmignore # Helm ignore patterns + └── templates/ # Kubernetes resource templates + ├── _helpers.tpl # Template helpers + ├── namespace.yaml # Namespace resource + ├── serviceaccount.yaml # Service account + ├── configmap.yaml # Configuration + ├── pvc.yaml # Persistent volume claim + ├── deployment.yaml # Main deployment + ├── service.yaml # Services (gRPC, API, metrics) + ├── ingress.yaml # Ingress (optional) + ├── hpa.yaml # Horizontal Pod Autoscaler (optional) + └── NOTES.txt # Post-installation notes +``` + +## Quick Start + +### Prerequisites + +- Kubernetes 1.19+ +- Helm 3.2.0+ +- kubectl configured to access your cluster + +### Install + +```bash +# Using Make (recommended) +make helm-install + +# Or with Helm directly +helm install semantic-router ./deploy/helm/semantic-router \ + --namespace vllm-semantic-router-system \ + --create-namespace +``` + +### Verify Installation + +```bash +# Check Helm release status +make helm-status + +# Check pods +kubectl get pods -n vllm-semantic-router-system + +# View logs +make helm-logs +``` + +### Access the Application + +```bash +# Port forward API +make helm-port-forward-api + +# Test the API +curl http://localhost:8080/health +``` + +## Deployment Scenarios + +### Development Environment + +For local development with reduced resources: + +```bash +make helm-dev + +# Or manually: +helm install semantic-router ./deploy/helm/semantic-router \ + -f ./deploy/helm/semantic-router/values-dev.yaml \ + --namespace vllm-semantic-router-system \ + --create-namespace +``` + +**Features:** + +- Reduced resource requests (2Gi RAM, 500m CPU) +- Smaller storage (5Gi) +- Faster probes +- Debug-friendly configuration + +### Production Environment + +For production deployment with high availability: + +```bash +make helm-prod + +# Or manually: +helm install semantic-router ./deploy/helm/semantic-router \ + -f ./deploy/helm/semantic-router/values-prod.yaml \ + --namespace production \ + --create-namespace +``` + +**Features:** + +- Multiple replicas (3) +- High resource allocation (8Gi RAM, 4 CPU) +- Auto-scaling enabled +- Security hardening +- Ingress with TLS +- Production-grade storage + +### Custom Configuration + +Create your own values file: + +```yaml +# my-values.yaml +replicaCount: 2 + +resources: + limits: + memory: "8Gi" + cpu: "2" + +config: + vllm_endpoints: + - name: "my-endpoint" + address: "10.0.1.100" + port: 8000 + weight: 1 + +ingress: + enabled: true + hosts: + - host: semantic-router.mydomain.com + paths: + - path: / + pathType: Prefix + servicePort: 8080 +``` + +Then install: + +```bash +helm install semantic-router ./deploy/helm/semantic-router \ + -f my-values.yaml \ + --namespace my-namespace \ + --create-namespace +``` + +## Make Targets + +The project includes convenient Make targets for Helm operations: + +### Installation & Management + +```bash +make helm-install # Install the chart +make helm-upgrade # Upgrade the release +make helm-uninstall # Uninstall the release +make helm-status # Show release status +make helm-list # List all releases +``` + +### Development + +```bash +make helm-lint # Lint the chart +make helm-template # Template the chart +make helm-dev # Deploy with dev config +make helm-prod # Deploy with prod config +make helm-package # Package the chart +``` + +### Testing & Debugging + +```bash +make helm-test # Test the deployment +make helm-logs # Show logs +make helm-values # Show computed values +make helm-manifest # Show deployed manifest +``` + +### Port Forwarding + +```bash +make helm-port-forward-api # Port forward API (8080) +make helm-port-forward-grpc # Port forward gRPC (50051) +make helm-port-forward-metrics # Port forward metrics (9190) +``` + +### Rollback & Cleanup + +```bash +make helm-rollback # Rollback to previous version +make helm-history # Show release history +make helm-clean # Complete cleanup +``` + +### Help + +```bash +make help-helm # Show Helm help +``` + +## Validation + +Before deploying, validate the Helm chart: + +```bash +# Run validation script +./deploy/helm/validate-chart.sh + +# Or manually: +make helm-lint +make helm-template +``` + +## Upgrading + +### In-Place Upgrade + +```bash +# Upgrade with new values +helm upgrade semantic-router ./deploy/helm/semantic-router \ + -f my-updated-values.yaml \ + --namespace vllm-semantic-router-system + +# Or using Make: +make helm-upgrade HELM_VALUES_FILE=my-updated-values.yaml +``` + +### Rollback + +If an upgrade fails: + +```bash +# Rollback to previous version +make helm-rollback + +# Or rollback to specific revision +helm rollback semantic-router 1 --namespace vllm-semantic-router-system +``` + +## Configuration Examples + +### Example 1: Custom Endpoints + +```yaml +config: + vllm_endpoints: + - name: "endpoint-1" + address: "10.0.1.10" + port: 8000 + weight: 2 + - name: "endpoint-2" + address: "10.0.1.11" + port: 8000 + weight: 1 +``` + +### Example 2: Enable Ingress + +```yaml +ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-prod" + hosts: + - host: semantic-router.example.com + paths: + - path: / + pathType: Prefix + servicePort: 8080 + tls: + - secretName: semantic-router-tls + hosts: + - semantic-router.example.com +``` + +### Example 3: Enable Auto-scaling + +```yaml +autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 +``` + +### Example 4: Custom Security Context + +```yaml +podSecurityContext: + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 1000 + +securityContext: + runAsNonRoot: true + runAsUser: 1000 + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL +``` + +## Migrating from Kustomize + +If you're currently using the Kustomize deployment, see [MIGRATION.md](MIGRATION.md) for detailed migration instructions. + +## Troubleshooting + +### Pods Stuck in Pending + +```bash +# Check events +kubectl describe pod -n vllm-semantic-router-system + +# Common causes: +# - Insufficient resources +# - PVC not binding +# - Image pull errors + +# Solution: Reduce resources +helm upgrade semantic-router ./deploy/helm/semantic-router \ + -f values-dev.yaml \ + --namespace vllm-semantic-router-system +``` + +### Init Container Fails + +```bash +# Check init container logs +kubectl logs -n vllm-semantic-router-system -c model-downloader + +# Common causes: +# - Network issues +# - HuggingFace rate limits +# - Insufficient storage + +# Solution: Check PVC and network +kubectl get pvc -n vllm-semantic-router-system +``` + +### Service Not Accessible + +```bash +# Check service +kubectl get svc -n vllm-semantic-router-system + +# Check endpoints +kubectl get endpoints -n vllm-semantic-router-system + +# Test internally +kubectl run -it --rm debug --image=curlimages/curl --restart=Never -- \ + curl http://semantic-router.vllm-semantic-router-system:8080/health +``` + +## Best Practices + +1. **Use Version Control**: Keep your `values.yaml` files in version control +2. **Environment Separation**: Use different namespaces and values files for different environments +3. **Resource Limits**: Always set appropriate resource limits based on your workload +4. **Monitoring**: Enable metrics and set up monitoring +5. **Security**: Use security contexts and network policies +6. **Backups**: Regularly backup your PVC data +7. **Testing**: Test upgrades in dev/staging before production + +## CI/CD Integration + +### GitHub Actions Example + +```yaml +- name: Deploy with Helm + run: | + helm upgrade --install semantic-router ./deploy/helm/semantic-router \ + -f values-prod.yaml \ + --namespace production \ + --create-namespace \ + --wait \ + --timeout 10m +``` + +### GitLab CI Example + +```yaml +deploy: + script: + - helm upgrade --install semantic-router ./deploy/helm/semantic-router + -f values-prod.yaml + --namespace production + --create-namespace + --wait +``` + +### ArgoCD Example + +```yaml +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: semantic-router +spec: + project: default + source: + repoURL: https://github.com/vllm-project/semantic-router + targetRevision: main + path: deploy/helm/semantic-router + helm: + valueFiles: + - values-prod.yaml + destination: + server: https://kubernetes.default.svc + namespace: production +``` + +## Additional Resources + +- [Chart README](semantic-router/README.md) - Detailed chart documentation +- [Migration Guide](MIGRATION.md) - Kustomize to Helm migration +- [Project Documentation](../../README.md) - Main project documentation +- [Helm Documentation](https://helm.sh/docs/) - Official Helm docs + +## Support + +For issues and questions: + +- GitHub Issues: https://github.com/vllm-project/semantic-router/issues +- Documentation: https://semantic-router.io +- Chart Issues: Tag with `helm` label diff --git a/deploy/helm/semantic-router/.helmignore b/deploy/helm/semantic-router/.helmignore new file mode 100644 index 00000000..a595db1a --- /dev/null +++ b/deploy/helm/semantic-router/.helmignore @@ -0,0 +1,30 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ +# CI/CD +.github/ +.gitlab-ci.yml +.travis.yml +.circleci/ +# Documentation +README.md.gotmpl diff --git a/deploy/helm/semantic-router/Chart.yaml b/deploy/helm/semantic-router/Chart.yaml new file mode 100644 index 00000000..662bb55e --- /dev/null +++ b/deploy/helm/semantic-router/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +name: semantic-router +description: A Helm chart for deploying Semantic Router - an intelligent routing system for LLM applications +type: application +version: 0.1.0 +appVersion: "latest" +keywords: + - semantic-router + - llm + - vllm + - ai + - routing + - classification +home: https://github.com/vllm-project/semantic-router +sources: + - https://github.com/vllm-project/semantic-router +maintainers: + - name: Semantic Router Team + url: https://github.com/vllm-project/semantic-router +icon: https://raw.githubusercontent.com/vllm-project/semantic-router/main/website/static/img/logo.png diff --git a/deploy/helm/semantic-router/README.md b/deploy/helm/semantic-router/README.md new file mode 100644 index 00000000..98384963 --- /dev/null +++ b/deploy/helm/semantic-router/README.md @@ -0,0 +1,395 @@ +# Semantic Router Helm Chart + +A Helm chart for deploying Semantic Router - an intelligent routing system for LLM applications with built-in classification, caching, and security features. + +## TL;DR + +```bash +# Install with default values +helm install semantic-router ./deploy/helm/semantic-router + +# Install with custom values +helm install semantic-router ./deploy/helm/semantic-router -f ./deploy/helm/semantic-router/values-dev.yaml +``` + +## Introduction + +This chart bootstraps a Semantic Router deployment on a Kubernetes cluster using the Helm package manager. It includes: + +- Intelligent routing and classification for LLM requests +- Built-in semantic caching (memory or Milvus) +- PII detection and jailbreak protection +- Tools database for function calling +- Multi-model support with automatic selection +- Prometheus metrics and observability +- Persistent storage for ML models + +## Prerequisites + +- Kubernetes 1.19+ +- Helm 3.2.0+ +- PV provisioner support in the underlying infrastructure (for persistent storage) +- (Optional) Ingress controller for external access +- (Optional) cert-manager for TLS certificates + +## Installing the Chart + +### Basic Installation + +To install the chart with the release name `semantic-router`: + +```bash +helm install semantic-router ./deploy/helm/semantic-router +``` + +### Install with Development Configuration + +For local development with reduced resource requirements: + +```bash +helm install semantic-router ./deploy/helm/semantic-router \ + -f ./deploy/helm/semantic-router/values-dev.yaml \ + --namespace vllm-semantic-router-system \ + --create-namespace +``` + +### Install with Production Configuration + +For production deployment with high availability: + +```bash +helm install semantic-router ./deploy/helm/semantic-router \ + -f ./deploy/helm/semantic-router/values-prod.yaml \ + --namespace vllm-semantic-router-system \ + --create-namespace +``` + +### Install with Custom Values + +Create your own values file and install: + +```bash +helm install semantic-router ./deploy/helm/semantic-router \ + -f my-values.yaml \ + --namespace my-namespace \ + --create-namespace +``` + +## Uninstalling the Chart + +To uninstall/delete the `semantic-router` deployment: + +```bash +helm uninstall semantic-router --namespace vllm-semantic-router-system +``` + +This command removes all the Kubernetes components associated with the chart and deletes the release. + +## Configuration + +### Key Configuration Parameters + +The following table lists the key configurable parameters of the Semantic Router chart and their default values. + +#### Global Settings + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `global.namespace` | Override namespace for all resources | `""` (uses Release.Namespace) | +| `replicaCount` | Number of replicas | `1` | +| `nameOverride` | Override the name of the chart | `""` | +| `fullnameOverride` | Override the full name of the chart | `""` | + +#### Image Configuration + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `image.repository` | Image repository | `ghcr.io/vllm-project/semantic-router/extproc` | +| `image.tag` | Image tag | `latest` | +| `image.pullPolicy` | Image pull policy | `IfNotPresent` | +| `imagePullSecrets` | Image pull secrets | `[]` | + +#### Service Configuration + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `service.type` | Service type | `ClusterIP` | +| `service.grpc.port` | gRPC service port | `50051` | +| `service.api.port` | HTTP API service port | `8080` | +| `service.metrics.enabled` | Enable metrics service | `true` | +| `service.metrics.port` | Metrics service port | `9190` | + +#### Resources + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `resources.limits.memory` | Memory limit | `6Gi` | +| `resources.limits.cpu` | CPU limit | `2` | +| `resources.requests.memory` | Memory request | `3Gi` | +| `resources.requests.cpu` | CPU request | `1` | + +#### Persistence + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `persistence.enabled` | Enable persistent volume | `true` | +| `persistence.storageClassName` | Storage class name | `standard` | +| `persistence.size` | Storage size | `10Gi` | +| `persistence.accessMode` | Access mode | `ReadWriteOnce` | +| `persistence.existingClaim` | Use existing PVC | `""` | + +#### Init Container + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `initContainer.enabled` | Enable init container for model downloading | `true` | +| `initContainer.image` | Init container image | `python:3.11-slim` | +| `initContainer.resources.limits.memory` | Init container memory limit | `1Gi` | +| `initContainer.resources.limits.cpu` | Init container CPU limit | `500m` | + +#### Autoscaling + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `autoscaling.enabled` | Enable horizontal pod autoscaling | `false` | +| `autoscaling.minReplicas` | Minimum number of replicas | `1` | +| `autoscaling.maxReplicas` | Maximum number of replicas | `10` | +| `autoscaling.targetCPUUtilizationPercentage` | Target CPU utilization | `80` | + +#### Ingress + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `ingress.enabled` | Enable ingress | `false` | +| `ingress.className` | Ingress class name | `""` | +| `ingress.annotations` | Ingress annotations | `{}` | +| `ingress.hosts` | Ingress hosts configuration | See values.yaml | +| `ingress.tls` | Ingress TLS configuration | `[]` | + +#### Application Configuration + +The `config` section contains the application-specific configuration: + +- `bert_model`: BERT model configuration for embeddings +- `semantic_cache`: Semantic cache settings (memory/Milvus) +- `tools`: Tools database configuration +- `prompt_guard`: Jailbreak detection settings +- `vllm_endpoints`: vLLM endpoint configuration +- `classifier`: Category and PII classifier settings +- `categories`: Category-specific model scores and prompts +- `api`: API configuration including batch processing + +See [values.yaml](values.yaml) for complete configuration options. + +## Usage Examples + +### Basic Deployment + +```bash +# Install semantic router +helm install semantic-router ./deploy/helm/semantic-router + +# Wait for deployment to be ready +kubectl wait --for=condition=Available deployment/semantic-router \ + -n vllm-semantic-router-system --timeout=600s + +# Port forward to access the API +kubectl port-forward -n vllm-semantic-router-system \ + svc/semantic-router 8080:8080 +``` + +### Test the API + +```bash +# Health check +curl http://localhost:8080/health + +# Intent classification +curl -X POST http://localhost:8080/api/v1/classify/intent \ + -H "Content-Type: application/json" \ + -d '{"text": "What is machine learning?"}' + +# Category classification +curl -X POST http://localhost:8080/api/v1/classify/category \ + -H "Content-Type: application/json" \ + -d '{"text": "Explain photosynthesis"}' +``` + +### Access Metrics + +```bash +kubectl port-forward -n vllm-semantic-router-system \ + svc/semantic-router-metrics 9190:9190 + +curl http://localhost:9190/metrics +``` + +### Upgrade Deployment + +```bash +# Upgrade with new values +helm upgrade semantic-router ./deploy/helm/semantic-router \ + -f my-updated-values.yaml + +# Upgrade to a new version +helm upgrade semantic-router ./deploy/helm/semantic-router \ + --set image.tag=v0.2.0 + +# Rollback to previous version +helm rollback semantic-router +``` + +### Custom Configuration Example + +Create a `custom-values.yaml`: + +```yaml +replicaCount: 2 + +resources: + limits: + memory: "8Gi" + cpu: "2" + requests: + memory: "4Gi" + cpu: "1" + +config: + vllm_endpoints: + - name: "my-endpoint" + address: "10.0.1.100" + port: 8000 + weight: 1 + + semantic_cache: + enabled: true + backend_type: "milvus" + max_entries: 5000 + +ingress: + enabled: true + className: "nginx" + hosts: + - host: semantic-router.mydomain.com + paths: + - path: / + pathType: Prefix + servicePort: 8080 +``` + +Then install: + +```bash +helm install semantic-router ./deploy/helm/semantic-router \ + -f custom-values.yaml \ + --namespace production \ + --create-namespace +``` + +## Migration from Kustomize + +If you're currently using the Kustomize deployment, here's how to migrate: + +1. **Export your current configuration:** + + ```bash + kubectl get configmap semantic-router-config \ + -n vllm-semantic-router-system \ + -o yaml > current-config.yaml + ``` + +2. **Create a values file with your configuration:** + + ```bash + # Extract config.yaml and tools_db.json from the configmap + # and merge into your values file + ``` + +3. **Uninstall Kustomize deployment:** + + ```bash + kubectl delete -k deploy/kubernetes/ + ``` + +4. **Install with Helm:** + + ```bash + helm install semantic-router ./deploy/helm/semantic-router \ + -f your-values.yaml \ + --namespace vllm-semantic-router-system \ + --create-namespace + ``` + +## Development + +### Validating the Chart + +```bash +# Lint the chart +helm lint ./deploy/helm/semantic-router + +# Dry-run installation +helm install semantic-router ./deploy/helm/semantic-router \ + --dry-run --debug + +# Template rendering +helm template semantic-router ./deploy/helm/semantic-router \ + -f ./deploy/helm/semantic-router/values-dev.yaml +``` + +### Package the Chart + +```bash +helm package ./deploy/helm/semantic-router + +# Output: semantic-router-0.1.0.tgz +``` + +## Troubleshooting + +### Pods not starting + +Check pod status: + +```bash +kubectl get pods -n vllm-semantic-router-system +kubectl describe pod -n vllm-semantic-router-system +kubectl logs -n vllm-semantic-router-system +``` + +### Model download failures + +Check init container logs: + +```bash +kubectl logs -n vllm-semantic-router-system -c model-downloader +``` + +### Insufficient resources + +If pods are pending due to insufficient resources, reduce resource requests: + +```bash +helm upgrade semantic-router ./deploy/helm/semantic-router \ + -f ./deploy/helm/semantic-router/values-dev.yaml +``` + +### PVC issues + +Check PVC status: + +```bash +kubectl get pvc -n vllm-semantic-router-system +kubectl describe pvc semantic-router-models -n vllm-semantic-router-system +``` + +## Support + +For issues and feature requests, please visit: + +- GitHub: https://github.com/vllm-project/semantic-router +- Documentation: https://semantic-router.io + +## License + +This Helm chart is licensed under the same license as the Semantic Router project. diff --git a/deploy/helm/semantic-router/templates/NOTES.txt b/deploy/helm/semantic-router/templates/NOTES.txt new file mode 100644 index 00000000..f2e811b8 --- /dev/null +++ b/deploy/helm/semantic-router/templates/NOTES.txt @@ -0,0 +1,38 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ include "semantic-router.namespace" . }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "semantic-router.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ include "semantic-router.namespace" . }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ include "semantic-router.namespace" . }} svc -w {{ include "semantic-router.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ include "semantic-router.namespace" . }} {{ include "semantic-router.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.api.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ include "semantic-router.namespace" . }} -l "app.kubernetes.io/name={{ include "semantic-router.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ include "semantic-router.namespace" . }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ include "semantic-router.namespace" . }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} + +2. Test the Classification API: + # Health check + curl http://localhost:8080/health + + # Intent classification + curl -X POST http://localhost:8080/api/v1/classify/intent \ + -H "Content-Type: application/json" \ + -d '{"text": "What is machine learning?"}' + +3. Access metrics: + kubectl --namespace {{ include "semantic-router.namespace" . }} port-forward svc/{{ include "semantic-router.fullname" . }}-metrics 9190:9190 + curl http://localhost:9190/metrics + +4. Access gRPC API: + kubectl --namespace {{ include "semantic-router.namespace" . }} port-forward svc/{{ include "semantic-router.fullname" . }} 50051:50051 diff --git a/deploy/helm/semantic-router/templates/_helpers.tpl b/deploy/helm/semantic-router/templates/_helpers.tpl new file mode 100644 index 00000000..d9a2993d --- /dev/null +++ b/deploy/helm/semantic-router/templates/_helpers.tpl @@ -0,0 +1,83 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "semantic-router.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "semantic-router.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "semantic-router.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "semantic-router.labels" -}} +helm.sh/chart: {{ include "semantic-router.chart" . }} +{{ include "semantic-router.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "semantic-router.selectorLabels" -}} +app.kubernetes.io/name: {{ include "semantic-router.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +app: semantic-router +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "semantic-router.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "semantic-router.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Get the namespace +*/}} +{{- define "semantic-router.namespace" -}} +{{- if .Values.global.namespace }} +{{- .Values.global.namespace }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} + +{{/* +Get the PVC name +*/}} +{{- define "semantic-router.pvcName" -}} +{{- if .Values.persistence.existingClaim }} +{{- .Values.persistence.existingClaim }} +{{- else }} +{{- printf "%s-models" (include "semantic-router.fullname" .) }} +{{- end }} +{{- end }} diff --git a/deploy/helm/semantic-router/templates/configmap.yaml b/deploy/helm/semantic-router/templates/configmap.yaml new file mode 100644 index 00000000..2e21cb3a --- /dev/null +++ b/deploy/helm/semantic-router/templates/configmap.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "semantic-router.fullname" . }}-config + namespace: {{ include "semantic-router.namespace" . }} + labels: + {{- include "semantic-router.labels" . | nindent 4 }} +data: + config.yaml: | + {{- toYaml .Values.config | nindent 4 }} + tools_db.json: | + {{- toJson .Values.toolsDb | nindent 4 }} diff --git a/deploy/helm/semantic-router/templates/deployment.yaml b/deploy/helm/semantic-router/templates/deployment.yaml new file mode 100644 index 00000000..5b43f015 --- /dev/null +++ b/deploy/helm/semantic-router/templates/deployment.yaml @@ -0,0 +1,141 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "semantic-router.fullname" . }} + namespace: {{ include "semantic-router.namespace" . }} + labels: + {{- include "semantic-router.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "semantic-router.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "semantic-router.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "semantic-router.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if .Values.initContainer.enabled }} + initContainers: + - name: model-downloader + image: {{ .Values.initContainer.image }} + securityContext: + {{- toYaml .Values.securityContext | nindent 10 }} + command: ["/bin/bash", "-c"] + args: + - | + set -e + echo "Installing Hugging Face CLI..." + pip install --no-cache-dir huggingface_hub[cli] + + echo "Downloading models to persistent volume..." + cd /app/models + + {{- range .Values.initContainer.models }} + # Download {{ .name }} + if [ ! -d "{{ .name }}" ]; then + echo "Downloading {{ .name }}..." + python -m huggingface_hub.commands.huggingface_cli download {{ .repo }} --local-dir {{ .name }} + else + echo "{{ .name }} already exists, skipping..." + fi + + {{- end }} + echo "All models downloaded successfully!" + ls -la /app/models/ + env: + - name: HF_HUB_CACHE + value: /tmp/hf_cache + resources: + {{- toYaml .Values.initContainer.resources | nindent 10 }} + volumeMounts: + - name: models-volume + mountPath: /app/models + {{- end }} + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.args }} + args: + {{- toYaml . | nindent 10 }} + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 10 }} + ports: + - containerPort: {{ .Values.service.grpc.targetPort }} + name: grpc + protocol: TCP + - containerPort: {{ .Values.service.metrics.targetPort }} + name: metrics + protocol: TCP + - containerPort: {{ .Values.service.api.targetPort }} + name: classify-api + protocol: TCP + {{- with .Values.env }} + env: + {{- toYaml . | nindent 10 }} + {{- end }} + volumeMounts: + - name: config-volume + mountPath: /app/config + readOnly: true + {{- if .Values.persistence.enabled }} + - name: models-volume + mountPath: /app/models + {{- end }} + {{- if .Values.livenessProbe.enabled }} + livenessProbe: + tcpSocket: + port: {{ .Values.service.grpc.targetPort }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + {{- end }} + {{- if .Values.readinessProbe.enabled }} + readinessProbe: + tcpSocket: + port: {{ .Values.service.grpc.targetPort }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 10 }} + volumes: + - name: config-volume + configMap: + name: {{ include "semantic-router.fullname" . }}-config + {{- if .Values.persistence.enabled }} + - name: models-volume + persistentVolumeClaim: + claimName: {{ include "semantic-router.pvcName" . }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deploy/helm/semantic-router/templates/hpa.yaml b/deploy/helm/semantic-router/templates/hpa.yaml new file mode 100644 index 00000000..ce5f4ddb --- /dev/null +++ b/deploy/helm/semantic-router/templates/hpa.yaml @@ -0,0 +1,33 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "semantic-router.fullname" . }} + namespace: {{ include "semantic-router.namespace" . }} + labels: + {{- include "semantic-router.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "semantic-router.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/deploy/helm/semantic-router/templates/ingress.yaml b/deploy/helm/semantic-router/templates/ingress.yaml new file mode 100644 index 00000000..bdd4d9d8 --- /dev/null +++ b/deploy/helm/semantic-router/templates/ingress.yaml @@ -0,0 +1,42 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "semantic-router.fullname" . }} + namespace: {{ include "semantic-router.namespace" . }} + labels: + {{- include "semantic-router.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.ingress.className }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + name: {{ include "semantic-router.fullname" $ }} + port: + number: {{ .servicePort }} + {{- end }} + {{- end }} +{{- end }} diff --git a/deploy/helm/semantic-router/templates/namespace.yaml b/deploy/helm/semantic-router/templates/namespace.yaml new file mode 100644 index 00000000..2d03b93c --- /dev/null +++ b/deploy/helm/semantic-router/templates/namespace.yaml @@ -0,0 +1,8 @@ +{{- if not (lookup "v1" "Namespace" "" (include "semantic-router.namespace" .)) }} +apiVersion: v1 +kind: Namespace +metadata: + name: {{ include "semantic-router.namespace" . }} + labels: + {{- include "semantic-router.labels" . | nindent 4 }} +{{- end }} diff --git a/deploy/helm/semantic-router/templates/pvc.yaml b/deploy/helm/semantic-router/templates/pvc.yaml new file mode 100644 index 00000000..4d780e2c --- /dev/null +++ b/deploy/helm/semantic-router/templates/pvc.yaml @@ -0,0 +1,26 @@ +{{- if and .Values.persistence.enabled (not .Values.persistence.existingClaim) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "semantic-router.pvcName" . }} + namespace: {{ include "semantic-router.namespace" . }} + labels: + {{- include "semantic-router.labels" . | nindent 4 }} + {{- with .Values.persistence.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + accessModes: + - {{ .Values.persistence.accessMode }} + {{- if .Values.persistence.storageClassName }} + {{- if (eq "-" .Values.persistence.storageClassName) }} + storageClassName: "" + {{- else }} + storageClassName: {{ .Values.persistence.storageClassName }} + {{- end }} + {{- end }} + resources: + requests: + storage: {{ .Values.persistence.size }} +{{- end }} diff --git a/deploy/helm/semantic-router/templates/service.yaml b/deploy/helm/semantic-router/templates/service.yaml new file mode 100644 index 00000000..8dd7efbe --- /dev/null +++ b/deploy/helm/semantic-router/templates/service.yaml @@ -0,0 +1,40 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "semantic-router.fullname" . }} + namespace: {{ include "semantic-router.namespace" . }} + labels: + {{- include "semantic-router.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.grpc.port }} + targetPort: {{ .Values.service.grpc.targetPort }} + protocol: {{ .Values.service.grpc.protocol }} + name: grpc + - port: {{ .Values.service.api.port }} + targetPort: {{ .Values.service.api.targetPort }} + protocol: {{ .Values.service.api.protocol }} + name: classify-api + selector: + {{- include "semantic-router.selectorLabels" . | nindent 4 }} +--- +{{- if .Values.service.metrics.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "semantic-router.fullname" . }}-metrics + namespace: {{ include "semantic-router.namespace" . }} + labels: + {{- include "semantic-router.labels" . | nindent 4 }} + service: metrics +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.metrics.port }} + targetPort: {{ .Values.service.metrics.targetPort }} + protocol: {{ .Values.service.metrics.protocol }} + name: metrics + selector: + {{- include "semantic-router.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/deploy/helm/semantic-router/templates/serviceaccount.yaml b/deploy/helm/semantic-router/templates/serviceaccount.yaml new file mode 100644 index 00000000..ff2d4736 --- /dev/null +++ b/deploy/helm/semantic-router/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "semantic-router.serviceAccountName" . }} + namespace: {{ include "semantic-router.namespace" . }} + labels: + {{- include "semantic-router.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/deploy/helm/semantic-router/values-dev.yaml b/deploy/helm/semantic-router/values-dev.yaml new file mode 100644 index 00000000..6371bc11 --- /dev/null +++ b/deploy/helm/semantic-router/values-dev.yaml @@ -0,0 +1,88 @@ +# Development environment values +# Optimized for local development with reduced resources + +replicaCount: 1 + +image: + pullPolicy: Always + tag: "latest" + +# Reduced resources for development +resources: + limits: + memory: "4Gi" + cpu: "1" + requests: + memory: "2Gi" + cpu: "500m" + +initContainer: + resources: + limits: + memory: "512Mi" + cpu: "250m" + requests: + memory: "256Mi" + cpu: "100m" + +# Smaller storage for development +persistence: + size: 5Gi + storageClassName: "standard" + +# Faster probes for development +livenessProbe: + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + +readinessProbe: + initialDelaySeconds: 45 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + +# Development configuration +config: + bert_model: + use_cpu: true + threshold: 0.5 # Lower threshold for testing + + semantic_cache: + enabled: true + max_entries: 500 # Smaller cache for dev + + tools: + enabled: true + + prompt_guard: + enabled: true + use_cpu: true + + vllm_endpoints: + - name: "dev-endpoint" + address: "127.0.0.1" + port: 8000 + weight: 1 + + # Enable detailed metrics for debugging + api: + batch_classification: + metrics: + enabled: true + detailed_goroutine_tracking: true + high_resolution_timing: true + sample_rate: 1.0 + +# Disable autoscaling in development +autoscaling: + enabled: false + +# Ingress disabled for local development +ingress: + enabled: false + +# Service type for local development +service: + type: ClusterIP diff --git a/deploy/helm/semantic-router/values-example.yaml b/deploy/helm/semantic-router/values-example.yaml new file mode 100644 index 00000000..11ac21b0 --- /dev/null +++ b/deploy/helm/semantic-router/values-example.yaml @@ -0,0 +1,180 @@ +# Example: Custom Configuration Values +# This file demonstrates how to customize the Semantic Router Helm deployment + +# Basic settings +replicaCount: 2 +nameOverride: "" +fullnameOverride: "" + +# Image configuration +image: + repository: ghcr.io/vllm-project/semantic-router/extproc + pullPolicy: IfNotPresent + tag: "v0.1.0" # Use specific version for production + +# Service configuration +service: + type: ClusterIP # Or LoadBalancer for external access + grpc: + port: 50051 + api: + port: 8080 + metrics: + enabled: true + port: 9190 + +# Resources - adjust based on your workload +resources: + limits: + memory: "8Gi" + cpu: "3" + requests: + memory: "4Gi" + cpu: "1.5" + +# Storage configuration +persistence: + enabled: true + storageClassName: "fast-ssd" # Use your storage class + size: 20Gi + accessMode: ReadWriteOnce + +# Init container configuration +initContainer: + enabled: true + resources: + limits: + memory: "2Gi" + cpu: "1" + requests: + memory: "1Gi" + cpu: "500m" + +# Application configuration +config: + # BERT model settings + bert_model: + model_id: sentence-transformers/all-MiniLM-L12-v2 + threshold: 0.65 + use_cpu: false # Set to true if no GPU + + # Semantic cache + semantic_cache: + enabled: true + backend_type: "memory" # or "milvus" for production + similarity_threshold: 0.85 + max_entries: 2000 + ttl_seconds: 7200 + eviction_policy: "lru" + + # Tools configuration + tools: + enabled: true + top_k: 5 + similarity_threshold: 0.25 + + # Security - Prompt guard + prompt_guard: + enabled: true + use_modernbert: true + model_id: "models/jailbreak_classifier_modernbert-base_model" + threshold: 0.75 + use_cpu: true + + # vLLM endpoints - Configure your actual endpoints + vllm_endpoints: + - name: "primary-endpoint" + address: "10.0.1.10" # Your vLLM server IP + port: 8000 + weight: 2 + - name: "secondary-endpoint" + address: "10.0.1.11" # Your vLLM server IP + port: 8000 + weight: 1 + + # Model configuration + model_config: + "your-model-name": + reasoning_family: "qwen3" # or "gpt-oss", "deepseek" + preferred_endpoints: ["primary-endpoint"] + pii_policy: + allow_by_default: true + + # Default model + default_model: "your-model-name" + + # API configuration + api: + batch_classification: + max_batch_size: 150 + concurrency_threshold: 8 + max_concurrency: 12 + metrics: + enabled: true + sample_rate: 0.5 # Sample 50% for performance + +# Auto-scaling (optional) +autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 8 + targetCPUUtilizationPercentage: 75 + targetMemoryUtilizationPercentage: 80 + +# Ingress configuration (optional) +ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-prod" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/proxy-body-size: "10m" + hosts: + - host: semantic-router.yourdomain.com + paths: + - path: / + pathType: Prefix + servicePort: 8080 + tls: + - secretName: semantic-router-tls + hosts: + - semantic-router.yourdomain.com + +# Security contexts +podSecurityContext: + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 1000 + +securityContext: + runAsNonRoot: true + runAsUser: 1000 + allowPrivilegeEscalation: false + readOnlyRootFilesystem: false + capabilities: + drop: + - ALL + +# Node selection (optional) +nodeSelector: + workload-type: ml-inference + +# Tolerations (optional) +tolerations: + - key: "nvidia.com/gpu" + operator: "Exists" + effect: "NoSchedule" + +# Affinity rules for high availability +affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - semantic-router + topologyKey: kubernetes.io/hostname diff --git a/deploy/helm/semantic-router/values-prod.yaml b/deploy/helm/semantic-router/values-prod.yaml new file mode 100644 index 00000000..3bd03cb9 --- /dev/null +++ b/deploy/helm/semantic-router/values-prod.yaml @@ -0,0 +1,165 @@ +# Production environment values +# Optimized for production with high availability and security + +replicaCount: 3 + +image: + pullPolicy: IfNotPresent + tag: "v0.1.0" # Use specific version in production + +imagePullSecrets: + - name: ghcr-secret + +# Production-grade resources +resources: + limits: + memory: "8Gi" + cpu: "4" + requests: + memory: "4Gi" + cpu: "2" + +initContainer: + resources: + limits: + memory: "2Gi" + cpu: "1" + requests: + memory: "1Gi" + cpu: "500m" + +# Larger storage for production +persistence: + size: 50Gi + storageClassName: "fast-ssd" # Use high-performance storage class + +# Conservative probes for production +livenessProbe: + initialDelaySeconds: 90 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + +readinessProbe: + initialDelaySeconds: 120 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 3 + +# Production configuration +config: + bert_model: + use_cpu: false # Use GPU in production if available + threshold: 0.7 # Higher threshold for production + + semantic_cache: + enabled: true + backend_type: "milvus" # Use Milvus in production + max_entries: 10000 + + tools: + enabled: true + top_k: 5 + + prompt_guard: + enabled: true + threshold: 0.8 # Stricter security in production + + vllm_endpoints: + - name: "prod-endpoint-1" + address: "10.0.1.10" + port: 8000 + weight: 1 + - name: "prod-endpoint-2" + address: "10.0.1.11" + port: 8000 + weight: 1 + + # Production API configuration + api: + batch_classification: + max_batch_size: 200 + max_concurrency: 16 + metrics: + enabled: true + detailed_goroutine_tracking: false + high_resolution_timing: false + sample_rate: 0.1 # Sample 10% in production + +# Enable autoscaling in production +autoscaling: + enabled: true + minReplicas: 3 + maxReplicas: 10 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + +# Ingress configuration for production +ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-prod" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + hosts: + - host: semantic-router.example.com + paths: + - path: / + pathType: Prefix + servicePort: 8080 + tls: + - secretName: semantic-router-tls + hosts: + - semantic-router.example.com + +# Service configuration +service: + type: ClusterIP + +# Pod security context for production +podSecurityContext: + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 1000 + seccompProfile: + type: RuntimeDefault + +securityContext: + runAsNonRoot: true + runAsUser: 1000 + allowPrivilegeEscalation: false + readOnlyRootFilesystem: false + capabilities: + drop: + - ALL + +# Pod anti-affinity for high availability +affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - semantic-router + topologyKey: kubernetes.io/hostname + +# Node selector for production nodes +nodeSelector: + node-role.kubernetes.io/worker: "true" + +# Tolerations for production workloads +tolerations: + - key: "workload" + operator: "Equal" + value: "production" + effect: "NoSchedule" + +# Pod disruption budget (requires separate resource) +# podDisruptionBudget: +# enabled: true +# minAvailable: 2 diff --git a/deploy/helm/semantic-router/values.yaml b/deploy/helm/semantic-router/values.yaml new file mode 100644 index 00000000..554e80aa --- /dev/null +++ b/deploy/helm/semantic-router/values.yaml @@ -0,0 +1,531 @@ +# Default values for semantic-router. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Global settings +global: + # -- Namespace for all resources (if not specified, uses Release.Namespace) + namespace: "" + +# -- Number of replicas for the deployment +replicaCount: 1 + +# Image configuration +image: + # -- Image repository + repository: ghcr.io/vllm-project/semantic-router/extproc + # -- Image pull policy + pullPolicy: IfNotPresent + # -- Image tag (overrides the image tag whose default is the chart appVersion) + tag: "latest" + +# -- Image pull secrets for private registries +imagePullSecrets: [] + +# -- Override the name of the chart +nameOverride: "" + +# -- Override the full name of the chart +fullnameOverride: "" + +# Service account configuration +serviceAccount: + # -- Specifies whether a service account should be created + create: true + # -- Annotations to add to the service account + annotations: {} + # -- The name of the service account to use + name: "" + +# Pod annotations +podAnnotations: {} + +# Pod security context +podSecurityContext: {} + # fsGroup: 2000 + +# Container security context +securityContext: + # -- Run as non-root user + runAsNonRoot: false + # -- Allow privilege escalation + allowPrivilegeEscalation: false + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsUser: 1000 + +# Service configuration +service: + # -- Service type + type: ClusterIP + # gRPC service port + grpc: + # -- gRPC port number + port: 50051 + # -- gRPC target port + targetPort: 50051 + # -- gRPC protocol + protocol: TCP + # HTTP API service port + api: + # -- HTTP API port number + port: 8080 + # -- HTTP API target port + targetPort: 8080 + # -- HTTP API protocol + protocol: TCP + # Metrics service + metrics: + # -- Enable metrics service + enabled: true + # -- Metrics port number + port: 9190 + # -- Metrics target port + targetPort: 9190 + # -- Metrics protocol + protocol: TCP + +# Ingress configuration +ingress: + # -- Enable ingress + enabled: false + # -- Ingress class name + className: "" + # -- Ingress annotations + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + # -- Ingress hosts configuration + hosts: + - host: semantic-router.local + paths: + - path: / + pathType: Prefix + servicePort: 8080 + # -- Ingress TLS configuration + tls: [] + # - secretName: semantic-router-tls + # hosts: + # - semantic-router.local + +# Resource limits and requests +resources: + # -- Resource limits + limits: + memory: "6Gi" + cpu: "2" + # -- Resource requests + requests: + memory: "3Gi" + cpu: "1" + +# Init container for model downloading +initContainer: + # -- Enable init container + enabled: true + # -- Init container image + image: python:3.11-slim + # -- Resource limits for init container + resources: + limits: + memory: "1Gi" + cpu: "500m" + requests: + memory: "512Mi" + cpu: "250m" + # -- Models to download + models: + - name: all-MiniLM-L12-v2 + repo: sentence-transformers/all-MiniLM-L12-v2 + - name: category_classifier_modernbert-base_model + repo: LLM-Semantic-Router/category_classifier_modernbert-base_model + - name: pii_classifier_modernbert-base_model + repo: LLM-Semantic-Router/pii_classifier_modernbert-base_model + - name: jailbreak_classifier_modernbert-base_model + repo: LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model + - name: pii_classifier_modernbert-base_presidio_token_model + repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model + +# Autoscaling configuration +autoscaling: + # -- Enable horizontal pod autoscaling + enabled: false + # -- Minimum number of replicas + minReplicas: 1 + # -- Maximum number of replicas + maxReplicas: 10 + # -- Target CPU utilization percentage + targetCPUUtilizationPercentage: 80 + # -- Target memory utilization percentage + # targetMemoryUtilizationPercentage: 80 + +# Node selector +nodeSelector: {} + +# Tolerations +tolerations: [] + +# Affinity rules +affinity: {} + +# Liveness probe configuration +livenessProbe: + # -- Enable liveness probe + enabled: true + # -- Initial delay seconds + initialDelaySeconds: 60 + # -- Period seconds + periodSeconds: 30 + # -- Timeout seconds + timeoutSeconds: 10 + # -- Failure threshold + failureThreshold: 3 + +# Readiness probe configuration +readinessProbe: + # -- Enable readiness probe + enabled: true + # -- Initial delay seconds + initialDelaySeconds: 90 + # -- Period seconds + periodSeconds: 30 + # -- Timeout seconds + timeoutSeconds: 10 + # -- Failure threshold + failureThreshold: 3 + +# Persistent Volume Claim for models +persistence: + # -- Enable persistent volume + enabled: true + # -- Storage class name (use "-" for default storage class) + storageClassName: "standard" + # -- Access mode + accessMode: ReadWriteOnce + # -- Storage size + size: 10Gi + # -- Annotations for PVC + annotations: {} + # -- Existing claim name (if provided, will use existing PVC instead of creating new one) + existingClaim: "" + +# Application configuration +config: + # BERT model configuration + bert_model: + model_id: models/all-MiniLM-L12-v2 + threshold: 0.6 + use_cpu: true + + # Semantic cache configuration + semantic_cache: + enabled: true + backend_type: "memory" + similarity_threshold: 0.8 + max_entries: 1000 + ttl_seconds: 3600 + eviction_policy: "fifo" + + # Tools configuration + tools: + enabled: true + top_k: 3 + similarity_threshold: 0.2 + tools_db_path: "config/tools_db.json" + fallback_to_empty: true + + # Prompt guard configuration + prompt_guard: + enabled: true + use_modernbert: true + model_id: "models/jailbreak_classifier_modernbert-base_model" + threshold: 0.7 + use_cpu: true + jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json" + + # vLLM endpoints configuration + vllm_endpoints: + - name: "endpoint1" + address: "172.28.0.20" + port: 8002 + weight: 1 + + # Model configuration + model_config: + "qwen3": + reasoning_family: "qwen3" + preferred_endpoints: ["endpoint1"] + pii_policy: + allow_by_default: true + + # Classifier configuration + classifier: + category_model: + model_id: "models/category_classifier_modernbert-base_model" + use_modernbert: true + threshold: 0.6 + use_cpu: true + category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json" + pii_model: + model_id: "models/pii_classifier_modernbert-base_presidio_token_model" + use_modernbert: true + threshold: 0.7 + use_cpu: true + pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json" + + # Categories configuration + categories: + - name: business + system_prompt: "You are a senior business consultant and strategic advisor with expertise in corporate strategy, operations management, financial analysis, marketing, and organizational development. Provide practical, actionable business advice backed by proven methodologies and industry best practices. Consider market dynamics, competitive landscape, and stakeholder interests in your recommendations." + model_scores: + - model: qwen3 + score: 0.7 + use_reasoning: false + - name: law + system_prompt: "You are a knowledgeable legal expert with comprehensive understanding of legal principles, case law, statutory interpretation, and legal procedures across multiple jurisdictions. Provide accurate legal information and analysis while clearly stating that your responses are for informational purposes only and do not constitute legal advice. Always recommend consulting with qualified legal professionals for specific legal matters." + model_scores: + - model: qwen3 + score: 0.4 + use_reasoning: false + - name: psychology + system_prompt: "You are a psychology expert with deep knowledge of cognitive processes, behavioral patterns, mental health, developmental psychology, social psychology, and therapeutic approaches. Provide evidence-based insights grounded in psychological research and theory. When discussing mental health topics, emphasize the importance of professional consultation and avoid providing diagnostic or therapeutic advice." + semantic_cache_enabled: true + semantic_cache_similarity_threshold: 0.92 + model_scores: + - model: qwen3 + score: 0.6 + use_reasoning: false + - name: biology + system_prompt: "You are a biology expert with comprehensive knowledge spanning molecular biology, genetics, cell biology, ecology, evolution, anatomy, physiology, and biotechnology. Explain biological concepts with scientific accuracy, use appropriate terminology, and provide examples from current research. Connect biological principles to real-world applications and emphasize the interconnectedness of biological systems." + model_scores: + - model: qwen3 + score: 0.9 + use_reasoning: false + - name: chemistry + system_prompt: "You are a chemistry expert specializing in chemical reactions, molecular structures, and laboratory techniques. Provide detailed, step-by-step explanations." + model_scores: + - model: qwen3 + score: 0.6 + use_reasoning: true + - name: history + system_prompt: "You are a historian with expertise across different time periods and cultures. Provide accurate historical context and analysis." + model_scores: + - model: qwen3 + score: 0.7 + use_reasoning: false + - name: other + system_prompt: "You are a helpful and knowledgeable assistant. Provide accurate, helpful responses across a wide range of topics." + semantic_cache_enabled: true + semantic_cache_similarity_threshold: 0.75 + model_scores: + - model: qwen3 + score: 0.7 + use_reasoning: false + - name: health + system_prompt: "You are a health and medical information expert with knowledge of anatomy, physiology, diseases, treatments, preventive care, nutrition, and wellness. Provide accurate, evidence-based health information while emphasizing that your responses are for educational purposes only and should never replace professional medical advice, diagnosis, or treatment. Always encourage users to consult healthcare professionals for medical concerns and emergencies." + semantic_cache_enabled: true + semantic_cache_similarity_threshold: 0.95 + model_scores: + - model: qwen3 + score: 0.5 + use_reasoning: false + - name: economics + system_prompt: "You are an economics expert with deep understanding of microeconomics, macroeconomics, econometrics, financial markets, monetary policy, fiscal policy, international trade, and economic theory. Analyze economic phenomena using established economic principles, provide data-driven insights, and explain complex economic concepts in accessible terms. Consider both theoretical frameworks and real-world applications in your responses." + model_scores: + - model: qwen3 + score: 1.0 + use_reasoning: false + - name: math + system_prompt: "You are a mathematics expert. Provide step-by-step solutions, show your work clearly, and explain mathematical concepts in an understandable way." + model_scores: + - model: qwen3 + score: 1.0 + use_reasoning: true + - name: physics + system_prompt: "You are a physics expert with deep understanding of physical laws and phenomena. Provide clear explanations with mathematical derivations when appropriate." + model_scores: + - model: qwen3 + score: 0.7 + use_reasoning: true + - name: computer science + system_prompt: "You are a computer science expert with knowledge of algorithms, data structures, programming languages, and software engineering. Provide clear, practical solutions with code examples when helpful." + model_scores: + - model: qwen3 + score: 0.6 + use_reasoning: false + - name: philosophy + system_prompt: "You are a philosophy expert with comprehensive knowledge of philosophical traditions, ethical theories, logic, metaphysics, epistemology, political philosophy, and the history of philosophical thought. Engage with complex philosophical questions by presenting multiple perspectives, analyzing arguments rigorously, and encouraging critical thinking. Draw connections between philosophical concepts and contemporary issues while maintaining intellectual honesty about the complexity and ongoing nature of philosophical debates." + model_scores: + - model: qwen3 + score: 0.5 + use_reasoning: false + - name: engineering + system_prompt: "You are an engineering expert with knowledge across multiple engineering disciplines including mechanical, electrical, civil, chemical, software, and systems engineering. Apply engineering principles, design methodologies, and problem-solving approaches to provide practical solutions. Consider safety, efficiency, sustainability, and cost-effectiveness in your recommendations. Use technical precision while explaining concepts clearly, and emphasize the importance of proper engineering practices and standards." + model_scores: + - model: qwen3 + score: 0.7 + use_reasoning: false + + # Default model + default_model: "qwen3" + + # Reasoning families + reasoning_families: + deepseek: + type: "chat_template_kwargs" + parameter: "thinking" + qwen3: + type: "chat_template_kwargs" + parameter: "enable_thinking" + gpt-oss: + type: "reasoning_effort" + parameter: "reasoning_effort" + gpt: + type: "reasoning_effort" + parameter: "reasoning_effort" + + # Default reasoning effort + default_reasoning_effort: high + + # API configuration + api: + batch_classification: + max_batch_size: 100 + concurrency_threshold: 5 + max_concurrency: 8 + metrics: + enabled: true + detailed_goroutine_tracking: true + high_resolution_timing: false + sample_rate: 1.0 + duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30] + size_buckets: [1, 2, 5, 10, 20, 50, 100, 200] + + # Observability configuration + observability: + tracing: + enabled: true + provider: "opentelemetry" + exporter: + type: "otlp" + endpoint: "jaeger:4317" + insecure: true + sampling: + type: "always_on" + rate: 1.0 + resource: + service_name: "vllm-semantic-router" + service_version: "v0.1.0" + deployment_environment: "development" + +# Tools database configuration +toolsDb: + - tool: + type: "function" + function: + name: "get_weather" + description: "Get current weather information for a location" + parameters: + type: "object" + properties: + location: + type: "string" + description: "The city and state, e.g. San Francisco, CA" + unit: + type: "string" + enum: ["celsius", "fahrenheit"] + description: "Temperature unit" + required: ["location"] + description: "Get current weather information, temperature, conditions, forecast for any location, city, or place. Check weather today, now, current conditions, temperature, rain, sun, cloudy, hot, cold, storm, snow" + category: "weather" + tags: ["weather", "temperature", "forecast", "climate"] + - tool: + type: "function" + function: + name: "search_web" + description: "Search the web for information" + parameters: + type: "object" + properties: + query: + type: "string" + description: "The search query" + num_results: + type: "integer" + description: "Number of results to return" + default: 5 + required: ["query"] + description: "Search the internet, web search, find information online, browse web content, lookup, research, google, find answers, discover, investigate" + category: "search" + tags: ["search", "web", "internet", "information", "browse"] + - tool: + type: "function" + function: + name: "calculate" + description: "Perform mathematical calculations" + parameters: + type: "object" + properties: + expression: + type: "string" + description: "Mathematical expression to evaluate" + required: ["expression"] + description: "Calculate mathematical expressions, solve math problems, arithmetic operations, compute numbers, addition, subtraction, multiplication, division, equations, formula" + category: "math" + tags: ["math", "calculation", "arithmetic", "compute", "numbers"] + - tool: + type: "function" + function: + name: "send_email" + description: "Send an email message" + parameters: + type: "object" + properties: + to: + type: "string" + description: "Recipient email address" + subject: + type: "string" + description: "Email subject" + body: + type: "string" + description: "Email body content" + required: ["to", "subject", "body"] + description: "Send email messages, email communication, contact people via email, mail, message, correspondence, notify, inform" + category: "communication" + tags: ["email", "send", "communication", "message", "contact"] + - tool: + type: "function" + function: + name: "create_calendar_event" + description: "Create a new calendar event or appointment" + parameters: + type: "object" + properties: + title: + type: "string" + description: "Event title" + date: + type: "string" + description: "Event date in YYYY-MM-DD format" + time: + type: "string" + description: "Event time in HH:MM format" + duration: + type: "integer" + description: "Duration in minutes" + required: ["title", "date", "time"] + description: "Schedule meetings, create calendar events, set appointments, manage calendar, book time, plan meeting, organize schedule, reminder, agenda" + category: "productivity" + tags: ["calendar", "event", "meeting", "appointment", "schedule"] + +# Container arguments +args: + - "--secure=true" + +# Environment variables +env: + - name: LD_LIBRARY_PATH + value: "/app/lib" diff --git a/deploy/helm/validate-chart.sh b/deploy/helm/validate-chart.sh new file mode 100755 index 00000000..ade91ed2 --- /dev/null +++ b/deploy/helm/validate-chart.sh @@ -0,0 +1,256 @@ +#!/bin/bash + +# Helm Chart Validation Script +# This script validates the Helm chart for semantic-router + +set -e + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Configuration +CHART_PATH="deploy/helm/semantic-router" +TEMP_DIR="/tmp/helm-test-$$" + +# Functions +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +cleanup() { + log_info "Cleaning up..." + rm -rf "$TEMP_DIR" +} + +trap cleanup EXIT + +# Create temp directory +mkdir -p "$TEMP_DIR" + +echo "==================================================" +echo "Semantic Router Helm Chart Validation" +echo "==================================================" +echo "" + +# Test 1: Helm lint +log_info "Running Helm lint..." +if helm lint "$CHART_PATH"; then + log_success "Helm lint passed" +else + log_error "Helm lint failed" + exit 1 +fi +echo "" + +# Test 2: Helm template with default values +log_info "Testing Helm template with default values..." +if helm template test-release "$CHART_PATH" > "$TEMP_DIR/default-template.yaml"; then + log_success "Helm template with default values succeeded" + log_info "Output saved to $TEMP_DIR/default-template.yaml" +else + log_error "Helm template with default values failed" + exit 1 +fi +echo "" + +# Test 3: Helm template with dev values +log_info "Testing Helm template with dev values..." +if helm template test-release "$CHART_PATH" -f "$CHART_PATH/values-dev.yaml" > "$TEMP_DIR/dev-template.yaml"; then + log_success "Helm template with dev values succeeded" + log_info "Output saved to $TEMP_DIR/dev-template.yaml" +else + log_error "Helm template with dev values failed" + exit 1 +fi +echo "" + +# Test 4: Helm template with prod values +log_info "Testing Helm template with prod values..." +if helm template test-release "$CHART_PATH" -f "$CHART_PATH/values-prod.yaml" > "$TEMP_DIR/prod-template.yaml"; then + log_success "Helm template with prod values succeeded" + log_info "Output saved to $TEMP_DIR/prod-template.yaml" +else + log_error "Helm template with prod values failed" + exit 1 +fi +echo "" + +# Test 5: Validate YAML syntax +log_info "Validating YAML syntax..." +yamllint_available=false +if command -v yamllint &> /dev/null; then + yamllint_available=true + if yamllint "$CHART_PATH/values.yaml" "$CHART_PATH/values-dev.yaml" "$CHART_PATH/values-prod.yaml" 2>&1 | grep -v "too many spaces inside braces"; then + log_warning "YAML lint found some issues (Helm templates cause expected warnings)" + else + log_success "YAML validation passed" + fi +else + log_warning "yamllint not installed, skipping YAML validation" +fi +echo "" + +# Test 6: Check required files exist +log_info "Checking required files..." +required_files=( + "Chart.yaml" + "values.yaml" + "values-dev.yaml" + "values-prod.yaml" + "README.md" + ".helmignore" + "templates/_helpers.tpl" + "templates/deployment.yaml" + "templates/service.yaml" + "templates/configmap.yaml" + "templates/namespace.yaml" + "templates/pvc.yaml" + "templates/serviceaccount.yaml" + "templates/ingress.yaml" + "templates/hpa.yaml" + "templates/NOTES.txt" +) + +all_files_exist=true +for file in "${required_files[@]}"; do + if [ -f "$CHART_PATH/$file" ]; then + log_success "Found: $file" + else + log_error "Missing: $file" + all_files_exist=false + fi +done + +if [ "$all_files_exist" = false ]; then + log_error "Some required files are missing" + exit 1 +fi +echo "" + +# Test 7: Validate generated resources +log_info "Validating generated Kubernetes resources..." +resource_types=( + "Namespace" + "ServiceAccount" + "PersistentVolumeClaim" + "ConfigMap" + "Deployment" + "Service" +) + +for resource in "${resource_types[@]}"; do + if grep -q "kind: $resource" "$TEMP_DIR/default-template.yaml"; then + log_success "Found resource: $resource" + else + log_error "Missing resource: $resource" + exit 1 + fi +done +echo "" + +# Test 8: Validate Chart.yaml +log_info "Validating Chart.yaml..." +if [ -f "$CHART_PATH/Chart.yaml" ]; then + chart_name=$(grep "^name:" "$CHART_PATH/Chart.yaml" | awk '{print $2}') + chart_version=$(grep "^version:" "$CHART_PATH/Chart.yaml" | awk '{print $2}') + app_version=$(grep "^appVersion:" "$CHART_PATH/Chart.yaml" | awk '{print $2}') + + log_success "Chart name: $chart_name" + log_success "Chart version: $chart_version" + log_success "App version: $app_version" +else + log_error "Chart.yaml not found" + exit 1 +fi +echo "" + +# Test 9: Check for common Helm best practices +log_info "Checking Helm best practices..." +best_practices_passed=true + +# Check if labels helper exists +if grep -q "semantic-router.labels" "$CHART_PATH/templates/_helpers.tpl"; then + log_success "Labels helper template exists" +else + log_error "Labels helper template missing" + best_practices_passed=false +fi + +# Check if selector labels helper exists +if grep -q "semantic-router.selectorLabels" "$CHART_PATH/templates/_helpers.tpl"; then + log_success "Selector labels helper template exists" +else + log_error "Selector labels helper template missing" + best_practices_passed=false +fi + +# Check if NOTES.txt exists +if [ -f "$CHART_PATH/templates/NOTES.txt" ]; then + log_success "NOTES.txt exists" +else + log_error "NOTES.txt missing" + best_practices_passed=false +fi + +if [ "$best_practices_passed" = false ]; then + log_error "Some best practices checks failed" + exit 1 +fi +echo "" + +# Test 10: Dry-run install (requires cluster) +if kubectl cluster-info &> /dev/null; then + log_info "Testing dry-run install..." + if helm install test-release "$CHART_PATH" --dry-run --debug > "$TEMP_DIR/dry-run.log" 2>&1; then + log_success "Dry-run install succeeded" + else + log_error "Dry-run install failed" + cat "$TEMP_DIR/dry-run.log" + exit 1 + fi +else + log_warning "No Kubernetes cluster available, skipping dry-run install test" +fi +echo "" + +# Test 11: Package the chart +log_info "Testing chart packaging..." +if helm package "$CHART_PATH" --destination "$TEMP_DIR" > /dev/null 2>&1; then + log_success "Chart packaged successfully" + ls -lh "$TEMP_DIR"/*.tgz +else + log_error "Chart packaging failed" + exit 1 +fi +echo "" + +# Summary +echo "==================================================" +echo "Validation Summary" +echo "==================================================" +log_success "All validation tests passed!" +echo "" +echo "Generated files are available in: $TEMP_DIR" +echo "" +echo "Next steps:" +echo "1. Review the generated templates in $TEMP_DIR" +echo "2. Test installation: make helm-install" +echo "3. Test with dev config: make helm-dev" +echo "4. Test with prod config: make helm-prod" +echo "" diff --git a/tools/kind/generate-kind-config.sh b/tools/kind/generate-kind-config.sh index 0c0b672d..b3777e91 100755 --- a/tools/kind/generate-kind-config.sh +++ b/tools/kind/generate-kind-config.sh @@ -48,4 +48,3 @@ echo -e "${GREEN}✓ Generated ${OUTPUT_FILE}${NC}" echo "" echo "You can now create the kind cluster with:" echo " kind create cluster --config ${OUTPUT_FILE}" - diff --git a/tools/make/helm.mk b/tools/make/helm.mk new file mode 100644 index 00000000..ca0b698a --- /dev/null +++ b/tools/make/helm.mk @@ -0,0 +1,255 @@ +# ======== helm.mk ============ +# = Helm deployment targets = +# ======== helm.mk ============ + +##@ Helm + +# Configuration +HELM_RELEASE_NAME ?= semantic-router +HELM_NAMESPACE ?= vllm-semantic-router-system +HELM_CHART_PATH ?= deploy/helm/semantic-router +HELM_VALUES_FILE ?= +HELM_SET_VALUES ?= +HELM_TIMEOUT ?= 10m + +# Colors for output (reuse from common.mk if available, otherwise define) +BLUE ?= \033[0;34m +GREEN ?= \033[0;32m +YELLOW ?= \033[1;33m +RED ?= \033[0;31m +NC ?= \033[0m + +.PHONY: helm-lint helm-template helm-install helm-upgrade helm-install-or-upgrade \ + helm-uninstall helm-status helm-list helm-history helm-rollback helm-test \ + helm-package helm-dev helm-prod helm-values helm-manifest \ + helm-port-forward-api helm-port-forward-grpc helm-port-forward-metrics \ + helm-logs helm-clean helm-setup helm-cleanup helm-reinstall help-helm _check-k8s + +helm-lint: ## Lint the Helm chart +helm-lint: + @$(LOG_TARGET) + @helm lint $(HELM_CHART_PATH) + @echo "$(GREEN)[SUCCESS]$(NC) Helm chart linted successfully" + +helm-template: ## Template the Helm chart (dry-run) +helm-template: + @$(LOG_TARGET) + @helm template $(HELM_RELEASE_NAME) $(HELM_CHART_PATH) \ + $(if $(HELM_VALUES_FILE),-f $(HELM_VALUES_FILE)) \ + $(if $(HELM_SET_VALUES),--set $(HELM_SET_VALUES)) \ + --namespace $(HELM_NAMESPACE) + +helm-install: ## Install the Helm chart +helm-install: _check-k8s + @$(LOG_TARGET) + @echo "Installing Helm release: $(HELM_RELEASE_NAME)" + @if helm list -n $(HELM_NAMESPACE) 2>/dev/null | grep -q "^$(HELM_RELEASE_NAME)"; then \ + echo "$(YELLOW)[WARNING]$(NC) Release $(HELM_RELEASE_NAME) already exists in namespace $(HELM_NAMESPACE)"; \ + echo "$(BLUE)[INFO]$(NC) Use 'make helm-upgrade' to upgrade or 'make helm-uninstall' to remove it first"; \ + exit 1; \ + fi + @echo "$(BLUE)[INFO]$(NC) Ensuring namespace $(HELM_NAMESPACE) exists..." + @kubectl get namespace $(HELM_NAMESPACE) &>/dev/null || kubectl create namespace $(HELM_NAMESPACE) + @helm install $(HELM_RELEASE_NAME) $(HELM_CHART_PATH) \ + $(if $(HELM_VALUES_FILE),-f $(HELM_VALUES_FILE)) \ + $(if $(HELM_SET_VALUES),--set $(HELM_SET_VALUES)) \ + --namespace $(HELM_NAMESPACE) \ + --wait \ + --timeout $(HELM_TIMEOUT) + @echo "$(GREEN)[SUCCESS]$(NC) Helm chart installed successfully" + @$(MAKE) helm-status + +helm-upgrade: ## Upgrade the Helm release +helm-upgrade: _check-k8s + @$(LOG_TARGET) + @echo "Upgrading Helm release: $(HELM_RELEASE_NAME)" + @helm upgrade $(HELM_RELEASE_NAME) $(HELM_CHART_PATH) \ + $(if $(HELM_VALUES_FILE),-f $(HELM_VALUES_FILE)) \ + $(if $(HELM_SET_VALUES),--set $(HELM_SET_VALUES)) \ + --namespace $(HELM_NAMESPACE) \ + --wait \ + --timeout $(HELM_TIMEOUT) + @echo "$(GREEN)[SUCCESS]$(NC) Helm release upgraded successfully" + @$(MAKE) helm-status + +helm-install-or-upgrade: ## Install or upgrade the Helm release (idempotent) +helm-install-or-upgrade: _check-k8s + @if helm list -n $(HELM_NAMESPACE) 2>/dev/null | grep -q "^$(HELM_RELEASE_NAME)"; then \ + echo "$(BLUE)[INFO]$(NC) Release exists, upgrading..."; \ + $(MAKE) helm-upgrade; \ + else \ + echo "$(BLUE)[INFO]$(NC) Release does not exist, installing..."; \ + $(MAKE) helm-install; \ + fi + +helm-uninstall: ## Uninstall the Helm release +helm-uninstall: + @$(LOG_TARGET) + @helm uninstall $(HELM_RELEASE_NAME) --namespace $(HELM_NAMESPACE) + @echo "$(GREEN)[SUCCESS]$(NC) Helm release uninstalled successfully" + +helm-status: ## Show Helm release status +helm-status: + @$(LOG_TARGET) + @helm status $(HELM_RELEASE_NAME) --namespace $(HELM_NAMESPACE) || echo "$(RED)[ERROR]$(NC) Release not found" + @echo "" + @echo "$(BLUE)[INFO]$(NC) Deployed resources:" + @kubectl get all -n $(HELM_NAMESPACE) -l app.kubernetes.io/instance=$(HELM_RELEASE_NAME) || echo "$(YELLOW)[WARNING]$(NC) No resources found" + +helm-list: ## List all Helm releases +helm-list: + @$(LOG_TARGET) + @helm list --all-namespaces + +helm-history: ## Show Helm release history +helm-history: + @$(LOG_TARGET) + @helm history $(HELM_RELEASE_NAME) --namespace $(HELM_NAMESPACE) + +helm-rollback: ## Rollback to previous Helm release +helm-rollback: + @$(LOG_TARGET) + @helm rollback $(HELM_RELEASE_NAME) --namespace $(HELM_NAMESPACE) --wait + @echo "$(GREEN)[SUCCESS]$(NC) Helm release rolled back successfully" + @$(MAKE) helm-status + +helm-test: ## Test the Helm release +helm-test: _check-k8s + @$(LOG_TARGET) + @if ! helm list -n $(HELM_NAMESPACE) 2>/dev/null | grep -q "^$(HELM_RELEASE_NAME)"; then \ + echo "$(RED)[ERROR]$(NC) Release $(HELM_RELEASE_NAME) not found in namespace $(HELM_NAMESPACE)"; \ + echo "$(BLUE)[INFO]$(NC) Please run 'make helm-install' or 'make helm-setup' first"; \ + exit 1; \ + fi + @echo "$(BLUE)[INFO]$(NC) Checking deployment status..." + @kubectl wait --for=condition=Available deployment/$(HELM_RELEASE_NAME) \ + -n $(HELM_NAMESPACE) --timeout=300s || echo "$(RED)[ERROR]$(NC) Deployment not ready" + @echo "$(BLUE)[INFO]$(NC) Checking pod status..." + @kubectl get pods -n $(HELM_NAMESPACE) -l app.kubernetes.io/instance=$(HELM_RELEASE_NAME) || echo "$(RED)[ERROR]$(NC) Cannot get pods" + @echo "$(BLUE)[INFO]$(NC) Checking services..." + @kubectl get svc -n $(HELM_NAMESPACE) -l app.kubernetes.io/instance=$(HELM_RELEASE_NAME) || echo "$(RED)[ERROR]$(NC) Cannot get services" + @echo "$(BLUE)[INFO]$(NC) Checking PVC..." + @kubectl get pvc -n $(HELM_NAMESPACE) || echo "$(YELLOW)[WARNING]$(NC) Cannot get PVC" + @echo "$(GREEN)[SUCCESS]$(NC) Helm release test completed" + +helm-package: ## Package the Helm chart +helm-package: + @$(LOG_TARGET) + @mkdir -p ./dist + @helm package $(HELM_CHART_PATH) --destination ./dist + @echo "$(GREEN)[SUCCESS]$(NC) Helm chart packaged successfully" + @ls -lh ./dist/semantic-router-*.tgz + +helm-dev: ## Deploy with development configuration +helm-dev: + @$(LOG_TARGET) + @$(MAKE) helm-install HELM_VALUES_FILE=$(HELM_CHART_PATH)/values-dev.yaml + @echo "" + @echo "$(GREEN)[SUCCESS]$(NC) Development deployment completed!" + @echo "$(BLUE)[INFO]$(NC) Next steps:" + @echo " - Test deployment: make helm-test" + @echo " - Port forward API: make helm-port-forward-api" + @echo " - View logs: make helm-logs" + +helm-prod: ## Deploy with production configuration +helm-prod: + @$(LOG_TARGET) + @$(MAKE) helm-install HELM_VALUES_FILE=$(HELM_CHART_PATH)/values-prod.yaml + +helm-values: ## Show computed Helm values +helm-values: + @$(LOG_TARGET) + @helm get values $(HELM_RELEASE_NAME) --namespace $(HELM_NAMESPACE) --all + +helm-manifest: ## Show deployed Helm manifest +helm-manifest: + @$(LOG_TARGET) + @helm get manifest $(HELM_RELEASE_NAME) --namespace $(HELM_NAMESPACE) + +helm-port-forward-api: ## Port forward Classification API (8080) +helm-port-forward-api: + @$(LOG_TARGET) + @echo "$(YELLOW)[INFO]$(NC) Access API at: http://localhost:8080" + @echo "$(YELLOW)[INFO]$(NC) Health check: curl http://localhost:8080/health" + @echo "$(YELLOW)[INFO]$(NC) Press Ctrl+C to stop port forwarding" + @kubectl port-forward -n $(HELM_NAMESPACE) svc/$(HELM_RELEASE_NAME) 8080:8080 + +helm-port-forward-grpc: ## Port forward gRPC API (50051) +helm-port-forward-grpc: + @$(LOG_TARGET) + @echo "$(YELLOW)[INFO]$(NC) Access gRPC API at: localhost:50051" + @echo "$(YELLOW)[INFO]$(NC) Press Ctrl+C to stop port forwarding" + @kubectl port-forward -n $(HELM_NAMESPACE) svc/$(HELM_RELEASE_NAME) 50051:50051 + +helm-port-forward-metrics: ## Port forward Prometheus metrics (9190) +helm-port-forward-metrics: + @$(LOG_TARGET) + @echo "$(YELLOW)[INFO]$(NC) Access metrics at: http://localhost:9190/metrics" + @echo "$(YELLOW)[INFO]$(NC) Press Ctrl+C to stop port forwarding" + @kubectl port-forward -n $(HELM_NAMESPACE) svc/$(HELM_RELEASE_NAME)-metrics 9190:9190 + +helm-logs: ## Show semantic-router logs +helm-logs: + @$(LOG_TARGET) + @kubectl logs -n $(HELM_NAMESPACE) -l app.kubernetes.io/instance=$(HELM_RELEASE_NAME) -f + +helm-setup: helm-dev ## Complete setup: install with dev configuration +helm-setup: + @echo "$(GREEN)[SUCCESS]$(NC) Helm setup completed!" + @echo "$(BLUE)[INFO]$(NC) Next steps:" + @echo " - Test deployment: make helm-test" + @echo " - Port forward API: make helm-port-forward-api" + @echo " - View logs: make helm-logs" + +helm-cleanup: helm-uninstall ## Complete cleanup: uninstall and delete namespace +helm-cleanup: + @$(LOG_TARGET) + @echo "Cleaning up namespace..." + @kubectl delete namespace $(HELM_NAMESPACE) --ignore-not-found=true + @echo "$(GREEN)[SUCCESS]$(NC) Complete cleanup finished!" + +helm-clean: ## Alias for helm-cleanup +helm-clean: helm-cleanup + +helm-reinstall: ## Force reinstall: cleanup and install fresh +helm-reinstall: + @$(LOG_TARGET) + @echo "$(YELLOW)[INFO]$(NC) Force reinstalling Helm release..." + @echo "$(BLUE)[STEP 1/5]$(NC) Uninstalling existing release (if any)..." + @helm uninstall $(HELM_RELEASE_NAME) --namespace $(HELM_NAMESPACE) 2>/dev/null || echo "No existing release found" + @echo "$(BLUE)[STEP 2/5]$(NC) Deleting namespace..." + @kubectl delete namespace $(HELM_NAMESPACE) --ignore-not-found=true --wait=false 2>/dev/null || true + @echo "$(BLUE)[STEP 3/5]$(NC) Waiting for namespace to be fully deleted..." + @timeout=30; \ + elapsed=0; \ + while kubectl get namespace $(HELM_NAMESPACE) &>/dev/null && [ $$elapsed -lt $$timeout ]; do \ + echo " Waiting for namespace $(HELM_NAMESPACE) to terminate... ($$elapsed/$$timeout seconds)"; \ + sleep 2; \ + elapsed=$$((elapsed + 2)); \ + done; \ + if kubectl get namespace $(HELM_NAMESPACE) &>/dev/null; then \ + echo "$(YELLOW)[WARNING]$(NC) Namespace still exists after $$timeout seconds, forcing cleanup..."; \ + kubectl get namespace $(HELM_NAMESPACE) -o json 2>/dev/null | jq '.spec.finalizers = []' | kubectl replace --raw /api/v1/namespaces/$(HELM_NAMESPACE)/finalize -f - 2>/dev/null || true; \ + sleep 2; \ + fi + @echo "$(GREEN)[✓]$(NC) Namespace deleted successfully" + @echo "$(BLUE)[STEP 4/5]$(NC) Ensuring namespace is gone..." + @sleep 3 + @echo "$(BLUE)[STEP 5/5]$(NC) Installing fresh release..." + @$(MAKE) helm-install + @echo "$(GREEN)[SUCCESS]$(NC) Helm release reinstalled successfully!" + +# Internal helper target to check if Kubernetes is available +_check-k8s: + @if ! kubectl cluster-info &>/dev/null; then \ + echo "$(RED)[ERROR]$(NC) Kubernetes cluster is not accessible"; \ + echo "$(BLUE)[INFO]$(NC) Please ensure your Kubernetes cluster is running:"; \ + echo " - For local development: minikube start / kind create cluster / docker desktop"; \ + echo " - For remote clusters: check your kubeconfig and cluster connection"; \ + echo ""; \ + echo "$(YELLOW)[TIP]$(NC) You can use the following commands to start a local cluster:"; \ + echo " - minikube: make kube-up"; \ + echo " - kind: make kind-cluster-create"; \ + exit 1; \ + fi + @echo "$(GREEN)[✓]$(NC) Kubernetes cluster is accessible"