From 0665cbf773a64b2a70df88673da4eb2b2dc0a37b Mon Sep 17 00:00:00 2001 From: JaredforReal Date: Tue, 30 Sep 2025 12:26:02 +0800 Subject: [PATCH 1/6] feat: add Grafava+Prometheus in k8s Signed-off-by: JaredforReal --- deploy/kubernetes/observability/README.md | 203 ++++++ .../grafana/configmap-dashboard.yaml | 652 ++++++++++++++++++ .../grafana/configmap-provisioning.yaml | 30 + .../observability/grafana/deployment.yaml | 85 +++ .../kubernetes/observability/grafana/pvc.yaml | 12 + .../observability/grafana/secret.yaml | 10 + .../observability/grafana/service.yaml | 14 + deploy/kubernetes/observability/ingress.yaml | 53 ++ .../observability/kustomization.yaml | 22 + .../observability/prometheus/configmap.yaml | 35 + .../observability/prometheus/deployment.yaml | 54 ++ .../observability/prometheus/pvc.yaml | 12 + .../observability/prometheus/rbac.yaml | 43 ++ .../observability/prometheus/service.yaml | 14 + 14 files changed, 1239 insertions(+) create mode 100644 deploy/kubernetes/observability/README.md create mode 100644 deploy/kubernetes/observability/grafana/configmap-dashboard.yaml create mode 100644 deploy/kubernetes/observability/grafana/configmap-provisioning.yaml create mode 100644 deploy/kubernetes/observability/grafana/deployment.yaml create mode 100644 deploy/kubernetes/observability/grafana/pvc.yaml create mode 100644 deploy/kubernetes/observability/grafana/secret.yaml create mode 100644 deploy/kubernetes/observability/grafana/service.yaml create mode 100644 deploy/kubernetes/observability/ingress.yaml create mode 100644 deploy/kubernetes/observability/kustomization.yaml create mode 100644 deploy/kubernetes/observability/prometheus/configmap.yaml create mode 100644 deploy/kubernetes/observability/prometheus/deployment.yaml create mode 100644 deploy/kubernetes/observability/prometheus/pvc.yaml create mode 100644 deploy/kubernetes/observability/prometheus/rbac.yaml create mode 100644 deploy/kubernetes/observability/prometheus/service.yaml diff --git a/deploy/kubernetes/observability/README.md b/deploy/kubernetes/observability/README.md new file mode 100644 index 00000000..640621ce --- /dev/null +++ b/deploy/kubernetes/observability/README.md @@ -0,0 +1,203 @@ +# Semantic Router Observability on Kubernetes + +This guide adds a production-ready Prometheus + Grafana stack to the existing Semantic Router Kubernetes deployment. It includes manifests for collectors, dashboards, data sources, RBAC, and ingress so you can monitor routing performance in any cluster. + +> **Namespace** – All manifests default to the `vllm-semantic-router-system` namespace to match the core deployment. Override it with Kustomize if you use a different namespace. + +## What Gets Installed + +| Component | Purpose | Key Files | +|--------------|---------|-----------| +| Prometheus | Scrapes Semantic Router metrics and stores them with persistent retention | `prometheus/` (`rbac.yaml`, `configmap.yaml`, `deployment.yaml`, `pvc.yaml`, `service.yaml`)| +| Grafana | Visualizes metrics using the bundled LLM Router dashboard and a pre-configured Prometheus datasource | `grafana/` (`secret.yaml`, `configmap-*.yaml`, `deployment.yaml`, `pvc.yaml`, `service.yaml`)| +| Ingress (optional) | Exposes the UIs outside the cluster | `ingress.yaml`| +| Dashboard provisioning | Automatically loads `deploy/llm-router-dashboard.json` into Grafana | `grafana/configmap-dashboard.yaml`| + +Prometheus is configured to discover the `semantic-router-metrics` service (port `9190`) automatically. Grafana provisions the same LLM Router dashboard that ships with the Docker Compose stack. + +## 1. Prerequisites + +- Deployed Semantic Router workload via `deploy/kubernetes/` +- A Kubernetes cluster (managed, on-prem, or kind) +- `kubectl` v1.23+ +- Optional: an ingress controller (NGINX, ALB, etc.) if you want external access + +## 2. Directory Layout + +``` +deploy/kubernetes/observability/ +├── README.md +├── kustomization.yaml # (created in the next step) +├── ingress.yaml # optional HTTPS ingress examples +├── prometheus/ +│ ├── configmap.yaml # Scrape config (Kubernetes SD) +│ ├── deployment.yaml +│ ├── pvc.yaml +│ ├── rbac.yaml # SA + ClusterRole + binding +│ └── service.yaml +└── grafana/ + ├── configmap-dashboard.yaml # Bundled LLM router dashboard + ├── configmap-provisioning.yaml # Datasource + provider config + ├── deployment.yaml + ├── pvc.yaml + ├── secret.yaml # Admin credentials (override in prod) + └── service.yaml +``` + +## 3. Prometheus Configuration Highlights + +- Uses `kubernetes_sd_configs` to enumerate endpoints in `vllm-semantic-router-system` +- Keeps 15 days of metrics by default (`--storage.tsdb.retention.time=15d`) +- Stores metrics in a `PersistentVolumeClaim` named `prometheus-data` +- RBAC rules grant read-only access to Services, Endpoints, Pods, Nodes, and EndpointSlices + +### Scrape configuration snippet + +```yaml +scrape_configs: + - job_name: semantic-router + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - vllm-semantic-router-system + relabel_configs: + - source_labels: [__meta_kubernetes_service_name] + regex: semantic-router-metrics + action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: metrics + action: keep +``` + +Modify the namespace or service name if you changed them in your primary deployment. + +## 4. Grafana Configuration Highlights + +- Stateful deployment backed by the `grafana-storage` PVC +- Datasource provisioned automatically pointing to `http://prometheus:9090` +- Dashboard provider watches `/var/lib/grafana-dashboards` +- Bundled `llm-router-dashboard.json` is identical to `deploy/llm-router-dashboard.json` +- Admin credentials pulled from the `grafana-admin` secret (default `admin/admin` – **change this!)** + +### Updating credentials + +```bash +kubectl create secret generic grafana-admin \ + --namespace vllm-semantic-router-system \ + --from-literal=admin-user=monitor \ + --from-literal=admin-password='pick-a-strong-password' \ + --dry-run=client -o yaml | kubectl apply -f - +``` + +Remove or overwrite the committed `secret.yaml` when you adopt a different secret management approach. + +## 5. Deployment Steps + +### 5.1. Create the Kustomization + +Create `deploy/kubernetes/observability/kustomization.yaml` (see below) to assemble all manifests. This guide assumes you keep Prometheus & Grafana in the same namespace as the router. + +### 5.2. Apply manifests + +```bash +kubectl apply -k deploy/kubernetes/observability/ +``` + +Verify pods: + +```bash +kubectl get pods -n vllm-semantic-router-system +``` + +You should see `prometheus-...` and `grafana-...` pods in `Running` state. + +### 5.3. Integration with the core deployment + +1. Deploy or update Semantic Router (`kubectl apply -k deploy/kubernetes/`). +2. Deploy observability stack (`kubectl apply -k deploy/kubernetes/observability/`). +3. Confirm the metrics service (`semantic-router-metrics`) has endpoints: + + ```bash + kubectl get endpoints semantic-router-metrics -n vllm-semantic-router-system + ``` + +4. Prometheus target should transition to **UP** within ~15 seconds. + +### 5.4. Accessing the UIs + +> **Optional Ingress** – If you prefer to keep the stack private, delete `ingress.yaml` from `kustomization.yaml` before applying. + +- **Port-forward (quick check)** + + ```bash + kubectl port-forward svc/prometheus 9090:9090 -n vllm-semantic-router-system + kubectl port-forward svc/grafana 3000:3000 -n vllm-semantic-router-system + ``` + + Prometheus → http://localhost:9090, Grafana → http://localhost:3000 + +- **Ingress (production)** – Customize `ingress.yaml` with real domains, TLS secrets, and your ingress class before applying. Replace `*.example.com` and configure HTTPS certificates via cert-manager or your provider. + +## 6. Verifying Metrics Collection + +1. Open Prometheus (port-forward or ingress) → **Status ▸ Targets** → ensure `semantic-router` job is green. +2. Query `rate(llm_model_completion_tokens_total[5m])` – should return data after traffic. +3. Open Grafana, log in with the admin credentials, and confirm the **LLM Router Metrics** dashboard exists under the *Semantic Router* folder. +4. Generate traffic to Semantic Router (classification or routing requests). Key panels should start populating: + - Prompt Category counts + - Token usage rate per model + - Routing modifications between models + - Latency histograms (TTFT, completion p95) + +## 7. Dashboard Customization + +- Duplicate the provisioned dashboard inside Grafana to make changes while keeping the original as a template. +- Update Grafana provisioning (`grafana/configmap-provisioning.yaml`) to point to alternate folders or add new providers. +- Add additional dashboards by extending `grafana/configmap-dashboard.yaml` or mounting a different ConfigMap. +- Incorporate Kubernetes cluster metrics (CPU/memory) by adding another datasource or deploying kube-state-metrics + node exporters. + +## 8. Best Practices + +### Resource Sizing + +- Prometheus: increase CPU/memory with higher scrape cardinality or retention > 15 days. +- Grafana: start with `500m` CPU / `1Gi` RAM; scale replicas horizontally when concurrent viewers exceed a few dozen. + +### Storage + +- Use SSD-backed storage classes for Prometheus when retention/window is large. +- Increase `prometheus/pvc.yaml` (default 20Gi) and `grafana/pvc.yaml` (default 10Gi) to match retention requirements. +- Enable volume snapshots or backups for dashboards and alert history. + +### Security + +- Replace the demo `grafana-admin` secret with credentials stored in your preferred secret manager. +- Restrict ingress access with network policies, OAuth proxies, or SSO integrations. +- Enable Grafana role-based access control and API keys for automation. +- Scope Prometheus RBAC to only the namespaces you need. If metrics run in multiple namespaces, list them in the scrape config. + +### Maintenance + +- Monitor Prometheus disk usage; prune retention or scale PVC before it fills up. +- Back up Grafana dashboards or store them in Git (already done through this ConfigMap). +- Roll upgrades separately: update Prometheus and Grafana images via `kustomization.yaml` patches. +- Consider adopting the Prometheus Operator (`ServiceMonitor` + `PodMonitor`) if you already run kube-prometheus-stack. A sample `ServiceMonitor` is in `website/docs/tutorials/observability/observability.md`. + +## 9. Troubleshooting + +| Symptom | Checks | Fix | +|---------|--------|-----| +| Prometheus target **DOWN** | `kubectl get endpoints semantic-router-metrics -n vllm-semantic-router-system` | Ensure the Semantic Router deployment is running and the service labels match `app=semantic-router`, `service=metrics` | +| Grafana dashboard empty | **Configuration → Data Sources** | Confirm Prometheus datasource URL resolves and the Prometheus service is reachable | +| Login fails | `kubectl get secret grafana-admin -o yaml` | Update the secret to match the credentials you expect | +| PVC Pending | `kubectl describe pvc prometheus-data` | Provide a storage class via `storageClassName`, or provision storage manually | +| Ingress 404 | `kubectl describe ingress grafana` | Update hostnames, TLS secrets, and ensure ingress controller is installed | + +## 10. Next Steps + +- Configure alerts for critical metrics (Prometheus alerting rules + Alertmanager) +- Add log aggregation (Loki, Elasticsearch, or Cloud-native logging) +- Automate stack deployment through CI/CD pipelines using `kubectl apply -k` + +With this observability stack in place, you can track Semantic Router health, routing accuracy, latency distributions, and usage trends across any Kubernetes environment. diff --git a/deploy/kubernetes/observability/grafana/configmap-dashboard.yaml b/deploy/kubernetes/observability/grafana/configmap-dashboard.yaml new file mode 100644 index 00000000..eeccafb4 --- /dev/null +++ b/deploy/kubernetes/observability/grafana/configmap-dashboard.yaml @@ -0,0 +1,652 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboards + labels: + app: grafana + grafana_dashboard: "1" +data: + llm-router-dashboard.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 18, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 4, + "options": { + "displayMode": "gradient", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.5.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(category) (llm_category_classifications_count)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Prompt Category", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Tokens/sec", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "tps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.5.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(llm_model_completion_tokens_total[5m])) by (model)", + "legendFormat": "Completion Tokens {{model}}", + "range": true, + "refId": "A" + } + ], + "title": "Token Usage Rate by Model", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Routes/sec", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.5.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(rate(llm_model_routing_modifications_total[5m])) by (source_model, target_model)", + "format": "time_series", + "legendFormat": "{{source_model}} -> {{target_model}}", + "range": true, + "refId": "A" + } + ], + "title": "Model Routing Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.5.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))", + "legendFormat": "p95 {{model}}", + "range": true, + "refId": "A" + } + ], + "title": "Model Completion Latency (p95)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Seconds", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.5.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(llm_model_ttft_seconds_bucket[5m])) by (le, model))", + "legendFormat": "TTFT p95 {{model}}", + "range": true, + "refId": "A" + } + ], + "title": "TTFT (p95) by Model", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Seconds per token", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.5.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(llm_model_tpot_seconds_bucket[5m])) by (le, model))", + "legendFormat": "TPOT p95 {{model}}", + "range": true, + "refId": "A" + } + ], + "title": "TPOT (p95) by Model (sec/token)", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "10s", + "schemaVersion": 40, + "tags": [ + "llm-router" + ], + "templating": { + "list": [ + { + "current": { + "text": "prometheus", + "value": "prometheus" + }, + "includeAll": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "LLM Router Metrics", + "uid": "llm-router-metrics", + "version": 14, + "weekStart": "" + } diff --git a/deploy/kubernetes/observability/grafana/configmap-provisioning.yaml b/deploy/kubernetes/observability/grafana/configmap-provisioning.yaml new file mode 100644 index 00000000..32086fe3 --- /dev/null +++ b/deploy/kubernetes/observability/grafana/configmap-provisioning.yaml @@ -0,0 +1,30 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-provisioning + labels: + app: grafana +data: + datasources.yaml: | + apiVersion: 1 + datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false + jsonData: + timeInterval: 15s + dashboards.yaml: | + apiVersion: 1 + providers: + - name: semantic-router-dashboards + orgId: 1 + folder: Semantic Router + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana-dashboards diff --git a/deploy/kubernetes/observability/grafana/deployment.yaml b/deploy/kubernetes/observability/grafana/deployment.yaml new file mode 100644 index 00000000..0b3bfdcc --- /dev/null +++ b/deploy/kubernetes/observability/grafana/deployment.yaml @@ -0,0 +1,85 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + labels: + app: grafana +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + spec: + securityContext: + runAsNonRoot: true + runAsUser: 472 + fsGroup: 472 + containers: + - name: grafana + image: grafana/grafana:11.5.1 + imagePullPolicy: IfNotPresent + env: + - name: GF_SECURITY_ADMIN_USER + valueFrom: + secretKeyRef: + name: grafana-admin + key: admin-user + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: grafana-admin + key: admin-password + - name: GF_AUTH_ANONYMOUS_ENABLED + value: "false" + - name: GF_PATHS_PROVISIONING + value: /etc/grafana/provisioning + ports: + - name: http + containerPort: 3000 + readinessProbe: + httpGet: + path: /api/health + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /api/health + port: http + initialDelaySeconds: 60 + periodSeconds: 30 + resources: + requests: + cpu: "100m" + memory: "256Mi" + limits: + cpu: "500m" + memory: "1Gi" + volumeMounts: + - name: provisioning + mountPath: /etc/grafana/provisioning + readOnly: true + - name: dashboards + mountPath: /var/lib/grafana-dashboards + readOnly: true + - name: storage + mountPath: /var/lib/grafana + volumes: + - name: provisioning + configMap: + name: grafana-provisioning + items: + - key: datasources.yaml + path: datasources/datasource.yaml + - key: dashboards.yaml + path: dashboards/provider.yaml + - name: dashboards + configMap: + name: grafana-dashboards + - name: storage + persistentVolumeClaim: + claimName: grafana-storage diff --git a/deploy/kubernetes/observability/grafana/pvc.yaml b/deploy/kubernetes/observability/grafana/pvc.yaml new file mode 100644 index 00000000..e11b2d94 --- /dev/null +++ b/deploy/kubernetes/observability/grafana/pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: grafana-storage + labels: + app: grafana +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi diff --git a/deploy/kubernetes/observability/grafana/secret.yaml b/deploy/kubernetes/observability/grafana/secret.yaml new file mode 100644 index 00000000..f831a4a8 --- /dev/null +++ b/deploy/kubernetes/observability/grafana/secret.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Secret +metadata: + name: grafana-admin + labels: + app: grafana +type: Opaque +stringData: + admin-user: admin + admin-password: admin diff --git a/deploy/kubernetes/observability/grafana/service.yaml b/deploy/kubernetes/observability/grafana/service.yaml new file mode 100644 index 00000000..c394a31c --- /dev/null +++ b/deploy/kubernetes/observability/grafana/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: grafana + labels: + app: grafana +spec: + type: ClusterIP + selector: + app: grafana + ports: + - name: http + port: 3000 + targetPort: http diff --git a/deploy/kubernetes/observability/ingress.yaml b/deploy/kubernetes/observability/ingress.yaml new file mode 100644 index 00000000..7ef2cdf4 --- /dev/null +++ b/deploy/kubernetes/observability/ingress.yaml @@ -0,0 +1,53 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana + labels: + app: grafana + annotations: + kubernetes.io/ingress.class: nginx + nginx.ingress.kubernetes.io/backend-protocol: HTTP + nginx.ingress.kubernetes.io/ssl-redirect: "true" +spec: + tls: + - hosts: + - grafana.example.com + secretName: grafana-tls + rules: + - host: grafana.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: grafana + port: + name: http +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: prometheus + labels: + app: prometheus + annotations: + kubernetes.io/ingress.class: nginx + nginx.ingress.kubernetes.io/backend-protocol: HTTP + nginx.ingress.kubernetes.io/ssl-redirect: "true" +spec: + tls: + - hosts: + - prometheus.example.com + secretName: prometheus-tls + rules: + - host: prometheus.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: prometheus + port: + name: http diff --git a/deploy/kubernetes/observability/kustomization.yaml b/deploy/kubernetes/observability/kustomization.yaml new file mode 100644 index 00000000..d3ec5569 --- /dev/null +++ b/deploy/kubernetes/observability/kustomization.yaml @@ -0,0 +1,22 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: vllm-semantic-router-system + +commonLabels: + app.kubernetes.io/part-of: semantic-router + app.kubernetes.io/component: observability + +resources: + - prometheus/rbac.yaml + - prometheus/pvc.yaml + - prometheus/configmap.yaml + - prometheus/deployment.yaml + - prometheus/service.yaml + - grafana/secret.yaml + - grafana/pvc.yaml + - grafana/configmap-provisioning.yaml + - grafana/configmap-dashboard.yaml + - grafana/deployment.yaml + - grafana/service.yaml + - ingress.yaml diff --git a/deploy/kubernetes/observability/prometheus/configmap.yaml b/deploy/kubernetes/observability/prometheus/configmap.yaml new file mode 100644 index 00000000..8c600621 --- /dev/null +++ b/deploy/kubernetes/observability/prometheus/configmap.yaml @@ -0,0 +1,35 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-config + labels: + app: prometheus +data: + prometheus.yml: | + global: + scrape_interval: 15s + evaluation_interval: 15s + + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + - job_name: semantic-router + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - vllm-semantic-router-system + relabel_configs: + - source_labels: [__meta_kubernetes_service_name] + regex: semantic-router-metrics + action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: metrics + action: keep + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + - source_labels: [__address__] + target_label: instance diff --git a/deploy/kubernetes/observability/prometheus/deployment.yaml b/deploy/kubernetes/observability/prometheus/deployment.yaml new file mode 100644 index 00000000..a9815d01 --- /dev/null +++ b/deploy/kubernetes/observability/prometheus/deployment.yaml @@ -0,0 +1,54 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + labels: + app: prometheus +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + spec: + serviceAccountName: prometheus + securityContext: + runAsNonRoot: true + runAsUser: 65534 + fsGroup: 65534 + containers: + - name: prometheus + image: prom/prometheus:v2.53.0 + imagePullPolicy: IfNotPresent + args: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.enable-lifecycle" + - "--storage.tsdb.retention.time=15d" + - "--storage.tsdb.max-block-duration=2h" + - "--storage.tsdb.no-lockfile" + ports: + - name: http + containerPort: 9090 + resources: + requests: + cpu: "250m" + memory: "1Gi" + limits: + cpu: "500m" + memory: "2Gi" + volumeMounts: + - name: config + mountPath: /etc/prometheus + - name: data + mountPath: /prometheus + volumes: + - name: config + configMap: + name: prometheus-config + - name: data + persistentVolumeClaim: + claimName: prometheus-data diff --git a/deploy/kubernetes/observability/prometheus/pvc.yaml b/deploy/kubernetes/observability/prometheus/pvc.yaml new file mode 100644 index 00000000..d2dd216e --- /dev/null +++ b/deploy/kubernetes/observability/prometheus/pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: prometheus-data + labels: + app: prometheus +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi diff --git a/deploy/kubernetes/observability/prometheus/rbac.yaml b/deploy/kubernetes/observability/prometheus/rbac.yaml new file mode 100644 index 00000000..c0954750 --- /dev/null +++ b/deploy/kubernetes/observability/prometheus/rbac.yaml @@ -0,0 +1,43 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: prometheus + labels: + app: prometheus +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus + labels: + app: prometheus +rules: + - apiGroups: [""] + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] + - apiGroups: ["discovery.k8s.io"] + resources: + - endpointslices + verbs: ["get", "list", "watch"] + - nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: prometheus + labels: + app: prometheus +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: prometheus +subjects: + - kind: ServiceAccount + name: prometheus + namespace: vllm-semantic-router-system diff --git a/deploy/kubernetes/observability/prometheus/service.yaml b/deploy/kubernetes/observability/prometheus/service.yaml new file mode 100644 index 00000000..1d86bde7 --- /dev/null +++ b/deploy/kubernetes/observability/prometheus/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: prometheus + labels: + app: prometheus +spec: + selector: + app: prometheus + ports: + - name: http + port: 9090 + targetPort: http + type: ClusterIP From cedd13e642ccbf168e1b28e6bc8d49b97d779bcb Mon Sep 17 00:00:00 2001 From: JaredforReal Date: Tue, 30 Sep 2025 19:28:41 +0800 Subject: [PATCH 2/6] Update docs of observability k8s part Signed-off-by: JaredforReal --- .../tutorials/observability/observability.md | 215 ++++++++++++++---- 1 file changed, 174 insertions(+), 41 deletions(-) diff --git a/website/docs/tutorials/observability/observability.md b/website/docs/tutorials/observability/observability.md index 66411319..82756cca 100644 --- a/website/docs/tutorials/observability/observability.md +++ b/website/docs/tutorials/observability/observability.md @@ -49,74 +49,207 @@ Expected Prometheus targets: ## 3. Kubernetes Observability -After applying `deploy/kubernetes/`, you get services: +This guide adds a production-ready Prometheus + Grafana stack to the existing Semantic Router Kubernetes deployment. It includes manifests for collectors, dashboards, data sources, RBAC, and ingress so you can monitor routing performance in any cluster. -- `semantic-router` (gRPC) -- `semantic-router-metrics` (metrics 9190) +> **Namespace** – All manifests default to the `vllm-semantic-router-system` namespace to match the core deployment. Override it with Kustomize if you use a different namespace. -### 3.1 Prometheus Operator (ServiceMonitor) +## What Gets Installed -```yaml -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - name: semantic-router - namespace: semantic-router -spec: - selector: - matchLabels: - app: semantic-router - service: metrics - namespaceSelector: - matchNames: ["semantic-router"] - endpoints: - - port: metrics - interval: 15s - path: /metrics +| Component | Purpose | Key Files | +|--------------|---------|-----------| +| Prometheus | Scrapes Semantic Router metrics and stores them with persistent retention | `prometheus/` (`rbac.yaml`, `configmap.yaml`, `deployment.yaml`, `pvc.yaml`, `service.yaml`)| +| Grafana | Visualizes metrics using the bundled LLM Router dashboard and a pre-configured Prometheus datasource | `grafana/` (`secret.yaml`, `configmap-*.yaml`, `deployment.yaml`, `pvc.yaml`, `service.yaml`)| +| Ingress (optional) | Exposes the UIs outside the cluster | `ingress.yaml`| +| Dashboard provisioning | Automatically loads `deploy/llm-router-dashboard.json` into Grafana | `grafana/configmap-dashboard.yaml`| + +Prometheus is configured to discover the `semantic-router-metrics` service (port `9190`) automatically. Grafana provisions the same LLM Router dashboard that ships with the Docker Compose stack. + +### 1. Prerequisites + +- Deployed Semantic Router workload via `deploy/kubernetes/` +- A Kubernetes cluster (managed, on-prem, or kind) +- `kubectl` v1.23+ +- Optional: an ingress controller (NGINX, ALB, etc.) if you want external access + +### 2. Directory Layout + +``` +deploy/kubernetes/observability/ +├── README.md +├── kustomization.yaml # (created in the next step) +├── ingress.yaml # optional HTTPS ingress examples +├── prometheus/ +│ ├── configmap.yaml # Scrape config (Kubernetes SD) +│ ├── deployment.yaml +│ ├── pvc.yaml +│ ├── rbac.yaml # SA + ClusterRole + binding +│ └── service.yaml +└── grafana/ + ├── configmap-dashboard.yaml # Bundled LLM router dashboard + ├── configmap-provisioning.yaml # Datasource + provider config + ├── deployment.yaml + ├── pvc.yaml + ├── secret.yaml # Admin credentials (override in prod) + └── service.yaml ``` -Ensure the metrics Service carries a label like `service: metrics`. (It does in the provided manifests.) +### 3. Prometheus Configuration Highlights -### 3.2 Plain Prometheus Static Scrape +- Uses `kubernetes_sd_configs` to enumerate endpoints in `vllm-semantic-router-system` +- Keeps 15 days of metrics by default (`--storage.tsdb.retention.time=15d`) +- Stores metrics in a `PersistentVolumeClaim` named `prometheus-data` +- RBAC rules grant read-only access to Services, Endpoints, Pods, Nodes, and EndpointSlices + +#### Scrape configuration snippet ```yaml scrape_configs: - job_name: semantic-router kubernetes_sd_configs: - role: endpoints + namespaces: + names: + - vllm-semantic-router-system relabel_configs: - source_labels: [__meta_kubernetes_service_name] regex: semantic-router-metrics action: keep + - source_labels: [__meta_kubernetes_endpoint_port_name] + regex: metrics + action: keep ``` -### 3.3 Port Forward for Spot Checks +Modify the namespace or service name if you changed them in your primary deployment. + +### 4. Grafana Configuration Highlights + +- Stateful deployment backed by the `grafana-storage` PVC +- Datasource provisioned automatically pointing to `http://prometheus:9090` +- Dashboard provider watches `/var/lib/grafana-dashboards` +- Bundled `llm-router-dashboard.json` is identical to `deploy/llm-router-dashboard.json` +- Admin credentials pulled from the `grafana-admin` secret (default `admin/admin` – **change this!)** + +#### Updating credentials ```bash -kubectl -n semantic-router port-forward svc/semantic-router-metrics 9190:9190 -curl -s localhost:9190/metrics | head +kubectl create secret generic grafana-admin \ + --namespace vllm-semantic-router-system \ + --from-literal=admin-user=monitor \ + --from-literal=admin-password='pick-a-strong-password' \ + --dry-run=client -o yaml | kubectl apply -f - ``` -### 3.4 Grafana Dashboard Provision +Remove or overwrite the committed `secret.yaml` when you adopt a different secret management approach. -If using kube-prometheus-stack or a Grafana sidecar: +### 5. Deployment Steps -```yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: semantic-router-dashboard - namespace: semantic-router - labels: - grafana_dashboard: "1" -data: - llm-router-dashboard.json: | - # Paste JSON from deploy/llm-router-dashboard.json +#### 5.1. Create the Kustomization + +Create `deploy/kubernetes/observability/kustomization.yaml` (see below) to assemble all manifests. This guide assumes you keep Prometheus & Grafana in the same namespace as the router. + +#### 5.2. Apply manifests + +```bash +kubectl apply -k deploy/kubernetes/observability/ ``` -Otherwise import the JSON manually in Grafana UI. +Verify pods: ---- +```bash +kubectl get pods -n vllm-semantic-router-system +``` + +You should see `prometheus-...` and `grafana-...` pods in `Running` state. + +#### 5.3. Integration with the core deployment + +1. Deploy or update Semantic Router (`kubectl apply -k deploy/kubernetes/`). +2. Deploy observability stack (`kubectl apply -k deploy/kubernetes/observability/`). +3. Confirm the metrics service (`semantic-router-metrics`) has endpoints: + + ```bash + kubectl get endpoints semantic-router-metrics -n vllm-semantic-router-system + ``` + +4. Prometheus target should transition to **UP** within ~15 seconds. + +#### 5.4. Accessing the UIs + +> **Optional Ingress** – If you prefer to keep the stack private, delete `ingress.yaml` from `kustomization.yaml` before applying. + +- **Port-forward (quick check)** + + ```bash + kubectl port-forward svc/prometheus 9090:9090 -n vllm-semantic-router-system + kubectl port-forward svc/grafana 3000:3000 -n vllm-semantic-router-system + ``` + + Prometheus → http://localhost:9090, Grafana → http://localhost:3000 + +- **Ingress (production)** – Customize `ingress.yaml` with real domains, TLS secrets, and your ingress class before applying. Replace `*.example.com` and configure HTTPS certificates via cert-manager or your provider. + +### 6. Verifying Metrics Collection + +1. Open Prometheus (port-forward or ingress) → **Status ▸ Targets** → ensure `semantic-router` job is green. +2. Query `rate(llm_model_completion_tokens_total[5m])` – should return data after traffic. +3. Open Grafana, log in with the admin credentials, and confirm the **LLM Router Metrics** dashboard exists under the *Semantic Router* folder. +4. Generate traffic to Semantic Router (classification or routing requests). Key panels should start populating: + - Prompt Category counts + - Token usage rate per model + - Routing modifications between models + - Latency histograms (TTFT, completion p95) + +### 7. Dashboard Customization + +- Duplicate the provisioned dashboard inside Grafana to make changes while keeping the original as a template. +- Update Grafana provisioning (`grafana/configmap-provisioning.yaml`) to point to alternate folders or add new providers. +- Add additional dashboards by extending `grafana/configmap-dashboard.yaml` or mounting a different ConfigMap. +- Incorporate Kubernetes cluster metrics (CPU/memory) by adding another datasource or deploying kube-state-metrics + node exporters. + +### 8. Best Practices + +#### Resource Sizing + +- Prometheus: increase CPU/memory with higher scrape cardinality or retention > 15 days. +- Grafana: start with `500m` CPU / `1Gi` RAM; scale replicas horizontally when concurrent viewers exceed a few dozen. + +#### Storage + +- Use SSD-backed storage classes for Prometheus when retention/window is large. +- Increase `prometheus/pvc.yaml` (default 20Gi) and `grafana/pvc.yaml` (default 10Gi) to match retention requirements. +- Enable volume snapshots or backups for dashboards and alert history. + +#### Security + +- Replace the demo `grafana-admin` secret with credentials stored in your preferred secret manager. +- Restrict ingress access with network policies, OAuth proxies, or SSO integrations. +- Enable Grafana role-based access control and API keys for automation. +- Scope Prometheus RBAC to only the namespaces you need. If metrics run in multiple namespaces, list them in the scrape config. + +#### Maintenance + +- Monitor Prometheus disk usage; prune retention or scale PVC before it fills up. +- Back up Grafana dashboards or store them in Git (already done through this ConfigMap). +- Roll upgrades separately: update Prometheus and Grafana images via `kustomization.yaml` patches. +- Consider adopting the Prometheus Operator (`ServiceMonitor` + `PodMonitor`) if you already run kube-prometheus-stack. A sample `ServiceMonitor` is in `website/docs/tutorials/observability/observability.md`. + +### 9. Troubleshooting + +| Symptom | Checks | Fix | +|---------|--------|-----| +| Prometheus target **DOWN** | `kubectl get endpoints semantic-router-metrics -n vllm-semantic-router-system` | Ensure the Semantic Router deployment is running and the service labels match `app=semantic-router`, `service=metrics` | +| Grafana dashboard empty | **Configuration → Data Sources** | Confirm Prometheus datasource URL resolves and the Prometheus service is reachable | +| Login fails | `kubectl get secret grafana-admin -o yaml` | Update the secret to match the credentials you expect | +| PVC Pending | `kubectl describe pvc prometheus-data` | Provide a storage class via `storageClassName`, or provision storage manually | +| Ingress 404 | `kubectl describe ingress grafana` | Update hostnames, TLS secrets, and ensure ingress controller is installed | + +### 10. Next Steps + +- Configure alerts for critical metrics (Prometheus alerting rules + Alertmanager) +- Add log aggregation (Loki, Elasticsearch, or Cloud-native logging) +- Automate stack deployment through CI/CD pipelines using `kubectl apply -k` + +With this observability stack in place, you can track Semantic Router health, routing accuracy, latency distributions, and usage trends across any Kubernetes environment. ## 4. Key Metrics (Sample) From 9405f476851406bc2d0a52bb5999f01a8e951447 Mon Sep 17 00:00:00 2001 From: JaredforReal Date: Tue, 30 Sep 2025 19:35:50 +0800 Subject: [PATCH 3/6] get rig of redudent part in doc Signed-off-by: JaredforReal --- .../tutorials/observability/observability.md | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/website/docs/tutorials/observability/observability.md b/website/docs/tutorials/observability/observability.md index 82756cca..e8b5168c 100644 --- a/website/docs/tutorials/observability/observability.md +++ b/website/docs/tutorials/observability/observability.md @@ -233,24 +233,6 @@ You should see `prometheus-...` and `grafana-...` pods in `Running` state. - Roll upgrades separately: update Prometheus and Grafana images via `kustomization.yaml` patches. - Consider adopting the Prometheus Operator (`ServiceMonitor` + `PodMonitor`) if you already run kube-prometheus-stack. A sample `ServiceMonitor` is in `website/docs/tutorials/observability/observability.md`. -### 9. Troubleshooting - -| Symptom | Checks | Fix | -|---------|--------|-----| -| Prometheus target **DOWN** | `kubectl get endpoints semantic-router-metrics -n vllm-semantic-router-system` | Ensure the Semantic Router deployment is running and the service labels match `app=semantic-router`, `service=metrics` | -| Grafana dashboard empty | **Configuration → Data Sources** | Confirm Prometheus datasource URL resolves and the Prometheus service is reachable | -| Login fails | `kubectl get secret grafana-admin -o yaml` | Update the secret to match the credentials you expect | -| PVC Pending | `kubectl describe pvc prometheus-data` | Provide a storage class via `storageClassName`, or provision storage manually | -| Ingress 404 | `kubectl describe ingress grafana` | Update hostnames, TLS secrets, and ensure ingress controller is installed | - -### 10. Next Steps - -- Configure alerts for critical metrics (Prometheus alerting rules + Alertmanager) -- Add log aggregation (Loki, Elasticsearch, or Cloud-native logging) -- Automate stack deployment through CI/CD pipelines using `kubectl apply -k` - -With this observability stack in place, you can track Semantic Router health, routing accuracy, latency distributions, and usage trends across any Kubernetes environment. - ## 4. Key Metrics (Sample) | Metric | Type | Description | From 5249c10c9e701c40d697a012b2104468dd987e36 Mon Sep 17 00:00:00 2001 From: JaredforReal Date: Wed, 1 Oct 2025 11:41:52 +0800 Subject: [PATCH 4/6] add comments of 472 and 65534 Signed-off-by: JaredforReal --- deploy/kubernetes/observability/grafana/deployment.yaml | 2 +- deploy/kubernetes/observability/prometheus/deployment.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/deploy/kubernetes/observability/grafana/deployment.yaml b/deploy/kubernetes/observability/grafana/deployment.yaml index 0b3bfdcc..e69f111c 100644 --- a/deploy/kubernetes/observability/grafana/deployment.yaml +++ b/deploy/kubernetes/observability/grafana/deployment.yaml @@ -15,7 +15,7 @@ spec: app: grafana spec: securityContext: - runAsNonRoot: true + # Run as non-root user 472 (grafana) and set fsGroup for volume permissions. runAsUser: 472 fsGroup: 472 containers: diff --git a/deploy/kubernetes/observability/prometheus/deployment.yaml b/deploy/kubernetes/observability/prometheus/deployment.yaml index a9815d01..ef5e1653 100644 --- a/deploy/kubernetes/observability/prometheus/deployment.yaml +++ b/deploy/kubernetes/observability/prometheus/deployment.yaml @@ -17,6 +17,7 @@ spec: serviceAccountName: prometheus securityContext: runAsNonRoot: true + # Run as user 'nobody' and group 'nobody' for enhanced security runAsUser: 65534 fsGroup: 65534 containers: From 496a8bd2dc818170f691dfc792892dda9ca38ef6 Mon Sep 17 00:00:00 2001 From: JaredforReal Date: Wed, 1 Oct 2025 11:49:17 +0800 Subject: [PATCH 5/6] add network tips of k8s Signed-off-by: JaredforReal --- website/docs/troubleshooting/network-tips.md | 31 +++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/website/docs/troubleshooting/network-tips.md b/website/docs/troubleshooting/network-tips.md index 88610311..4820cc31 100644 --- a/website/docs/troubleshooting/network-tips.md +++ b/website/docs/troubleshooting/network-tips.md @@ -174,7 +174,36 @@ docker compose -f docker-compose.yml -f docker-compose.override.yml up -d docker compose -f docker-compose.yml -f docker-compose.override.yml --profile testing up -d ``` -## 5. Troubleshooting +## 5. Kubernetes clusters with limited egress + +Container runtimes on Kubernetes nodes do not automatically reuse the host Docker daemon settings. When registries are slow or blocked, pods can sit in `ImagePullBackOff`. Pick one or combine several of these mitigations: + +### 5.1 Configure containerd or CRI mirrors + +- For clusters backed by containerd (Kind, k3s, kubeadm), edit `/etc/containerd/config.toml` or use Kind’s `containerdConfigPatches` to add regional mirror endpoints for registries such as `docker.io`, `ghcr.io`, or `quay.io`. +- Restart containerd and kubelet after changes so the new mirrors take effect. +- Avoid pointing mirrors to loopback proxies unless every node can reach that proxy address. + +### 5.2 Preload or sideload images + +- Build required images locally, then push them into the cluster runtime. For Kind, run `kind load docker-image --name `; for other clusters, use `crictl pull` or `ctr -n k8s.io images import` on each node. +- Patch deployments to set `imagePullPolicy: IfNotPresent` when you know the image already exists on the node. + +### 5.3 Publish to an accessible registry + +- Tag and push images to a registry that is reachable from the cluster (cloud provider registry, privately hosted Harbor, etc.). +- Update your `kustomization.yaml` or Helm values with the new image name, and configure `imagePullSecrets` if the registry requires authentication. + +### 5.4 Run a local pull-through cache + +- Start a registry proxy (`registry:2` or vendor-specific cache) inside the same network, configure it as a mirror in containerd, and regularly warm it with the images you need. + +### 5.5 Verify after adjustments + +- Use `kubectl describe pod ` or `kubectl get events` to confirm pull errors disappear. +- Check that services such as `semantic-router-metrics` now expose endpoints and respond via port-forward (`kubectl port-forward svc/ :`). + +## 6. Troubleshooting - Go modules still time out: - Verify `GOPROXY` and `GOSUMDB` are present in the go-builder stage logs. From 790c919897b900b1695af33ed0588ff436bb3056 Mon Sep 17 00:00:00 2001 From: JaredforReal Date: Wed, 1 Oct 2025 12:22:04 +0800 Subject: [PATCH 6/6] update uid in dashboard Signed-off-by: JaredforReal --- deploy/llm-router-dashboard.json | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/deploy/llm-router-dashboard.json b/deploy/llm-router-dashboard.json index 350ebf84..4abc5e51 100644 --- a/deploy/llm-router-dashboard.json +++ b/deploy/llm-router-dashboard.json @@ -30,7 +30,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -90,7 +90,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "disableTextWrap": false, "editorMode": "builder", @@ -110,7 +110,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -193,7 +193,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "editorMode": "code", "expr": "sum(rate(llm_model_completion_tokens_total[5m])) by (model)", @@ -208,7 +208,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -291,7 +291,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "editorMode": "code", "expr": "sum(rate(llm_model_routing_modifications_total[5m])) by (source_model, target_model)", @@ -307,7 +307,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -394,7 +394,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "editorMode": "code", "expr": "histogram_quantile(0.95, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))", @@ -409,7 +409,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -496,7 +496,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "editorMode": "code", "expr": "histogram_quantile(0.95, sum(rate(llm_model_ttft_seconds_bucket[5m])) by (le, model))", @@ -511,7 +511,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "fieldConfig": { "defaults": { @@ -594,7 +594,7 @@ { "datasource": { "type": "prometheus", - "uid": "febzoy4cplt6oe" + "uid": "prometheus" }, "editorMode": "code", "expr": "histogram_quantile(0.95, sum(rate(llm_model_tpot_seconds_bucket[5m])) by (le, model))", @@ -618,7 +618,7 @@ { "current": { "text": "prometheus", - "value": "febzoy4cplt6oe" + "value": "prometheus" }, "includeAll": false, "name": "DS_PROMETHEUS", @@ -640,4 +640,4 @@ "uid": "llm-router-metrics", "version": 14, "weekStart": "" -} +} \ No newline at end of file