diff --git a/deploy/docker-compose/README.md b/deploy/docker-compose/README.md
index c226bdaf..09c9ff5f 100644
--- a/deploy/docker-compose/README.md
+++ b/deploy/docker-compose/README.md
@@ -35,7 +35,20 @@ Example mappings:
 ## Profiles
 
 - `testing` : enables `mock-vllm` and `llm-katan`
-- `llm-katan` : enables only `llm-katan`
+- `llm-katan` : only `llm-katan`
+
+## Services and Ports
+
+These host ports are exposed when you bring the stack up:
+
+- Dashboard: http://localhost:8700 (Semantic Router Dashboard)
+- Envoy proxy: http://localhost:8801
+- Envoy admin: http://localhost:19000
+- Grafana: http://localhost:3000 (admin/admin)
+- Prometheus: http://localhost:9090
+- Open WebUI: http://localhost:3001
+- Mock vLLM (testing profile): http://localhost:8000
+- LLM Katan (testing/llm-katan profiles): http://localhost:8002
 
 ## Quick Start
 
@@ -71,6 +84,8 @@ docker compose -f deploy/docker-compose/docker-compose.yml --profile testing up
 docker compose -f deploy/docker-compose/docker-compose.yml down
 ```
 
+After the stack is healthy, open the Dashboard at http://localhost:8700.
+
 ## Overrides
 
 You can place a `docker-compose.override.yml` at repo root and combine:
@@ -130,18 +145,3 @@ All services join the `semantic-network` bridge network with a fixed subnet to m
 
 - Local observability only: `tools/observability/docker-compose.obs.yml`
 - Tracing stack: `tools/tracing/docker-compose.tracing.yaml`
-
-## Related Stacks
-
-- Local observability only: `tools/observability/docker-compose.obs.yml`
-- Tracing stack (standalone, dev): `tools/tracing/docker-compose.tracing.yaml`
-
-## Tracing & Grafana
-
-- Jaeger UI: http://localhost:16686
-- Grafana: http://localhost:3000 (admin/admin)
-  - Prometheus datasource (default) for metrics
-  - Jaeger datasource for exploring traces (search service `vllm-semantic-router`)
-
-By default, the router container uses `config/config.tracing.yaml` (enabled tracing, exporter to Jaeger).
-Override with `CONFIG_FILE=/app/config/config.yaml` if you don’t want tracing.
diff --git a/deploy/kubernetes/README.md b/deploy/kubernetes/README.md
index 175763cd..d45dd022 100644
--- a/deploy/kubernetes/README.md
+++ b/deploy/kubernetes/README.md
@@ -1,6 +1,6 @@
 # Semantic Router Kubernetes Deployment
 
-This directory contains Kubernetes manifests for deploying the Semantic Router using Kustomize.
+Kustomize manifests for deploying the Semantic Router and its observability stack (Prometheus, Grafana, Dashboard, optional Open WebUI + Pipelines) on Kubernetes.
 
 ## Architecture
 
@@ -12,8 +12,9 @@ The deployment consists of:
   - **Init Container**: Downloads/copies model files to persistent volume
   - **Main Container**: Runs the semantic router service
 - **Services**:
-  - Main service exposing gRPC port (50051), Classification API (8080), and metrics port (9190)
-  - Separate metrics service for monitoring
+  - Main service exposing gRPC (50051), Classification API (8080), and metrics (9190)
+  - Separate metrics service for monitoring (`semantic-router-metrics`)
+  - Observability services (Grafana, Prometheus, Dashboard, optional Open WebUI)
 
 ## Ports
 
@@ -23,17 +24,40 @@ The deployment consists of:
 
 ## Quick Start
 
-### Standard Kubernetes Deployment
+### Deploy Core (Router)
 
 ```bash
 kubectl apply -k deploy/kubernetes/
 
 # Check deployment status
-kubectl get pods -l app=semantic-router -n semantic-router
-kubectl get services -l app=semantic-router -n semantic-router
+kubectl get pods -l app=semantic-router -n vllm-semantic-router-system
+kubectl get services -l app=semantic-router -n vllm-semantic-router-system
 
 # View logs
-kubectl logs -l app=semantic-router -n semantic-router -f
+kubectl logs -l app=semantic-router -n vllm-semantic-router-system -f
+
+### Add Observability (Prometheus + Grafana + Dashboard + Playground)
+
+```bash
+kubectl apply -k deploy/kubernetes/observability/
+```
+
+Port-forward to UIs (local dev):
+
+```bash
+kubectl port-forward -n vllm-semantic-router-system svc/prometheus 9090:9090
+kubectl port-forward -n vllm-semantic-router-system svc/grafana 3000:3000
+kubectl port-forward -n vllm-semantic-router-system svc/semantic-router-dashboard 8700:80
+kubectl port-forward -n vllm-semantic-router-system svc/openwebui 3001:8080
+```
+
+Then open:
+
+- Prometheus → http://localhost:9090
+- Grafana → http://localhost:3000
+- Dashboard → http://localhost:8700
+- Open WebUI (Playground) → http://localhost:3001
+
 ```
 
 ### Kind (Kubernetes in Docker) Deployment
@@ -86,20 +110,20 @@ kubectl wait --for=condition=Ready nodes --all --timeout=300s
 kubectl apply -k deploy/kubernetes/
 
 # Wait for deployment to be ready
-kubectl wait --for=condition=Available deployment/semantic-router -n semantic-router --timeout=600s
+kubectl wait --for=condition=Available deployment/semantic-router -n vllm-semantic-router-system --timeout=600s
 ```
 
 **Step 3: Check deployment status**
 
 ```bash
 # Check pods
-kubectl get pods -n semantic-router -o wide
+kubectl get pods -n vllm-semantic-router-system -o wide
 
 # Check services
-kubectl get services -n semantic-router
+kubectl get services -n vllm-semantic-router-system
 
 # View logs
-kubectl logs -l app=semantic-router -n semantic-router -f
+kubectl logs -l app=semantic-router -n vllm-semantic-router-system -f
 ```
 
 #### Resource Requirements for Kind
@@ -131,19 +155,30 @@ make port-forward-grpc
 
 # Access metrics
 make port-forward-metrics
+
+# Access Dashboard / Grafana / Open WebUI
+kubectl port-forward -n vllm-semantic-router-system svc/semantic-router-dashboard 8700:80
+kubectl port-forward -n vllm-semantic-router-system svc/grafana 3000:3000
+kubectl port-forward -n vllm-semantic-router-system svc/openwebui 3001:8080
 ```
 
 Or using kubectl directly:
 
 ```bash
 # Access Classification API (HTTP REST)
-kubectl port-forward -n semantic-router svc/semantic-router 8080:8080
+kubectl port-forward -n vllm-semantic-router-system svc/semantic-router 8080:8080
 
 # Access gRPC API
-kubectl port-forward -n semantic-router svc/semantic-router 50051:50051
+kubectl port-forward -n vllm-semantic-router-system svc/semantic-router 50051:50051
 
 # Access metrics
-kubectl port-forward -n semantic-router svc/semantic-router-metrics 9190:9190
+kubectl port-forward -n vllm-semantic-router-system svc/semantic-router-metrics 9190:9190
+
+# Access Prometheus/Grafana/Dashboard/Open WebUI
+kubectl port-forward -n vllm-semantic-router-system svc/prometheus 9090:9090
+kubectl port-forward -n vllm-semantic-router-system svc/grafana 3000:3000
+kubectl port-forward -n vllm-semantic-router-system svc/semantic-router-dashboard 8700:80
+kubectl port-forward -n vllm-semantic-router-system svc/openwebui 3001:8080
 ```
 
 #### Testing the Deployment
@@ -313,7 +348,10 @@ Edit the `resources` section in `deployment.yaml` accordingly.
 - `namespace.yaml` - Dedicated namespace for the application
 - `config.yaml` - Application configuration
 - `tools_db.json` - Tools database for semantic routing
-- `kustomization.yaml` - Kustomize configuration for easy deployment
+- `kustomization.yaml` - Kustomize configuration for core deployment
+- `observability/` - Prometheus, Grafana, Dashboard, optional Open WebUI + Pipelines (with its own `kustomization.yaml`)
+
+For detailed observability setup and screenshots, see `deploy/kubernetes/observability/README.md`.
 
 ### Development Tools
 
diff --git a/deploy/kubernetes/observability/README.md b/deploy/kubernetes/observability/README.md
index 640621ce..e5b7e41a 100644
--- a/deploy/kubernetes/observability/README.md
+++ b/deploy/kubernetes/observability/README.md
@@ -10,6 +10,9 @@ This guide adds a production-ready Prometheus + Grafana stack to the existing Se
 |--------------|---------|-----------|
 | Prometheus   | Scrapes Semantic Router metrics and stores them with persistent retention | `prometheus/` (`rbac.yaml`, `configmap.yaml`, `deployment.yaml`, `pvc.yaml`, `service.yaml`)|
 | Grafana      | Visualizes metrics using the bundled LLM Router dashboard and a pre-configured Prometheus datasource | `grafana/` (`secret.yaml`, `configmap-*.yaml`, `deployment.yaml`, `pvc.yaml`, `service.yaml`)|
+| Dashboard    | Unified UI that links Router, Prometheus, and embeds Grafana; reads Router config | `dashboard/` (`configmap.yaml`, `deployment.yaml`, `service.yaml`)|
+| Open WebUI | Playground UI for interacting with the router via a Manifold Pipeline | `openwebui/` (`deployment.yaml`, `service.yaml`)|
+| Pipelines | Executes the `vllm_semantic_router_pipe.py` manifold for Open WebUI | `pipelines/deployment.yaml` (includes a ConfigMap with the pipeline code) |
 | Ingress (optional) | Exposes the UIs outside the cluster | `ingress.yaml`|
 | Dashboard provisioning | Automatically loads `deploy/llm-router-dashboard.json` into Grafana | `grafana/configmap-dashboard.yaml`|
 
@@ -110,7 +113,7 @@ Verify pods:
 kubectl get pods -n vllm-semantic-router-system
 ```
 
-You should see `prometheus-...` and `grafana-...` pods in `Running` state.
+You should see `prometheus-...`, `grafana-...`, and `semantic-router-dashboard-...` pods in `Running` state.
 
 ### 5.3. Integration with the core deployment
 
@@ -133,9 +136,11 @@ You should see `prometheus-...` and `grafana-...` pods in `Running` state.
   ```bash
   kubectl port-forward svc/prometheus 9090:9090 -n vllm-semantic-router-system
   kubectl port-forward svc/grafana 3000:3000 -n vllm-semantic-router-system
+  kubectl port-forward svc/semantic-router-dashboard 8700:80 -n vllm-semantic-router-system
+  kubectl port-forward svc/openwebui 3001:8080 -n vllm-semantic-router-system
   ```
 
-  Prometheus → http://localhost:9090, Grafana → http://localhost:3000
+  Prometheus → http://localhost:9090, Grafana → http://localhost:3000, Dashboard → http://localhost:8700, Open WebUI → http://localhost:3001
 
 - **Ingress (production)** – Customize `ingress.yaml` with real domains, TLS secrets, and your ingress class before applying. Replace `*.example.com` and configure HTTPS certificates via cert-manager or your provider.
 
@@ -145,6 +150,7 @@ You should see `prometheus-...` and `grafana-...` pods in `Running` state.
 2. Query `rate(llm_model_completion_tokens_total[5m])` – should return data after traffic.
 3. Open Grafana, log in with the admin credentials, and confirm the **LLM Router Metrics** dashboard exists under the *Semantic Router* folder.
 4. Generate traffic to Semantic Router (classification or routing requests). Key panels should start populating:
+5.Playground: open Open WebUI (port-forward or ingress), select the `vllm-semantic-router/auto` model (from the Manifold pipeline), and send prompts. The Dashboard Monitoring page should reflect traffic, and the pipeline will display VSR decision headers inline.
    - Prompt Category counts
    - Token usage rate per model
    - Routing modifications between models
diff --git a/deploy/kubernetes/observability/dashboard/configmap.yaml b/deploy/kubernetes/observability/dashboard/configmap.yaml
new file mode 100644
index 00000000..5c31dc86
--- /dev/null
+++ b/deploy/kubernetes/observability/dashboard/configmap.yaml
@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: semantic-router-dashboard-config
+  labels:
+    app: semantic-router-dashboard
+    app.kubernetes.io/part-of: semantic-router
+    app.kubernetes.io/component: observability
+data:
+  TARGET_GRAFANA_URL: http://grafana.vllm-semantic-router-system.svc.cluster.local:3000
+  TARGET_PROMETHEUS_URL: http://prometheus.vllm-semantic-router-system.svc.cluster.local:9090
+  TARGET_ROUTER_API_URL: http://semantic-router.vllm-semantic-router-system.svc.cluster.local:8080
+  TARGET_ROUTER_METRICS_URL: http://semantic-router-metrics.vllm-semantic-router-system.svc.cluster.local:9190/metrics
+  TARGET_OPENWEBUI_URL: http://openwebui.vllm-semantic-router-system.svc.cluster.local:8080
diff --git a/deploy/kubernetes/observability/dashboard/deployment.yaml b/deploy/kubernetes/observability/dashboard/deployment.yaml
new file mode 100644
index 00000000..ac021aa4
--- /dev/null
+++ b/deploy/kubernetes/observability/dashboard/deployment.yaml
@@ -0,0 +1,60 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: semantic-router-dashboard
+  labels:
+    app: semantic-router-dashboard
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: semantic-router-dashboard
+  template:
+    metadata:
+      labels:
+        app: semantic-router-dashboard
+    spec:
+      containers:
+        - name: dashboard
+          image: ghcr.io/vllm-project/semantic-router/dashboard:latest
+          imagePullPolicy: IfNotPresent
+          args: ["-port=8700", "-static=/app/frontend", "-config=/app/config/config.yaml"]
+          env:
+            - name: TARGET_GRAFANA_URL
+              valueFrom:
+                configMapKeyRef:
+                  name: semantic-router-dashboard-config
+                  key: TARGET_GRAFANA_URL
+            - name: TARGET_PROMETHEUS_URL
+              valueFrom:
+                configMapKeyRef:
+                  name: semantic-router-dashboard-config
+                  key: TARGET_PROMETHEUS_URL
+            - name: TARGET_ROUTER_API_URL
+              valueFrom:
+                configMapKeyRef:
+                  name: semantic-router-dashboard-config
+                  key: TARGET_ROUTER_API_URL
+            - name: TARGET_ROUTER_METRICS_URL
+              valueFrom:
+                configMapKeyRef:
+                  name: semantic-router-dashboard-config
+                  key: TARGET_ROUTER_METRICS_URL
+            - name: TARGET_OPENWEBUI_URL
+              valueFrom:
+                configMapKeyRef:
+                  name: semantic-router-dashboard-config
+                  key: TARGET_OPENWEBUI_URL
+            - name: ROUTER_CONFIG_PATH
+              value: /app/config/config.yaml
+          ports:
+            - name: http
+              containerPort: 8700
+          volumeMounts:
+            - name: router-config
+              mountPath: /app/config
+              readOnly: true
+      volumes:
+        - name: router-config
+          configMap:
+            name: semantic-router-config
diff --git a/deploy/kubernetes/observability/dashboard/service.yaml b/deploy/kubernetes/observability/dashboard/service.yaml
new file mode 100644
index 00000000..1f94ee37
--- /dev/null
+++ b/deploy/kubernetes/observability/dashboard/service.yaml
@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: semantic-router-dashboard
+  labels:
+    app: semantic-router-dashboard
+spec:
+  type: ClusterIP
+  selector:
+    app: semantic-router-dashboard
+  ports:
+    - name: http
+      port: 80
+      targetPort: http
diff --git a/deploy/kubernetes/observability/ingress.yaml b/deploy/kubernetes/observability/ingress.yaml
index 7ef2cdf4..16c2a34d 100644
--- a/deploy/kubernetes/observability/ingress.yaml
+++ b/deploy/kubernetes/observability/ingress.yaml
@@ -51,3 +51,59 @@ spec:
                 name: prometheus
                 port:
                   name: http
+
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: dashboard
+  labels:
+    app: semantic-router-dashboard
+  annotations:
+    kubernetes.io/ingress.class: nginx
+    nginx.ingress.kubernetes.io/backend-protocol: HTTP
+    nginx.ingress.kubernetes.io/ssl-redirect: "true"
+spec:
+  tls:
+    - hosts:
+        - dashboard.example.com
+      secretName: dashboard-tls
+  rules:
+    - host: dashboard.example.com
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: semantic-router-dashboard
+                port:
+                  name: http
+
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: openwebui
+  labels:
+    app: openwebui
+  annotations:
+    kubernetes.io/ingress.class: nginx
+    nginx.ingress.kubernetes.io/backend-protocol: HTTP
+    nginx.ingress.kubernetes.io/ssl-redirect: "true"
+spec:
+  tls:
+    - hosts:
+        - openwebui.example.com
+      secretName: openwebui-tls
+  rules:
+    - host: openwebui.example.com
+      http:
+        paths:
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: openwebui
+                port:
+                  name: http
diff --git a/deploy/kubernetes/observability/kustomization.yaml b/deploy/kubernetes/observability/kustomization.yaml
index d3ec5569..adb77505 100644
--- a/deploy/kubernetes/observability/kustomization.yaml
+++ b/deploy/kubernetes/observability/kustomization.yaml
@@ -19,4 +19,18 @@ resources:
   - grafana/configmap-dashboard.yaml
   - grafana/deployment.yaml
   - grafana/service.yaml
+  - dashboard/configmap.yaml
+  - dashboard/deployment.yaml
+  - dashboard/service.yaml
+  - pipelines/deployment.yaml
+  - openwebui/deployment.yaml
   - ingress.yaml
+
+# Generate ConfigMaps from source files
+generatorOptions:
+  disableNameSuffixHash: true
+
+configMapGenerator:
+  - name: openwebui-pipelines-config
+    files:
+      - vllm_semantic_router_pipe.py=pipelines/vllm_semantic_router_pipe.py
diff --git a/deploy/kubernetes/observability/openwebui/deployment.yaml b/deploy/kubernetes/observability/openwebui/deployment.yaml
new file mode 100644
index 00000000..dadf955c
--- /dev/null
+++ b/deploy/kubernetes/observability/openwebui/deployment.yaml
@@ -0,0 +1,58 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: openwebui
+  labels:
+    app: openwebui
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: openwebui
+  template:
+    metadata:
+      labels:
+        app: openwebui
+    spec:
+      containers:
+        - name: openwebui
+          image: ghcr.io/open-webui/open-webui:main
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: http
+              containerPort: 8080
+          env:
+            - name: WEBUI_NAME
+              value: "Open WebUI"
+            - name: OPENAI_API_BASE_URL
+              value: "http://openwebui-pipelines:9099"
+            - name: OPENAI_API_KEY
+              value: "0p3n-w3bu!"
+          volumeMounts:
+            - name: data
+              mountPath: /app/backend/data
+          resources:
+            requests:
+              cpu: 100m
+              memory: 256Mi
+            limits:
+              cpu: 500m
+              memory: 1Gi
+      volumes:
+        - name: data
+          emptyDir: {}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: openwebui
+  labels:
+    app: openwebui
+spec:
+  selector:
+    app: openwebui
+  ports:
+    - name: http
+      port: 8080
+      targetPort: http
+  type: ClusterIP
diff --git a/deploy/kubernetes/observability/pipelines/deployment.yaml b/deploy/kubernetes/observability/pipelines/deployment.yaml
new file mode 100644
index 00000000..8a39c3a6
--- /dev/null
+++ b/deploy/kubernetes/observability/pipelines/deployment.yaml
@@ -0,0 +1,40 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: openwebui-pipelines
+  labels:
+    app: openwebui-pipelines
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: openwebui-pipelines
+  template:
+    metadata:
+      labels:
+        app: openwebui-pipelines
+    spec:
+      containers:
+        - name: pipelines
+          image: ghcr.io/open-webui/pipelines:main
+          imagePullPolicy: IfNotPresent
+          ports:
+            - name: http
+              containerPort: 9099
+          env:
+            - name: PYTHONUNBUFFERED
+              value: "1"
+          volumeMounts:
+            - name: pipelines-volume
+              mountPath: /app/pipelines
+          resources:
+            requests:
+              cpu: 100m
+              memory: 128Mi
+            limits:
+              cpu: 500m
+              memory: 512Mi
+      volumes:
+        - name: pipelines-volume
+          configMap:
+            name: openwebui-pipelines-config
diff --git a/deploy/kubernetes/observability/pipelines/vllm_semantic_router_pipe.py b/deploy/kubernetes/observability/pipelines/vllm_semantic_router_pipe.py
new file mode 100644
index 00000000..217c5c39
--- /dev/null
+++ b/deploy/kubernetes/observability/pipelines/vllm_semantic_router_pipe.py
@@ -0,0 +1,648 @@
+"""
+title: vLLM Semantic Router Pipe
+author: open-webui
+date: 2025-10-01
+version: 1.1
+license: Apache-2.0
+description: A pipe for proxying requests to vLLM Semantic Router and displaying decision headers (category, reasoning, model, injection) and security alerts (PII violations, jailbreak detection).
+requirements: requests, pydantic
+"""
+
+import json
+from typing import Generator, Iterator, List, Union
+
+import requests
+from pydantic import BaseModel
+
+
+class Pipeline:
+    class Valves(BaseModel):
+        # vLLM Semantic Router endpoint URL
+        vsr_base_url: str = "http://localhost:8000"
+
+        # API key for authentication (if required)
+        api_key: str = ""
+
+        # Enable/disable displaying VSR headers in the UI
+        show_vsr_info: bool = True
+
+        # Enable/disable logging VSR headers to console
+        log_vsr_info: bool = True
+
+        # Enable/disable debug logging
+        debug: bool = True
+
+        # Request timeout in seconds
+        timeout: int = 300
+
+    def __init__(self):
+        # Important: type should be "manifold" instead of "pipe"
+        # manifold type Pipeline will be displayed in the model list
+        self.type = "manifold"
+        self.id = "vllm_semantic_router"
+        self.name = "vllm-semantic-router/"
+
+        # Initialize valves
+        self.valves = self.Valves(
+            **{
+                "vsr_base_url": "http://localhost:8000",
+                "api_key": "",
+                "show_vsr_info": True,
+                "log_vsr_info": True,
+                "debug": True,
+                "timeout": 300,
+            }
+        )
+
+        # Store VSR headers from the last request
+        self.last_vsr_headers = {}
+
+        print("=" * 80)
+        print("🚀 vLLM Semantic Router Pipe - Initialization")
+        print("=" * 80)
+        print(f"  Type: {self.type}")
+        print(f"  ID: {self.id}")
+        print(f"  Name: {self.name}")
+        print(f"  VSR Base URL: {self.valves.vsr_base_url}")
+        print(f"  Debug Mode: {self.valves.debug}")
+        print("=" * 80)
+
+    async def on_startup(self):
+        print("\n" + "=" * 80)
+        print("🔥 on_startup: vLLM Semantic Router Pipe initialized")
+        print("=" * 80)
+        print(f"  VSR Base URL: {self.valves.vsr_base_url}")
+        print(f"  API Key: {'***' if self.valves.api_key else '(not set)'}")
+        print(f"  Show VSR Info: {self.valves.show_vsr_info}")
+        print(f"  Log VSR Info: {self.valves.log_vsr_info}")
+        print(f"  Debug: {self.valves.debug}")
+        print(f"  Timeout: {self.valves.timeout}s")
+
+        # Test if pipelines() is being called
+        pipes_list = self.pipelines()
+        print(f"\n📋 Available Pipes/Models:")
+        for pipe in pipes_list:
+            print(f"    - ID: {pipe['id']}")
+            print(f"      Name: {pipe['name']}")
+        print("=" * 80 + "\n")
+
+    async def on_shutdown(self):
+        print("\n" + "=" * 80)
+        print("🛑 on_shutdown: vLLM Semantic Router Pipe")
+        print("=" * 80 + "\n")
+
+    async def on_valves_updated(self):
+        print("\n" + "=" * 80)
+        print("⚙️  on_valves_updated: vLLM Semantic Router Pipe valves updated")
+        print("=" * 80)
+        print(f"  VSR Base URL: {self.valves.vsr_base_url}")
+        print(f"  API Key: {'***' if self.valves.api_key else '(not set)'}")
+        print(f"  Show VSR Info: {self.valves.show_vsr_info}")
+        print(f"  Log VSR Info: {self.valves.log_vsr_info}")
+        print(f"  Debug: {self.valves.debug}")
+        print(f"  Timeout: {self.valves.timeout}s")
+        print("=" * 80 + "\n")
+
+    def pipes(self) -> List[dict]:
+        """
+        Deprecated: manifold type uses pipelines() method instead of pipes()
+        The returned model list will be displayed in Open WebUI's model selector
+        """
+        return self.pipelines()
+
+    def pipelines(self) -> List[dict]:
+        """
+        Important: manifold type uses pipelines() method instead of pipes()
+        The returned model list will be displayed in Open WebUI's model selector
+        """
+        pipelines_list = [
+            {
+                "id": "vllm-semantic-router-auto",
+                "name": "vllm-semantic-router/auto",
+            }
+        ]
+
+        if self.valves.debug:
+            print("\n" + "=" * 80)
+            print("📞 pipelines() method called - Returning available models")
+            print("=" * 80)
+            for pipeline in pipelines_list:
+                print(f"  - ID: {pipeline['id']}")
+                print(f"    Name: {pipeline['name']}")
+            print("=" * 80 + "\n")
+
+        return pipelines_list
+
+    def _extract_vsr_headers(self, headers: dict) -> dict:
+        """
+        Extract VSR-specific headers from response headers.
+        """
+        vsr_headers = {}
+
+        # List of VSR headers to extract
+        vsr_header_keys = [
+            # Decision headers
+            "x-vsr-selected-category",
+            "x-vsr-selected-reasoning",
+            "x-vsr-selected-model",
+            "x-vsr-injected-system-prompt",
+            "x-vsr-cache-hit",
+            # Security headers
+            "x-vsr-pii-violation",
+            "x-vsr-jailbreak-blocked",
+            "x-vsr-jailbreak-type",
+            "x-vsr-jailbreak-confidence",
+        ]
+
+        # Extract headers (case-insensitive)
+        for key in vsr_header_keys:
+            # Try lowercase
+            value = headers.get(key)
+            if not value:
+                # Try uppercase
+                value = headers.get(key.upper())
+            if not value:
+                # Try title case
+                value = headers.get(key.title())
+
+            if value:
+                vsr_headers[key] = value
+
+        return vsr_headers
+
+    def _format_vsr_info(self, vsr_headers: dict, position: str = "prefix") -> str:
+        """
+        Format VSR headers into a readable message for display.
+        Shows the semantic router's decision chain in 3 stages (multi-line format):
+        Stage 1: Security Validation
+        Stage 2: Cache Check
+        Stage 3: Intelligent Routing
+
+        Args:
+            vsr_headers: VSR decision headers
+            position: "prefix" (before response) or "suffix" (after response)
+        """
+        if not vsr_headers:
+            return ""
+
+        # Build decision chain in stages (multi-line format)
+        lines = ["**🔀 vLLM Semantic Router - Chain-Of-Thought 🔀**"]
+
+        # ============================================================
+        # Stage 1: Security Validation (🛡️)
+        # ============================================================
+        security_parts = []
+
+        has_jailbreak = vsr_headers.get("x-vsr-jailbreak-blocked") == "true"
+        has_pii = vsr_headers.get("x-vsr-pii-violation") == "true"
+        is_blocked = has_jailbreak or has_pii
+
+        # Jailbreak check
+        if has_jailbreak:
+            jailbreak_type = vsr_headers.get("x-vsr-jailbreak-type", "unknown")
+            jailbreak_confidence = vsr_headers.get("x-vsr-jailbreak-confidence", "N/A")
+            security_parts.append(
+                f"🚨 *Jailbreak Detected, Confidence: {jailbreak_confidence}*"
+            )
+        else:
+            security_parts.append("✅ *No Jailbreak*")
+
+        # PII check
+        if has_pii:
+            security_parts.append("🚨 *PII Detected*")
+        else:
+            security_parts.append("✅ *No PII*")
+
+        # Result
+        if is_blocked:
+            security_parts.append("❌ ***BLOCKED***")
+        else:
+            security_parts.append("💯 ***Continue***")
+
+        lines.append(
+            "  → 🛡️ ***Stage 1 - Prompt Guard***: " + " → ".join(security_parts)
+        )
+
+        # If blocked, stop here
+        if is_blocked:
+            result = "\n".join(lines)
+            if position == "prefix":
+                return result + "\n\n---\n\n"
+            else:
+                return "\n\n---\n\n" + result
+
+        # ============================================================
+        # Stage 2: Cache Check (🔥)
+        # ============================================================
+        cache_parts = []
+        has_cache_hit = vsr_headers.get("x-vsr-cache-hit") == "true"
+
+        if has_cache_hit:
+            cache_parts.append("🔥 *HIT*")
+            cache_parts.append("⚡️ *Retrieve Memory*")
+            cache_parts.append("💯 ***Fast Response***")
+        else:
+            cache_parts.append("🌊 *MISS*")
+            cache_parts.append("🧠 *Update Memory*")
+            cache_parts.append("💯 ***Continue***")
+
+        lines.append("  → 🔥 ***Stage 2 - Router Memory***: " + " → ".join(cache_parts))
+
+        # If cache hit, stop here
+        if has_cache_hit:
+            result = "\n".join(lines)
+            if position == "prefix":
+                return result + "\n\n---\n\n"
+            else:
+                return "\n\n---\n\n" + result
+
+        # ============================================================
+        # Stage 3: Intelligent Routing (🧠)
+        # ============================================================
+        routing_parts = []
+
+        # Domain
+        category = vsr_headers.get("x-vsr-selected-category", "").strip()
+        if not category:
+            category = "other"
+        routing_parts.append(f"📂 *{category}*")
+
+        # Reasoning mode
+        if vsr_headers.get("x-vsr-selected-reasoning"):
+            reasoning = vsr_headers["x-vsr-selected-reasoning"]
+            if reasoning == "on":
+                routing_parts.append("🧠 *Reasoning On*")
+            else:
+                routing_parts.append("⚡ *Reasoning Off*")
+
+        # Model
+        if vsr_headers.get("x-vsr-selected-model"):
+            model = vsr_headers["x-vsr-selected-model"]
+            routing_parts.append(f"🥷 *{model}*")
+
+        # Prompt optimization
+        if vsr_headers.get("x-vsr-injected-system-prompt") == "true":
+            routing_parts.append("🎯 *Prompt Optimized*")
+
+        routing_parts.append(f"💯 ***Continue***")
+
+        if routing_parts:
+            lines.append(
+                "  → 🧠 ***Stage 3 - Smart Routing***: " + " → ".join(routing_parts)
+            )
+
+        # Combine all lines
+        result = "\n".join(lines)
+
+        if position == "prefix":
+            return result + "\n\n---\n\n"
+        else:
+            return "\n\n---\n\n" + result
+
+    def _log_vsr_info(self, vsr_headers: dict):
+        """
+        Log VSR information to console.
+        """
+        if not vsr_headers or not self.valves.log_vsr_info:
+            return
+
+        # Check if there are security violations
+        has_security_violation = (
+            vsr_headers.get("x-vsr-pii-violation") == "true"
+            or vsr_headers.get("x-vsr-jailbreak-blocked") == "true"
+        )
+
+        print("=" * 60)
+        if has_security_violation:
+            print("🛡️  SECURITY ALERT & Routing Decision:")
+        else:
+            print("vLLM Semantic Router Decision:")
+        print("=" * 60)
+
+        # Log security violations first
+        if vsr_headers.get("x-vsr-pii-violation") == "true":
+            print("  🚨 PII VIOLATION: Request blocked")
+
+        if vsr_headers.get("x-vsr-jailbreak-blocked") == "true":
+            print("  🚨 JAILBREAK BLOCKED: Potential attack detected")
+            if vsr_headers.get("x-vsr-jailbreak-type"):
+                print(f"     Type: {vsr_headers['x-vsr-jailbreak-type']}")
+            if vsr_headers.get("x-vsr-jailbreak-confidence"):
+                print(f"     Confidence: {vsr_headers['x-vsr-jailbreak-confidence']}")
+
+        # Log routing decision information
+        if vsr_headers.get("x-vsr-selected-category"):
+            print(f"  Category: {vsr_headers['x-vsr-selected-category']}")
+
+        if vsr_headers.get("x-vsr-selected-reasoning"):
+            print(f"  Reasoning Mode: {vsr_headers['x-vsr-selected-reasoning']}")
+
+        if vsr_headers.get("x-vsr-selected-model"):
+            print(f"  Selected Model: {vsr_headers['x-vsr-selected-model']}")
+
+        if vsr_headers.get("x-vsr-injected-system-prompt"):
+            print(
+                f"  System Prompt Injected: {vsr_headers['x-vsr-injected-system-prompt']}"
+            )
+
+        if vsr_headers.get("x-vsr-cache-hit"):
+            cache_hit = vsr_headers["x-vsr-cache-hit"].lower()
+            print(f"  Cache Hit: {cache_hit}")
+
+        print("=" * 60)
+
+    def pipe(
+        self, user_message: str, model_id: str, messages: List[dict], body: dict
+    ) -> Union[str, Generator, Iterator]:
+        """
+        Main pipe function that handles the request/response flow.
+
+        Manifold type pipe() method signature:
+        - user_message: User's last message
+        - model_id: Selected model ID
+        - messages: Complete message history
+        - body: Complete request body
+        """
+
+        if self.valves.debug:
+            print("\n" + "=" * 80)
+            print("🔄 pipe() method called - Processing request")
+            print("=" * 80)
+            print(
+                f"  User message: {user_message[:100]}..."
+                if len(user_message) > 100
+                else f"  User message: {user_message}"
+            )
+            print(f"  Model ID: {model_id}")
+            print(f"  Model requested: {body.get('model', 'N/A')}")
+            print(f"  Stream mode: {body.get('stream', False)}")
+            print(f"  Messages count: {len(messages)}")
+            print("=" * 80)
+
+        # Prepare the request to vLLM Semantic Router
+        url = f"{self.valves.vsr_base_url}/v1/chat/completions"
+
+        if self.valves.debug:
+            print(f"\n📡 Sending request to: {url}")
+
+        headers = {
+            "Content-Type": "application/json",
+        }
+
+        if self.valves.api_key:
+            headers["Authorization"] = f"Bearer {self.valves.api_key}"
+            if self.valves.debug:
+                print(f"  Authorization: Bearer ***")
+
+        # Important: Change model in body to "auto"
+        # VSR backend only accepts model="auto", then automatically selects model based on request content
+        request_body = body.copy()
+        original_model = request_body.get("model", "N/A")
+        request_body["model"] = "auto"
+
+        if self.valves.debug:
+            print(f"\n🔄 Model mapping:")
+            print(f"  Original model: {original_model}")
+            print(f"  Sending to VSR: auto")
+
+        # Check if streaming is requested
+        is_streaming = request_body.get("stream", False)
+
+        if self.valves.debug:
+            print(f"  Streaming: {is_streaming}")
+            print(f"  Timeout: {self.valves.timeout}s")
+
+        try:
+            if self.valves.debug:
+                print(f"\n🔌 Connecting to vLLM Semantic Router...")
+
+            response = requests.post(
+                url,
+                json=request_body,  # Use modified request_body
+                headers=headers,
+                timeout=self.valves.timeout,
+                stream=request_body.get("stream", False),
+            )
+
+            if self.valves.debug:
+                print(f"✅ Response received - Status: {response.status_code}")
+                print(f"  Response headers count: {len(response.headers)}")
+
+            # Check for HTTP errors
+            if response.status_code != 200:
+                error_msg = f"Error: vLLM Semantic Router returned status {response.status_code}"
+                if self.valves.debug:
+                    print(f"\n❌ {error_msg}")
+                    print(f"  Response text: {response.text[:500]}")
+                    print("=" * 80 + "\n")
+                return f"{error_msg}: {response.text}"
+
+            # Extract VSR headers from response
+            vsr_headers = self._extract_vsr_headers(dict(response.headers))
+            self.last_vsr_headers = vsr_headers
+
+            if self.valves.debug:
+                print(f"  VSR headers found: {len(vsr_headers)}")
+                for key, value in vsr_headers.items():
+                    print(f"    {key}: {value}")
+
+                # Print all response headers for debugging
+                print(f"\n  All response headers:")
+                for key, value in response.headers.items():
+                    if key.lower().startswith("x-vsr"):
+                        print(f"    {key}: {value}")
+
+            # Log VSR information
+            self._log_vsr_info(vsr_headers)
+
+            if is_streaming:
+                if self.valves.debug:
+                    print(f"\n📺 Handling streaming response...")
+                # Handle streaming response
+                return self._handle_streaming_response(response, vsr_headers)
+            else:
+                if self.valves.debug:
+                    print(f"\n📄 Handling non-streaming response...")
+                    print(f"  Response status: {response.status_code}")
+                    print(f"  Response content length: {len(response.content)}")
+                    print(
+                        f"  Response content type: {response.headers.get('content-type', 'unknown')}"
+                    )
+
+                # Check if response is empty
+                if not response.content:
+                    error_msg = "Error: Empty response from vLLM Semantic Router"
+                    if self.valves.debug:
+                        print(f"\n❌ {error_msg}")
+                        print("=" * 80 + "\n")
+                    return error_msg
+
+                # Try to parse JSON response
+                try:
+                    response_data = response.json()
+                except json.JSONDecodeError as e:
+                    error_msg = (
+                        f"Error: Invalid JSON response from vLLM Semantic Router"
+                    )
+                    if self.valves.debug:
+                        print(f"\n❌ {error_msg}")
+                        print(f"  JSON error: {str(e)}")
+                        print(
+                            f"  Response text (first 500 chars): {response.text[:500]}"
+                        )
+                        print("=" * 80 + "\n")
+                    return f"{error_msg}: {str(e)}"
+
+                if self.valves.debug:
+                    print(f"  Response data keys: {list(response_data.keys())}")
+                    if "choices" in response_data:
+                        print(f"  Choices count: {len(response_data['choices'])}")
+
+                # Add VSR info to the response if enabled
+                if self.valves.show_vsr_info and vsr_headers:
+                    vsr_info = self._format_vsr_info(vsr_headers, position="prefix")
+
+                    if self.valves.debug:
+                        print(
+                            f"  Adding VSR info to response (length: {len(vsr_info)})"
+                        )
+
+                    # Prepend to the assistant's message
+                    if "choices" in response_data and len(response_data["choices"]) > 0:
+                        for choice in response_data["choices"]:
+                            if "message" in choice and "content" in choice["message"]:
+                                choice["message"]["content"] = (
+                                    vsr_info + choice["message"]["content"]
+                                )
+                                if self.valves.debug:
+                                    print(f"  ✅ VSR info prepended to response")
+
+                if self.valves.debug:
+                    print(f"\n✅ Request completed successfully")
+                    print("=" * 80 + "\n")
+
+                return response_data
+
+        except requests.exceptions.Timeout:
+            error_msg = f"Error: Request to vLLM Semantic Router timed out after {self.valves.timeout} seconds"
+            if self.valves.debug:
+                print(f"\n❌ {error_msg}")
+                print("=" * 80 + "\n")
+            return error_msg
+        except Exception as e:
+            error_msg = (
+                f"Error: Failed to communicate with vLLM Semantic Router: {str(e)}"
+            )
+            if self.valves.debug:
+                print(f"\n❌ {error_msg}")
+                print(f"  Exception type: {type(e).__name__}")
+                print(f"  Exception details: {str(e)}")
+                print("=" * 80 + "\n")
+            return error_msg
+
+    def _handle_streaming_response(
+        self, response: requests.Response, vsr_headers: dict
+    ) -> Generator:
+        """
+        Handle streaming SSE response from vLLM Semantic Router.
+        Manually parse SSE stream, no need for sseclient-py dependency.
+
+        Strategy:
+        1. Add VSR info before the first content chunk (if enabled)
+        2. Detect VSR header updates during streaming (via SSE events)
+        3. Ensure it's only added once
+        """
+        vsr_info_added = False
+        first_content_chunk = True  # Mark whether it's the first content chunk
+        # Use initial vsr_headers, but may be updated during streaming
+        current_vsr_headers = vsr_headers.copy()
+
+        if self.valves.debug:
+            print(f"\n📝 Initial VSR headers:")
+            for key, value in current_vsr_headers.items():
+                print(f"    {key}: {value}")
+
+        # Read streaming response line by line
+        for line in response.iter_lines(decode_unicode=True):
+            if not line:
+                continue
+
+            # SSE format: data: {...}
+            if line.startswith("data: "):
+                data_str = line[6:].strip()  # Remove "data: " prefix
+
+                if data_str == "[DONE]":
+                    yield f"data: [DONE]\n\n"
+
+                    if self.valves.debug:
+                        print(
+                            f"✅ Streaming completed, VSR info added: {vsr_info_added}"
+                        )
+                else:
+                    try:
+                        chunk_data = json.loads(data_str)
+
+                        # Check if chunk contains updated VSR header information
+                        # Some SSE implementations may include updated headers in chunk metadata
+                        if "vsr_headers" in chunk_data:
+                            if self.valves.debug:
+                                print(f"🔄 VSR headers updated in stream:")
+                            for key, value in chunk_data["vsr_headers"].items():
+                                full_key = (
+                                    f"x-vsr-{key}"
+                                    if not key.startswith("x-vsr-")
+                                    else key
+                                )
+                                if current_vsr_headers.get(full_key) != value:
+                                    if self.valves.debug:
+                                        print(
+                                            f"    {full_key}: {current_vsr_headers.get(full_key)} → {value}"
+                                        )
+                                    current_vsr_headers[full_key] = value
+
+                        # Add VSR info before the first content chunk
+                        if (
+                            first_content_chunk
+                            and self.valves.show_vsr_info
+                            and not vsr_info_added
+                        ):
+                            if (
+                                "choices" in chunk_data
+                                and len(chunk_data["choices"]) > 0
+                            ):
+                                choice = chunk_data["choices"][0]
+                                delta = choice.get("delta", {})
+
+                                # Check if there is content (role or content)
+                                if "role" in delta or "content" in delta:
+                                    if self.valves.debug:
+                                        print(
+                                            f"✅ Adding VSR info at first content chunk"
+                                        )
+                                        print(f"    VSR headers:")
+                                        for key, value in current_vsr_headers.items():
+                                            print(f"      {key}: {value}")
+
+                                    # Format VSR info (using prefix mode)
+                                    vsr_info = self._format_vsr_info(
+                                        current_vsr_headers, position="prefix"
+                                    )
+
+                                    # Add VSR info before the first content
+                                    current_content = delta.get("content", "")
+                                    delta["content"] = vsr_info + current_content
+                                    chunk_data["choices"][0]["delta"] = delta
+                                    vsr_info_added = True
+                                    first_content_chunk = False
+
+                        # If not the first chunk, mark as False
+                        if "choices" in chunk_data and len(chunk_data["choices"]) > 0:
+                            choice = chunk_data["choices"][0]
+                            delta = choice.get("delta", {})
+                            if "role" in delta or "content" in delta:
+                                first_content_chunk = False
+
+                        yield f"data: {json.dumps(chunk_data)}\n\n"
+                    except json.JSONDecodeError:
+                        # If not valid JSON, pass through as-is
+                        yield f"data: {data_str}\n\n"