diff --git a/Makefile b/Makefile
index 4498b285..2b1efa57 100644
--- a/Makefile
+++ b/Makefile
@@ -16,6 +16,7 @@ _run:
 		-f tools/make/pre-commit.mk \
 		-f tools/make/docker.mk \
 		-f tools/make/kube.mk \
+		-f tools/make/observability.mk \
 		$(MAKECMDGOALS)
 
 .PHONY: _run
diff --git a/config/grafana/dashboards.yaml b/config/grafana/dashboards.yaml
deleted file mode 100644
index f34ddeef..00000000
--- a/config/grafana/dashboards.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-apiVersion: 1
-providers:
-  - name: LLM Router Dashboards
-    orgId: 1
-    folder: "LLM Router"
-    type: file
-    disableDeletion: false
-    allowUiUpdates: true
-    options:
-      path: /etc/grafana/provisioning/dashboards
\ No newline at end of file
diff --git a/config/grafana/datasource.yaml b/config/grafana/datasource.yaml
deleted file mode 100644
index 8d9f9d8f..00000000
--- a/config/grafana/datasource.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-apiVersion: 1
-datasources:
-  - name: Prometheus
-    type: prometheus
-    access: proxy
-    url: http://prometheus:9090
-    isDefault: true
\ No newline at end of file
diff --git a/config/prometheus.yaml b/config/prometheus.yaml
deleted file mode 100644
index f9a7ac37..00000000
--- a/config/prometheus.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-global:
-  scrape_interval: 10s
-  evaluation_interval: 10s
-
-scrape_configs:
-  # Semantic Router
-  - job_name: semantic-router
-    metrics_path: /metrics
-    static_configs:
-      - targets: ["semantic-router:9190"]
-        labels:
-          service: semantic-router
-          env: dev
-
-  # Optional: Envoy
-  - job_name: envoy
-    metrics_path: /stats/prometheus
-    static_configs:
-      - targets: ["envoy-proxy:19000"]
-        labels:
-          service: envoy
-          env: dev
\ No newline at end of file
diff --git a/docker-compose.obs.yml b/docker-compose.obs.yml
new file mode 100644
index 00000000..35bde379
--- /dev/null
+++ b/docker-compose.obs.yml
@@ -0,0 +1,42 @@
+# Local observability stack for monitoring semantic-router running on host
+#
+# Usage: make obs-local
+# Or: docker compose -f docker-compose.obs.yml up
+#
+# This provides Prometheus and Grafana in Docker with network_mode: host
+# to scrape metrics from router running natively on localhost:9190
+
+version: '3.8'
+
+services:
+  prometheus:
+    image: prom/prometheus:v2.53.0
+    container_name: prometheus-local
+    network_mode: host
+    volumes:
+      - ./tools/observability/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
+      - prometheus-local-data:/prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yaml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--storage.tsdb.retention.time=15d'
+    environment:
+      - ROUTER_TARGET=localhost:9190
+
+  grafana:
+    image: grafana/grafana:11.5.1
+    container_name: grafana-local
+    network_mode: host
+    volumes:
+      - ./tools/observability/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro
+      - ./tools/observability/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro
+      - ./tools/observability/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro
+      - grafana-local-data:/var/lib/grafana
+    environment:
+      - PROMETHEUS_URL=localhost:9090
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_USERS_ALLOW_SIGN_UP=false
+
+volumes:
+  prometheus-local-data:
+  grafana-local-data:
diff --git a/docker-compose.yml b/docker-compose.yml
index 21f193ed..8b6990cc 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -69,10 +69,13 @@ services:
     image: prom/prometheus:v2.53.0
     container_name: prometheus
     volumes:
-      - ./config/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
+      - ./tools/observability/prometheus.yaml:/etc/prometheus/prometheus.yaml:ro
+      - prometheus-data:/prometheus
     command:
       - --config.file=/etc/prometheus/prometheus.yaml
       - --storage.tsdb.retention.time=15d
+    environment:
+      - ROUTER_TARGET=semantic-router:9190
     ports:
       - "9090:9090"
     networks:
@@ -84,14 +87,18 @@ services:
     environment:
       - GF_SECURITY_ADMIN_USER=admin
       - GF_SECURITY_ADMIN_PASSWORD=admin
+      - PROMETHEUS_URL=prometheus:9090
     ports:
       - "3000:3000"
     volumes:
-      - ./config/grafana/datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro
-      - ./config/grafana/dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:ro
-      - ./deploy/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro
+      - ./tools/observability/grafana-datasource.yaml:/etc/grafana/provisioning/datasources/datasource.yaml:ro
+      - ./tools/observability/grafana-dashboard.yaml:/etc/grafana/provisioning/dashboards/dashboard.yaml:ro
+      - ./tools/observability/llm-router-dashboard.json:/etc/grafana/provisioning/dashboards/llm-router-dashboard.json:ro
+      - grafana-data:/var/lib/grafana
     networks:
       - semantic-network
+    depends_on:
+      - prometheus
 
   # LLM Katan service for testing
   llm-katan:
@@ -118,3 +125,5 @@ networks:
 volumes:
   models-cache:
     driver: local
+  prometheus-data:
+  grafana-data:
diff --git a/tools/make/common.mk b/tools/make/common.mk
index d34f2dbc..08cf2340 100644
--- a/tools/make/common.mk
+++ b/tools/make/common.mk
@@ -85,6 +85,16 @@ help:
 	@echo "    docs-serve              - Serve built documentation"
 	@echo "    docs-clean              - Clean documentation artifacts"
 	@echo ""
+	@echo "  Observability targets:"
+	@echo "    run-observability       - Start observability (alias for obs-local)"
+	@echo "    obs-local               - Start observability in LOCAL mode"
+	@echo "    obs-compose             - Start observability in COMPOSE mode"
+	@echo "    stop-observability      - Stop observability stack"
+	@echo "    open-observability      - Open Prometheus and Grafana in browser"
+	@echo "    obs-status              - Check observability stack status"
+	@echo "    obs-logs                - Show observability logs"
+	@echo "    obs-clean               - Remove observability data volumes"
+	@echo ""
 	@echo "  Environment variables:"
 	@echo "    CONTAINER_RUNTIME       - Container runtime (docker|podman, default: docker)"
 	@echo "    CONFIG_FILE             - Config file path (default: config/config.yaml)"
diff --git a/tools/make/observability.mk b/tools/make/observability.mk
new file mode 100644
index 00000000..46ee90eb
--- /dev/null
+++ b/tools/make/observability.mk
@@ -0,0 +1,51 @@
+# ====================== observability.mk ======================
+# = Observability targets for semantic-router monitoring       =
+# ====================== observability.mk ======================
+
+# Observability directories
+OBS_CONFIG_DIR = tools/observability
+OBS_SCRIPTS_DIR = tools/observability/scripts
+
+.PHONY: run-observability stop-observability obs-local obs-compose open-observability obs-logs obs-status obs-clean
+
+## run-observability: Start observability stack (alias for obs-local)
+run-observability: obs-local
+
+## obs-local: Start observability in LOCAL mode (router on host, obs in Docker)
+obs-local:
+	@$(call log, Starting observability in LOCAL mode...)
+	@$(OBS_SCRIPTS_DIR)/start-observability.sh local
+
+## obs-compose: Start observability in COMPOSE mode (all services in Docker)
+obs-compose:
+	@$(call log, Starting observability in COMPOSE mode...)
+	@$(OBS_SCRIPTS_DIR)/start-observability.sh compose
+
+## stop-observability: Stop and remove observability containers
+stop-observability:
+	@$(call log, Stopping observability stack...)
+	@$(OBS_SCRIPTS_DIR)/stop-observability.sh
+
+## open-observability: Open Prometheus and Grafana in browser
+open-observability:
+	@echo "Opening Prometheus and Grafana..."
+	@open http://localhost:9090 2>/dev/null || xdg-open http://localhost:9090 2>/dev/null || echo "Please open http://localhost:9090"
+	@open http://localhost:3000 2>/dev/null || xdg-open http://localhost:3000 2>/dev/null || echo "Please open http://localhost:3000"
+
+## obs-logs: Show logs from observability containers
+obs-logs:
+	@docker compose -f docker-compose.obs.yml logs -f 2>/dev/null || docker compose logs prometheus grafana -f
+
+## obs-status: Check status of observability containers
+obs-status:
+	@echo "==> Local mode:"
+	@docker compose -f docker-compose.obs.yml ps 2>/dev/null || echo "  Not running"
+	@echo ""
+	@echo "==> Compose mode:"
+	@docker compose ps prometheus grafana 2>/dev/null || echo "  Not running"
+
+## obs-clean: Remove observability data volumes
+obs-clean:
+	@echo "⚠️  Removing all observability data volumes..."
+	@docker volume rm prometheus-local-data grafana-local-data prometheus-data grafana-data 2>/dev/null || true
+	@echo "✓ Done"
diff --git a/tools/observability/README.md b/tools/observability/README.md
new file mode 100644
index 00000000..528f7803
--- /dev/null
+++ b/tools/observability/README.md
@@ -0,0 +1,30 @@
+# Observability Configuration
+
+Prometheus and Grafana configuration files for monitoring semantic-router.
+
+## Files
+
+- `prometheus.yaml` - Prometheus scrape config (uses `$ROUTER_TARGET` env var)
+- `grafana-datasource.yaml` - Grafana datasource (uses `$PROMETHEUS_URL` env var)
+- `grafana-dashboard.yaml` - Dashboard provisioning config
+- `llm-router-dashboard.json` - LLM Router dashboard
+
+## Usage
+
+**Local mode** (router on host, observability in Docker):
+
+```bash
+make obs-local
+```
+
+**Compose mode** (all services in Docker):
+
+```bash
+make obs-compose
+# or: docker compose up
+```
+
+**Access:**
+
+- Prometheus: http://localhost:9090
+- Grafana: http://localhost:3000 (admin/admin)
diff --git a/tools/observability/grafana-dashboard.yaml b/tools/observability/grafana-dashboard.yaml
new file mode 100644
index 00000000..9e162ee1
--- /dev/null
+++ b/tools/observability/grafana-dashboard.yaml
@@ -0,0 +1,15 @@
+# Grafana dashboard provisioning configuration
+# This file tells Grafana where to find dashboard JSON files
+
+apiVersion: 1
+
+providers:
+  - name: "Semantic Router"
+    orgId: 1
+    folder: ""
+    type: file
+    disableDeletion: false
+    updateIntervalSeconds: 10
+    allowUiUpdates: true
+    options:
+      path: /etc/grafana/provisioning/dashboards
diff --git a/tools/observability/grafana-datasource.yaml b/tools/observability/grafana-datasource.yaml
new file mode 100644
index 00000000..a96c705e
--- /dev/null
+++ b/tools/observability/grafana-datasource.yaml
@@ -0,0 +1,14 @@
+# Grafana datasource configuration for Prometheus
+# This file is provisioned automatically when Grafana starts
+
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://${PROMETHEUS_URL:-localhost:9090}
+    isDefault: true
+    editable: true
+    jsonData:
+      timeInterval: 15s
diff --git a/tools/observability/llm-router-dashboard.json b/tools/observability/llm-router-dashboard.json
new file mode 100644
index 00000000..ff136b6e
--- /dev/null
+++ b/tools/observability/llm-router-dashboard.json
@@ -0,0 +1,1238 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": 18,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 4,
+      "options": {
+        "displayMode": "gradient",
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "maxVizHeight": 300,
+        "minVizHeight": 16,
+        "minVizWidth": 8,
+        "namePlacement": "auto",
+        "orientation": "horizontal",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showUnfilled": true,
+        "sizing": "auto",
+        "valueMode": "color",
+        "text": {
+          "valueSize": 24
+        }
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "sum by(category) (llm_category_classifications_count)",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "instant": true,
+          "legendFormat": "{{category}}",
+          "range": false,
+          "refId": "A",
+          "useBackend": false
+        }
+      ],
+      "title": "Prompt Category",
+      "type": "bargauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Tokens/sec",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "tps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max",
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(llm_model_completion_tokens_total[5m])) by (model)",
+          "legendFormat": "Completion Tokens {{model}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Token Usage Rate by Model",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Routes/sec",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "ops"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "id": 3,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max",
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(llm_model_routing_modifications_total[5m])) by (source_model, target_model)",
+          "format": "time_series",
+          "legendFormat": "{{source_model}} -> {{target_model}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Model Routing Rate",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Seconds",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 8
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max",
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.95, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))",
+          "legendFormat": "p95 {{model}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Model Completion Latency (p95)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Seconds",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max",
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.95, sum(rate(llm_model_ttft_seconds_bucket[5m])) by (le, model))",
+          "legendFormat": "TTFT p95 {{model}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "TTFT (p95) by Model",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Seconds per token",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max",
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.95, sum(rate(llm_model_tpot_seconds_bucket[5m])) by (le, model))",
+          "legendFormat": "TPOT p95 {{model}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "TPOT (p95) by Model (sec/token)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Requests/sec",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 24
+      },
+      "id": 7,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max",
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(llm_reasoning_decisions_total{enabled=\"true\"}[5m])) by (model, effort)",
+          "legendFormat": "Reasoning Enabled: {{model}} ({{effort}})",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(llm_reasoning_decisions_total{enabled=\"false\"}[5m])) by (model)",
+          "legendFormat": "Reasoning Disabled: {{model}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Reasoning Rate by Model",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Cost",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "currencyUSD"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 24
+      },
+      "id": 8,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max",
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(llm_model_cost_total{currency=\"USD\"}[5m])) by (model)",
+          "legendFormat": "Cost/sec: {{model}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Model Cost Rate (USD/sec)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Errors/sec",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "normal"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 1
+              }
+            ]
+          },
+          "unit": "reqps"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 32
+      },
+      "id": 9,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max",
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(llm_request_errors_total{reason=\"pii_policy_denied\"}[5m])) by (model)",
+          "legendFormat": "PII Policy Denied: {{model}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(llm_request_errors_total{reason=\"jailbreak_block\"}[5m])) by (model)",
+          "legendFormat": "Jailbreak Block: {{model}}",
+          "range": true,
+          "refId": "B"
+        }
+      ],
+      "title": "Refusal Rates by Model",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 0.01
+              },
+              {
+                "color": "red",
+                "value": 0.05
+              }
+            ]
+          },
+          "unit": "percentunit"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 32
+      },
+      "id": 10,
+      "options": {
+        "displayMode": "gradient",
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "maxVizHeight": 300,
+        "minVizHeight": 16,
+        "minVizWidth": 8,
+        "namePlacement": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "mean"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showUnfilled": true,
+        "sizing": "auto",
+        "valueMode": "color"
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(rate(llm_request_errors_total{reason=~\"pii_policy_denied|jailbreak_block\"}[5m])) by (model) / sum(rate(llm_model_requests_total[5m])) by (model)",
+          "legendFormat": "{{model}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Refusal Rate Percentage by Model",
+      "type": "bargauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "currencyUSD"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 40
+      },
+      "id": 11,
+      "options": {
+        "displayMode": "gradient",
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "maxVizHeight": 300,
+        "minVizHeight": 16,
+        "minVizWidth": 8,
+        "namePlacement": "auto",
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showUnfilled": true,
+        "sizing": "auto",
+        "valueMode": "color"
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "sum(llm_model_cost_total{currency=\"USD\"}) by (model)",
+          "legendFormat": "{{model}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Total Cost by Model (USD)",
+      "type": "bargauge"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "Seconds",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "barWidthFactor": 0.6,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "smooth",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 40
+      },
+      "id": 12,
+      "options": {
+        "legend": {
+          "calcs": [
+            "mean",
+            "max",
+            "lastNotNull"
+          ],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "hideZeros": false,
+          "mode": "multi",
+          "sort": "none"
+        }
+      },
+      "pluginVersion": "11.5.1",
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.50, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))",
+          "legendFormat": "p50 {{model}}",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.90, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))",
+          "legendFormat": "p90 {{model}}",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.99, sum(rate(llm_model_completion_latency_seconds_bucket[5m])) by (le, model))",
+          "legendFormat": "p99 {{model}}",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "Model Completion Latency (p50/p90/p99)",
+      "type": "timeseries"
+    }
+  ],
+  "preload": false,
+  "refresh": "10s",
+  "schemaVersion": 40,
+  "tags": [
+    "llm-router"
+  ],
+  "templating": {
+    "list": [
+      {
+        "current": {
+          "text": "prometheus",
+          "value": "prometheus"
+        },
+        "includeAll": false,
+        "name": "DS_PROMETHEUS",
+        "options": [],
+        "query": "prometheus",
+        "refresh": 1,
+        "regex": "",
+        "type": "datasource"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-5m",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "LLM Router Metrics",
+  "uid": "llm-router-metrics",
+  "version": 14,
+  "weekStart": ""
+}
\ No newline at end of file
diff --git a/tools/observability/prometheus.yaml b/tools/observability/prometheus.yaml
new file mode 100644
index 00000000..74172124
--- /dev/null
+++ b/tools/observability/prometheus.yaml
@@ -0,0 +1,33 @@
+# Prometheus configuration for semantic-router observability
+#
+# This configuration works for both:
+#   - Local development (router running natively, observability in Docker)
+#   - Docker Compose (all services in containers)
+#
+# The target address is controlled by environment variable:
+#   - Local mode: ROUTER_TARGET=localhost:9190
+#   - Compose mode: ROUTER_TARGET=semantic-router:9190
+
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  # Semantic Router metrics
+  - job_name: semantic-router
+    metrics_path: /metrics
+    static_configs:
+      - targets: ["${ROUTER_TARGET:-localhost:9190}"]
+        labels:
+          service: semantic-router
+          environment: docker
+
+  # Optional: Envoy proxy metrics
+  # Uncomment if Envoy is running
+  # - job_name: envoy
+  #   metrics_path: /stats/prometheus
+  #   static_configs:
+  #     - targets: ["${ENVOY_TARGET:-envoy-proxy:19000}"]
+  #       labels:
+  #         service: envoy
+  #         environment: docker
diff --git a/tools/observability/scripts/start-observability.sh b/tools/observability/scripts/start-observability.sh
new file mode 100755
index 00000000..b017507b
--- /dev/null
+++ b/tools/observability/scripts/start-observability.sh
@@ -0,0 +1,136 @@
+#!/usr/bin/env bash
+# start-observability.sh
+#
+# Starts Prometheus and Grafana using Docker Compose
+# 
+# This script starts observability stack to monitor semantic-router.
+# It supports two modes:
+#   - Local mode: Router running natively, observability in Docker (network_mode: host)
+#   - Compose mode: All services in Docker (uses semantic-network)
+#
+# Prerequisites:
+#   - Docker and Docker Compose installed and running
+#
+# Usage:
+#   ./scripts/start-observability.sh [local|compose]
+#
+# To stop:
+#   ./scripts/stop-observability.sh
+
+set -euo pipefail
+
+# Color output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Directories
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
+log_debug() { echo -e "${BLUE}[DEBUG]${NC} $1"; }
+
+# Parse mode argument
+MODE="${1:-local}"
+
+case "${MODE}" in
+    local)
+        log_info "Starting observability in LOCAL mode (router on host, observability in Docker)"
+        COMPOSE_CMD="docker compose -f ${PROJECT_ROOT}/docker-compose.obs.yml"
+        ;;
+    compose)
+        log_info "Starting observability in COMPOSE mode (all services in Docker)"
+        COMPOSE_CMD="docker compose -f ${PROJECT_ROOT}/docker-compose.yml"
+        ;;
+    *)
+        log_error "Invalid mode: ${MODE}"
+        log_info "Usage: $0 [local|compose]"
+        log_info "  local   - Router on host, observability in Docker (default)"
+        log_info "  compose - All services in Docker (uses main docker-compose.yml)"
+        exit 1
+        ;;
+esac
+
+# Check if Docker is available
+if ! command -v docker &> /dev/null; then
+    log_error "Docker is not installed or not in PATH"
+    log_info "Please install Docker Desktop: https://www.docker.com/products/docker-desktop"
+    exit 1
+fi
+
+# Check if Docker daemon is running
+if ! docker info &> /dev/null; then
+    log_error "Docker daemon is not running"
+    log_info "Please start Docker Desktop"
+    exit 1
+fi
+
+log_info "Starting services..."
+log_debug "Command: ${COMPOSE_CMD} up -d"
+
+${COMPOSE_CMD} up -d
+
+# Wait for services to become healthy
+log_info "Waiting for services to become healthy..."
+sleep 10
+
+# Check service status
+if [[ "${MODE}" == "local" ]]; then
+    PROM_CONTAINER="prometheus-local"
+    GRAF_CONTAINER="grafana-local"
+else
+    PROM_CONTAINER="prometheus"
+    GRAF_CONTAINER="grafana"
+fi
+
+if docker ps --format '{{.Names}}' | grep -q "^${PROM_CONTAINER}$"; then
+    log_info "✓ Prometheus is running at http://localhost:9090"
+else
+    log_warn "⚠ Prometheus not running"
+    log_info "  Check logs: docker logs ${PROM_CONTAINER}"
+fi
+
+if docker ps --format '{{.Names}}' | grep -q "^${GRAF_CONTAINER}$"; then
+    log_info "✓ Grafana is running at http://localhost:3000"
+    log_info "  Default credentials: admin / admin"
+else
+    log_warn "⚠ Grafana not running"
+    log_info "  Check logs: docker logs ${GRAF_CONTAINER}"
+fi
+
+echo ""
+log_info "==================================================================="
+log_info "Observability stack started successfully in ${MODE^^} mode!"
+log_info "==================================================================="
+echo ""
+
+if [[ "${MODE}" == "local" ]]; then
+    log_info "Next steps:"
+    log_info "  1. Start semantic-router on localhost:9190"
+    log_info "  2. Open Prometheus: http://localhost:9090/targets"
+    log_info "  3. Open Grafana: http://localhost:3000"
+    log_info "  4. View dashboard: LLM Router Metrics"
+else
+    log_info "Next steps:"
+    log_info "  1. Ensure semantic-router is running in Docker"
+    log_info "  2. Open Prometheus: http://localhost:9090/targets"
+    log_info "  3. Open Grafana: http://localhost:3000"
+    log_info "  4. View dashboard: LLM Router Metrics"
+fi
+
+echo ""
+log_info "Useful commands:"
+if [[ "${MODE}" == "local" ]]; then
+    log_info "  - Check status: docker compose -f docker-compose.obs.yml ps"
+    log_info "  - View logs: docker compose -f docker-compose.obs.yml logs -f"
+else
+    log_info "  - Check status: docker compose ps"
+    log_info "  - View logs: docker compose logs prometheus grafana -f"
+fi
+log_info "  - Stop services: make stop-observability"
+echo ""
diff --git a/tools/observability/scripts/stop-observability.sh b/tools/observability/scripts/stop-observability.sh
new file mode 100755
index 00000000..60d42fad
--- /dev/null
+++ b/tools/observability/scripts/stop-observability.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+# stop-observability.sh
+#
+# Stops and removes observability Docker containers using Docker Compose.
+#
+# Usage:
+#   ./scripts/stop-observability.sh
+
+set -euo pipefail
+
+# Color output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Directories
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
+
+echo -e "${BLUE}==================================================================${NC}"
+echo -e "${BLUE}  Stopping Observability Stack${NC}"
+echo -e "${BLUE}==================================================================${NC}"
+echo ""
+
+# Stop services
+log_info "Stopping observability services..."
+
+# Try stopping local mode containers first
+if docker ps -a --format '{{.Names}}' | grep -qE '^(prometheus-local|grafana-local)$'; then
+    log_info "Stopping local mode containers..."
+    docker compose -f "${PROJECT_ROOT}/docker-compose.obs.yml" down
+fi
+
+# Also stop compose mode if running as part of main stack
+if docker ps -a --format '{{.Names}}' | grep -qE '^(prometheus|grafana)$' && ! docker ps -a --format '{{.Names}}' | grep -q 'semantic-router'; then
+    log_warn "Observability containers from main stack are running"
+    log_info "Use 'docker compose down' to stop the full stack"
+fi
+
+echo ""
+log_info "Observability stopped"
+echo ""
diff --git a/website/docs/tutorials/observability/observability.md b/website/docs/tutorials/observability/observability.md
index e8b5168c..bba9757a 100644
--- a/website/docs/tutorials/observability/observability.md
+++ b/website/docs/tutorials/observability/observability.md
@@ -1,265 +1,190 @@
 # Observability
 
-This page focuses solely on collecting and visualizing metrics for Semantic Router using Prometheus and Grafana—deployment method (Docker Compose vs Kubernetes) is covered in `docker-quickstart.md`.
+Metrics collection and visualization for Semantic Router using Prometheus and Grafana.
 
 ---
 
-## 1. Metrics & Endpoints Summary
+## 1. Metrics & Endpoints
 
-| Component                    | Endpoint                  | Notes                                      |
-| ---------------------------- | ------------------------- | ------------------------------------------ |
-| Router metrics               | `:9190/metrics`           | Prometheus format (flag: `--metrics-port`) |
-| Router health (future probe) | `:8080/health`            | HTTP readiness/liveness candidate          |
-| Envoy metrics (optional)     | `:19000/stats/prometheus` | If you enable Envoy                        |
+| Component                | Endpoint                  | Notes                                      |
+| ------------------------ | ------------------------- | ------------------------------------------ |
+| Router metrics           | `:9190/metrics`           | Prometheus format (flag: `--metrics-port`) |
+| Router health            | `:8080/health`            | HTTP readiness/liveness                    |
+| Envoy metrics (optional) | `:19000/stats/prometheus` | If Envoy is enabled                        |
 
-Dashboard JSON: `deploy/llm-router-dashboard.json`.
-
-Primary source file exposing metrics: `src/semantic-router/cmd/main.go` (uses `promhttp`).
+**Configuration location**: `tools/observability/`  
+**Dashboard**: `tools/observability/llm-router-dashboard.json`
 
 ---
 
-## 2. Docker Compose Observability
-
-Compose bundles: `prometheus`, `grafana`, `semantic-router`, (optional) `envoy`, `mock-vllm`.
+## 2. Local Mode (Router on Host)
 
-Key files:
+Run router natively on host, observability in Docker.
 
-- `config/prometheus.yaml`
-- `config/grafana/datasource.yaml`
-- `config/grafana/dashboards.yaml`
-- `deploy/llm-router-dashboard.json`
-
-Start (with testing profile example):
+### Quick Start
 
 ```bash
-CONFIG_FILE=/app/config/config.testing.yaml docker compose --profile testing up --build
+# Start router
+make run-router
+
+# Start observability
+make obs-local
 ```
 
-Access:
+**Access:**
 
 - Prometheus: http://localhost:9090
 - Grafana: http://localhost:3000 (admin/admin)
 
-Expected Prometheus targets:
-
-- `semantic-router:9190`
-- `envoy-proxy:19000` (optional)
-
----
-
-## 3. Kubernetes Observability
-
-This guide adds a production-ready Prometheus + Grafana stack to the existing Semantic Router Kubernetes deployment. It includes manifests for collectors, dashboards, data sources, RBAC, and ingress so you can monitor routing performance in any cluster.
-
-> **Namespace** – All manifests default to the `vllm-semantic-router-system` namespace to match the core deployment. Override it with Kustomize if you use a different namespace.
-
-## What Gets Installed
-
-| Component    | Purpose | Key Files |
-|--------------|---------|-----------|
-| Prometheus   | Scrapes Semantic Router metrics and stores them with persistent retention | `prometheus/` (`rbac.yaml`, `configmap.yaml`, `deployment.yaml`, `pvc.yaml`, `service.yaml`)|
-| Grafana      | Visualizes metrics using the bundled LLM Router dashboard and a pre-configured Prometheus datasource | `grafana/` (`secret.yaml`, `configmap-*.yaml`, `deployment.yaml`, `pvc.yaml`, `service.yaml`)|
-| Ingress (optional) | Exposes the UIs outside the cluster | `ingress.yaml`|
-| Dashboard provisioning | Automatically loads `deploy/llm-router-dashboard.json` into Grafana | `grafana/configmap-dashboard.yaml`|
-
-Prometheus is configured to discover the `semantic-router-metrics` service (port `9190`) automatically. Grafana provisions the same LLM Router dashboard that ships with the Docker Compose stack.
-
-### 1. Prerequisites
+**Verify targets:**
 
-- Deployed Semantic Router workload via `deploy/kubernetes/`
-- A Kubernetes cluster (managed, on-prem, or kind)
-- `kubectl` v1.23+
-- Optional: an ingress controller (NGINX, ALB, etc.) if you want external access
-
-### 2. Directory Layout
-
-```
-deploy/kubernetes/observability/
-├── README.md
-├── kustomization.yaml          # (created in the next step)
-├── ingress.yaml                # optional HTTPS ingress examples
-├── prometheus/
-│   ├── configmap.yaml          # Scrape config (Kubernetes SD)
-│   ├── deployment.yaml
-│   ├── pvc.yaml
-│   ├── rbac.yaml               # SA + ClusterRole + binding
-│   └── service.yaml
-└── grafana/
-    ├── configmap-dashboard.yaml    # Bundled LLM router dashboard
-    ├── configmap-provisioning.yaml # Datasource + provider config
-    ├── deployment.yaml
-    ├── pvc.yaml
-    ├── secret.yaml                 # Admin credentials (override in prod)
-    └── service.yaml
+```bash
+# Check Prometheus scrapes localhost:9190
+open http://localhost:9090/targets
 ```
 
-### 3. Prometheus Configuration Highlights
+**Stop:**
 
-- Uses `kubernetes_sd_configs` to enumerate endpoints in `vllm-semantic-router-system`
-- Keeps 15 days of metrics by default (`--storage.tsdb.retention.time=15d`)
-- Stores metrics in a `PersistentVolumeClaim` named `prometheus-data`
-- RBAC rules grant read-only access to Services, Endpoints, Pods, Nodes, and EndpointSlices
-
-#### Scrape configuration snippet
-
-```yaml
-scrape_configs:
-  - job_name: semantic-router
-    kubernetes_sd_configs:
-      - role: endpoints
-        namespaces:
-          names:
-            - vllm-semantic-router-system
-    relabel_configs:
-      - source_labels: [__meta_kubernetes_service_name]
-        regex: semantic-router-metrics
-        action: keep
-      - source_labels: [__meta_kubernetes_endpoint_port_name]
-        regex: metrics
-        action: keep
+```bash
+make stop-observability
 ```
 
-Modify the namespace or service name if you changed them in your primary deployment.
+### Configuration
 
-### 4. Grafana Configuration Highlights
+All configs in `tools/observability/`:
 
-- Stateful deployment backed by the `grafana-storage` PVC
-- Datasource provisioned automatically pointing to `http://prometheus:9090`
-- Dashboard provider watches `/var/lib/grafana-dashboards`
-- Bundled `llm-router-dashboard.json` is identical to `deploy/llm-router-dashboard.json`
-- Admin credentials pulled from the `grafana-admin` secret (default `admin/admin` – **change this!)**
+- `prometheus.yaml` - Scrapes `localhost:9190` when `ROUTER_TARGET=localhost:9190`
+- `grafana-datasource.yaml` - Points to `localhost:9090`
+- `grafana-dashboard.yaml` - Dashboard provisioning
+- `llm-router-dashboard.json` - Dashboard definition
 
-#### Updating credentials
+### Troubleshooting
 
-```bash
-kubectl create secret generic grafana-admin \
-  --namespace vllm-semantic-router-system \
-  --from-literal=admin-user=monitor \
-  --from-literal=admin-password='pick-a-strong-password' \
-  --dry-run=client -o yaml | kubectl apply -f -
-```
-
-Remove or overwrite the committed `secret.yaml` when you adopt a different secret management approach.
+| Issue         | Fix                                     |
+| ------------- | --------------------------------------- |
+| Target DOWN   | Start router: `make run-router`         |
+| No metrics    | Generate traffic, check `:9190/metrics` |
+| Port conflict | Change port or stop conflicting service |
 
-### 5. Deployment Steps
+---
 
-#### 5.1. Create the Kustomization
+## 3. Docker Compose Mode
 
-Create `deploy/kubernetes/observability/kustomization.yaml` (see below) to assemble all manifests. This guide assumes you keep Prometheus & Grafana in the same namespace as the router.
+All services in Docker containers.
 
-#### 5.2. Apply manifests
+### Quick Start
 
 ```bash
-kubectl apply -k deploy/kubernetes/observability/
+# Start full stack (includes observability)
+docker compose up --build
+
+# Or with testing profile
+docker compose --profile testing up --build
 ```
 
-Verify pods:
+**Access:**
 
-```bash
-kubectl get pods -n vllm-semantic-router-system
-```
+- Prometheus: http://localhost:9090
+- Grafana: http://localhost:3000 (admin/admin)
 
-You should see `prometheus-...` and `grafana-...` pods in `Running` state.
+**Expected targets:**
 
-#### 5.3. Integration with the core deployment
+- `semantic-router:9190`
+- `envoy-proxy:19000` (optional)
 
-1. Deploy or update Semantic Router (`kubectl apply -k deploy/kubernetes/`).
-2. Deploy observability stack (`kubectl apply -k deploy/kubernetes/observability/`).
-3. Confirm the metrics service (`semantic-router-metrics`) has endpoints:
+### Configuration
 
-   ```bash
-   kubectl get endpoints semantic-router-metrics -n vllm-semantic-router-system
-   ```
+Same configs as local mode (`tools/observability/`), but:
 
-4. Prometheus target should transition to **UP** within ~15 seconds.
+- `ROUTER_TARGET=semantic-router:9190`
+- `PROMETHEUS_URL=prometheus:9090`
+- Uses `semantic-network` bridge network
 
-#### 5.4. Accessing the UIs
+---
 
-> **Optional Ingress** – If you prefer to keep the stack private, delete `ingress.yaml` from `kustomization.yaml` before applying.
+## 4. Kubernetes Mode
 
-- **Port-forward (quick check)**
+Production-ready Prometheus + Grafana for K8s clusters.
 
-  ```bash
-  kubectl port-forward svc/prometheus 9090:9090 -n vllm-semantic-router-system
-  kubectl port-forward svc/grafana 3000:3000 -n vllm-semantic-router-system
-  ```
+> **Namespace**: `vllm-semantic-router-system`
 
-  Prometheus → http://localhost:9090, Grafana → http://localhost:3000
+### Components
 
-- **Ingress (production)** – Customize `ingress.yaml` with real domains, TLS secrets, and your ingress class before applying. Replace `*.example.com` and configure HTTPS certificates via cert-manager or your provider.
+| Component  | Purpose                               | Location                                       |
+| ---------- | ------------------------------------- | ---------------------------------------------- |
+| Prometheus | Scrapes router metrics, 15d retention | `deploy/kubernetes/observability/prometheus/`  |
+| Grafana    | Dashboard visualization               | `deploy/kubernetes/observability/grafana/`     |
+| Ingress    | Optional external access              | `deploy/kubernetes/observability/ingress.yaml` |
 
-### 6. Verifying Metrics Collection
+### Deploy
 
-1. Open Prometheus (port-forward or ingress) → **Status ▸ Targets** → ensure `semantic-router` job is green.
-2. Query `rate(llm_model_completion_tokens_total[5m])` – should return data after traffic.
-3. Open Grafana, log in with the admin credentials, and confirm the **LLM Router Metrics** dashboard exists under the *Semantic Router* folder.
-4. Generate traffic to Semantic Router (classification or routing requests). Key panels should start populating:
-   - Prompt Category counts
-   - Token usage rate per model
-   - Routing modifications between models
-   - Latency histograms (TTFT, completion p95)
+```bash
+# Apply manifests
+kubectl apply -k deploy/kubernetes/observability/
 
-### 7. Dashboard Customization
+# Verify
+kubectl get pods -n vllm-semantic-router-system
+```
 
-- Duplicate the provisioned dashboard inside Grafana to make changes while keeping the original as a template.
-- Update Grafana provisioning (`grafana/configmap-provisioning.yaml`) to point to alternate folders or add new providers.
-- Add additional dashboards by extending `grafana/configmap-dashboard.yaml` or mounting a different ConfigMap.
-- Incorporate Kubernetes cluster metrics (CPU/memory) by adding another datasource or deploying kube-state-metrics + node exporters.
+### Access
 
-### 8. Best Practices
+**Port-forward:**
 
-#### Resource Sizing
+```bash
+kubectl port-forward svc/prometheus 9090:9090 -n vllm-semantic-router-system
+kubectl port-forward svc/grafana 3000:3000 -n vllm-semantic-router-system
+```
 
-- Prometheus: increase CPU/memory with higher scrape cardinality or retention > 15 days.
-- Grafana: start with `500m` CPU / `1Gi` RAM; scale replicas horizontally when concurrent viewers exceed a few dozen.
+**Ingress:** Customize `ingress.yaml` with your domain and TLS
 
-#### Storage
+### Key Configuration
 
-- Use SSD-backed storage classes for Prometheus when retention/window is large.
-- Increase `prometheus/pvc.yaml` (default 20Gi) and `grafana/pvc.yaml` (default 10Gi) to match retention requirements.
-- Enable volume snapshots or backups for dashboards and alert history.
+**Prometheus** uses Kubernetes service discovery:
 
-#### Security
+```yaml
+scrape_configs:
+  - job_name: semantic-router
+    kubernetes_sd_configs:
+      - role: endpoints
+        namespaces:
+          names: [vllm-semantic-router-system]
+```
 
-- Replace the demo `grafana-admin` secret with credentials stored in your preferred secret manager.
-- Restrict ingress access with network policies, OAuth proxies, or SSO integrations.
-- Enable Grafana role-based access control and API keys for automation.
-- Scope Prometheus RBAC to only the namespaces you need. If metrics run in multiple namespaces, list them in the scrape config.
+**Grafana** credentials (change in production):
 
-#### Maintenance
+```bash
+kubectl create secret generic grafana-admin \
+  --namespace vllm-semantic-router-system \
+  --from-literal=admin-user=admin \
+  --from-literal=admin-password='your-password'
+```
 
-- Monitor Prometheus disk usage; prune retention or scale PVC before it fills up.
-- Back up Grafana dashboards or store them in Git (already done through this ConfigMap).
-- Roll upgrades separately: update Prometheus and Grafana images via `kustomization.yaml` patches.
-- Consider adopting the Prometheus Operator (`ServiceMonitor` + `PodMonitor`) if you already run kube-prometheus-stack. A sample `ServiceMonitor` is in `website/docs/tutorials/observability/observability.md`.
+---
 
-## 4. Key Metrics (Sample)
+## 5. Key Metrics
 
-| Metric                                                        | Type      | Description                                  |
-| ------------------------------------------------------------- | --------- | -------------------------------------------- |
-| `llm_category_classifications_count`                          | counter   | Number of category classification operations |
-| `llm_model_completion_tokens_total`                           | counter   | Tokens emitted per model                     |
-| `llm_model_routing_modifications_total`                       | counter   | Model switch / routing adjustments           |
-| `llm_model_completion_latency_seconds`                        | histogram | Completion latency distribution              |
-| `process_cpu_seconds_total` / `process_resident_memory_bytes` | standard  | Runtime resource usage                       |
+| Metric                                  | Type      | Description              |
+| --------------------------------------- | --------- | ------------------------ |
+| `llm_category_classifications_count`    | counter   | Category classifications |
+| `llm_model_completion_tokens_total`     | counter   | Tokens per model         |
+| `llm_model_routing_modifications_total` | counter   | Model routing changes    |
+| `llm_model_completion_latency_seconds`  | histogram | Completion latency       |
 
-Use typical PromQL patterns:
+**Example queries:**
 
 ```promql
 rate(llm_model_completion_tokens_total[5m])
-histogram_quantile(0.95, sum by (le) (rate(llm_model_completion_latency_seconds_bucket[5m])))
+histogram_quantile(0.95, rate(llm_model_completion_latency_seconds_bucket[5m]))
 ```
 
 ---
 
-## 5. Troubleshooting
+## 6. Troubleshooting
 
-| Symptom               | Likely Cause              | Check                                    | Fix                                                              |
-| --------------------- | ------------------------- | ---------------------------------------- | ---------------------------------------------------------------- |
-| Target DOWN (Docker)  | Service name mismatch     | Prometheus /targets                      | Ensure `semantic-router` container running                       |
-| Target DOWN (K8s)     | Label/selectors mismatch  | `kubectl get ep semantic-router-metrics` | Align labels or ServiceMonitor selector                          |
-| No new tokens metrics | No traffic                | Generate chat/completions via Envoy      | Send test requests                                               |
-| Dashboard empty       | Datasource URL wrong      | Grafana datasource settings              | Point to `http://prometheus:9090` (Docker) or cluster Prometheus |
-| Large 5xx spikes      | Backend model unreachable | Router logs                              | Verify vLLM endpoints configuration                              |
+| Issue           | Check               | Fix                                                   |
+| --------------- | ------------------- | ----------------------------------------------------- |
+| Target DOWN     | Prometheus /targets | Verify router is running and exposing `:9190/metrics` |
+| No metrics      | Generate traffic    | Send requests through router                          |
+| Dashboard empty | Grafana datasource  | Check Prometheus URL configuration                    |
 
 ---