diff --git a/.gitignore b/.gitignore
index a706c5b098..3a59f21e4f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -109,3 +109,4 @@ profiling_results*
 
 # Direnv
 .envrc
+rebuild.sh
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index 138677bc65..3fcc631249 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -226,6 +226,18 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9"
 
+[[package]]
+name = "async-broadcast"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "435a87a52755b8f27fcf321ac4f04b2802e337c8c4872923137471ec39c37532"
+dependencies = [
+ "event-listener",
+ "event-listener-strategy",
+ "futures-core",
+ "pin-project-lite",
+]
+
 [[package]]
 name = "async-channel"
 version = "2.5.0"
@@ -578,6 +590,17 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "backon"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef"
+dependencies = [
+ "fastrand",
+ "gloo-timers",
+ "tokio",
+]
+
 [[package]]
 name = "backtrace"
 version = "0.3.75"
@@ -2366,6 +2389,8 @@ dependencies = [
  "futures",
  "humantime",
  "jsonschema",
+ "k8s-openapi",
+ "kube",
  "local-ip-address",
  "log",
  "nid",
@@ -2382,6 +2407,7 @@ dependencies = [
  "regex",
  "reqwest 0.12.23",
  "rstest 0.23.0",
+ "schemars 1.0.4",
  "serde",
  "serde_json",
  "socket2 0.5.10",
@@ -3336,6 +3362,18 @@ dependencies = [
  "regex-syntax",
 ]
 
+[[package]]
+name = "gloo-timers"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "h2"
 version = "0.3.27"
@@ -3504,6 +3542,26 @@ dependencies = [
  "windows-sys 0.60.2",
 ]
 
+[[package]]
+name = "home"
+version = "0.5.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d"
+dependencies = [
+ "windows-sys 0.61.0",
+]
+
+[[package]]
+name = "hostname"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a56f203cd1c76362b69e3863fd987520ac36cf70a8c92627449b2f64a8cf7d65"
+dependencies = [
+ "cfg-if 1.0.3",
+ "libc",
+ "windows-link 0.1.3",
+]
+
 [[package]]
 name = "hound"
 version = "3.5.1"
@@ -3676,6 +3734,7 @@ dependencies = [
  "http 1.3.1",
  "hyper 1.7.0",
  "hyper-util",
+ "log",
  "rustls",
  "rustls-native-certs 0.8.1",
  "rustls-pki-types",
@@ -4221,6 +4280,18 @@ dependencies = [
  "unicode-general-category",
 ]
 
+[[package]]
+name = "json-patch"
+version = "4.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f300e415e2134745ef75f04562dd0145405c2f7fd92065db029ac4b16b57fe90"
+dependencies = [
+ "jsonptr",
+ "serde",
+ "serde_json",
+ "thiserror 1.0.69",
+]
+
 [[package]]
 name = "json5"
 version = "0.4.1"
@@ -4232,6 +4303,29 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "jsonpath-rust"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c00ae348f9f8fd2d09f82a98ca381c60df9e0820d8d79fce43e649b4dc3128b"
+dependencies = [
+ "pest",
+ "pest_derive",
+ "regex",
+ "serde_json",
+ "thiserror 2.0.16",
+]
+
+[[package]]
+name = "jsonptr"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5a3cc660ba5d72bce0b3bb295bf20847ccbb40fd423f3f05b61273672e561fe"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "jsonschema"
 version = "0.17.1"
@@ -4272,6 +4366,19 @@ dependencies = [
  "rayon",
 ]
 
+[[package]]
+name = "k8s-openapi"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d13f06d5326a915becaffabdfab75051b8cdc260c2a5c06c0e90226ede89a692"
+dependencies = [
+ "base64 0.22.1",
+ "chrono",
+ "schemars 1.0.4",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "kernel32-sys"
 version = "0.2.2"
@@ -4282,6 +4389,115 @@ dependencies = [
  "winapi-build",
 ]
 
+[[package]]
+name = "kube"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48e7bb0b6a46502cc20e4575b6ff401af45cfea150b34ba272a3410b78aa014e"
+dependencies = [
+ "k8s-openapi",
+ "kube-client",
+ "kube-core",
+ "kube-derive",
+ "kube-runtime",
+]
+
+[[package]]
+name = "kube-client"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4987d57a184d2b5294fdad3d7fc7f278899469d21a4da39a8f6ca16426567a36"
+dependencies = [
+ "base64 0.22.1",
+ "bytes",
+ "chrono",
+ "either",
+ "futures",
+ "home",
+ "http 1.3.1",
+ "http-body 1.0.1",
+ "http-body-util",
+ "hyper 1.7.0",
+ "hyper-rustls",
+ "hyper-timeout",
+ "hyper-util",
+ "jsonpath-rust",
+ "k8s-openapi",
+ "kube-core",
+ "pem",
+ "rustls",
+ "secrecy",
+ "serde",
+ "serde_json",
+ "serde_yaml",
+ "thiserror 2.0.16",
+ "tokio",
+ "tokio-util",
+ "tower 0.5.2",
+ "tower-http",
+ "tracing",
+]
+
+[[package]]
+name = "kube-core"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "914bbb770e7bb721a06e3538c0edd2babed46447d128f7c21caa68747060ee73"
+dependencies = [
+ "chrono",
+ "derive_more 2.0.1",
+ "form_urlencoded",
+ "http 1.3.1",
+ "json-patch",
+ "k8s-openapi",
+ "schemars 1.0.4",
+ "serde",
+ "serde-value",
+ "serde_json",
+ "thiserror 2.0.16",
+]
+
+[[package]]
+name = "kube-derive"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03dee8252be137772a6ab3508b81cd797dee62ee771112a2453bc85cbbe150d2"
+dependencies = [
+ "darling 0.21.3",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "serde_json",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "kube-runtime"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6aea4de4b562c5cc89ab10300bb63474ae1fa57ff5a19275f2e26401a323e3fd"
+dependencies = [
+ "ahash",
+ "async-broadcast",
+ "async-stream",
+ "backon",
+ "educe",
+ "futures",
+ "hashbrown 0.15.5",
+ "hostname",
+ "json-patch",
+ "k8s-openapi",
+ "kube-client",
+ "parking_lot",
+ "pin-project",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.16",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
 [[package]]
 name = "kvbm-py3"
 version = "0.1.0"
@@ -4918,7 +5134,7 @@ dependencies = [
  "num-traits",
  "objc",
  "once_cell",
- "ordered-float",
+ "ordered-float 5.1.0",
  "parking_lot",
  "radix_trie",
  "rand 0.9.2",
@@ -5755,6 +5971,15 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
 
+[[package]]
+name = "ordered-float"
+version = "2.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c"
+dependencies = [
+ "num-traits",
+]
+
 [[package]]
 name = "ordered-float"
 version = "5.1.0"
@@ -5860,6 +6085,16 @@ dependencies = [
  "syn 2.0.106",
 ]
 
+[[package]]
+name = "pem"
+version = "3.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be"
+dependencies = [
+ "base64 0.22.1",
+ "serde_core",
+]
+
 [[package]]
 name = "pem-rfc7468"
 version = "0.7.0"
@@ -7505,7 +7740,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
 dependencies = [
  "dyn-clone",
- "schemars_derive",
+ "schemars_derive 0.8.22",
  "serde",
  "serde_json",
 ]
@@ -7530,6 +7765,7 @@ checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0"
 dependencies = [
  "dyn-clone",
  "ref-cast",
+ "schemars_derive 1.0.4",
  "serde",
  "serde_json",
 ]
@@ -7546,6 +7782,18 @@ dependencies = [
  "syn 2.0.106",
 ]
 
+[[package]]
+name = "schemars_derive"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33d020396d1d138dc19f1165df7545479dcd58d93810dc5d646a16e55abefa80"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "serde_derive_internals",
+ "syn 2.0.106",
+]
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
@@ -7687,6 +7935,16 @@ dependencies = [
  "typeid",
 ]
 
+[[package]]
+name = "serde-value"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c"
+dependencies = [
+ "ordered-float 2.10.1",
+ "serde",
+]
+
 [[package]]
 name = "serde_cbor"
 version = "0.11.2"
@@ -8866,6 +9124,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "pin-project-lite",
+ "slab",
  "tokio",
 ]
 
@@ -9182,12 +9441,14 @@ version = "0.6.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
 dependencies = [
+ "base64 0.22.1",
  "bitflags 2.9.4",
  "bytes",
  "futures-util",
  "http 1.3.1",
  "http-body 1.0.1",
  "iri-string",
+ "mime",
  "pin-project-lite",
  "tower 0.5.2",
  "tower-layer",
diff --git a/components/src/dynamo/vllm/main.py b/components/src/dynamo/vllm/main.py
index 4edbd5b05e..24af44b701 100644
--- a/components/src/dynamo/vllm/main.py
+++ b/components/src/dynamo/vllm/main.py
@@ -8,7 +8,7 @@
 from typing import Optional
 
 import uvloop
-from kvbm.vllm_integration.consolidator_config import get_consolidator_endpoints
+# from kvbm.vllm_integration.consolidator_config import get_consolidator_endpoints
 from prometheus_client import REGISTRY
 from vllm.distributed.kv_events import ZmqEventPublisher
 from vllm.usage.usage_lib import UsageContext
diff --git a/k8s-test/LOCAL_TESTING.md b/k8s-test/LOCAL_TESTING.md
new file mode 100644
index 0000000000..2d4b48801d
--- /dev/null
+++ b/k8s-test/LOCAL_TESTING.md
@@ -0,0 +1,264 @@
+# Local Testing Mode
+
+This guide explains how to test the Kubernetes discovery client with a **local metadata server** while watching **real Kubernetes resources**.
+
+## Overview
+
+The local testing mode allows you to:
+- ✅ Watch real Kubernetes EndpointSlices
+- ✅ Connect to a local metadata server (on localhost) instead of pod IPs
+- ✅ Test the full discovery flow with your actual metadata implementation
+- ✅ Debug and iterate quickly without deploying to Kubernetes
+
+## How It Works
+
+When `DYN_LOCAL_KUBE_TEST=1` is set:
+1. The discovery client watches Kubernetes for EndpointSlices (as normal)
+2. When a pod is discovered, it parses the pod name for a port number
+3. If the pod name ends with `-<port>` (e.g., `dynamo-test-worker-8080`), it connects to `localhost:<port>` instead of the pod IP
+4. Your local metadata server running on that port receives the request
+
+## Setup
+
+### 1. Create a Test Pod and Service
+
+Create a pod and service with a specific port number:
+
+```bash
+cd k8s-test
+
+# Create pod and service with default labels
+./create-local-test-pod.sh 8080
+
+# Or with custom Kubernetes namespace
+./create-local-test-pod.sh 8080 my-k8s-namespace
+
+# Or with custom Dynamo namespace and component labels
+./create-local-test-pod.sh 8080 discovery hello_world backend
+```
+
+**Arguments:**
+1. `port` - Port number (required) - used in pod name and for localhost connection
+2. `k8s-namespace` - Kubernetes namespace (default: `discovery`)
+3. `dynamo-namespace` - Value for `dynamo.nvidia.com/namespace` label (default: `test-namespace`)
+4. `dynamo-component` - Value for `dynamo.nvidia.com/component` label (default: `test-component`)
+
+This creates:
+- A pod named `dynamo-test-worker-<port>`
+- A service named `dynamo-test-service-<port>`
+- An EndpointSlice (automatically created by Kubernetes for the service)
+
+### 2. Start Your Local Metadata Server
+
+Start your metadata server on the port you specified:
+
+```bash
+# Example with the system status server
+cargo run --bin your-app -- --port 8080
+```
+
+Make sure your server exposes the `/metadata` endpoint that returns a JSON-serialized `DiscoveryMetadata` structure.
+
+### 3. Run Tests in Local Mode
+
+Set the environment variable and run your tests:
+
+```bash
+export DYN_LOCAL_KUBE_TEST=1
+cargo test --test kube_client_integration test_watch_all_endpoints -- --ignored --nocapture
+```
+
+You should see logs like:
+```
+Local test mode: using localhost:8080 for pod dynamo-test-worker-8080
+Fetching metadata from http://localhost:8080/metadata
+```
+
+## Multiple Local Servers
+
+You can create multiple test pods with different ports and labels:
+
+```bash
+# Create pods for different components
+./create-local-test-pod.sh 8080 discovery hello_world frontend
+./create-local-test-pod.sh 8081 discovery hello_world backend
+./create-local-test-pod.sh 8082 discovery hello_world worker
+```
+
+Then run multiple metadata servers on different ports:
+
+```bash
+# Terminal 1 - Frontend server
+export PORT=8080
+export POD_NAME=dynamo-test-worker-8080
+export POD_NAMESPACE=discovery
+your-server --component frontend
+
+# Terminal 2 - Backend server
+export PORT=8081
+export POD_NAME=dynamo-test-worker-8081
+export POD_NAMESPACE=discovery
+your-server --component backend
+
+# Terminal 3 - Worker server
+export PORT=8082
+export POD_NAME=dynamo-test-worker-8082
+export POD_NAMESPACE=discovery
+your-server --component worker
+```
+
+The discovery client will discover all three and connect to the appropriate localhost port for each!
+
+## Pod Name Format
+
+The pod name MUST end with `-<port>` where `<port>` is a valid port number:
+
+✅ Valid:
+- `dynamo-test-worker-8080`
+- `my-service-9000`
+- `test-pod-3000`
+
+❌ Invalid:
+- `dynamo-test-worker` (no port)
+- `dynamo-test-worker-abc` (not a number)
+- `8080-worker` (port not at the end)
+
+The helper script automatically creates pods with the correct naming format.
+
+## Example: Testing the Discovery Flow with hello_world
+
+Here's a complete example using the `hello_world` app from the terminal output:
+
+```bash
+# 1. Create test pod with hello_world labels
+cd k8s-test
+./create-local-test-pod.sh 9000 discovery hello_world backend
+
+# 2. Start your server (in another terminal)
+cd ../examples/custom_backend/hello_world
+
+# Set environment variables for the server
+export PORT=9000
+export DYN_SYSTEM_PORT=$PORT
+export DYN_LOCAL_KUBE_TEST=1  # Not needed for server, but harmless
+export POD_NAME=dynamo-test-worker-$PORT
+export POD_NAMESPACE=discovery
+export DYN_DISCOVERY_BACKEND=kubernetes
+
+# Run the server
+python3 -m hello_world
+
+# 3. In another terminal, run the client
+export PORT=9009  # Different port for client
+export DYN_SYSTEM_PORT=$PORT
+export DYN_LOCAL_KUBE_TEST=1  # IMPORTANT: Client needs this!
+export POD_NAME=dynamo-test-worker-$PORT
+export POD_NAMESPACE=discovery
+export DYN_DISCOVERY_BACKEND=kubernetes
+
+python3 -m client
+```
+
+You should see:
+1. The server registers endpoint `hello_world/backend/generate` with its local metadata
+2. The client discovers the pod `dynamo-test-worker-9000` from Kubernetes
+3. The client connects to `http://localhost:9000/metadata` (not the pod IP!)
+4. The server responds with its registered metadata
+5. The client emits an `Added` event and can now make requests
+
+This lets you:
+- ✅ Debug both server and client locally
+- ✅ See actual Kubernetes discovery in action
+- ✅ Test with real metadata exchange
+- ✅ Iterate quickly without container builds
+
+## Cleanup
+
+Delete test resources when done:
+
+```bash
+# Delete specific pod and service
+kubectl delete pod/dynamo-test-worker-9000 --namespace=discovery
+kubectl delete service/dynamo-test-service-9000 --namespace=discovery
+
+# Or delete multiple at once
+kubectl delete pod/dynamo-test-worker-8080 service/dynamo-test-service-8080 --namespace=discovery
+kubectl delete pod/dynamo-test-worker-8081 service/dynamo-test-service-8081 --namespace=discovery
+```
+
+Or delete all local test resources at once:
+
+```bash
+kubectl delete pods,services -l app=dynamo-local-test --namespace=discovery
+```
+
+## Troubleshooting
+
+### "Connection refused" to localhost
+
+**Problem:** The discovery client can't connect to your local metadata server.
+
+**Solution:**
+- Ensure your metadata server is running on the correct port
+- Check that the port matches the pod name (e.g., pod `...-8080` → server on port 8080)
+- Verify your server exposes `/metadata` endpoint
+
+### Pod name doesn't have a port
+
+**Problem:** You created a pod without using the helper script and the name doesn't end with a port number.
+
+**Solution:**
+- Delete the pod: `kubectl delete pod/<pod-name>`
+- Use the helper script: `./create-local-test-pod.sh 8080`
+- Or manually create a pod with a name ending in `-<port>`
+
+### Still connecting to pod IP instead of localhost
+
+**Problem:** The environment variable isn't set.
+
+**Solution:**
+```bash
+export DYN_LOCAL_KUBE_TEST=1
+# Verify it's set
+echo $DYN_LOCAL_KUBE_TEST  # Should print: 1
+```
+
+### Metadata format errors
+
+**Problem:** Your local server returns data that doesn't match the expected `DiscoveryMetadata` format.
+
+**Solution:**
+Check the logs for JSON parsing errors. Your `/metadata` endpoint should return:
+```json
+{
+  "endpoints": {
+    "namespace/component/endpoint": {
+      "Endpoint": {
+        "namespace": "test-namespace",
+        "component": "test-component",
+        "endpoint": "test-endpoint",
+        "instance_id": 12345,
+        "transport": {"NatsTcp": "nats://localhost:4222"}
+      }
+    }
+  },
+  "model_cards": {}
+}
+```
+
+## Production vs. Local Testing
+
+| Mode | Environment Var | Connection | Use Case |
+|------|----------------|------------|----------|
+| Production | (none) | Pod IP:port | Real deployment |
+| Mock | (test only) | No HTTP calls | Unit tests |
+| Local Testing | `DYN_LOCAL_KUBE_TEST=1` | localhost:port | Integration testing with local server |
+
+## Benefits
+
+- 🚀 **Fast iteration**: No need to rebuild/redeploy containers
+- 🐛 **Easy debugging**: Use debuggers, logging, etc. on your local server
+- 🧪 **Full integration**: Test with real Kubernetes resources
+- 💰 **Cost effective**: No cloud resources needed for testing
+- ⚡ **Quick validation**: Test changes to metadata format instantly
+
diff --git a/k8s-test/README.md b/k8s-test/README.md
new file mode 100644
index 0000000000..2d22e60a9d
--- /dev/null
+++ b/k8s-test/README.md
@@ -0,0 +1,547 @@
+# Kubernetes Discovery Integration Tests
+
+This directory contains integration tests for the Dynamo Kubernetes discovery client. These tests verify that our Rust code can correctly interact with the Kubernetes API to list and watch EndpointSlices.
+
+## Prerequisites
+
+1. **Kubernetes Cluster Access**: You need a running Kubernetes cluster with `kubectl` configured
+   - Local: Docker Desktop, Minikube, Kind, k3s, etc.
+   - Cloud: GKE, EKS, AKS, etc.
+
+2. **Admin/Sufficient Permissions**: Your current kubectl context should have permissions to:
+   - Create/delete Deployments and Services
+   - List/watch EndpointSlices
+
+3. **Rust Environment**: Cargo with the dynamo-runtime crate compiled
+
+## Quick Start
+
+### 1. Deploy Test Resources
+
+```bash
+cd k8s-test
+
+# Deploy to default namespace
+./deploy.sh
+
+# Or deploy to a specific namespace
+./deploy.sh my-namespace
+```
+
+This will:
+- Create the namespace if it doesn't exist
+- Create a deployment with 3 nginx pods
+- Create a service that generates EndpointSlices
+- Wait for pods to be ready
+- Show the current status
+
+**Examples:**
+```bash
+./deploy.sh                    # Deploy to 'default' namespace
+./deploy.sh test-namespace     # Deploy to 'test-namespace'
+./deploy.sh production         # Deploy to 'production' namespace
+```
+
+### 2. Run Integration Tests
+
+There are two test suites:
+
+#### A. Raw Kubernetes API Tests (kube_discovery_integration)
+
+These tests verify the raw Kubernetes API interactions work correctly:
+
+```bash
+# Run all raw K8s tests
+cargo test --test kube_discovery_integration -- --ignored --nocapture
+
+# Or run individual tests:
+cargo test --test kube_discovery_integration test_kube_client_connection -- --ignored --nocapture
+cargo test --test kube_discovery_integration test_list_endpointslices -- --ignored --nocapture
+cargo test --test kube_discovery_integration test_watch_endpointslices -- --ignored --nocapture
+cargo test --test kube_discovery_integration test_discovery_simulation -- --ignored --nocapture
+```
+
+#### B. KubeDiscoveryClient Tests (kube_client_integration) **[RECOMMENDED]**
+
+These tests verify the actual `KubeDiscoveryClient` implementation:
+
+```bash
+# Run all KubeDiscoveryClient tests (sequential for clean output)
+cargo test --test kube_client_integration -- --ignored --nocapture --test-threads=1
+
+# Or run individual tests:
+
+# Test client creation
+cargo test --test kube_client_integration test_client_creation -- --ignored --nocapture
+
+# Test list() method
+cargo test --test kube_client_integration test_list_all_endpoints -- --ignored --nocapture
+cargo test --test kube_client_integration test_list_namespaced_endpoints -- --ignored --nocapture
+cargo test --test kube_client_integration test_list_component_endpoints -- --ignored --nocapture
+
+# Test list_and_watch() method
+cargo test --test kube_client_integration test_watch_all_endpoints -- --ignored --nocapture
+cargo test --test kube_client_integration test_watch_namespaced_endpoints -- --ignored --nocapture
+cargo test --test kube_client_integration test_watch_receives_k8s_events -- --ignored --nocapture
+```
+
+**Note:** The `--test-threads=1` flag ensures tests run sequentially, preventing output from multiple tests from being interleaved. This makes the output much more readable, especially for watch tests that print events over time.
+
+**Note:** The `KubeDiscoveryClient` tests use **mock metadata** mode, which means they skip actual HTTP calls to pods and return mock `DiscoveryMetadata` instead. This allows the tests to verify:
+- ✅ Kubernetes API interactions (listing/watching EndpointSlices)
+- ✅ Endpoint extraction from EndpointSlices
+- ✅ Discovery event flow (Added/Removed events)
+- ✅ The full discovery pipeline
+
+Without needing actual metadata servers running in pods. This makes tests fast, reliable, and easy to run.
+
+#### Alternative: Using the Test Runner Script
+
+You can also use the `run-tests.sh` script for a more convenient workflow:
+
+```bash
+cd k8s-test
+
+# Run all client tests (checks default namespace)
+./run-tests.sh
+
+# Run specific test
+./run-tests.sh client test_list_all_endpoints
+
+# Run tests and check a specific namespace
+./run-tests.sh client "" my-namespace
+
+# Run all test suites
+./run-tests.sh all
+```
+
+The script will:
+- Check if kubectl is configured
+- Verify test resources exist in the specified namespace
+- Run the requested tests
+- Provide helpful error messages if resources aren't deployed
+
+### 3. Clean Up
+
+```bash
+# Clean up from default namespace
+./cleanup.sh
+
+# Or clean up from a specific namespace
+./cleanup.sh my-namespace
+```
+
+**Examples:**
+```bash
+./cleanup.sh                    # Clean up from 'default' namespace
+./cleanup.sh test-namespace     # Clean up from 'test-namespace'
+./cleanup.sh production         # Clean up from 'production' namespace
+```
+
+**Note:** The cleanup script does not delete the namespace itself. To delete the namespace:
+```bash
+kubectl delete namespace my-namespace
+```
+
+## Test Descriptions
+
+### KubeDiscoveryClient Tests (Recommended)
+
+These tests exercise the actual `KubeDiscoveryClient` methods that will be used in production.
+
+#### `test_client_creation`
+Verifies that we can create a `KubeDiscoveryClient` for testing.
+
+**What it tests:**
+- Client instantiation
+- Instance ID generation from pod name
+
+**Expected output:**
+```
+🔌 Testing KubeDiscoveryClient creation...
+✅ Client created with instance_id: abc123def456
+```
+
+#### `test_list_all_endpoints`
+Tests the `list()` method with `DiscoveryKey::AllEndpoints`.
+
+**What it tests:**
+- Calling `KubeDiscoveryClient::list()`
+- EndpointSlice querying without label filters
+- Metadata fetching workflow (will fail gracefully without metadata server)
+
+**Expected output:**
+```
+📋 Testing list all endpoints...
+   Note: This will try to fetch metadata from pods via HTTP,
+   which will likely fail unless pods are running the metadata server.
+   The test verifies the Kubernetes API calls work correctly.
+Calling list() with key=AllEndpoints
+✅ list() succeeded
+   Found 0 instances
+✅ List test completed (K8s API calls work)
+```
+
+#### `test_list_namespaced_endpoints` & `test_list_component_endpoints`
+Test the `list()` method with label-based filtering.
+
+**What it tests:**
+- Label selector generation from `DiscoveryKey`
+- Filtered EndpointSlice queries
+
+#### `test_watch_all_endpoints` & `test_watch_namespaced_endpoints`
+Test the `list_and_watch()` method which creates a streaming watch.
+
+**What it tests:**
+- Creating a watch stream from `KubeDiscoveryClient`
+- Receiving discovery events (Added/Removed)
+- Watch lifecycle management
+
+**Expected output:**
+```
+👀 Testing watch all endpoints...
+   This test will watch for 5 seconds
+Calling list_and_watch() with key=AllEndpoints
+📡 Watch stream started...
+⏰ Timeout reached
+✅ Watch test completed (0 events received)
+   Note: Events are only emitted when pods are discovered
+   and their metadata can be fetched via HTTP
+```
+
+#### `test_watch_receives_k8s_events`
+Verifies the Kubernetes watcher integration is functioning.
+
+**What it tests:**
+- Watch stream receives at least one event
+- K8s watcher initialization
+- Stream lifecycle
+
+### Raw Kubernetes API Tests
+
+These tests verify low-level Kubernetes API interactions.
+
+#### `test_kube_client_connection`
+Verifies that we can create a Kubernetes client and connect to the cluster.
+
+**What it tests:**
+- Client creation from default kubeconfig
+- Basic API connectivity by listing namespaces
+
+**Expected output:**
+```
+🔌 Testing Kubernetes client connection...
+✅ Successfully connected to Kubernetes cluster
+📋 Found X namespaces
+✅ Kubernetes API is accessible
+```
+
+### `test_list_endpointslices`
+Tests listing all EndpointSlices in the default namespace.
+
+**What it tests:**
+- EndpointSlice API access
+- Parsing EndpointSlice structures
+- Extracting endpoint information (pod names, IPs, readiness)
+
+**Expected output:**
+```
+📋 Testing EndpointSlice listing...
+📊 Found X EndpointSlices in default namespace
+  • dynamo-test-service-abcde (service: dynamo-test-service, endpoints: 3)
+    [0] pod=dynamo-test-worker-xxx, ready=true, addresses=["10.1.2.3"]
+    [1] pod=dynamo-test-worker-yyy, ready=true, addresses=["10.1.2.4"]
+    [2] pod=dynamo-test-worker-zzz, ready=true, addresses=["10.1.2.5"]
+✅ EndpointSlice listing test completed
+```
+
+### `test_list_with_labels`
+Tests listing EndpointSlices with label selectors (like our discovery client does).
+
+**What it tests:**
+- Label selector functionality
+- Filtering EndpointSlices by labels
+
+**Important:** EndpointSlices are created by Services, not Deployments. The EndpointSlices will have labels from the Service, not from the pod labels. The test uses `kubernetes.io/service-name=dynamo-test-service` which is automatically added by Kubernetes.
+
+**Expected output:**
+```
+🏷️  Testing EndpointSlice listing with label selector...
+Using label selector: kubernetes.io/service-name=dynamo-test-service
+📊 Found X EndpointSlices matching labels
+  • dynamo-test-service-abcde (endpoints: 3)
+✅ Label selector test completed
+```
+
+### `test_watch_endpointslices`
+Tests the Kubernetes watch mechanism for EndpointSlices.
+
+**What it tests:**
+- Creating a watch stream
+- Receiving watch events (Init, InitApply, Apply, Delete, InitDone)
+- Event types and their contents
+
+**Expected output:**
+```
+👀 Testing EndpointSlice watching...
+   This test will watch for 10 seconds or 5 events, whichever comes first
+📡 Watch stream started...
+  [1] 🚀 Init - watch stream starting
+  [2] 🔄 InitApply: dynamo-test-service-xxx (endpoints: 3)
+  [3] ✅ InitDone - initial list complete
+📊 Reached max events (5), stopping watch
+✅ Watch test completed (5 events received)
+```
+
+### `test_watch_with_labels`
+Tests watching EndpointSlices with a label selector.
+
+**What it tests:**
+- Watch with label filtering
+- Receiving only relevant events
+
+**Expected output:**
+```
+👀 Testing EndpointSlice watching with label selector...
+   This test will watch for 5 seconds or until InitDone
+Using label selector: kubernetes.io/service-name=dynamo-test-service
+📡 Watch stream started...
+  [1] 🚀 Init - watch stream starting
+  [2] 🔄 InitApply: dynamo-test-service-xxx (endpoints: 3)
+  [3] ✅ InitDone - initial list complete
+📊 InitDone received, stopping watch
+✅ Watch with labels test completed (3 events received)
+```
+
+### `test_discovery_simulation`
+Comprehensive test that simulates the full discovery client behavior.
+
+**What it tests:**
+- Complete discovery flow: watch → extract endpoints → track instances
+- Pod name hashing (instance ID generation)
+- Ready state filtering
+- Duplicate detection
+
+**Expected output:**
+```
+🔍 Testing discovery client simulation...
+   This simulates how our KubeDiscoveryClient list_and_watch works
+Label selector: kubernetes.io/service-name=dynamo-test-service
+📡 Starting watch stream...
+  🚀 Watch stream initialized
+  📦 Processing EndpointSlice: dynamo-test-service-xxx
+    ✅ New endpoint: pod=dynamo-test-worker-xxx, instance_id=abc123, addresses=["10.1.2.3"]
+    ✅ New endpoint: pod=dynamo-test-worker-yyy, instance_id=def456, addresses=["10.1.2.4"]
+    ✅ New endpoint: pod=dynamo-test-worker-zzz, instance_id=789abc, addresses=["10.1.2.5"]
+  ✅ Initial sync complete
+  📊 Discovered 3 unique endpoints
+✅ Discovery simulation completed
+📊 Total unique endpoints discovered: 3
+```
+
+## Architecture Overview
+
+```
+┌─────────────────────────────────────────┐
+│         Kubernetes Cluster              │
+│                                         │
+│  Namespace: default (POD_NAMESPACE)    │
+│  ┌─────────────────────────────────┐   │
+│  │  Deployment: dynamo-test-worker │   │
+│  │  Replicas: 3                    │   │
+│  │  Labels:                        │   │
+│  │    app=dynamo-test              │   │
+│  │    component=worker             │   │
+│  └─────────────────────────────────┘   │
+│            │                            │
+│            ▼                            │
+│  ┌─────────────────────────────────┐   │
+│  │  Pods (3 replicas)              │   │
+│  │  - dynamo-test-worker-xxx       │   │
+│  │  - dynamo-test-worker-yyy       │   │
+│  │  - dynamo-test-worker-zzz       │   │
+│  └─────────────────────────────────┘   │
+│            │                            │
+│            ▼                            │
+│  ┌─────────────────────────────────┐   │
+│  │  Service: dynamo-test-service   │   │
+│  │  Type: ClusterIP                │   │
+│  │  Selector: app=dynamo-test      │   │
+│  └─────────────────────────────────┘   │
+│            │                            │
+│            ▼                            │
+│  ┌─────────────────────────────────┐   │
+│  │  EndpointSlices (auto-created)  │   │
+│  │  Labels:                        │   │
+│  │    kubernetes.io/service-name:  │   │
+│  │      dynamo-test-service        │   │
+│  │  Endpoints: [pod IPs + status]  │   │
+│  └─────────────────────────────────┘   │
+│                                         │
+└─────────────────────────────────────────┘
+                 │
+                 │ Kubernetes API
+                 │ (List/Watch - namespace scoped)
+                 ▼
+┌─────────────────────────────────────────┐
+│     Integration Tests (Rust)            │
+│  - test_list_endpointslices             │
+│  - test_watch_endpointslices            │
+│  - test_discovery_simulation            │
+└─────────────────────────────────────────┘
+```
+
+**Important:** The `KubeDiscoveryClient` is **namespace-scoped**. It only watches EndpointSlices in the namespace specified by the `POD_NAMESPACE` environment variable. This provides:
+- ✅ Better security (no cluster-wide access needed)
+- ✅ Better performance (fewer resources to watch)
+- ✅ Namespace isolation (pods only discover within their namespace)
+
+## Troubleshooting
+
+### "Failed to create Kubernetes client"
+
+**Cause:** kubectl is not configured or kubeconfig is invalid
+
+**Solution:**
+```bash
+# Check kubectl connection
+kubectl cluster-info
+
+# Check current context
+kubectl config current-context
+
+# If needed, set context
+kubectl config use-context <context-name>
+```
+
+### "No EndpointSlices found"
+
+**Cause:** Test resources not deployed
+
+**Solution:**
+```bash
+cd k8s-test
+./deploy.sh
+
+# Verify resources exist
+kubectl get endpointslices -l kubernetes.io/service-name=dynamo-test-service
+```
+
+### "Pods not ready"
+
+**Cause:** Pods are still starting or failing
+
+**Solution:**
+```bash
+# Check pod status
+kubectl get pods -l app=dynamo-test
+
+# Check pod events
+kubectl describe pod <pod-name>
+
+# Check pod logs
+kubectl logs <pod-name>
+```
+
+### "No endpoints discovered"
+
+**Cause:** Pods might not be ready yet
+
+**Solution:**
+```bash
+# Wait for pods to be ready
+kubectl wait --for=condition=ready pod -l app=dynamo-test --timeout=60s
+
+# Check pod readiness
+kubectl get pods -l app=dynamo-test -o wide
+```
+
+## Notes
+
+### Namespace Configuration
+
+The `KubeDiscoveryClient` reads the `POD_NAMESPACE` environment variable to determine which namespace to watch. This is automatically set by Kubernetes when you use the downward API:
+
+```yaml
+env:
+- name: POD_NAMESPACE
+  valueFrom:
+    fieldRef:
+      fieldPath: metadata.namespace
+```
+
+The client will **only** watch EndpointSlices within this namespace. It does not have cluster-wide access.
+
+### Why EndpointSlices?
+
+Kubernetes automatically creates EndpointSlices for Services. EndpointSlices track:
+- Pod IPs
+- Pod readiness state
+- Pod names (via targetRef)
+- Port information
+
+This makes them perfect for service discovery.
+
+### Labels on EndpointSlices
+
+**Important:** EndpointSlices inherit labels from the Service, not from Pods. The most reliable label to use is:
+- `kubernetes.io/service-name=<service-name>` (automatically added)
+
+If you want custom labels on EndpointSlices, add them to the Service, not the Pods.
+
+### Difference from Production
+
+These integration tests skip the HTTP metadata fetching part. In production:
+1. Watch EndpointSlice → get pod IPs
+2. HTTP GET `http://<pod-ip>:8080/metadata` → get registration data
+3. Cache and return discovery instances
+
+For these tests, we only verify step 1 works correctly.
+
+## Local Testing Mode
+
+Want to test with a **real metadata server** running locally? See **[LOCAL_TESTING.md](LOCAL_TESTING.md)** for detailed instructions.
+
+Quick start:
+```bash
+# 1. Create a test pod and service with custom labels
+./create-local-test-pod.sh 9000 discovery hello_world backend
+#                          ^port ^k8s-ns  ^dynamo-ns  ^component
+
+# This creates:
+# - Pod: dynamo-test-worker-9000
+# - Service: dynamo-test-service-9000
+# - EndpointSlice: (auto-created by K8s)
+
+# 2. Start your metadata server locally (in another terminal)
+export PORT=9000
+export DYN_SYSTEM_PORT=$PORT
+export POD_NAME=dynamo-test-worker-$PORT
+export POD_NAMESPACE=discovery
+export DYN_DISCOVERY_BACKEND=kubernetes
+python3 -m your_app
+
+# 3. Run your client in local mode
+export DYN_LOCAL_KUBE_TEST=1  # Key: makes client connect to localhost!
+export POD_NAMESPACE=discovery
+export DYN_DISCOVERY_BACKEND=kubernetes
+python3 -m your_client
+```
+
+This allows you to:
+- ✅ Test with real Kubernetes resources
+- ✅ Debug your metadata server locally
+- ✅ See full discovery flow with actual metadata exchange
+- ✅ Iterate quickly without deploying to K8s
+
+The client discovers the pod from K8s but connects to `localhost:9000` for metadata!
+
+## Next Steps
+
+After these tests pass:
+1. Test with real metadata servers using local testing mode (see above)
+2. Test error handling (network failures, timeouts, etc.)
+3. Test scale (100s of pods)
+4. Test label selector edge cases
+5. Add RBAC roles and test with restricted permissions
+
diff --git a/k8s-test/cleanup.sh b/k8s-test/cleanup.sh
new file mode 100755
index 0000000000..745d8c9a3c
--- /dev/null
+++ b/k8s-test/cleanup.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# Clean up test resources from Kubernetes cluster
+
+set -e
+
+# Parse namespace argument (default to "default")
+NAMESPACE="${1:-default}"
+
+echo "🧹 Cleaning up Dynamo test resources from namespace: $NAMESPACE"
+
+# Delete manifests
+kubectl delete -f manifests/test-deployment.yaml --namespace="$NAMESPACE" --ignore-not-found=true
+
+echo ""
+echo "✅ Test resources cleaned up from namespace: $NAMESPACE!"
+echo ""
+echo "Note: The namespace itself was not deleted. To delete it, run:"
+echo "  kubectl delete namespace $NAMESPACE"
+
diff --git a/k8s-test/create-local-test-pod.sh b/k8s-test/create-local-test-pod.sh
new file mode 100755
index 0000000000..60b8aa1b35
--- /dev/null
+++ b/k8s-test/create-local-test-pod.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+# Create a pod and service for local testing with DYN_LOCAL_KUBE_TEST
+# The pod name will be in format: dynamo-test-worker-<port>
+# This allows the discovery client to connect to localhost:<port> for the metadata server
+
+set -e
+
+# Parse arguments
+PORT="${1:-9000}"
+K8S_NAMESPACE="${2:-discovery}"
+DYNAMO_NAMESPACE="${3:-hello_world}"
+DYNAMO_COMPONENT="${4:-backend}"
+
+if [ -z "$PORT" ]; then
+    echo "Usage: $0 <port> [k8s-namespace] [dynamo-namespace] [dynamo-component]"
+    echo ""
+    echo "Creates a pod and service that will be discovered by the Kubernetes client."
+    echo "When DYN_LOCAL_KUBE_TEST is set, the client will connect to localhost:<port>"
+    echo "for the metadata endpoint instead of the pod IP."
+    echo ""
+    echo "Arguments:"
+    echo "  port              - Port number to use (required)"
+    echo "  k8s-namespace     - Kubernetes namespace (default: discovery)"
+    echo "  dynamo-namespace  - Dynamo namespace label (default: hello_world)"
+    echo "  dynamo-component  - Dynamo component label (default: backend)"
+    echo ""
+    echo "Examples:"
+    echo "  $0 8080                                    # backend component (default)"
+    echo "  $0 8081 discovery                          # backend in discovery namespace"
+    echo "  $0 8082 discovery hello_world backend      # Explicit backend component"
+    echo "  $0 8083 discovery hello_world prefill      # prefill component"
+    echo "  $0 8084 discovery dynamo frontend          # frontend component"
+    echo ""
+    echo "After creating the pod, run your metadata server locally:"
+    echo "  # In one terminal:"
+    echo "  your-metadata-server --port $PORT"
+    echo ""
+    echo "  # In another terminal:"
+    echo "  export DYN_LOCAL_KUBE_TEST=1"
+    echo "  cargo test --test kube_client_integration test_watch_all_endpoints -- --ignored --nocapture"
+    exit 1
+fi
+
+POD_NAME="dynamo-test-worker-${PORT}"
+SERVICE_NAME="dynamo-test-${DYNAMO_COMPONENT}"
+
+echo "🚀 Creating local test resources in K8s namespace: $K8S_NAMESPACE"
+echo "   Pod name: $POD_NAME"
+echo "   Service name: $SERVICE_NAME (component: $DYNAMO_COMPONENT)"
+echo "   Port: $PORT"
+echo "   Dynamo namespace: $DYNAMO_NAMESPACE"
+echo "   Dynamo component: $DYNAMO_COMPONENT"
+echo ""
+
+# Create namespace if it doesn't exist
+if ! kubectl get namespace "$K8S_NAMESPACE" &> /dev/null; then
+    echo "📦 Creating Kubernetes namespace: $K8S_NAMESPACE"
+    kubectl create namespace "$K8S_NAMESPACE"
+fi
+
+# Create the pod and service using kubectl
+cat <<EOF | kubectl apply -f -
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: $POD_NAME
+  namespace: $K8S_NAMESPACE
+  labels:
+    app: dynamo-local-test
+    local-test-port: "$PORT"
+    dynamo.nvidia.com/namespace: "$DYNAMO_NAMESPACE"
+    dynamo.nvidia.com/component: "$DYNAMO_COMPONENT"
+spec:
+  containers:
+  - name: worker
+    image: nginx:alpine
+    ports:
+    - containerPort: 80
+      name: http
+      protocol: TCP
+    readinessProbe:
+      httpGet:
+        path: /
+        port: 80
+      initialDelaySeconds: 2
+      periodSeconds: 5
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: $SERVICE_NAME
+  namespace: $K8S_NAMESPACE
+  labels:
+    app: dynamo-local-test
+    dynamo.nvidia.com/namespace: "$DYNAMO_NAMESPACE"
+    dynamo.nvidia.com/component: "$DYNAMO_COMPONENT"
+spec:
+  selector:
+    app: dynamo-local-test
+    dynamo.nvidia.com/namespace: "$DYNAMO_NAMESPACE"
+    dynamo.nvidia.com/component: "$DYNAMO_COMPONENT"
+  ports:
+  - port: 80
+    targetPort: 80
+    protocol: TCP
+    name: http
+  type: ClusterIP
+EOF
+
+echo ""
+echo "✅ Resources created: $POD_NAME and $SERVICE_NAME"
+echo ""
+echo "Waiting for pod to be ready..."
+kubectl wait --for=condition=ready pod/$POD_NAME --namespace="$K8S_NAMESPACE" --timeout=60s
+
+echo ""
+echo "📊 Resource status:"
+kubectl get pod/$POD_NAME --namespace="$K8S_NAMESPACE"
+kubectl get service/$SERVICE_NAME --namespace="$K8S_NAMESPACE"
+kubectl get endpointslices -l kubernetes.io/service-name=$SERVICE_NAME --namespace="$K8S_NAMESPACE"
+
+echo ""
+echo "✅ Resources are ready!"
+echo ""
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "  Local Testing Instructions"
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo ""
+echo "1. Start your metadata server locally on port $PORT:"
+echo "   export PORT=$PORT"
+echo "   export DYN_SYSTEM_PORT=\$PORT"
+echo "   export POD_NAME=$POD_NAME"
+echo "   export POD_NAMESPACE=$K8S_NAMESPACE"
+echo "   export DYN_DISCOVERY_BACKEND=kubernetes"
+echo "   python3 -m your_app"
+echo ""
+echo "2. In another terminal, run your client with DYN_LOCAL_KUBE_TEST:"
+echo "   export DYN_LOCAL_KUBE_TEST=1"
+echo "   export POD_NAMESPACE=$K8S_NAMESPACE"
+echo "   export DYN_DISCOVERY_BACKEND=kubernetes"
+echo "   python3 -m your_client"
+echo ""
+echo "3. The client will discover the pod and connect to localhost:$PORT"
+echo ""
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo "  Cleanup"
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo ""
+echo "To delete these resources:"
+echo "  kubectl delete pod/$POD_NAME --namespace=$K8S_NAMESPACE"
+echo "  kubectl delete service/$SERVICE_NAME --namespace=$K8S_NAMESPACE"
+echo ""
+echo "Or delete all local test resources:"
+echo "  kubectl delete pods,services -l app=dynamo-local-test --namespace=$K8S_NAMESPACE"
+echo ""
+
diff --git a/k8s-test/deploy.sh b/k8s-test/deploy.sh
new file mode 100755
index 0000000000..0d943cc8fb
--- /dev/null
+++ b/k8s-test/deploy.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# Deploy test resources to Kubernetes cluster
+
+set -e
+
+# Parse namespace argument (default to "default")
+NAMESPACE="${1:-default}"
+
+echo "🚀 Deploying Dynamo test resources to namespace: $NAMESPACE"
+
+# Create namespace if it doesn't exist
+if ! kubectl get namespace "$NAMESPACE" &> /dev/null; then
+    echo "📦 Creating namespace: $NAMESPACE"
+    kubectl create namespace "$NAMESPACE"
+else
+    echo "✅ Namespace $NAMESPACE already exists"
+fi
+
+echo ""
+echo "Applying manifests..."
+
+# Apply manifests with namespace override
+kubectl apply -f manifests/test-deployment.yaml --namespace="$NAMESPACE"
+
+echo ""
+echo "✅ Resources deployed!"
+echo ""
+echo "Waiting for pods to be ready..."
+kubectl wait --for=condition=ready pod -l app=dynamo-test --namespace="$NAMESPACE" --timeout=60s
+
+echo ""
+echo "📊 Current status in namespace $NAMESPACE:"
+kubectl get deployment dynamo-test-worker --namespace="$NAMESPACE"
+kubectl get service dynamo-test-service --namespace="$NAMESPACE"
+kubectl get pods -l app=dynamo-test --namespace="$NAMESPACE"
+kubectl get endpointslices -l kubernetes.io/service-name=dynamo-test-service --namespace="$NAMESPACE"
+
+echo ""
+echo "✅ Test environment is ready in namespace: $NAMESPACE!"
+echo ""
+echo "To run tests against this namespace, set POD_NAMESPACE=$NAMESPACE in your test client"
+
diff --git a/k8s-test/manifests/test-deployment.yaml b/k8s-test/manifests/test-deployment.yaml
new file mode 100644
index 0000000000..fb64aeb2f6
--- /dev/null
+++ b/k8s-test/manifests/test-deployment.yaml
@@ -0,0 +1,65 @@
+---
+# Test deployment for Dynamo discovery integration testing
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dynamo-test-worker
+  labels:
+    app: dynamo-test
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: dynamo-test
+      component: worker
+  template:
+    metadata:
+      labels:
+        app: dynamo-test
+        component: worker
+        dynamo.nvidia.com/namespace: "test-namespace"
+        dynamo.nvidia.com/component: "test-component"
+    spec:
+      containers:
+      - name: worker
+        image: nginx:alpine  # Simple container for testing
+        ports:
+        - containerPort: 8080
+          name: http
+          protocol: TCP
+        env:
+        - name: POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        readinessProbe:
+          httpGet:
+            path: /
+            port: 80
+          initialDelaySeconds: 2
+          periodSeconds: 5
+---
+# Service to create EndpointSlices
+apiVersion: v1
+kind: Service
+metadata:
+  name: dynamo-test-service
+  labels:
+    app: dynamo-test
+    dynamo.nvidia.com/namespace: "test-namespace"
+    dynamo.nvidia.com/component: "test-component"
+spec:
+  selector:
+    app: dynamo-test
+    component: worker
+  ports:
+  - port: 8080
+    targetPort: 80
+    protocol: TCP
+    name: http
+  type: ClusterIP
+
diff --git a/k8s-test/run-tests.sh b/k8s-test/run-tests.sh
new file mode 100644
index 0000000000..461f0c97af
--- /dev/null
+++ b/k8s-test/run-tests.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# Run integration tests for Kubernetes discovery client
+
+set -e
+
+echo "🧪 Running Kubernetes Discovery Integration Tests"
+echo ""
+
+# Check if kubectl is configured
+if ! kubectl cluster-info &> /dev/null; then
+    echo "❌ kubectl is not configured or cluster is not accessible"
+    echo "   Please ensure you have access to a Kubernetes cluster"
+    exit 1
+fi
+
+echo "✅ kubectl is configured"
+echo "   Cluster: $(kubectl config current-context)"
+echo ""
+
+# Parse command line arguments
+TEST_SUITE="${1:-kube_client}"
+TEST_NAME="${2:-}"
+NAMESPACE="${3:-default}"
+
+echo "🔍 Checking for test resources in namespace: $NAMESPACE"
+
+# Check if test resources are deployed
+PODS=$(kubectl get pods -l app=dynamo-test --namespace="$NAMESPACE" --no-headers 2>/dev/null | wc -l)
+if [ "$PODS" -eq 0 ]; then
+    echo "⚠️  Test resources not deployed in namespace: $NAMESPACE"
+    echo "   Run ./deploy.sh $NAMESPACE to create test resources"
+    echo "   (Tests will still run but may not find any endpoints)"
+    echo ""
+else
+    echo "✅ Found $PODS test pods in namespace: $NAMESPACE"
+    echo ""
+fi
+
+case "$TEST_SUITE" in
+    "client"|"kube_client")
+        echo "Running KubeDiscoveryClient tests..."
+        if [ -n "$TEST_NAME" ]; then
+            cargo test --test kube_client_integration "$TEST_NAME" -- --ignored --nocapture --test-threads=1
+        else
+            cargo test --test kube_client_integration -- --ignored --nocapture --test-threads=1
+        fi
+        ;;
+    "raw"|"kube_api")
+        echo "Running raw Kubernetes API tests..."
+        if [ -n "$TEST_NAME" ]; then
+            cargo test --test kube_discovery_integration "$TEST_NAME" -- --ignored --nocapture --test-threads=1
+        else
+            cargo test --test kube_discovery_integration -- --ignored --nocapture --test-threads=1
+        fi
+        ;;
+    "all")
+        echo "Running all integration tests..."
+        cargo test --test kube_client_integration -- --ignored --nocapture --test-threads=1
+        echo ""
+        echo "---"
+        echo ""
+        cargo test --test kube_discovery_integration -- --ignored --nocapture --test-threads=1
+        ;;
+    *)
+        echo "Usage: $0 [client|raw|all] [test_name] [namespace]"
+        echo ""
+        echo "Arguments:"
+        echo "  test_suite  - Which test suite to run (default: client)"
+        echo "  test_name   - Specific test to run (optional)"
+        echo "  namespace   - Kubernetes namespace to check (default: default)"
+        echo ""
+        echo "Test suites:"
+        echo "  client (default) - Run KubeDiscoveryClient tests (recommended)"
+        echo "  raw              - Run raw Kubernetes API tests"
+        echo "  all              - Run all integration tests"
+        echo ""
+        echo "Examples:"
+        echo "  $0                                              # Run client tests (default namespace)"
+        echo "  $0 client test_list_all_endpoints               # Run specific client test"
+        echo "  $0 client test_list_all_endpoints my-namespace  # Run test, check my-namespace"
+        echo "  $0 raw test_list_endpointslices                 # Run specific raw API test"
+        echo "  $0 all \"\" my-namespace                          # Run all tests, check my-namespace"
+        exit 1
+        ;;
+esac
+
+echo ""
+echo "✅ Tests completed"
+echo ""
+echo "Note: Tests check for resources in namespace: $NAMESPACE"
+echo "      The actual KubeDiscoveryClient namespace is determined by POD_NAMESPACE env var in test code"
+
diff --git a/lib/bindings/python/Cargo.lock b/lib/bindings/python/Cargo.lock
index 839caf2d5e..854303e791 100644
--- a/lib/bindings/python/Cargo.lock
+++ b/lib/bindings/python/Cargo.lock
@@ -55,6 +55,12 @@ dependencies = [
  "equator",
 ]
 
+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
 [[package]]
 name = "android-tzdata"
 version = "0.1.1"
@@ -173,6 +179,18 @@ version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
+[[package]]
+name = "async-broadcast"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "435a87a52755b8f27fcf321ac4f04b2802e337c8c4872923137471ec39c37532"
+dependencies = [
+ "event-listener",
+ "event-listener-strategy",
+ "futures-core",
+ "pin-project-lite",
+]
+
 [[package]]
 name = "async-channel"
 version = "2.5.0"
@@ -467,6 +485,17 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "backon"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef"
+dependencies = [
+ "fastrand",
+ "gloo-timers",
+ "tokio",
+]
+
 [[package]]
 name = "backtrace"
 version = "0.3.75"
@@ -1144,8 +1173,18 @@ version = "0.20.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
 dependencies = [
- "darling_core",
- "darling_macro",
+ "darling_core 0.20.11",
+ "darling_macro 0.20.11",
+]
+
+[[package]]
+name = "darling"
+version = "0.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0"
+dependencies = [
+ "darling_core 0.21.3",
+ "darling_macro 0.21.3",
 ]
 
 [[package]]
@@ -1162,13 +1201,38 @@ dependencies = [
  "syn 2.0.106",
 ]
 
+[[package]]
+name = "darling_core"
+version = "0.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim",
+ "syn 2.0.106",
+]
+
 [[package]]
 name = "darling_macro"
 version = "0.20.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
 dependencies = [
- "darling_core",
+ "darling_core 0.20.11",
+ "quote",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
+dependencies = [
+ "darling_core 0.21.3",
  "quote",
  "syn 2.0.106",
 ]
@@ -1265,7 +1329,7 @@ version = "0.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8"
 dependencies = [
- "darling",
+ "darling 0.20.11",
  "proc-macro2",
  "quote",
  "syn 2.0.106",
@@ -1287,7 +1351,16 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05"
 dependencies = [
- "derive_more-impl",
+ "derive_more-impl 1.0.0",
+]
+
+[[package]]
+name = "derive_more"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678"
+dependencies = [
+ "derive_more-impl 2.0.1",
 ]
 
 [[package]]
@@ -1302,6 +1375,17 @@ dependencies = [
  "unicode-xid",
 ]
 
+[[package]]
+name = "derive_more-impl"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.106",
+]
+
 [[package]]
 name = "dialoguer"
 version = "0.11.0"
@@ -1457,6 +1541,7 @@ dependencies = [
  "async_zmq",
  "axum",
  "axum-server",
+ "base64 0.22.1",
  "bincode 2.0.1",
  "bitflags 2.9.3",
  "blake3",
@@ -1494,6 +1579,7 @@ dependencies = [
  "rand 0.9.2",
  "rayon",
  "regex",
+ "reqwest",
  "rmp-serde",
  "rustls",
  "serde",
@@ -1601,6 +1687,8 @@ dependencies = [
  "figment",
  "futures",
  "humantime",
+ "k8s-openapi",
+ "kube",
  "local-ip-address",
  "log",
  "nid",
@@ -1615,6 +1703,8 @@ dependencies = [
  "rand 0.9.2",
  "rayon",
  "regex",
+ "reqwest",
+ "schemars 1.0.4",
  "serde",
  "serde_json",
  "socket2 0.5.10",
@@ -2381,6 +2471,18 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
 
+[[package]]
+name = "gloo-timers"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "h2"
 version = "0.4.12"
@@ -2435,6 +2537,8 @@ version = "0.15.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
+ "allocator-api2",
+ "equivalent",
  "foldhash",
 ]
 
@@ -2497,6 +2601,17 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "hostname"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a56f203cd1c76362b69e3863fd987520ac36cf70a8c92627449b2f64a8cf7d65"
+dependencies = [
+ "cfg-if 1.0.3",
+ "libc",
+ "windows-link 0.1.3",
+]
+
 [[package]]
 name = "http"
 version = "1.3.1"
@@ -2581,6 +2696,7 @@ dependencies = [
  "http",
  "hyper",
  "hyper-util",
+ "log",
  "rustls",
  "rustls-native-certs 0.8.1",
  "rustls-pki-types",
@@ -3045,6 +3161,18 @@ dependencies = [
  "unicode-general-category",
 ]
 
+[[package]]
+name = "json-patch"
+version = "4.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f300e415e2134745ef75f04562dd0145405c2f7fd92065db029ac4b16b57fe90"
+dependencies = [
+ "jsonptr",
+ "serde",
+ "serde_json",
+ "thiserror 1.0.69",
+]
+
 [[package]]
 name = "json5"
 version = "0.4.1"
@@ -3056,6 +3184,29 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "jsonpath-rust"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c00ae348f9f8fd2d09f82a98ca381c60df9e0820d8d79fce43e649b4dc3128b"
+dependencies = [
+ "pest",
+ "pest_derive",
+ "regex",
+ "serde_json",
+ "thiserror 2.0.16",
+]
+
+[[package]]
+name = "jsonptr"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5a3cc660ba5d72bce0b3bb295bf20847ccbb40fd423f3f05b61273672e561fe"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "jwalk"
 version = "0.8.1"
@@ -3066,6 +3217,19 @@ dependencies = [
  "rayon",
 ]
 
+[[package]]
+name = "k8s-openapi"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d13f06d5326a915becaffabdfab75051b8cdc260c2a5c06c0e90226ede89a692"
+dependencies = [
+ "base64 0.22.1",
+ "chrono",
+ "schemars 1.0.4",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "kernel32-sys"
 version = "0.2.2"
@@ -3076,6 +3240,115 @@ dependencies = [
  "winapi-build",
 ]
 
+[[package]]
+name = "kube"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48e7bb0b6a46502cc20e4575b6ff401af45cfea150b34ba272a3410b78aa014e"
+dependencies = [
+ "k8s-openapi",
+ "kube-client",
+ "kube-core",
+ "kube-derive",
+ "kube-runtime",
+]
+
+[[package]]
+name = "kube-client"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4987d57a184d2b5294fdad3d7fc7f278899469d21a4da39a8f6ca16426567a36"
+dependencies = [
+ "base64 0.22.1",
+ "bytes",
+ "chrono",
+ "either",
+ "futures",
+ "home",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-rustls",
+ "hyper-timeout",
+ "hyper-util",
+ "jsonpath-rust",
+ "k8s-openapi",
+ "kube-core",
+ "pem",
+ "rustls",
+ "secrecy",
+ "serde",
+ "serde_json",
+ "serde_yaml",
+ "thiserror 2.0.16",
+ "tokio",
+ "tokio-util",
+ "tower",
+ "tower-http",
+ "tracing",
+]
+
+[[package]]
+name = "kube-core"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "914bbb770e7bb721a06e3538c0edd2babed46447d128f7c21caa68747060ee73"
+dependencies = [
+ "chrono",
+ "derive_more 2.0.1",
+ "form_urlencoded",
+ "http",
+ "json-patch",
+ "k8s-openapi",
+ "schemars 1.0.4",
+ "serde",
+ "serde-value",
+ "serde_json",
+ "thiserror 2.0.16",
+]
+
+[[package]]
+name = "kube-derive"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03dee8252be137772a6ab3508b81cd797dee62ee771112a2453bc85cbbe150d2"
+dependencies = [
+ "darling 0.21.3",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "serde_json",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "kube-runtime"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6aea4de4b562c5cc89ab10300bb63474ae1fa57ff5a19275f2e26401a323e3fd"
+dependencies = [
+ "ahash",
+ "async-broadcast",
+ "async-stream",
+ "backon",
+ "educe",
+ "futures",
+ "hashbrown 0.15.5",
+ "hostname",
+ "json-patch",
+ "k8s-openapi",
+ "kube-client",
+ "parking_lot",
+ "pin-project",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.16",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
 [[package]]
 name = "lalrpop-util"
 version = "0.20.2"
@@ -3256,7 +3529,7 @@ version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d149aaa2965d70381709d9df4c7ee1fc0de1c614a4efc2ee356f5e43d68749f8"
 dependencies = [
- "derive_more",
+ "derive_more 1.0.0",
  "malachite",
  "num-integer",
  "num-traits",
@@ -3940,6 +4213,15 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
 
+[[package]]
+name = "ordered-float"
+version = "2.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c"
+dependencies = [
+ "num-traits",
+]
+
 [[package]]
 name = "ordered-multimap"
 version = "0.7.3"
@@ -4020,6 +4302,16 @@ dependencies = [
  "syn 2.0.106",
 ]
 
+[[package]]
+name = "pem"
+version = "3.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be"
+dependencies = [
+ "base64 0.22.1",
+ "serde_core",
+]
+
 [[package]]
 name = "pem-rfc7468"
 version = "0.7.0"
@@ -5338,10 +5630,23 @@ checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0"
 dependencies = [
  "dyn-clone",
  "ref-cast",
+ "schemars_derive",
  "serde",
  "serde_json",
 ]
 
+[[package]]
+name = "schemars_derive"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33d020396d1d138dc19f1165df7545479dcd58d93810dc5d646a16e55abefa80"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "serde_derive_internals",
+ "syn 2.0.106",
+]
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
@@ -5414,10 +5719,11 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
 
 [[package]]
 name = "serde"
-version = "1.0.219"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
 dependencies = [
+ "serde_core",
  "serde_derive",
 ]
 
@@ -5432,11 +5738,41 @@ dependencies = [
  "typeid",
 ]
 
+[[package]]
+name = "serde-value"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c"
+dependencies = [
+ "ordered-float",
+ "serde",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
 [[package]]
 name = "serde_derive"
-version = "1.0.219"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.106",
+]
+
+[[package]]
+name = "serde_derive_internals"
+version = "0.29.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -5542,7 +5878,7 @@ version = "3.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
 dependencies = [
- "darling",
+ "darling 0.20.11",
  "proc-macro2",
  "quote",
  "syn 2.0.106",
@@ -6138,6 +6474,7 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "pin-project-lite",
+ "slab",
  "tokio",
 ]
 
@@ -6367,12 +6704,14 @@ version = "0.6.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
 dependencies = [
+ "base64 0.22.1",
  "bitflags 2.9.3",
  "bytes",
  "futures-util",
  "http",
  "http-body",
  "iri-string",
+ "mime",
  "pin-project-lite",
  "tower",
  "tower-layer",
@@ -6836,7 +7175,7 @@ version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b7df16e474ef958526d1205f6dda359fdfab79d9aa6d54bafcb92dcd07673dca"
 dependencies = [
- "darling",
+ "darling 0.20.11",
  "once_cell",
  "proc-macro-error2",
  "proc-macro2",
diff --git a/lib/llm/src/discovery/watcher.rs b/lib/llm/src/discovery/watcher.rs
index 00412422ad..9dd6f6b688 100644
--- a/lib/llm/src/discovery/watcher.rs
+++ b/lib/llm/src/discovery/watcher.rs
@@ -3,25 +3,26 @@
 
 use std::sync::Arc;
 use tokio::sync::mpsc::Sender;
+use tokio::sync::Notify;
 
 use anyhow::Context as _;
-use tokio::sync::{Notify, mpsc::Receiver};
+use futures::StreamExt;
 
 use dynamo_runtime::{
     DistributedRuntime,
+    discovery::{DiscoveryEvent, DiscoveryInstance, DiscoveryKey, DiscoveryStream},
     pipeline::{
         ManyOut, Operator, RouterMode, SegmentSource, ServiceBackend, SingleIn, Source,
         network::egress::push_router::PushRouter,
     },
     protocols::{EndpointId, annotated::Annotated},
-    storage::key_value_store::WatchEvent,
 };
 
 use crate::{
     backend::Backend,
     entrypoint,
     kv_router::{KvRouterConfig, PrefillRouter},
-    model_card::{self, ModelDeploymentCard},
+    model_card::ModelDeploymentCard,
     model_type::{ModelInput, ModelType},
     preprocessor::{OpenAIPreprocessor, PreprocessedEmbeddingRequest, prompt::PromptFormatter},
     protocols::{
@@ -99,17 +100,45 @@ impl ModelWatcher {
     }
 
     /// Common watch logic with optional namespace filtering
-    pub async fn watch(&self, mut events_rx: Receiver<WatchEvent>, target_namespace: Option<&str>) {
+    pub async fn watch(&self, mut discovery_stream: DiscoveryStream, target_namespace: Option<&str>) {
         let global_namespace = target_namespace.is_none_or(is_global_namespace);
 
-        while let Some(event) = events_rx.recv().await {
+        while let Some(result) = discovery_stream.next().await {
+            let event = match result {
+                Ok(event) => event,
+                Err(err) => {
+                    tracing::error!(%err, "Error in discovery stream");
+                    continue;
+                }
+            };
+
             match event {
-                WatchEvent::Put(kv) => {
-                    let key = kv.key_str();
-                    let endpoint_id = match key_extract(key) {
-                        Ok((eid, _)) => eid,
-                        Err(err) => {
-                            tracing::error!(%key, %err, "Failed extracting EndpointId from key. Ignoring instance.");
+                DiscoveryEvent::Added(instance) => {
+                    // Extract EndpointId, instance_id, and card from the discovery instance
+                    let (endpoint_id, instance_id, mut card) = match &instance {
+                        DiscoveryInstance::ModelCard {
+                            namespace,
+                            component,
+                            endpoint,
+                            instance_id,
+                            ..
+                        } => {
+                            let eid = EndpointId {
+                                namespace: namespace.clone(),
+                                component: component.clone(),
+                                name: endpoint.clone(),
+                            };
+                            
+                            match instance.deserialize_model_card::<ModelDeploymentCard>() {
+                                Ok(card) => (eid, *instance_id, card),
+                                Err(err) => {
+                                    tracing::error!(%err, instance_id, "Failed to deserialize model card");
+                                    continue;
+                                }
+                            }
+                        }
+                        _ => {
+                            tracing::error!("Unexpected discovery instance type (expected ModelCard)");
                             continue;
                         }
                     };
@@ -127,21 +156,6 @@ impl ModelWatcher {
                         continue;
                     }
 
-                    let mut card = match serde_json::from_slice::<ModelDeploymentCard>(kv.value()) {
-                        Ok(card) => card,
-                        Err(err) => {
-                            match kv.value_str() {
-                                Ok(value) => {
-                                    tracing::error!(%err, value, "Invalid JSON in model card")
-                                }
-                                Err(value_str_err) => {
-                                    tracing::error!(original_error = %err, %value_str_err, "Invalid UTF-8 string in model card, expected JSON")
-                                }
-                            }
-                            continue;
-                        }
-                    };
-
                     // If we already have a worker for this model, and the ModelDeploymentCard
                     // cards don't match, alert, and don't add the new instance
                     let can_add =
@@ -164,7 +178,10 @@ impl ModelWatcher {
                         continue;
                     }
 
-                    match self.handle_put(key, &endpoint_id, &mut card).await {
+                    // Use instance_id as the HashMap key (simpler and sufficient since keys are opaque)
+                    let key = format!("{:x}", instance_id);
+
+                    match self.handle_put(&key, &endpoint_id, &mut card).await {
                         Ok(()) => {
                             tracing::info!(
                                 model_name = card.name(),
@@ -183,10 +200,12 @@ impl ModelWatcher {
                         }
                     }
                 }
-                WatchEvent::Delete(kv) => {
-                    let deleted_key = kv.key_str();
+                DiscoveryEvent::Removed(instance_id) => {
+                    // Use instance_id hex as the HashMap key (matches what we saved with)
+                    let key = format!("{:x}", instance_id);
+                    
                     match self
-                        .handle_delete(deleted_key, target_namespace, global_namespace)
+                        .handle_delete(&key, target_namespace, global_namespace)
                         .await
                     {
                         Ok(Some(model_name)) => {
@@ -212,6 +231,8 @@ impl ModelWatcher {
         target_namespace: Option<&str>,
         is_global_namespace: bool,
     ) -> anyhow::Result<Option<String>> {
+        tracing::warn!("DISCOVERY_VALIDATION: handle_delete: key={}", key);
+        
         let card = match self.manager.remove_model_card(key) {
             Some(card) => card,
             None => {
@@ -303,6 +324,8 @@ impl ModelWatcher {
         endpoint_id: &EndpointId,
         card: &mut ModelDeploymentCard,
     ) -> anyhow::Result<()> {
+        tracing::warn!("DISCOVERY_VALIDATION: handle_put: key={}", key);
+        
         card.download_config().await?;
 
         let component = self
@@ -559,35 +582,37 @@ impl ModelWatcher {
 
     /// All the registered ModelDeploymentCard with the EndpointId they are attached to, one per instance
     async fn all_cards(&self) -> anyhow::Result<Vec<(EndpointId, ModelDeploymentCard)>> {
-        let store = self.drt.store();
-        let Some(card_bucket) = store.get_bucket(model_card::ROOT_PATH).await? else {
-            // no cards
-            return Ok(vec![]);
-        };
-        let entries = card_bucket.entries().await?;
+        let discovery = self.drt.discovery_client();
+        let instances = discovery.list(DiscoveryKey::AllModelCards).await?;
 
-        let mut results = Vec::with_capacity(entries.len());
-        for (key, card_bytes) in entries {
-            let r = match serde_json::from_slice::<ModelDeploymentCard>(&card_bytes) {
+        let mut results = Vec::with_capacity(instances.len());
+        for instance in instances {
+            match instance.deserialize_model_card::<ModelDeploymentCard>() {
                 Ok(card) => {
-                    let maybe_endpoint_id =
-                        key_extract(&key).map(|(endpoint_id, _instance_id)| endpoint_id);
-                    let endpoint_id = match maybe_endpoint_id {
-                        Ok(eid) => eid,
-                        Err(err) => {
-                            tracing::error!(%err, "Skipping invalid key, not string or not EndpointId");
+                    // Extract EndpointId from the instance
+                    let endpoint_id = match &instance {
+                        dynamo_runtime::discovery::DiscoveryInstance::ModelCard {
+                            namespace,
+                            component,
+                            endpoint,
+                            ..
+                        } => EndpointId {
+                            namespace: namespace.clone(),
+                            component: component.clone(),
+                            name: endpoint.clone(),
+                        },
+                        _ => {
+                            tracing::error!("Unexpected discovery instance type (expected ModelCard)");
                             continue;
                         }
                     };
-                    (endpoint_id, card)
+                    results.push((endpoint_id, card));
                 }
                 Err(err) => {
-                    let value = String::from_utf8_lossy(&card_bytes);
-                    tracing::error!(%err, %value, "Invalid JSON in model card");
+                    tracing::error!(%err, "Failed to deserialize model card");
                     continue;
                 }
-            };
-            results.push(r);
+            }
         }
         Ok(results)
     }
@@ -612,40 +637,4 @@ impl ModelWatcher {
     }
 }
 
-/// The ModelDeploymentCard is published in store with a key like "v1/mdc/dynamo/backend/generate/694d9981145a61ad".
-/// Extract the EndpointId and instance_id from that.
-fn key_extract(s: &str) -> anyhow::Result<(EndpointId, String)> {
-    if !s.starts_with(model_card::ROOT_PATH) {
-        anyhow::bail!("Invalid format: expected model card ROOT_PATH segment in {s}");
-    }
-    let parts: Vec<&str> = s.split('/').collect();
-
-    // Need at least prefix model_card::ROOT_PATH (2 parts) + namespace, component, name (3 parts)
-    if parts.len() <= 5 {
-        anyhow::bail!("Invalid format: not enough path segments in {s}");
-    }
 
-    let endpoint_id = EndpointId {
-        namespace: parts[2].to_string(),
-        component: parts[3].to_string(),
-        name: parts[4].to_string(),
-    };
-    Ok((endpoint_id, parts[parts.len() - 1].to_string()))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_key_extract() {
-        let input = format!(
-            "{}/dynamo/backend/generate/694d9981145a61ad",
-            model_card::ROOT_PATH
-        );
-        let (endpoint_id, _) = key_extract(&input).unwrap();
-        assert_eq!(endpoint_id.namespace, "dynamo");
-        assert_eq!(endpoint_id.component, "backend");
-        assert_eq!(endpoint_id.name, "generate");
-    }
-}
diff --git a/lib/llm/src/discovery/worker_monitor.rs b/lib/llm/src/discovery/worker_monitor.rs
index bc43dd38bf..3e0e6b7031 100644
--- a/lib/llm/src/discovery/worker_monitor.rs
+++ b/lib/llm/src/discovery/worker_monitor.rs
@@ -3,12 +3,12 @@
 
 use crate::kv_router::KV_METRICS_SUBJECT;
 use crate::kv_router::scoring::LoadEvent;
-use crate::model_card::{self, ModelDeploymentCard};
+use crate::model_card::ModelDeploymentCard;
 use dynamo_runtime::component::Client;
+use dynamo_runtime::discovery::{watch_and_extract_field, DiscoveryKey};
 use dynamo_runtime::pipeline::{WorkerLoadMonitor, async_trait};
 use dynamo_runtime::traits::DistributedRuntimeProvider;
 use dynamo_runtime::traits::events::EventSubscriber;
-use dynamo_runtime::utils::typed_prefix_watcher::{key_extractors, watch_prefix_with_extraction};
 use std::collections::HashMap;
 use std::sync::{Arc, RwLock};
 use tokio_stream::StreamExt;
@@ -79,21 +79,13 @@ impl WorkerLoadMonitor for KvWorkerMonitor {
         let endpoint = &self.client.endpoint;
         let component = endpoint.component();
 
-        let Some(etcd_client) = component.drt().etcd_client() else {
-            // Static mode, no monitoring needed
-            return Ok(());
-        };
-
-        // Watch for runtime config updates from model deployment cards
-        let runtime_configs_watcher = watch_prefix_with_extraction(
-            etcd_client,
-            model_card::ROOT_PATH,
-            key_extractors::lease_id,
-            |card: ModelDeploymentCard| Some(card.runtime_config),
-            component.drt().child_token(),
-        )
-        .await?;
-        let mut config_events_rx = runtime_configs_watcher.receiver();
+        // Watch for runtime config updates from model deployment cards via discovery interface
+        let discovery = component.drt().discovery_client();
+        let discovery_stream = discovery.list_and_watch(DiscoveryKey::AllModelCards).await?;
+        let mut config_events_rx = watch_and_extract_field(
+            discovery_stream,
+            |card: ModelDeploymentCard| card.runtime_config,
+        );
 
         // Subscribe to KV metrics events
         let mut kv_metrics_rx = component.namespace().subscribe(KV_METRICS_SUBJECT).await?;
@@ -117,6 +109,21 @@ impl WorkerLoadMonitor for KvWorkerMonitor {
                     // Handle runtime config updates
                     _ = config_events_rx.changed() => {
                         let runtime_configs = config_events_rx.borrow().clone();
+                        
+                        tracing::warn!(
+                            worker_count = runtime_configs.len(),
+                            "DISCOVERY: Runtime config updates received"
+                        );
+
+                        // Log detailed config state for comparison
+                        let config_details: Vec<(u64, Option<u64>)> = runtime_configs
+                            .iter()
+                            .map(|(&lease_id, config)| (lease_id, config.total_kv_blocks))
+                            .collect();
+                        tracing::warn!(
+                            "DISCOVERY_VALIDATION: config_state: configs={:?}",
+                            config_details
+                        );
 
                         let mut states = worker_load_states.write().unwrap();
                         states.retain(|lease_id, _| runtime_configs.contains_key(lease_id));
diff --git a/lib/llm/src/entrypoint/input/common.rs b/lib/llm/src/entrypoint/input/common.rs
index df382b3b62..feac5d442a 100644
--- a/lib/llm/src/entrypoint/input/common.rs
+++ b/lib/llm/src/entrypoint/input/common.rs
@@ -10,7 +10,7 @@ use crate::{
     entrypoint::{self, EngineConfig},
     kv_router::{KvPushRouter, KvRouter, PrefillRouter},
     migration::Migration,
-    model_card::{self, ModelDeploymentCard},
+    model_card::ModelDeploymentCard,
     preprocessor::{OpenAIPreprocessor, prompt::PromptFormatter},
     protocols::common::llm_backend::{BackendOutput, LLMEngineOutput, PreprocessedRequest},
     request_template::RequestTemplate,
@@ -62,19 +62,19 @@ pub async fn prepare_engine(
         EngineConfig::Dynamic(local_model) => {
             let distributed_runtime = DistributedRuntime::from_settings(runtime.clone()).await?;
 
-            let store = Arc::new(distributed_runtime.store().clone());
             let model_manager = Arc::new(ModelManager::new());
             let watch_obj = Arc::new(ModelWatcher::new(
-                distributed_runtime,
+                distributed_runtime.clone(),
                 model_manager.clone(),
                 dynamo_runtime::pipeline::RouterMode::RoundRobin,
                 None,
                 None,
             ));
-            let (_, receiver) = store.watch(model_card::ROOT_PATH, None, runtime.primary_token());
+            let discovery = distributed_runtime.discovery_client();
+            let discovery_stream = discovery.list_and_watch(dynamo_runtime::discovery::DiscoveryKey::AllModelCards).await?;
             let inner_watch_obj = watch_obj.clone();
             let _watcher_task = tokio::spawn(async move {
-                inner_watch_obj.watch(receiver, None).await;
+                inner_watch_obj.watch(discovery_stream, None).await;
             });
             tracing::info!("Waiting for remote model..");
 
diff --git a/lib/llm/src/entrypoint/input/grpc.rs b/lib/llm/src/entrypoint/input/grpc.rs
index 8693c4d1d1..e1653e7e15 100644
--- a/lib/llm/src/entrypoint/input/grpc.rs
+++ b/lib/llm/src/entrypoint/input/grpc.rs
@@ -9,14 +9,13 @@ use crate::{
     entrypoint::{self, EngineConfig, input::common},
     grpc::service::kserve,
     kv_router::KvRouterConfig,
-    model_card,
     namespace::is_global_namespace,
     types::openai::{
         chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse},
         completions::{NvCreateCompletionRequest, NvCreateCompletionResponse},
     },
 };
-use dynamo_runtime::{DistributedRuntime, Runtime, storage::key_value_store::KeyValueStoreManager};
+use dynamo_runtime::{DistributedRuntime, Runtime};
 use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode};
 
 /// Build and run an KServe gRPC service
@@ -28,7 +27,6 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
     let grpc_service = match engine_config {
         EngineConfig::Dynamic(_) => {
             let distributed_runtime = DistributedRuntime::from_settings(runtime.clone()).await?;
-            let store = Arc::new(distributed_runtime.store().clone());
             let grpc_service = grpc_service_builder.build()?;
             let router_config = engine_config.local_model().router_config();
             // Listen for models registering themselves, add them to gRPC service
@@ -41,7 +39,6 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
             run_watcher(
                 distributed_runtime,
                 grpc_service.state().manager_clone(),
-                store,
                 router_config.router_mode,
                 Some(router_config.kv_router_config),
                 router_config.busy_threshold,
@@ -164,34 +161,32 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
 
 /// Spawns a task that watches for new models in store,
 /// and registers them with the ModelManager so that the HTTP service can use them.
-#[allow(clippy::too_many_arguments)]
 async fn run_watcher(
     runtime: DistributedRuntime,
     model_manager: Arc<ModelManager>,
-    store: Arc<KeyValueStoreManager>,
     router_mode: RouterMode,
     kv_router_config: Option<KvRouterConfig>,
     busy_threshold: Option<f64>,
     target_namespace: Option<String>,
 ) -> anyhow::Result<()> {
-    let cancellation_token = runtime.primary_token();
     let watch_obj = ModelWatcher::new(
-        runtime,
+        runtime.clone(),
         model_manager,
         router_mode,
         kv_router_config,
         busy_threshold,
     );
     tracing::debug!("Waiting for remote model");
-    let (_, receiver) = store.watch(model_card::ROOT_PATH, None, cancellation_token);
+    let discovery = runtime.discovery_client();
+    let discovery_stream = discovery.list_and_watch(dynamo_runtime::discovery::DiscoveryKey::AllModelCards).await?;
 
     // [gluo NOTE] This is different from http::run_watcher where it alters the HTTP service
     // endpoint being exposed, gRPC doesn't have the same concept as the KServe service
     // only has one kind of inference endpoint.
 
-    // Pass the sender to the watcher
+    // Pass the discovery stream to the watcher
     let _watcher_task = tokio::spawn(async move {
-        watch_obj.watch(receiver, target_namespace.as_deref()).await;
+        watch_obj.watch(discovery_stream, target_namespace.as_deref()).await;
     });
 
     Ok(())
diff --git a/lib/llm/src/entrypoint/input/http.rs b/lib/llm/src/entrypoint/input/http.rs
index 88b4e3e979..e95c7eef36 100644
--- a/lib/llm/src/entrypoint/input/http.rs
+++ b/lib/llm/src/entrypoint/input/http.rs
@@ -10,14 +10,12 @@ use crate::{
     entrypoint::{self, EngineConfig, input::common},
     http::service::service_v2::{self, HttpService},
     kv_router::KvRouterConfig,
-    model_card,
     namespace::is_global_namespace,
     types::openai::{
         chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse},
         completions::{NvCreateCompletionRequest, NvCreateCompletionResponse},
     },
 };
-use dynamo_runtime::storage::key_value_store::KeyValueStoreManager;
 use dynamo_runtime::{DistributedRuntime, Runtime};
 use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode};
 
@@ -67,7 +65,6 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
             // This allows the /health endpoint to query store for active instances
             http_service_builder = http_service_builder.store(distributed_runtime.store().clone());
             let http_service = http_service_builder.build()?;
-            let store = Arc::new(distributed_runtime.store().clone());
 
             let router_config = engine_config.local_model().router_config();
             // Listen for models registering themselves, add them to HTTP service
@@ -82,7 +79,6 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
             run_watcher(
                 distributed_runtime,
                 http_service.state().manager_clone(),
-                store,
                 router_config.router_mode,
                 Some(router_config.kv_router_config),
                 router_config.busy_threshold,
@@ -273,7 +269,6 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
 async fn run_watcher(
     runtime: DistributedRuntime,
     model_manager: Arc<ModelManager>,
-    store: Arc<KeyValueStoreManager>,
     router_mode: RouterMode,
     kv_router_config: Option<KvRouterConfig>,
     busy_threshold: Option<f64>,
@@ -281,16 +276,16 @@ async fn run_watcher(
     http_service: Arc<HttpService>,
     metrics: Arc<crate::http::service::metrics::Metrics>,
 ) -> anyhow::Result<()> {
-    let cancellation_token = runtime.primary_token();
     let mut watch_obj = ModelWatcher::new(
-        runtime,
+        runtime.clone(),
         model_manager,
         router_mode,
         kv_router_config,
         busy_threshold,
     );
     tracing::debug!("Waiting for remote model");
-    let (_, receiver) = store.watch(model_card::ROOT_PATH, None, cancellation_token);
+    let discovery = runtime.discovery_client();
+    let discovery_stream = discovery.list_and_watch(dynamo_runtime::discovery::DiscoveryKey::AllModelCards).await?;
 
     // Create a channel to receive model type updates
     let (tx, mut rx) = tokio::sync::mpsc::channel(32);
@@ -304,9 +299,9 @@ async fn run_watcher(
         }
     });
 
-    // Pass the sender to the watcher
+    // Pass the discovery stream to the watcher
     let _watcher_task = tokio::spawn(async move {
-        watch_obj.watch(receiver, target_namespace.as_deref()).await;
+        watch_obj.watch(discovery_stream, target_namespace.as_deref()).await;
     });
 
     Ok(())
diff --git a/lib/llm/src/http/service/clear_kv_blocks.rs b/lib/llm/src/http/service/clear_kv_blocks.rs
index ee1cc3bc3e..b734b60480 100644
--- a/lib/llm/src/http/service/clear_kv_blocks.rs
+++ b/lib/llm/src/http/service/clear_kv_blocks.rs
@@ -6,7 +6,7 @@ use axum::{http::Method, response::IntoResponse, routing::post, Json, Router};
 use serde_json::json;
 use std::sync::Arc;
 
-use dynamo_runtime::{pipeline::PushRouter, stream::StreamExt};
+use dynamo_runtime::{discovery::DiscoveryKey, pipeline::PushRouter, stream::StreamExt};
 
 pub const CLEAR_KV_ENDPOINT: &str = "clear_kv_blocks";
 
@@ -150,7 +150,14 @@ async fn clear_kv_blocks_handler(
             }
         };
 
-        let instances = match component_obj.list_instances().await {
+        let discovery_client = distributed.discovery_client();
+        let discovery_key = DiscoveryKey::Endpoint {
+            namespace: namespace.clone(),
+            component: component.clone(),
+            endpoint: CLEAR_KV_ENDPOINT.to_string(),
+        };
+
+        let discovery_instances = match discovery_client.list(discovery_key).await {
             Ok(instances) => instances,
             Err(e) => {
                 add_worker_result(
@@ -165,11 +172,11 @@ async fn clear_kv_blocks_handler(
             }
         };
 
-        if instances.is_empty() {
+        if discovery_instances.is_empty() {
             add_worker_result(
                 false,
                 entry_name,
-                "No instances found for worker group",
+                "No instances found for clear_kv_blocks endpoint",
                 namespace,
                 component,
                 None,
@@ -177,30 +184,12 @@ async fn clear_kv_blocks_handler(
             continue;
         }
 
-        let instances_filtered = instances
-            .clone()
+        let instances_filtered: Vec<dynamo_runtime::component::Instance> = discovery_instances
             .into_iter()
-            .filter(|instance| instance.endpoint == CLEAR_KV_ENDPOINT)
-            .collect::<Vec<_>>();
-
-        if instances_filtered.is_empty() {
-            let found_endpoints: Vec<String> = instances
-                .iter()
-                .map(|instance| instance.endpoint.clone())
-                .collect();
-            add_worker_result(
-                false,
-                entry_name,
-                &format!(
-                    "Worker group doesn't support clear_kv_blocks. Supported endpoints: {}",
-                    found_endpoints.join(", ")
-                ),
-                namespace,
-                component,
-                None,
-            );
-            continue;
-        }
+            .map(|di| match di {
+                dynamo_runtime::discovery::DiscoveryInstance::Endpoint(instance) => instance,
+            })
+            .collect();
 
         for instance in &instances_filtered {
             let instance_name = format!("{}-instance-{}", entry.name, instance.id());
diff --git a/lib/llm/src/http/service/health.rs b/lib/llm/src/http/service/health.rs
index 5f007a9bd4..5e4e9deb5f 100644
--- a/lib/llm/src/http/service/health.rs
+++ b/lib/llm/src/http/service/health.rs
@@ -52,14 +52,13 @@ async fn live_handler(
 async fn health_handler(
     axum::extract::State(state): axum::extract::State<Arc<service_v2::State>>,
 ) -> impl IntoResponse {
-    let instances = match list_all_instances(state.store()).await {
+    let instances = match list_all_instances(state.discovery_client()).await {
         Ok(instances) => instances,
         Err(err) => {
-            tracing::warn!(%err, "Failed to fetch instances from store");
+            tracing::warn!(%err, "Failed to fetch instances from discovery client");
             vec![]
         }
     };
-
     let mut endpoints: Vec<String> = instances
         .iter()
         .map(|instance| instance.endpoint_id().as_url())
diff --git a/lib/llm/src/http/service/service_v2.rs b/lib/llm/src/http/service/service_v2.rs
index ae18a67bdb..40d5007fbc 100644
--- a/lib/llm/src/http/service/service_v2.rs
+++ b/lib/llm/src/http/service/service_v2.rs
@@ -18,6 +18,7 @@ use crate::request_template::RequestTemplate;
 use anyhow::Result;
 use axum_server::tls_rustls::RustlsConfig;
 use derive_builder::Builder;
+use dynamo_runtime::discovery::{DiscoveryClient, KVStoreDiscoveryClient};
 use dynamo_runtime::logging::make_request_span;
 use dynamo_runtime::metrics::prometheus_names::name_prefix;
 use dynamo_runtime::storage::key_value_store::KeyValueStoreManager;
@@ -31,6 +32,7 @@ pub struct State {
     metrics: Arc<Metrics>,
     manager: Arc<ModelManager>,
     store: KeyValueStoreManager,
+    discovery_client: Arc<dyn DiscoveryClient>,
     flags: StateFlags,
 }
 
@@ -72,10 +74,21 @@ impl StateFlags {
 
 impl State {
     pub fn new(manager: Arc<ModelManager>, store: KeyValueStoreManager) -> Self {
+        // Initialize discovery client backed by KV store
+        // Create a cancellation token for the discovery client's watch streams
+        let discovery_client = {
+            let cancel_token = CancellationToken::new();
+            Arc::new(KVStoreDiscoveryClient::new(
+                store.clone(),
+                cancel_token,
+            )) as Arc<dyn DiscoveryClient>
+        };
+
         Self {
             manager,
             metrics: Arc::new(Metrics::default()),
             store,
+            discovery_client,
             flags: StateFlags {
                 chat_endpoints_enabled: AtomicBool::new(false),
                 cmpl_endpoints_enabled: AtomicBool::new(false),
@@ -102,6 +115,10 @@ impl State {
         &self.store
     }
 
+    pub fn discovery_client(&self) -> Arc<dyn DiscoveryClient> {
+        self.discovery_client.clone()
+    }
+
     // TODO
     pub fn sse_keep_alive(&self) -> Option<Duration> {
         None
diff --git a/lib/llm/src/kv_router.rs b/lib/llm/src/kv_router.rs
index 63b3c0c8c0..26ca4a4c49 100644
--- a/lib/llm/src/kv_router.rs
+++ b/lib/llm/src/kv_router.rs
@@ -9,13 +9,13 @@ use anyhow::Result;
 use derive_builder::Builder;
 use dynamo_runtime::{
     component::{Component, InstanceSource},
+    discovery::{watch_and_extract_field, DiscoveryKey},
     pipeline::{
         AsyncEngine, AsyncEngineContextProvider, Error, ManyOut, PushRouter, ResponseStream,
         SingleIn, async_trait,
     },
-    prelude::*,
     protocols::annotated::Annotated,
-    utils::typed_prefix_watcher::{key_extractors, watch_prefix_with_extraction},
+    traits::DistributedRuntimeProvider,
 };
 use futures::stream::{self, StreamExt};
 use serde::{Deserialize, Serialize};
@@ -47,7 +47,7 @@ use crate::{
         subscriber::start_kv_router_background,
     },
     local_model::runtime_config::ModelRuntimeConfig,
-    model_card::{self, ModelDeploymentCard},
+    model_card::ModelDeploymentCard,
     preprocessor::PreprocessedRequest,
     protocols::common::llm_backend::LLMEngineOutput,
 };
@@ -235,22 +235,18 @@ impl KvRouter {
             }
         };
 
-        // Create runtime config watcher using the generic etcd watcher
-        // TODO: Migrate to discovery_client() once it exposes kv_get_and_watch_prefix functionality
-        let etcd_client = component
-            .drt()
-            .etcd_client()
-            .expect("Cannot KV route without etcd client");
-
-        let runtime_configs_watcher = watch_prefix_with_extraction(
-            etcd_client,
-            &format!("{}/{}", model_card::ROOT_PATH, component.path()),
-            key_extractors::lease_id,
-            |card: ModelDeploymentCard| Some(card.runtime_config),
-            cancellation_token.clone(),
-        )
-        .await?;
-        let runtime_configs_rx = runtime_configs_watcher.receiver();
+        // Watch for runtime config updates via discovery interface
+        let discovery = component.drt().discovery_client();
+        let discovery_key = DiscoveryKey::EndpointModelCards {
+            namespace: component.namespace().name().to_string(),
+            component: component.name().to_string(),
+            endpoint: "generate".to_string(),
+        };
+        let discovery_stream = discovery.list_and_watch(discovery_key).await?;
+        let runtime_configs_rx = watch_and_extract_field(
+            discovery_stream,
+            |card: ModelDeploymentCard| card.runtime_config,
+        );
 
         let indexer = if kv_router_config.overlap_score_weight == 0.0 {
             // When overlap_score_weight is zero, we don't need to track prefixes
diff --git a/lib/llm/src/kv_router/scheduler.rs b/lib/llm/src/kv_router/scheduler.rs
index 9a90b49116..7e9addc436 100644
--- a/lib/llm/src/kv_router/scheduler.rs
+++ b/lib/llm/src/kv_router/scheduler.rs
@@ -162,6 +162,16 @@ impl KvScheduler {
                 let new_instances = instances_monitor_rx.borrow_and_update().clone();
                 let new_configs = configs_monitor_rx.borrow_and_update().clone();
 
+                // Log config state for comparison
+                let config_details: Vec<(u64, Option<u64>)> = new_configs
+                    .iter()
+                    .map(|(&worker_id, config)| (worker_id, config.total_kv_blocks))
+                    .collect();
+                tracing::warn!(
+                    "DISCOVERY_VALIDATION: scheduler_config_state: configs={:?}",
+                    config_details
+                );
+
                 // Build the new workers_with_configs map
                 let mut new_workers_with_configs = HashMap::new();
                 for instance in &new_instances {
diff --git a/lib/llm/src/kv_router/subscriber.rs b/lib/llm/src/kv_router/subscriber.rs
index dbdc4da69a..7051327be0 100644
--- a/lib/llm/src/kv_router/subscriber.rs
+++ b/lib/llm/src/kv_router/subscriber.rs
@@ -8,6 +8,7 @@ use std::{collections::HashSet, time::Duration};
 use anyhow::Result;
 use dynamo_runtime::{
     component::Component,
+    discovery::DiscoveryKey,
     prelude::*,
     traits::events::EventPublisher,
     transports::{
@@ -15,6 +16,7 @@ use dynamo_runtime::{
         nats::{NatsQueue, Slug},
     },
 };
+use futures::StreamExt;
 use tokio::sync::{mpsc, oneshot};
 use tokio_util::sync::CancellationToken;
 
@@ -248,10 +250,13 @@ pub async fn start_kv_router_background(
 
     // Get the generate endpoint and watch for instance deletions
     let generate_endpoint = component.endpoint("generate");
-    let (_instance_prefix, _instance_watcher, mut instance_event_rx) = etcd_client
-        .kv_get_and_watch_prefix(generate_endpoint.etcd_root())
-        .await?
-        .dissolve();
+    let discovery_client = component.drt().discovery_client();
+    let discovery_key = DiscoveryKey::Endpoint {
+        namespace: component.namespace().name().to_string(),
+        component: component.name().to_string(),
+        endpoint: "generate".to_string(),
+    };
+    let mut instance_event_stream = discovery_client.list_and_watch(discovery_key).await?;
 
     // Get instances_rx for tracking current workers
     let client = generate_endpoint.client().await?;
@@ -299,25 +304,21 @@ pub async fn start_kv_router_background(
                 }
 
                 // Handle generate endpoint instance deletion events
-                Some(event) = instance_event_rx.recv() => {
-                    let WatchEvent::Delete(kv) = event else {
+                Some(discovery_event_result) = instance_event_stream.next() => {
+                    let Ok(discovery_event) = discovery_event_result else {
                         continue;
                     };
 
-                    let key = String::from_utf8_lossy(kv.key());
-
-                    let Some(worker_id_str) = key.split(&['/', ':'][..]).next_back() else {
-                        tracing::warn!("Could not extract worker ID from instance key: {key}");
+                    let dynamo_runtime::discovery::DiscoveryEvent::Removed(worker_id) = discovery_event else {
                         continue;
                     };
 
-                    // Parse as hexadecimal (base 16)
-                    let Ok(worker_id) = u64::from_str_radix(worker_id_str, 16) else {
-                        tracing::warn!("Could not parse worker ID from instance key: {key}");
-                        continue;
-                    };
+                    tracing::warn!(
+                        worker_id = worker_id,
+                        "DISCOVERY: Generate endpoint instance removed, removing worker"
+                    );
 
-                    tracing::info!("Generate endpoint instance deleted, removing worker {worker_id}");
+                    tracing::warn!("DISCOVERY_VALIDATION: remove_worker_tx: worker_id={}", worker_id);
                     if let Err(e) = remove_worker_tx.send(worker_id).await {
                         tracing::warn!("Failed to send worker removal for worker {worker_id}: {e}");
                     }
diff --git a/lib/llm/src/local_model.rs b/lib/llm/src/local_model.rs
index de869047c5..a307449397 100644
--- a/lib/llm/src/local_model.rs
+++ b/lib/llm/src/local_model.rs
@@ -5,14 +5,14 @@ use std::fs;
 use std::path::{Path, PathBuf};
 
 use dynamo_runtime::component::Endpoint;
+use dynamo_runtime::discovery::DiscoverySpec;
 use dynamo_runtime::protocols::EndpointId;
 use dynamo_runtime::slug::Slug;
-use dynamo_runtime::storage::key_value_store::Key;
 use dynamo_runtime::traits::DistributedRuntimeProvider;
 
 use crate::entrypoint::RouterConfig;
 use crate::mocker::protocols::MockEngineArgs;
-use crate::model_card::{self, ModelDeploymentCard};
+use crate::model_card::ModelDeploymentCard;
 use crate::model_type::{ModelInput, ModelType};
 use crate::request_template::RequestTemplate;
 
@@ -413,13 +413,24 @@ impl LocalModel {
         self.card.model_type = model_type;
         self.card.model_input = model_input;
 
-        // Publish the Model Deployment Card to KV store
-        let card_store = endpoint.drt().store();
-        let key = Key::from_raw(endpoint.unique_path(card_store.connection_id()));
-
-        let _outcome = card_store
-            .publish(model_card::ROOT_PATH, None, &key, &mut self.card)
-            .await?;
+        // Register the Model Deployment Card via discovery interface
+        let discovery = endpoint.drt().discovery_client();
+        let spec = DiscoverySpec::from_model_card(
+            endpoint.component().namespace().name().to_string(),
+            endpoint.component().name().to_string(),
+            endpoint.name().to_string(),
+            &self.card,
+        )?;
+        let _instance = discovery.register(spec).await?;
+        
+        tracing::warn!(
+            "DISCOVERY_VALIDATION: model_card_registered: namespace={}, component={}, endpoint={}, model_name={}",
+            endpoint.component().namespace().name(),
+            endpoint.component().name(),
+            endpoint.name(),
+            self.card.name()
+        );
+        
         Ok(())
     }
 }
diff --git a/lib/llm/tests/http_metrics.rs b/lib/llm/tests/http_metrics.rs
index 36a34be2f1..e3bd1bc5b4 100644
--- a/lib/llm/tests/http_metrics.rs
+++ b/lib/llm/tests/http_metrics.rs
@@ -295,8 +295,10 @@ mod integration_tests {
     use super::*;
     use dynamo_llm::{
         discovery::ModelWatcher, engines::make_echo_engine, entrypoint::EngineConfig,
-        local_model::LocalModelBuilder, model_card,
+        local_model::LocalModelBuilder,
     };
+    use dynamo_runtime::discovery::DiscoveryKey;
+    use dynamo_runtime::traits::DistributedRuntimeProvider;
     use dynamo_runtime::DistributedRuntime;
     use dynamo_runtime::pipeline::RouterMode;
     use std::sync::Arc;
@@ -333,7 +335,7 @@ mod integration_tests {
             .build()
             .unwrap();
 
-        // Set up model watcher to discover models from etcd (like production)
+        // Set up model watcher to discover models via discovery interface (like production)
         // This is crucial for the polling task to find model entries
 
         let model_watcher = ModelWatcher::new(
@@ -343,17 +345,16 @@ mod integration_tests {
             None,
             None,
         );
-        // Start watching etcd for model registrations
-        let store = Arc::new(distributed_runtime.store().clone());
-        let (_, receiver) = store.watch(
-            model_card::ROOT_PATH,
-            None,
-            distributed_runtime.primary_token(),
-        );
+        // Start watching for model registrations via discovery interface
+        let discovery = distributed_runtime.discovery_client();
+        let discovery_stream = discovery
+            .list_and_watch(DiscoveryKey::AllModelCards)
+            .await
+            .unwrap();
 
-        // Spawn watcher task to discover models from etcd
+        // Spawn watcher task to discover models
         let _watcher_task = tokio::spawn(async move {
-            model_watcher.watch(receiver, None).await;
+            model_watcher.watch(discovery_stream, None).await;
         });
 
         // Set up the engine following the StaticFull pattern from http.rs
diff --git a/lib/runtime/Cargo.toml b/lib/runtime/Cargo.toml
index cd774ba16e..b11a570b4d 100644
--- a/lib/runtime/Cargo.toml
+++ b/lib/runtime/Cargo.toml
@@ -39,6 +39,7 @@ humantime = { workspace = true }
 parking_lot = { workspace = true }
 prometheus = { workspace = true }
 rand = { workspace = true }
+reqwest = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
 tokio = { workspace = true }
@@ -74,6 +75,11 @@ regex = { version = "1" }
 socket2 = { version = "0.5.8" }
 tokio-rayon = { version = "2.1" }
 
+# Kubernetes discovery backend
+kube = { version = "2.0.1", default-features = false, features = ["runtime", "derive", "client", "rustls-tls", "aws-lc-rs"] }
+k8s-openapi = { version = "0.26.0", features = ["latest", "schemars"] }
+schemars = { version = "1" }
+
 [dev-dependencies]
 assert_matches = { version = "1.5.0" }
 criterion = { version = "0.5", features = ["async_tokio"] }
diff --git a/lib/runtime/src/component.rs b/lib/runtime/src/component.rs
index a97193928a..f695b67f0f 100644
--- a/lib/runtime/src/component.rs
+++ b/lib/runtime/src/component.rs
@@ -75,7 +75,7 @@ pub use client::{Client, InstanceSource};
 /// An instance is namespace+component+endpoint+lease_id and must be unique.
 pub const INSTANCE_ROOT_PATH: &str = "v1/instances";
 
-#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
+#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
 #[serde(rename_all = "snake_case")]
 pub enum TransportType {
     NatsTcp(String),
@@ -278,21 +278,24 @@ impl Component {
     }
 
     pub async fn list_instances(&self) -> anyhow::Result<Vec<Instance>> {
-        let client = self.drt.store();
-        let Some(bucket) = client.get_bucket(&self.instance_root()).await? else {
-            return Ok(vec![]);
+        let discovery_client = self.drt.discovery_client();
+        
+        let discovery_key = crate::discovery::DiscoveryKey::ComponentEndpoints {
+            namespace: self.namespace.name(),
+            component: self.name.clone(),
         };
-        let entries = bucket.entries().await?;
-        let mut instances = Vec::with_capacity(entries.len());
-        for (name, bytes) in entries.into_iter() {
-            let val = match serde_json::from_slice::<Instance>(&bytes) {
-                Ok(val) => val,
-                Err(err) => {
-                    anyhow::bail!("Error converting storage response to Instance: {err}. {name}",);
-                }
-            };
-            instances.push(val);
-        }
+        
+        let discovery_instances = discovery_client.list(discovery_key).await?;
+        
+        // Extract Instance from DiscoveryInstance::Endpoint wrapper
+        let mut instances: Vec<Instance> = discovery_instances
+            .into_iter()
+            .filter_map(|di| match di {
+                crate::discovery::DiscoveryInstance::Endpoint(instance) => Some(instance),
+                _ => None, // Ignore all other variants (ModelCard, etc.)
+            })
+            .collect();
+        
         instances.sort();
         Ok(instances)
     }
diff --git a/lib/runtime/src/component/client.rs b/lib/runtime/src/component/client.rs
index 987c5002d8..6c7734af02 100644
--- a/lib/runtime/src/component/client.rs
+++ b/lib/runtime/src/component/client.rs
@@ -6,14 +6,12 @@ use crate::pipeline::{
     SingleIn,
 };
 use arc_swap::ArcSwap;
+use futures::StreamExt;
 use std::collections::HashMap;
 use std::sync::Arc;
 use tokio::net::unix::pipe::Receiver;
 
-use crate::{
-    pipeline::async_trait,
-    transports::etcd::{Client as EtcdClient, WatchEvent},
-};
+use crate::{pipeline::async_trait, transports::etcd::Client as EtcdClient};
 
 use super::*;
 
@@ -67,23 +65,21 @@ impl Client {
 
     // Client with auto-discover instances using etcd
     pub(crate) async fn new_dynamic(endpoint: Endpoint) -> Result<Self> {
+        tracing::debug!("Client::new_dynamic: Creating dynamic client for endpoint: {}", endpoint.path());
         const INSTANCE_REFRESH_PERIOD: Duration = Duration::from_secs(1);
 
-        // create live endpoint watcher
-        let Some(etcd_client) = &endpoint.component.drt.etcd_client else {
-            anyhow::bail!("Attempt to create a dynamic client on a static endpoint");
-        };
-
-        let instance_source =
-            Self::get_or_create_dynamic_instance_source(etcd_client, &endpoint).await?;
+        let instance_source = Self::get_or_create_dynamic_instance_source(&endpoint).await?;
+        tracing::debug!("Client::new_dynamic: Got instance source for endpoint: {}", endpoint.path());
 
         let client = Client {
-            endpoint,
+            endpoint: endpoint.clone(),
             instance_source: instance_source.clone(),
             instance_avail: Arc::new(ArcSwap::from(Arc::new(vec![]))),
             instance_free: Arc::new(ArcSwap::from(Arc::new(vec![]))),
         };
+        tracing::debug!("Client::new_dynamic: Starting instance source monitor for endpoint: {}", endpoint.path());
         client.monitor_instance_source();
+        tracing::debug!("Client::new_dynamic: Successfully created dynamic client for endpoint: {}", endpoint.path());
         Ok(client)
     }
 
@@ -118,17 +114,47 @@ impl Client {
 
     /// Wait for at least one Instance to be available for this Endpoint
     pub async fn wait_for_instances(&self) -> Result<Vec<Instance>> {
+        tracing::debug!(
+            "wait_for_instances: Starting wait for endpoint: {}",
+            self.endpoint.path()
+        );
         let mut instances: Vec<Instance> = vec![];
         if let InstanceSource::Dynamic(mut rx) = self.instance_source.as_ref().clone() {
             // wait for there to be 1 or more endpoints
+            let mut iteration = 0;
             loop {
                 instances = rx.borrow_and_update().to_vec();
+                tracing::debug!(
+                    "wait_for_instances: iteration={}, current_instance_count={}, endpoint={}",
+                    iteration,
+                    instances.len(),
+                    self.endpoint.path()
+                );
                 if instances.is_empty() {
+                    tracing::debug!(
+                        "wait_for_instances: No instances yet, waiting for change notification for endpoint: {}",
+                        self.endpoint.path()
+                    );
                     rx.changed().await?;
+                    tracing::debug!(
+                        "wait_for_instances: Change notification received for endpoint: {}",
+                        self.endpoint.path()
+                    );
                 } else {
+                    tracing::info!(
+                        "wait_for_instances: Found {} instance(s) for endpoint: {}",
+                        instances.len(),
+                        self.endpoint.path()
+                    );
                     break;
                 }
+                iteration += 1;
             }
+        } else {
+            tracing::debug!(
+                "wait_for_instances: Static instance source, no dynamic discovery for endpoint: {}",
+                self.endpoint.path()
+            );
         }
         Ok(instances)
     }
@@ -164,14 +190,17 @@ impl Client {
     fn monitor_instance_source(&self) {
         let cancel_token = self.endpoint.drt().primary_token();
         let client = self.clone();
+        let endpoint_path = self.endpoint.path();
+        tracing::debug!("monitor_instance_source: Starting monitor for endpoint: {}", endpoint_path);
         tokio::task::spawn(async move {
             let mut rx = match client.instance_source.as_ref() {
                 InstanceSource::Static => {
-                    tracing::error!("Static instance source is not watchable");
+                    tracing::error!("monitor_instance_source: Static instance source is not watchable");
                     return;
                 }
                 InstanceSource::Dynamic(rx) => rx.clone(),
             };
+            let mut iteration = 0;
             while !cancel_token.is_cancelled() {
                 let instance_ids: Vec<u64> = rx
                     .borrow_and_update()
@@ -179,107 +208,177 @@ impl Client {
                     .map(|instance| instance.id())
                     .collect();
 
+                tracing::debug!(
+                    "monitor_instance_source: iteration={}, instance_count={}, instance_ids={:?}, endpoint={}",
+                    iteration,
+                    instance_ids.len(),
+                    instance_ids,
+                    endpoint_path
+                );
+
                 // TODO: this resets both tracked available and free instances
                 client.instance_avail.store(Arc::new(instance_ids.clone()));
-                client.instance_free.store(Arc::new(instance_ids));
+                client.instance_free.store(Arc::new(instance_ids.clone()));
 
-                tracing::debug!("instance source updated");
+                tracing::warn!(
+                    "DISCOVERY_VALIDATION: endpoint={}, instance_avail={:?}, instance_free={:?}",
+                    endpoint_path,
+                    instance_ids,
+                    instance_ids
+                );
+
+                tracing::debug!("monitor_instance_source: instance source updated, endpoint={}", endpoint_path);
 
                 if let Err(err) = rx.changed().await {
-                    tracing::error!("The Sender is dropped: {}", err);
+                    tracing::error!("monitor_instance_source: The Sender is dropped: {}, endpoint={}", err, endpoint_path);
                     cancel_token.cancel();
                 }
+                iteration += 1;
             }
+            tracing::debug!("monitor_instance_source: Monitor loop exiting for endpoint: {}", endpoint_path);
         });
     }
 
     async fn get_or_create_dynamic_instance_source(
-        etcd_client: &EtcdClient,
         endpoint: &Endpoint,
     ) -> Result<Arc<InstanceSource>> {
         let drt = endpoint.drt();
         let instance_sources = drt.instance_sources();
         let mut instance_sources = instance_sources.lock().await;
 
+        tracing::debug!(
+            "get_or_create_dynamic_instance_source: Checking cache for endpoint: {}",
+            endpoint.path()
+        );
+
         if let Some(instance_source) = instance_sources.get(endpoint) {
             if let Some(instance_source) = instance_source.upgrade() {
+                tracing::debug!(
+                    "get_or_create_dynamic_instance_source: Found cached instance source for endpoint: {}",
+                    endpoint.path()
+                );
                 return Ok(instance_source);
             } else {
+                tracing::debug!(
+                    "get_or_create_dynamic_instance_source: Cached instance source was dropped, removing for endpoint: {}",
+                    endpoint.path()
+                );
                 instance_sources.remove(endpoint);
             }
         }
 
-        let prefix_watcher = etcd_client
-            .kv_get_and_watch_prefix(endpoint.etcd_root())
-            .await?;
+        tracing::debug!(
+            "get_or_create_dynamic_instance_source: Creating new instance source for endpoint: {}",
+            endpoint.path()
+        );
+
+        let discovery_client = drt.discovery_client();
+        let discovery_key = crate::discovery::DiscoveryKey::Endpoint {
+            namespace: endpoint.component.namespace.name.clone(),
+            component: endpoint.component.name.clone(),
+            endpoint: endpoint.name.clone(),
+        };
+
+        tracing::debug!(
+            "get_or_create_dynamic_instance_source: Calling discovery_client.list_and_watch for key: {:?}",
+            discovery_key
+        );
 
-        let (prefix, _watcher, mut kv_event_rx) = prefix_watcher.dissolve();
+        let mut discovery_stream = discovery_client.list_and_watch(discovery_key.clone()).await?;
+
+        tracing::debug!(
+            "get_or_create_dynamic_instance_source: Got discovery stream for key: {:?}",
+            discovery_key
+        );
 
         let (watch_tx, watch_rx) = tokio::sync::watch::channel(vec![]);
 
         let secondary = endpoint.component.drt.runtime.secondary().clone();
 
-        // this task should be included in the registry
-        // currently this is created once per client, but this object/task should only be instantiated
-        // once per worker/instance
         secondary.spawn(async move {
-            tracing::debug!("Starting endpoint watcher for prefix: {}", prefix);
-            let mut map = HashMap::new();
+            tracing::debug!("endpoint_watcher: Starting for discovery key: {:?}", discovery_key);
+            let mut map: HashMap<u64, Instance> = HashMap::new();
+            let mut event_count = 0;
 
             loop {
-                let kv_event = tokio::select! {
+                let discovery_event = tokio::select! {
                     _ = watch_tx.closed() => {
-                        tracing::debug!("all watchers have closed; shutting down endpoint watcher for prefix: {prefix}");
+                        tracing::debug!("endpoint_watcher: all watchers have closed; shutting down for discovery key: {:?}", discovery_key);
                         break;
                     }
-                    kv_event = kv_event_rx.recv() => {
-                        match kv_event {
-                            Some(kv_event) => kv_event,
+                    discovery_event = discovery_stream.next() => {
+                        tracing::debug!("endpoint_watcher: Received stream event for discovery key: {:?}", discovery_key);
+                        match discovery_event {
+                            Some(Ok(event)) => {
+                                tracing::debug!("endpoint_watcher: Got Ok event: {:?}", event);
+                                event
+                            },
+                            Some(Err(e)) => {
+                                tracing::error!("endpoint_watcher: discovery stream error: {}; shutting down for discovery key: {:?}", e, discovery_key);
+                                break;
+                            }
                             None => {
-                                tracing::debug!("watch stream has closed; shutting down endpoint watcher for prefix: {prefix}");
+                                tracing::debug!("endpoint_watcher: watch stream has closed; shutting down for discovery key: {:?}", discovery_key);
                                 break;
                             }
                         }
                     }
                 };
 
-                match kv_event {
-                    WatchEvent::Put(kv) => {
-                        let key = String::from_utf8(kv.key().to_vec());
-                        let val = serde_json::from_slice::<Instance>(kv.value());
-                        if let (Ok(key), Ok(val)) = (key, val) {
-                            map.insert(key.clone(), val);
-                        } else {
-                            tracing::error!("Unable to parse put endpoint event; shutting down endpoint watcher for prefix: {prefix}");
-                            break;
-                        }
-                    }
-                    WatchEvent::Delete(kv) => {
-                        match String::from_utf8(kv.key().to_vec()) {
-                            Ok(key) => { map.remove(&key); }
-                            Err(_) => {
-                                tracing::error!("Unable to parse delete endpoint event; shutting down endpoint watcher for prefix: {}", prefix);
-                                break;
+                event_count += 1;
+                tracing::debug!("endpoint_watcher: Processing event #{} for discovery key: {:?}", event_count, discovery_key);
+
+                match discovery_event {
+                    crate::discovery::DiscoveryEvent::Added(discovery_instance) => {
+                        match discovery_instance {
+                            crate::discovery::DiscoveryInstance::Endpoint(instance) => {
+                                tracing::info!(
+                                    "endpoint_watcher: Added endpoint instance_id={}, namespace={}, component={}, endpoint={}",
+                                    instance.instance_id,
+                                    instance.namespace,
+                                    instance.component,
+                                    instance.endpoint
+                                );
+                                map.insert(instance.instance_id, instance);
+                            }
+                            _ => {
+                                tracing::debug!("endpoint_watcher: Ignoring non-endpoint instance (ModelCard, etc.) for discovery key: {:?}", discovery_key);
                             }
                         }
                     }
+                    crate::discovery::DiscoveryEvent::Removed(instance_id) => {
+                        tracing::info!(
+                            "endpoint_watcher: Removed instance_id={} for discovery key: {:?}",
+                            instance_id,
+                            discovery_key
+                        );
+                        map.remove(&instance_id);
+                    }
                 }
 
                 let instances: Vec<Instance> = map.values().cloned().collect();
+                tracing::debug!(
+                    "endpoint_watcher: Current map size={}, sending update for discovery key: {:?}",
+                    instances.len(),
+                    discovery_key
+                );
 
                 if watch_tx.send(instances).is_err() {
-                    tracing::debug!("Unable to send watch updates; shutting down endpoint watcher for prefix: {}", prefix);
+                    tracing::debug!("endpoint_watcher: Unable to send watch updates; shutting down for discovery key: {:?}", discovery_key);
                     break;
                 }
-
             }
 
-            tracing::debug!("Completed endpoint watcher for prefix: {prefix}");
+            tracing::debug!("endpoint_watcher: Completed for discovery key: {:?}, total events processed: {}", discovery_key, event_count);
             let _ = watch_tx.send(vec![]);
         });
 
         let instance_source = Arc::new(InstanceSource::Dynamic(watch_rx));
         instance_sources.insert(endpoint.clone(), Arc::downgrade(&instance_source));
+        tracing::debug!(
+            "get_or_create_dynamic_instance_source: Successfully created and cached instance source for endpoint: {}",
+            endpoint.path()
+        );
         Ok(instance_source)
     }
 }
diff --git a/lib/runtime/src/component/endpoint.rs b/lib/runtime/src/component/endpoint.rs
index baeb46683f..90b4d8e6e2 100644
--- a/lib/runtime/src/component/endpoint.rs
+++ b/lib/runtime/src/component/endpoint.rs
@@ -118,8 +118,6 @@ impl EndpointConfigBuilder {
         let endpoint_name = endpoint.name.clone();
         let system_health = endpoint.drt().system_health.clone();
         let subject = endpoint.subject_to(connection_id);
-        let etcd_path = endpoint.etcd_path_with_lease_id(connection_id);
-        let etcd_client = endpoint.component.drt.etcd_client.clone();
 
         // Register health check target in SystemHealth if provided
         if let Some(health_check_payload) = &health_check_payload {
@@ -193,24 +191,19 @@ impl EndpointConfigBuilder {
             result
         });
 
-        // make the components service endpoint discovery in etcd
-
-        // client.register_service()
-        let info = Instance {
+        // Register this endpoint instance in the discovery plane
+        // The discovery interface abstracts storage backend (etcd, k8s, etc) and provides
+        // consistent registration/discovery across the system.
+        let discovery_client = endpoint.drt().discovery_client();
+        
+        let discovery_spec = crate::discovery::DiscoverySpec::Endpoint {
+            namespace: namespace_name.clone(),
             component: component_name.clone(),
             endpoint: endpoint_name.clone(),
-            namespace: namespace_name.clone(),
-            instance_id: connection_id,
-            transport: TransportType::NatsTcp(subject),
+            transport: TransportType::NatsTcp(subject.clone()),
         };
 
-        let info = serde_json::to_vec_pretty(&info)?;
-
-        if let Some(etcd_client) = &etcd_client
-            && let Err(e) = etcd_client
-                .kv_create(&etcd_path, info, Some(connection_id))
-                .await
-        {
+        if let Err(e) = discovery_client.register(discovery_spec).await {
             tracing::error!(
                 component_name,
                 endpoint_name,
@@ -222,6 +215,15 @@ impl EndpointConfigBuilder {
                 "Unable to register service for discovery. Check discovery service status"
             ));
         }
+        
+        tracing::warn!(
+            "DISCOVERY_VALIDATION: endpoint_registered: namespace={}, component={}, endpoint={}, instance_id={}",
+            namespace_name,
+            component_name,
+            endpoint_name,
+            connection_id
+        );
+        
         task.await??;
 
         Ok(())
diff --git a/lib/runtime/src/discovery/kube.rs b/lib/runtime/src/discovery/kube.rs
new file mode 100644
index 0000000000..2499a81d88
--- /dev/null
+++ b/lib/runtime/src/discovery/kube.rs
@@ -0,0 +1,1119 @@
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::{CancellationToken, Result};
+use async_trait::async_trait;
+use std::collections::{HashMap, HashSet};
+use std::hash::{Hash, Hasher};
+use std::sync::Arc;
+use tokio::sync::RwLock;
+use uuid;
+
+use super::{DiscoveryClient, DiscoveryEvent, DiscoveryInstance, DiscoveryKey, DiscoverySpec, DiscoveryStream};
+use k8s_openapi::api::discovery::v1::EndpointSlice;
+use kube::{
+    Api, Client as KubeClient,
+    api::ListParams,
+    runtime::{watcher, watcher::Config, reflector, WatchStreamExt},
+};
+
+/// Hash a pod name to get a consistent instance ID
+pub fn hash_pod_name(pod_name: &str) -> u64 {
+    use std::collections::hash_map::DefaultHasher;
+    let mut hasher = DefaultHasher::new();
+    pod_name.hash(&mut hasher);
+    hasher.finish()
+}
+
+/// Key for organizing metadata internally
+/// Format: "namespace/component/endpoint"
+fn make_endpoint_key(namespace: &str, component: &str, endpoint: &str) -> String {
+    format!("{}/{}/{}", namespace, component, endpoint)
+}
+
+/// Metadata stored on each pod and exposed via HTTP endpoint
+/// This struct holds all discovery registrations for this pod instance
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct DiscoveryMetadata {
+    /// Registered endpoint instances (key: "namespace/component/endpoint")
+    endpoints: HashMap<String, DiscoveryInstance>,
+    /// Registered model card instances (key: "namespace/component/endpoint")
+    model_cards: HashMap<String, DiscoveryInstance>,
+}
+
+impl DiscoveryMetadata {
+    /// Create a new empty metadata store
+    pub fn new() -> Self {
+        Self {
+            endpoints: HashMap::new(),
+            model_cards: HashMap::new(),
+        }
+    }
+
+    /// Register an endpoint instance
+    pub fn register_endpoint(&mut self, instance: DiscoveryInstance) -> Result<()> {
+        if let DiscoveryInstance::Endpoint(ref inst) = instance {
+            let key = make_endpoint_key(&inst.namespace, &inst.component, &inst.endpoint);
+            self.endpoints.insert(key, instance);
+            Ok(())
+        } else {
+            crate::raise!("Cannot register non-endpoint instance as endpoint")
+        }
+    }
+
+    /// Register a model card instance
+    pub fn register_model_card(&mut self, instance: DiscoveryInstance) -> Result<()> {
+        if let DiscoveryInstance::ModelCard {
+            ref namespace,
+            ref component,
+            ref endpoint,
+            ..
+        } = instance
+        {
+            let key = make_endpoint_key(namespace, component, endpoint);
+            self.model_cards.insert(key, instance);
+            Ok(())
+        } else {
+            crate::raise!("Cannot register non-model-card instance as model card")
+        }
+    }
+
+    /// Get all registered endpoints
+    pub fn get_all_endpoints(&self) -> Vec<DiscoveryInstance> {
+        self.endpoints.values().cloned().collect()
+    }
+
+    /// Get all registered model cards
+    pub fn get_all_model_cards(&self) -> Vec<DiscoveryInstance> {
+        self.model_cards.values().cloned().collect()
+    }
+
+    /// Get all registered instances (endpoints and model cards)
+    pub fn get_all(&self) -> Vec<DiscoveryInstance> {
+        self.endpoints
+            .values()
+            .chain(self.model_cards.values())
+            .cloned()
+            .collect()
+    }
+}
+
+impl Default for DiscoveryMetadata {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Cached metadata from a remote pod
+struct CachedMetadata {
+    metadata: Arc<DiscoveryMetadata>,
+    pod_ip: String,
+    fetched_at: std::time::Instant,
+}
+
+/// Pod information extracted from environment
+#[derive(Debug, Clone)]
+struct PodInfo {
+    pod_name: String,
+    pod_namespace: String,
+    system_port: u16,
+}
+
+impl PodInfo {
+    /// Discover pod information from environment variables
+    fn from_env() -> Result<Self> {
+        let pod_name = std::env::var("POD_NAME")
+            .map_err(|_| crate::error!("POD_NAME environment variable not set"))?;
+        
+        let pod_namespace = std::env::var("POD_NAMESPACE")
+            .unwrap_or_else(|_| {
+                tracing::warn!("POD_NAMESPACE not set, defaulting to 'default'");
+                "default".to_string()
+            });
+        
+        // Read system server port from config
+        let config = crate::config::RuntimeConfig::from_settings().unwrap_or_default();
+        let system_port = config.system_port;
+        
+        Ok(Self {
+            pod_name,
+            pod_namespace,
+            system_port,
+        })
+    }
+}
+
+/// Discovery client implementation backed by Kubernetes EndpointSlices
+#[derive(Clone)]
+pub struct KubeDiscoveryClient {
+    /// Instance ID derived from pod name hash
+    instance_id: u64,
+    /// Local pod's metadata (shared with system server)
+    metadata: Arc<RwLock<DiscoveryMetadata>>,
+    /// HTTP client for fetching remote metadata
+    http_client: reqwest::Client,
+    /// Cache of remote pod metadata (instance_id -> metadata)
+    metadata_cache: Arc<RwLock<HashMap<u64, CachedMetadata>>>,
+    /// Pod information
+    pod_info: PodInfo,
+    /// Cancellation token
+    cancel_token: CancellationToken,
+    /// Kubernetes client
+    kube_client: KubeClient,
+    /// Mock mode for testing (skips HTTP calls and returns mock metadata)
+    mock_metadata: bool,
+}
+
+impl KubeDiscoveryClient {
+    /// Create a new Kubernetes discovery client
+    /// 
+    /// # Arguments
+    /// * `metadata` - Shared metadata store (also used by system server)
+    /// * `cancel_token` - Cancellation token for shutdown
+    pub async fn new(
+        metadata: Arc<RwLock<DiscoveryMetadata>>,
+        cancel_token: CancellationToken,
+    ) -> Result<Self> {
+        let pod_info = PodInfo::from_env()?;
+        let instance_id = hash_pod_name(&pod_info.pod_name);
+        
+        tracing::info!(
+            "Initializing KubeDiscoveryClient: pod_name={}, instance_id={:x}, namespace={}",
+            pod_info.pod_name,
+            instance_id,
+            pod_info.pod_namespace
+        );
+        
+        let http_client = reqwest::Client::builder()
+            .timeout(std::time::Duration::from_secs(5))
+            .build()
+            .map_err(|e| crate::error!("Failed to create HTTP client: {}", e))?;
+        
+        let kube_client = KubeClient::try_default()
+            .await
+            .map_err(|e| crate::error!("Failed to create Kubernetes client: {}", e))?;
+        
+        Ok(Self {
+            instance_id,
+            metadata,
+            http_client,
+            metadata_cache: Arc::new(RwLock::new(HashMap::new())),
+            pod_info,
+            cancel_token,
+            kube_client,
+            mock_metadata: false,
+        })
+    }
+
+    /// Create a new client for testing (doesn't require environment variables)
+    /// 
+    /// This method is intended for testing only and bypasses the normal
+    /// environment variable requirements. When `mock_metadata` is true,
+    /// HTTP calls are skipped and mock metadata is returned.
+    #[doc(hidden)]
+    pub async fn new_for_testing(
+        kube_client: KubeClient,
+        pod_name: String,
+        pod_namespace: String,
+        mock_metadata: bool,
+    ) -> Result<Self> {
+        let instance_id = hash_pod_name(&pod_name);
+        let metadata = Arc::new(RwLock::new(DiscoveryMetadata::new()));
+        let cancel_token = CancellationToken::new();
+        
+        let http_client = reqwest::Client::builder()
+            .timeout(std::time::Duration::from_secs(5))
+            .build()
+            .map_err(|e| crate::error!("Failed to create HTTP client: {}", e))?;
+        
+        let pod_info = PodInfo {
+            pod_name,
+            pod_namespace,
+            system_port: 8080,
+        };
+        
+        Ok(Self {
+            instance_id,
+            metadata,
+            http_client,
+            metadata_cache: Arc::new(RwLock::new(HashMap::new())),
+            pod_info,
+            cancel_token,
+            kube_client,
+            mock_metadata,
+        })
+    }
+
+    /// Generate mock metadata for testing
+    /// Returns a DiscoveryMetadata with one endpoint instance
+    fn create_mock_metadata(pod_name: &str) -> DiscoveryMetadata {
+        use crate::component::{Instance, TransportType};
+        
+        let mut metadata = DiscoveryMetadata::new();
+        let instance_id = hash_pod_name(pod_name);
+        
+        // Create a mock endpoint instance
+        let endpoint = DiscoveryInstance::Endpoint(Instance {
+            namespace: "test-namespace".to_string(),
+            component: "test-component".to_string(),
+            endpoint: "test-endpoint".to_string(),
+            instance_id,
+            transport: TransportType::NatsTcp("nats://test:4222".to_string()),
+        });
+        
+        // Ignore errors in mock data creation
+        let _ = metadata.register_endpoint(endpoint);
+        
+        metadata
+    }
+
+    /// Get metadata for a remote pod, using cache if available
+    async fn get_metadata(&self, pod_name: &str, pod_ip: &str) -> Result<Arc<DiscoveryMetadata>> {
+        let instance_id = hash_pod_name(pod_name);
+        
+        // Mock mode: return mock metadata without HTTP calls
+        if self.mock_metadata {
+            tracing::debug!(
+                "Mock mode: returning mock metadata for pod_name={}, instance_id={:x}",
+                pod_name,
+                instance_id
+            );
+            let metadata = Self::create_mock_metadata(pod_name);
+            return Ok(Arc::new(metadata));
+        }
+        
+        // Local test mode: parse port from pod name and use localhost
+        let target_host = if std::env::var("DYN_LOCAL_KUBE_TEST").is_ok() {
+            if let Some(port) = Self::parse_port_from_pod_name(pod_name) {
+                tracing::info!(
+                    "Local test mode: using localhost:{} for pod {}",
+                    port,
+                    pod_name
+                );
+                format!("localhost:{}", port)
+            } else {
+                tracing::warn!(
+                    "Local test mode enabled but couldn't parse port from pod name: {}",
+                    pod_name
+                );
+                format!("{}:{}", pod_ip, self.pod_info.system_port)
+            }
+        } else {
+            format!("{}:{}", pod_ip, self.pod_info.system_port)
+        };
+        
+        // Fast path: check cache
+        {
+            let cache = self.metadata_cache.read().await;
+            if let Some(cached) = cache.get(&instance_id) {
+                tracing::debug!(
+                    "Cache hit for pod_name={}, instance_id={:x}",
+                    pod_name,
+                    instance_id
+                );
+                return Ok(cached.metadata.clone());
+            }
+        }
+        
+        // Cache miss: fetch from remote pod
+        tracing::debug!(
+            "Cache miss for pod_name={}, instance_id={:x}, fetching from {}",
+            pod_name,
+            instance_id,
+            target_host
+        );
+        self.fetch_and_cache_from_host(instance_id, pod_name, &target_host).await
+    }
+    
+    /// Parse port number from pod name (format: pod-name-<port>)
+    /// Returns Some(port) if successfully parsed, None otherwise
+    fn parse_port_from_pod_name(pod_name: &str) -> Option<u16> {
+        // Split by '-' and try to parse the last segment as a port number
+        pod_name.rsplit('-')
+            .next()
+            .and_then(|last| last.parse::<u16>().ok())
+    }
+
+    /// Fetch metadata from a remote pod and cache it
+    async fn fetch_and_cache_from_host(
+        &self,
+        instance_id: u64,
+        pod_name: &str,
+        target_host: &str,
+    ) -> Result<Arc<DiscoveryMetadata>> {
+        let url = format!("http://{}/metadata", target_host);
+        
+        tracing::debug!("Fetching metadata from {}", url);
+        
+        let response = self
+            .http_client
+            .get(&url)
+            .send()
+            .await
+            .map_err(|e| crate::error!("Failed to fetch metadata from {}: {}", url, e))?;
+        
+        let metadata: DiscoveryMetadata = response
+            .json()
+            .await
+            .map_err(|e| crate::error!("Failed to parse metadata from {}: {}", url, e))?;
+        
+        let metadata = Arc::new(metadata);
+        
+        // Store in cache
+        {
+            let mut cache = self.metadata_cache.write().await;
+            // Check again in case another task inserted while we were fetching
+            if let Some(existing) = cache.get(&instance_id) {
+                tracing::debug!(
+                    "Another task cached metadata for instance_id={:x} while we were fetching",
+                    instance_id
+                );
+                return Ok(existing.metadata.clone());
+            }
+            
+            cache.insert(
+                instance_id,
+                CachedMetadata {
+                    metadata: metadata.clone(),
+                    pod_ip: target_host.to_string(),
+                    fetched_at: std::time::Instant::now(),
+                },
+            );
+            
+            tracing::debug!(
+                "Cached metadata for pod_name={}, instance_id={:x}",
+                pod_name,
+                instance_id
+            );
+        }
+        
+        Ok(metadata)
+    }
+
+    /// Invalidate cache entry for a given instance
+    async fn invalidate_cache(&self, instance_id: u64) {
+        let mut cache = self.metadata_cache.write().await;
+        if cache.remove(&instance_id).is_some() {
+            tracing::debug!("Invalidated cache for instance_id={:x}", instance_id);
+        }
+    }
+
+    /// Build label selector for Kubernetes EndpointSlices from DiscoveryKey
+    fn build_label_selector(key: &DiscoveryKey) -> String {
+        match key {
+            DiscoveryKey::AllEndpoints => String::new(),
+            DiscoveryKey::NamespacedEndpoints { namespace } => {
+                format!("dynamo.nvidia.com/namespace={}", namespace)
+            }
+            DiscoveryKey::ComponentEndpoints { namespace, component } => {
+                format!("dynamo.nvidia.com/namespace={},dynamo.nvidia.com/component={}", namespace, component)
+            }
+            DiscoveryKey::Endpoint { namespace, component, .. } => {
+                format!("dynamo.nvidia.com/namespace={},dynamo.nvidia.com/component={}", namespace, component)
+            }
+            DiscoveryKey::AllModelCards => String::new(),
+            DiscoveryKey::NamespacedModelCards { namespace } => {
+                format!("dynamo.nvidia.com/namespace={}", namespace)
+            }
+            DiscoveryKey::ComponentModelCards { namespace, component } => {
+                format!("dynamo.nvidia.com/namespace={},dynamo.nvidia.com/component={}", namespace, component)
+            }
+            DiscoveryKey::EndpointModelCards { namespace, component, .. } => {
+                format!("dynamo.nvidia.com/namespace={},dynamo.nvidia.com/component={}", namespace, component)
+            }
+        }
+    }
+
+    /// Extract ready endpoints from an EndpointSlice
+    /// Returns (pod_name, pod_ip) pairs
+    fn extract_ready_endpoints(slice: &EndpointSlice) -> Vec<(String, String)> {
+        let mut result = Vec::new();
+        
+        let endpoints = &slice.endpoints;
+        
+        for endpoint in endpoints {
+            // Check if endpoint is ready
+            let is_ready = endpoint.conditions.as_ref()
+                .and_then(|c| c.ready)
+                .unwrap_or(false);
+            
+            if !is_ready {
+                continue;
+            }
+            
+            // Get pod name from targetRef
+            let pod_name = match endpoint.target_ref.as_ref() {
+                Some(target_ref) => target_ref.name.as_deref().unwrap_or(""),
+                None => continue,
+            };
+            
+            if pod_name.is_empty() {
+                continue;
+            }
+            
+            // Get IP addresses
+            for ip in &endpoint.addresses {
+                result.push((pod_name.to_string(), ip.clone()));
+            }
+        }
+        
+        result
+    }
+
+    /// Extract instance IDs from an EndpointSlice (only ready endpoints)
+    fn extract_instance_ids(slice: &EndpointSlice) -> HashSet<u64> {
+        let mut ids = HashSet::new();
+        
+        let endpoints = &slice.endpoints;
+        
+        for endpoint in endpoints {
+            // Only count ready endpoints
+            let is_ready = endpoint.conditions.as_ref()
+                .and_then(|c| c.ready)
+                .unwrap_or(false);
+            
+            if !is_ready {
+                continue;
+            }
+            
+            if let Some(target_ref) = &endpoint.target_ref {
+                if let Some(pod_name) = &target_ref.name {
+                    ids.insert(hash_pod_name(pod_name));
+                }
+            }
+        }
+        
+        ids
+    }
+
+    /// Extract endpoint information from an EndpointSlice
+    /// Returns (instance_id, pod_name, pod_ip) tuples for ready endpoints
+    fn extract_endpoint_info(slice: &EndpointSlice) -> Vec<(u64, String, String)> {
+        let mut result = Vec::new();
+        
+        let endpoints = &slice.endpoints;
+        
+        for endpoint in endpoints {
+            // Check if endpoint is ready
+            let is_ready = endpoint.conditions.as_ref()
+                .and_then(|c| c.ready)
+                .unwrap_or(false);
+            
+            if !is_ready {
+                continue;
+            }
+            
+            // Get pod name from targetRef
+            let pod_name = match endpoint.target_ref.as_ref() {
+                Some(target_ref) => target_ref.name.as_deref().unwrap_or(""),
+                None => continue,
+            };
+            
+            if pod_name.is_empty() {
+                continue;
+            }
+            
+            let instance_id = hash_pod_name(pod_name);
+            
+            // Get IP addresses
+            for ip in &endpoint.addresses {
+                result.push((instance_id, pod_name.to_string(), ip.clone()));
+            }
+        }
+        
+        result
+    }
+
+    /// Filter metadata instances by DiscoveryKey
+    fn filter_metadata(
+        metadata: &DiscoveryMetadata,
+        key: &DiscoveryKey,
+        _instance_id: u64,
+    ) -> Vec<DiscoveryInstance> {
+        let mut result = Vec::new();
+        
+        match key {
+            DiscoveryKey::AllEndpoints => {
+                result.extend(metadata.get_all_endpoints());
+            }
+            DiscoveryKey::NamespacedEndpoints { namespace } => {
+                for instance in metadata.get_all_endpoints() {
+                    if let DiscoveryInstance::Endpoint(ref inst) = instance {
+                        if &inst.namespace == namespace {
+                            result.push(instance);
+                        }
+                    }
+                }
+            }
+            DiscoveryKey::ComponentEndpoints { namespace, component } => {
+                for instance in metadata.get_all_endpoints() {
+                    if let DiscoveryInstance::Endpoint(ref inst) = instance {
+                        if &inst.namespace == namespace && &inst.component == component {
+                            result.push(instance);
+                        }
+                    }
+                }
+            }
+            DiscoveryKey::Endpoint { namespace, component, endpoint } => {
+                for instance in metadata.get_all_endpoints() {
+                    if let DiscoveryInstance::Endpoint(ref inst) = instance {
+                        if &inst.namespace == namespace 
+                            && &inst.component == component 
+                            && &inst.endpoint == endpoint {
+                            result.push(instance);
+                        }
+                    }
+                }
+            }
+            DiscoveryKey::AllModelCards => {
+                result.extend(metadata.get_all_model_cards());
+            }
+            DiscoveryKey::NamespacedModelCards { namespace } => {
+                for instance in metadata.get_all_model_cards() {
+                    if let DiscoveryInstance::ModelCard { namespace: ns, .. } = &instance {
+                        if ns == namespace {
+                            result.push(instance);
+                        }
+                    }
+                }
+            }
+            DiscoveryKey::ComponentModelCards { namespace, component } => {
+                for instance in metadata.get_all_model_cards() {
+                    if let DiscoveryInstance::ModelCard { 
+                        namespace: ns, 
+                        component: comp, 
+                        .. 
+                    } = &instance {
+                        if ns == namespace && comp == component {
+                            result.push(instance);
+                        }
+                    }
+                }
+            }
+            DiscoveryKey::EndpointModelCards { namespace, component, endpoint } => {
+                for instance in metadata.get_all_model_cards() {
+                    if let DiscoveryInstance::ModelCard { 
+                        namespace: ns, 
+                        component: comp, 
+                        endpoint: ep,
+                        .. 
+                    } = &instance {
+                        if ns == namespace && comp == component && ep == endpoint {
+                            result.push(instance);
+                        }
+                    }
+                }
+            }
+        }
+        
+        result
+    }
+}
+
+#[async_trait]
+impl DiscoveryClient for KubeDiscoveryClient {
+    fn instance_id(&self) -> u64 {
+        self.instance_id
+    }
+
+    async fn register(&self, spec: DiscoverySpec) -> Result<DiscoveryInstance> {
+        let instance_id = self.instance_id();
+        let instance = spec.with_instance_id(instance_id);
+        
+        tracing::debug!(
+            "Registering instance: {:?} with instance_id={:x}",
+            instance,
+            instance_id
+        );
+        
+        // Write to local metadata
+        let mut metadata = self.metadata.write().await;
+        match &instance {
+            DiscoveryInstance::Endpoint(inst) => {
+                tracing::info!(
+                    "Registered endpoint: namespace={}, component={}, endpoint={}, instance_id={:x}",
+                    inst.namespace,
+                    inst.component,
+                    inst.endpoint,
+                    instance_id
+                );
+                metadata.register_endpoint(instance.clone())?;
+            }
+            DiscoveryInstance::ModelCard {
+                namespace,
+                component,
+                endpoint,
+                ..
+            } => {
+                tracing::info!(
+                    "Registered model card: namespace={}, component={}, endpoint={}, instance_id={:x}",
+                    namespace,
+                    component,
+                    endpoint,
+                    instance_id
+                );
+                metadata.register_model_card(instance.clone())?;
+            }
+        }
+        
+        Ok(instance)
+    }
+
+    async fn list(&self, key: DiscoveryKey) -> Result<Vec<DiscoveryInstance>> {
+        use futures::StreamExt;
+
+        tracing::debug!("KubeDiscoveryClient::list called with key={:?}", key);
+        
+        // Build label selector
+        let label_selector = Self::build_label_selector(&key);
+        tracing::debug!("Using label selector: {}", label_selector);
+        
+        // Query EndpointSlices in our namespace only
+        let endpoint_slices: Api<EndpointSlice> = Api::namespaced(
+            self.kube_client.clone(),
+            &self.pod_info.pod_namespace,
+        );
+        let mut list_params = ListParams::default();
+        if !label_selector.is_empty() {
+            list_params = list_params.labels(&label_selector);
+        }
+        
+        tracing::debug!(
+            "Listing EndpointSlices in namespace: {}",
+            self.pod_info.pod_namespace
+        );
+        
+        let slices = endpoint_slices
+            .list(&list_params)
+            .await
+            .map_err(|e| crate::error!("Failed to list EndpointSlices: {}", e))?;
+        
+        tracing::debug!("Found {} EndpointSlices", slices.items.len());
+        
+        // Extract ready endpoints
+        let ready_endpoints: Vec<(String, String)> = slices
+            .items
+            .iter()
+            .flat_map(Self::extract_ready_endpoints)
+            .collect();
+        
+        tracing::debug!("Found {} ready endpoints", ready_endpoints.len());
+        
+        // Fetch metadata concurrently with rate limiting
+        let metadata_futures = ready_endpoints.into_iter().map(|(pod_name, pod_ip)| {
+            let client = self.clone();
+            async move {
+                match client.get_metadata(&pod_name, &pod_ip).await {
+                    Ok(metadata) => Some((hash_pod_name(&pod_name), metadata)),
+                    Err(e) => {
+                        tracing::warn!(
+                            "Failed to fetch metadata from pod {} ({}): {}",
+                            pod_name,
+                            pod_ip,
+                            e
+                        );
+                        None
+                    }
+                }
+            }
+        });
+        
+        let results: Vec<_> = futures::stream::iter(metadata_futures)
+            .buffer_unordered(20)
+            .collect()
+            .await;
+        
+        // Filter and collect instances
+        let mut instances = Vec::new();
+        for result in results {
+            if let Some((instance_id, metadata)) = result {
+                let filtered = Self::filter_metadata(&metadata, &key, instance_id);
+                instances.extend(filtered);
+            }
+        }
+        
+        tracing::info!(
+            "KubeDiscoveryClient::list returning {} instances for key={:?}",
+            instances.len(),
+            key
+        );
+        
+        Ok(instances)
+    }
+
+    async fn list_and_watch(&self, key: DiscoveryKey) -> Result<DiscoveryStream> {
+        use futures::{StreamExt, future};
+        use tokio::sync::mpsc;
+
+        tracing::info!(
+            "KubeDiscoveryClient::list_and_watch started for key={:?} in namespace={}",
+            key,
+            self.pod_info.pod_namespace
+        );
+        
+        // Build label selector
+        let label_selector = Self::build_label_selector(&key);
+        
+        // Create EndpointSlice API and watcher (scoped to our namespace)
+        let endpoint_slices: Api<EndpointSlice> = Api::namespaced(
+            self.kube_client.clone(),
+            &self.pod_info.pod_namespace,
+        );
+        let mut watch_config = Config::default();
+        if !label_selector.is_empty() {
+            watch_config = watch_config.labels(&label_selector);
+        }
+        
+        tracing::debug!(
+            "Watching EndpointSlices in namespace: {} with label selector: {:?}",
+            self.pod_info.pod_namespace,
+            label_selector
+        );
+        
+        // Create reflector to maintain complete current state
+        let (reader, writer) = reflector::store();
+        
+        // Generate unique stream identifier for tracing
+        let stream_id = uuid::Uuid::new_v4();
+        
+        // Set up reflector stream that polls forever to keep store updated
+        let reflector_stream = reflector(writer, watcher(endpoint_slices, watch_config))
+            .default_backoff()
+            .touched_objects()
+            .for_each(move |res| {
+                future::ready(match res {
+                    Ok(obj) => {
+                        tracing::debug!(
+                            stream_id = %stream_id,
+                            slice_name = obj.metadata.name.as_deref().unwrap_or("unknown"),
+                            "Reflector updated"
+                        );
+                    }
+                    Err(e) => {
+                        tracing::warn!(
+                            stream_id = %stream_id,
+                            error = %e,
+                            "Reflector error"
+                        );
+                    }
+                })
+            });
+        
+        // Spawn background task to poll reflector forever
+        tokio::spawn(reflector_stream);
+        
+        // Track known instances for diffing
+        let known_instances = Arc::new(RwLock::new(HashSet::<u64>::new()));
+        let client = self.clone();
+        let key_clone = key.clone();
+        
+        // Create channel for emitting discovery events
+        let (tx, rx) = mpsc::unbounded_channel();
+        
+        // Spawn task that watches the reflector store and emits events
+        tokio::spawn(async move {
+            let mut interval = tokio::time::interval(tokio::time::Duration::from_millis(500));
+            
+            tracing::debug!(
+                stream_id = %stream_id,
+                "Store monitor started for key={:?}",
+                key_clone
+            );
+            
+            loop {
+                interval.tick().await;
+                
+                // Get complete current state from reflector
+                let all_slices: Vec<EndpointSlice> = reader.state()
+                    .iter()
+                    .map(|arc_slice| arc_slice.as_ref().clone())
+                    .collect();
+                
+                // Debug: print all slices
+                // let slice_names: Vec<String> = all_slices.iter()
+                //     .map(|s| s.metadata.name.as_deref().unwrap_or("unnamed").to_string())
+                //     .collect();
+                // tracing::debug!(
+                //     stream_id = %stream_id,
+                //     slice_count = all_slices.len(),
+                //     slices = ?slice_names,
+                //     "Store monitor tick - all slices"
+                // );
+                
+                // Extract ALL current instances from ALL slices
+                let current_instances: HashSet<u64> = all_slices.iter()
+                    .flat_map(Self::extract_instance_ids)
+                    .collect();
+                
+                // Build endpoint info map for fetching
+                let mut endpoint_info_map = HashMap::new();
+                for slice in &all_slices {
+                    let endpoint_infos = Self::extract_endpoint_info(slice);
+                    for (instance_id, pod_name, pod_ip) in endpoint_infos {
+                        endpoint_info_map.entry(instance_id)
+                            .or_insert((pod_name, pod_ip));
+                    }
+                }
+                
+                // Diff against previous state
+                let prev_instances = known_instances.read().await.clone();
+                let added: Vec<_> = current_instances.difference(&prev_instances).copied().collect();
+                let removed: Vec<_> = prev_instances.difference(&current_instances).copied().collect();
+                
+                if !added.is_empty() || !removed.is_empty() {
+                    tracing::debug!(
+                        stream_id = %stream_id,
+                        added = added.len(),
+                        removed = removed.len(),
+                        total = current_instances.len(),
+                        "State diff computed"
+                    );
+                }
+                
+                // Update known_instances before fetching
+                *known_instances.write().await = current_instances.clone();
+                
+                // Fetch metadata for new instances concurrently
+                let fetch_futures: Vec<_> = added.iter().filter_map(|&instance_id| {
+                    endpoint_info_map.get(&instance_id).map(|(pod_name, pod_ip)| {
+                        let client = client.clone();
+                        let pod_name = pod_name.clone();
+                        let pod_ip = pod_ip.clone();
+                        let key_clone = key_clone.clone();
+                        let known_instances = known_instances.clone();
+                        
+                        async move {
+                            match client.get_metadata(&pod_name, &pod_ip).await {
+                                Ok(metadata) => {
+                                    // Fetch-after-delete guard: check if still in known set
+                                    if known_instances.read().await.contains(&instance_id) {
+                                        let instances = Self::filter_metadata(&metadata, &key_clone, instance_id);
+                                        Some((instance_id, instances))
+                                    } else {
+                                        tracing::debug!(
+                                            stream_id = %stream_id,
+                                            instance_id = format!("{:x}", instance_id),
+                                            "Instance removed before fetch completed, skipping"
+                                        );
+                                        None
+                                    }
+                                }
+                                Err(e) => {
+                                    tracing::warn!(
+                                        stream_id = %stream_id,
+                                        pod_name = %pod_name,
+                                        error = %e,
+                                        "Failed to fetch metadata"
+                                    );
+                                    None
+                                }
+                            }
+                        }
+                    })
+                }).collect();
+                
+                // Fetch concurrently and emit Added events
+                let results: Vec<_> = futures::stream::iter(fetch_futures)
+                    .buffer_unordered(20)
+                    .collect()
+                    .await;
+                
+                for result in results {
+                    if let Some((_instance_id, instances)) = result {
+                        for instance in instances {
+                            tracing::info!(
+                                stream_id = %stream_id,
+                                instance_id = format!("{:x}", instance.instance_id()),
+                                "Emitting Added event"
+                            );
+                            if tx.send(Ok(DiscoveryEvent::Added(instance))).is_err() {
+                                tracing::debug!(stream_id = %stream_id, "Receiver dropped, stopping monitor");
+                                return;
+                            }
+                        }
+                    }
+                }
+                
+                // Emit Removed events
+                for instance_id in removed {
+                    tracing::info!(
+                        stream_id = %stream_id,
+                        instance_id = format!("{:x}", instance_id),
+                        "Emitting Removed event"
+                    );
+                    client.invalidate_cache(instance_id).await;
+                    if tx.send(Ok(DiscoveryEvent::Removed(instance_id))).is_err() {
+                        tracing::debug!(stream_id = %stream_id, "Receiver dropped, stopping monitor");
+                        return;
+                    }
+                }
+            }
+        });
+        
+        // Convert receiver to stream
+        let stream = tokio_stream::wrappers::UnboundedReceiverStream::new(rx);
+        Ok(Box::pin(stream))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::component::TransportType;
+
+    #[test]
+    fn test_hash_consistency() {
+        let pod_name = "test-pod-123";
+        let hash1 = hash_pod_name(pod_name);
+        let hash2 = hash_pod_name(pod_name);
+        assert_eq!(hash1, hash2, "Hash should be consistent");
+    }
+
+    #[test]
+    fn test_hash_uniqueness() {
+        let hash1 = hash_pod_name("pod-1");
+        let hash2 = hash_pod_name("pod-2");
+        assert_ne!(hash1, hash2, "Different pods should have different hashes");
+    }
+
+    #[test]
+    fn test_metadata_serde() {
+        let mut metadata = DiscoveryMetadata::new();
+        
+        // Add an endpoint
+        let instance = DiscoveryInstance::Endpoint(crate::component::Instance {
+            namespace: "test".to_string(),
+            component: "comp1".to_string(),
+            endpoint: "ep1".to_string(),
+            instance_id: 123,
+            transport: TransportType::NatsTcp("nats://localhost:4222".to_string()),
+        });
+        
+        metadata.register_endpoint(instance).unwrap();
+        
+        // Serialize
+        let json = serde_json::to_string(&metadata).unwrap();
+        
+        // Deserialize
+        let deserialized: DiscoveryMetadata = serde_json::from_str(&json).unwrap();
+        
+        assert_eq!(deserialized.endpoints.len(), 1);
+        assert_eq!(deserialized.model_cards.len(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_concurrent_registration() {
+        let metadata = Arc::new(RwLock::new(DiscoveryMetadata::new()));
+        
+        // Spawn multiple tasks registering concurrently
+        let handles: Vec<_> = (0..10)
+            .map(|i| {
+                let metadata = metadata.clone();
+                tokio::spawn(async move {
+                    let mut meta = metadata.write().await;
+                    let instance = DiscoveryInstance::Endpoint(crate::component::Instance {
+                        namespace: "test".to_string(),
+                        component: "comp1".to_string(),
+                        endpoint: format!("ep{}", i),
+                        instance_id: i,
+                        transport: TransportType::NatsTcp("nats://localhost:4222".to_string()),
+                    });
+                    meta.register_endpoint(instance).unwrap();
+                })
+            })
+            .collect();
+        
+        // Wait for all to complete
+        for handle in handles {
+            handle.await.unwrap();
+        }
+        
+        // Verify all registrations succeeded
+        let meta = metadata.read().await;
+        assert_eq!(meta.endpoints.len(), 10);
+    }
+
+    #[test]
+    fn test_endpoint_key() {
+        let key1 = make_endpoint_key("ns1", "comp1", "ep1");
+        let key2 = make_endpoint_key("ns1", "comp1", "ep1");
+        let key3 = make_endpoint_key("ns1", "comp1", "ep2");
+        
+        assert_eq!(key1, key2);
+        assert_ne!(key1, key3);
+        assert_eq!(key1, "ns1/comp1/ep1");
+    }
+
+    #[test]
+    fn test_parse_port_from_pod_name() {
+        // Valid port numbers
+        assert_eq!(
+            KubeDiscoveryClient::parse_port_from_pod_name("dynamo-test-worker-8080"),
+            Some(8080)
+        );
+        assert_eq!(
+            KubeDiscoveryClient::parse_port_from_pod_name("my-service-9000"),
+            Some(9000)
+        );
+        assert_eq!(
+            KubeDiscoveryClient::parse_port_from_pod_name("test-3000"),
+            Some(3000)
+        );
+        assert_eq!(
+            KubeDiscoveryClient::parse_port_from_pod_name("a-b-c-80"),
+            Some(80)
+        );
+        
+        // Invalid - no port number at end
+        assert_eq!(
+            KubeDiscoveryClient::parse_port_from_pod_name("dynamo-test-worker"),
+            None
+        );
+        assert_eq!(
+            KubeDiscoveryClient::parse_port_from_pod_name("8080-worker"),
+            None  // Port at beginning, not end
+        );
+        assert_eq!(
+            KubeDiscoveryClient::parse_port_from_pod_name("worker-abc"),
+            None  // Not a number
+        );
+        assert_eq!(
+            KubeDiscoveryClient::parse_port_from_pod_name(""),
+            None  // Empty string
+        );
+    }
+
+    #[tokio::test]
+    async fn test_metadata_accessors() {
+        let mut metadata = DiscoveryMetadata::new();
+        
+        // Register endpoints
+        for i in 0..3 {
+            let instance = DiscoveryInstance::Endpoint(crate::component::Instance {
+                namespace: "test".to_string(),
+                component: "comp1".to_string(),
+                endpoint: format!("ep{}", i),
+                instance_id: i,
+                transport: TransportType::NatsTcp("nats://localhost:4222".to_string()),
+            });
+            metadata.register_endpoint(instance).unwrap();
+        }
+        
+        // Register model cards
+        for i in 0..2 {
+            let instance = DiscoveryInstance::ModelCard {
+                namespace: "test".to_string(),
+                component: "comp1".to_string(),
+                endpoint: format!("ep{}", i),
+                instance_id: i,
+                card_json: serde_json::json!({"model": "test"}),
+            };
+            metadata.register_model_card(instance).unwrap();
+        }
+        
+        assert_eq!(metadata.get_all_endpoints().len(), 3);
+        assert_eq!(metadata.get_all_model_cards().len(), 2);
+        assert_eq!(metadata.get_all().len(), 5);
+    }
+}
+
diff --git a/lib/runtime/src/discovery/kv_store.rs b/lib/runtime/src/discovery/kv_store.rs
new file mode 100644
index 0000000000..e284614e21
--- /dev/null
+++ b/lib/runtime/src/discovery/kv_store.rs
@@ -0,0 +1,470 @@
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::storage::key_value_store::{KeyValueStoreManager, WatchEvent};
+use crate::{CancellationToken, Result};
+use async_trait::async_trait;
+use futures::{Stream, StreamExt};
+use std::pin::Pin;
+use std::sync::Arc;
+
+use super::{DiscoveryClient, DiscoveryEvent, DiscoveryInstance, DiscoveryKey, DiscoverySpec, DiscoveryStream};
+
+const INSTANCES_BUCKET: &str = "v1/instances";
+const MODEL_CARDS_BUCKET: &str = "v1/mdc";
+
+/// Discovery client implementation backed by a KeyValueStore
+pub struct KVStoreDiscoveryClient {
+    store: Arc<KeyValueStoreManager>,
+    cancel_token: CancellationToken,
+}
+
+impl KVStoreDiscoveryClient {
+    pub fn new(store: KeyValueStoreManager, cancel_token: CancellationToken) -> Self {
+        Self {
+            store: Arc::new(store),
+            cancel_token,
+        }
+    }
+
+    /// Build the key path for an endpoint (relative to bucket, not absolute)
+    fn endpoint_key(namespace: &str, component: &str, endpoint: &str, instance_id: u64) -> String {
+        format!("{}/{}/{}/{:x}", namespace, component, endpoint, instance_id)
+    }
+
+    /// Build the key path for a model card (relative to bucket, not absolute)
+    fn model_card_key(namespace: &str, component: &str, endpoint: &str, instance_id: u64) -> String {
+        format!("{}/{}/{}/{:x}", namespace, component, endpoint, instance_id)
+    }
+
+    /// Extract prefix for querying based on discovery key
+    fn key_prefix(key: &DiscoveryKey) -> String {
+        match key {
+            DiscoveryKey::AllEndpoints => INSTANCES_BUCKET.to_string(),
+            DiscoveryKey::NamespacedEndpoints { namespace } => {
+                format!("{}/{}", INSTANCES_BUCKET, namespace)
+            }
+            DiscoveryKey::ComponentEndpoints { namespace, component } => {
+                format!("{}/{}/{}", INSTANCES_BUCKET, namespace, component)
+            }
+            DiscoveryKey::Endpoint { namespace, component, endpoint } => {
+                format!("{}/{}/{}/{}", INSTANCES_BUCKET, namespace, component, endpoint)
+            }
+            DiscoveryKey::AllModelCards => MODEL_CARDS_BUCKET.to_string(),
+            DiscoveryKey::NamespacedModelCards { namespace } => {
+                format!("{}/{}", MODEL_CARDS_BUCKET, namespace)
+            }
+            DiscoveryKey::ComponentModelCards { namespace, component } => {
+                format!("{}/{}/{}", MODEL_CARDS_BUCKET, namespace, component)
+            }
+            DiscoveryKey::EndpointModelCards { namespace, component, endpoint } => {
+                format!("{}/{}/{}/{}", MODEL_CARDS_BUCKET, namespace, component, endpoint)
+            }
+        }
+    }
+
+    /// Check if a key matches the given discovery key filter
+    fn matches_prefix(key_str: &str, prefix: &str) -> bool {
+        key_str.starts_with(prefix)
+    }
+
+    /// Parse and deserialize a discovery instance from KV store entry
+    fn parse_instance(value: &[u8]) -> Result<DiscoveryInstance> {
+        let instance: DiscoveryInstance = serde_json::from_slice(value)?;
+        Ok(instance)
+    }
+}
+
+#[async_trait]
+impl DiscoveryClient for KVStoreDiscoveryClient {
+    fn instance_id(&self) -> u64 {
+        self.store.connection_id()
+    }
+
+    async fn register(&self, spec: DiscoverySpec) -> Result<DiscoveryInstance> {
+        let instance_id = self.instance_id();
+        let instance = spec.with_instance_id(instance_id);
+
+        let (bucket_name, key_path) = match &instance {
+            DiscoveryInstance::Endpoint(inst) => {
+                let key = Self::endpoint_key(
+                    &inst.namespace,
+                    &inst.component,
+                    &inst.endpoint,
+                    inst.instance_id,
+                );
+                tracing::debug!(
+                    "KVStoreDiscoveryClient::register: Registering endpoint instance_id={}, namespace={}, component={}, endpoint={}, key={}",
+                    inst.instance_id,
+                    inst.namespace,
+                    inst.component,
+                    inst.endpoint,
+                    key
+                );
+                (INSTANCES_BUCKET, key)
+            }
+            DiscoveryInstance::ModelCard {
+                namespace,
+                component,
+                endpoint,
+                instance_id,
+                ..
+            } => {
+                let key = Self::model_card_key(namespace, component, endpoint, *instance_id);
+                tracing::debug!(
+                    "KVStoreDiscoveryClient::register: Registering model card instance_id={}, namespace={}, component={}, endpoint={}, key={}",
+                    instance_id,
+                    namespace,
+                    component,
+                    endpoint,
+                    key
+                );
+                (MODEL_CARDS_BUCKET, key)
+            }
+        };
+
+        // Serialize the instance
+        let instance_json = serde_json::to_vec(&instance)?;
+        tracing::debug!(
+            "KVStoreDiscoveryClient::register: Serialized instance to {} bytes for key={}",
+            instance_json.len(),
+            key_path
+        );
+
+        // Store in the KV store with no TTL (instances persist until explicitly removed)
+        tracing::debug!(
+            "KVStoreDiscoveryClient::register: Getting/creating bucket={} for key={}",
+            bucket_name,
+            key_path
+        );
+        let bucket = self
+            .store
+            .get_or_create_bucket(bucket_name, None)
+            .await?;
+        let key = crate::storage::key_value_store::Key::from_raw(key_path.clone());
+        
+        tracing::debug!(
+            "KVStoreDiscoveryClient::register: Inserting into bucket={}, key={}",
+            bucket_name,
+            key_path
+        );
+        // Use revision 0 for initial registration
+        let outcome = bucket.insert(&key, instance_json.into(), 0).await?;
+        tracing::info!(
+            "KVStoreDiscoveryClient::register: Successfully registered instance_id={}, key={}, outcome={:?}",
+            instance_id,
+            key_path,
+            outcome
+        );
+
+        Ok(instance)
+    }
+
+    async fn list(&self, key: DiscoveryKey) -> Result<Vec<DiscoveryInstance>> {
+        let prefix = Self::key_prefix(&key);
+        let bucket_name = if prefix.starts_with(INSTANCES_BUCKET) {
+            INSTANCES_BUCKET
+        } else {
+            MODEL_CARDS_BUCKET
+        };
+
+        // Get bucket - if it doesn't exist, return empty list
+        let Some(bucket) = self.store.get_bucket(bucket_name).await? else {
+            return Ok(Vec::new());
+        };
+
+        // Get all entries from the bucket
+        let entries = bucket.entries().await?;
+
+        // Filter by prefix and deserialize
+        let mut instances = Vec::new();
+        for (key_str, value) in entries {
+            if Self::matches_prefix(&key_str, &prefix) {
+                match Self::parse_instance(&value) {
+                    Ok(instance) => instances.push(instance),
+                    Err(e) => {
+                        tracing::warn!(key = %key_str, error = %e, "Failed to parse discovery instance");
+                    }
+                }
+            }
+        }
+
+        Ok(instances)
+    }
+
+    async fn list_and_watch(&self, key: DiscoveryKey) -> Result<DiscoveryStream> {
+        let prefix = Self::key_prefix(&key);
+        let bucket_name = if prefix.starts_with(INSTANCES_BUCKET) {
+            INSTANCES_BUCKET
+        } else {
+            MODEL_CARDS_BUCKET
+        };
+
+        tracing::debug!(
+            "KVStoreDiscoveryClient::list_and_watch: Starting watch for key={:?}, prefix={}, bucket={}",
+            key,
+            prefix,
+            bucket_name
+        );
+
+        // Use the KeyValueStoreManager's watch mechanism
+        let (_, mut rx) = self.store.clone().watch(
+            bucket_name,
+            None, // No TTL
+            self.cancel_token.clone(),
+        );
+
+        tracing::debug!(
+            "KVStoreDiscoveryClient::list_and_watch: Got watch receiver for bucket={}",
+            bucket_name
+        );
+
+        // Create a stream that filters and transforms WatchEvents to DiscoveryEvents
+        let stream = async_stream::stream! {
+            let mut event_count = 0;
+            tracing::debug!("KVStoreDiscoveryClient::list_and_watch: Stream started, waiting for events on prefix={}", prefix);
+            while let Some(event) = rx.recv().await {
+                event_count += 1;
+                tracing::debug!(
+                    "KVStoreDiscoveryClient::list_and_watch: Received event #{} for prefix={}",
+                    event_count,
+                    prefix
+                );
+                let discovery_event = match event {
+                    WatchEvent::Put(kv) => {
+                        tracing::debug!(
+                            "KVStoreDiscoveryClient::list_and_watch: Put event, key={}, prefix={}, matches={}",
+                            kv.key_str(),
+                            prefix,
+                            Self::matches_prefix(kv.key_str(), &prefix)
+                        );
+                        // Check if this key matches our prefix
+                        if !Self::matches_prefix(kv.key_str(), &prefix) {
+                            tracing::debug!(
+                                "KVStoreDiscoveryClient::list_and_watch: Skipping key {} (doesn't match prefix {})",
+                                kv.key_str(),
+                                prefix
+                            );
+                            continue;
+                        }
+
+                        match Self::parse_instance(kv.value()) {
+                            Ok(instance) => {
+                                tracing::info!(
+                                    "KVStoreDiscoveryClient::list_and_watch: Emitting Added event for instance_id={}, key={}",
+                                    instance.instance_id(),
+                                    kv.key_str()
+                                );
+                                Some(DiscoveryEvent::Added(instance))
+                            },
+                            Err(e) => {
+                                tracing::warn!(
+                                    key = %kv.key_str(),
+                                    error = %e,
+                                    "Failed to parse discovery instance from watch event"
+                                );
+                                None
+                            }
+                        }
+                    }
+                    WatchEvent::Delete(kv) => {
+                        tracing::debug!(
+                            "KVStoreDiscoveryClient::list_and_watch: Delete event, key={}, prefix={}",
+                            kv.key_str(),
+                            prefix
+                        );
+                        // Check if this key matches our prefix
+                        if !Self::matches_prefix(kv.key_str(), &prefix) {
+                            tracing::debug!(
+                                "KVStoreDiscoveryClient::list_and_watch: Skipping deleted key {} (doesn't match prefix {})",
+                                kv.key_str(),
+                                prefix
+                            );
+                            continue;
+                        }
+
+                        // Extract instance_id from the key path, not the value
+                        // Delete events have empty values in etcd, so we parse the instance_id from the key
+                        // Key format: "v1/instances/namespace/component/endpoint/{instance_id:x}"
+                        let key_parts: Vec<&str> = kv.key_str().split('/').collect();
+                        match key_parts.last() {
+                            Some(instance_id_hex) => {
+                                match u64::from_str_radix(instance_id_hex, 16) {
+                                    Ok(instance_id) => {
+                                        tracing::info!(
+                                            "KVStoreDiscoveryClient::list_and_watch: Emitting Removed event for instance_id={}, key={}",
+                                            instance_id,
+                                            kv.key_str()
+                                        );
+                                        Some(DiscoveryEvent::Removed(instance_id))
+                                    }
+                                    Err(e) => {
+                                        tracing::warn!(
+                                            key = %kv.key_str(),
+                                            error = %e,
+                                            "Failed to parse instance_id hex from deleted key"
+                                        );
+                                        None
+                                    }
+                                }
+                            }
+                            None => {
+                                tracing::warn!(
+                                    key = %kv.key_str(),
+                                    "Delete event key has no path components"
+                                );
+                                None
+                            }
+                        }
+                    }
+                };
+
+                if let Some(event) = discovery_event {
+                    tracing::debug!("KVStoreDiscoveryClient::list_and_watch: Yielding event: {:?}", event);
+                    yield Ok(event);
+                } else {
+                    tracing::debug!("KVStoreDiscoveryClient::list_and_watch: Event was filtered out (None)");
+                }
+            }
+            tracing::debug!("KVStoreDiscoveryClient::list_and_watch: Stream ended after {} events for prefix={}", event_count, prefix);
+        };
+
+        tracing::debug!(
+            "KVStoreDiscoveryClient::list_and_watch: Returning stream for key={:?}",
+            key
+        );
+        Ok(Box::pin(stream))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::component::TransportType;
+
+    #[tokio::test]
+    async fn test_kv_store_discovery_register_endpoint() {
+        let store = KeyValueStoreManager::memory();
+        let cancel_token = CancellationToken::new();
+        let client = KVStoreDiscoveryClient::new(store, cancel_token);
+
+        let spec = DiscoverySpec::Endpoint {
+            namespace: "test".to_string(),
+            component: "comp1".to_string(),
+            endpoint: "ep1".to_string(),
+            transport: TransportType::NatsTcp("nats://localhost:4222".to_string()),
+        };
+
+        let instance = client.register(spec).await.unwrap();
+        
+        match instance {
+            DiscoveryInstance::Endpoint(inst) => {
+                assert_eq!(inst.namespace, "test");
+                assert_eq!(inst.component, "comp1");
+                assert_eq!(inst.endpoint, "ep1");
+            }
+            _ => panic!("Expected Endpoint instance"),
+        }
+    }
+
+    #[tokio::test]
+    async fn test_kv_store_discovery_list() {
+        let store = KeyValueStoreManager::memory();
+        let cancel_token = CancellationToken::new();
+        let client = KVStoreDiscoveryClient::new(store, cancel_token);
+
+        // Register multiple endpoints
+        let spec1 = DiscoverySpec::Endpoint {
+            namespace: "ns1".to_string(),
+            component: "comp1".to_string(),
+            endpoint: "ep1".to_string(),
+            transport: TransportType::NatsTcp("nats://localhost:4222".to_string()),
+        };
+        client.register(spec1).await.unwrap();
+
+        let spec2 = DiscoverySpec::Endpoint {
+            namespace: "ns1".to_string(),
+            component: "comp1".to_string(),
+            endpoint: "ep2".to_string(),
+            transport: TransportType::NatsTcp("nats://localhost:4222".to_string()),
+        };
+        client.register(spec2).await.unwrap();
+
+        let spec3 = DiscoverySpec::Endpoint {
+            namespace: "ns2".to_string(),
+            component: "comp2".to_string(),
+            endpoint: "ep1".to_string(),
+            transport: TransportType::NatsTcp("nats://localhost:4222".to_string()),
+        };
+        client.register(spec3).await.unwrap();
+
+        // List all endpoints
+        let all = client.list(DiscoveryKey::AllEndpoints).await.unwrap();
+        assert_eq!(all.len(), 3);
+
+        // List namespaced endpoints
+        let ns1 = client
+            .list(DiscoveryKey::NamespacedEndpoints {
+                namespace: "ns1".to_string(),
+            })
+            .await
+            .unwrap();
+        assert_eq!(ns1.len(), 2);
+
+        // List component endpoints
+        let comp1 = client
+            .list(DiscoveryKey::ComponentEndpoints {
+                namespace: "ns1".to_string(),
+                component: "comp1".to_string(),
+            })
+            .await
+            .unwrap();
+        assert_eq!(comp1.len(), 2);
+    }
+
+    #[tokio::test]
+    async fn test_kv_store_discovery_watch() {
+        let store = KeyValueStoreManager::memory();
+        let cancel_token = CancellationToken::new();
+        let client = Arc::new(KVStoreDiscoveryClient::new(store, cancel_token.clone()));
+
+        // Start watching before registering
+        let mut stream = client
+            .list_and_watch(DiscoveryKey::AllEndpoints)
+            .await
+            .unwrap();
+
+        let client_clone = client.clone();
+        let register_task = tokio::spawn(async move {
+            tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
+            
+            let spec = DiscoverySpec::Endpoint {
+                namespace: "test".to_string(),
+                component: "comp1".to_string(),
+                endpoint: "ep1".to_string(),
+                transport: TransportType::NatsTcp("nats://localhost:4222".to_string()),
+            };
+            client_clone.register(spec).await.unwrap();
+        });
+
+        // Wait for the added event
+        let event = stream.next().await.unwrap().unwrap();
+        match event {
+            DiscoveryEvent::Added(instance) => {
+                match instance {
+                    DiscoveryInstance::Endpoint(inst) => {
+                        assert_eq!(inst.namespace, "test");
+                        assert_eq!(inst.component, "comp1");
+                        assert_eq!(inst.endpoint, "ep1");
+                    }
+                    _ => panic!("Expected Endpoint instance"),
+                }
+            }
+            _ => panic!("Expected Added event"),
+        }
+
+        register_task.await.unwrap();
+        cancel_token.cancel();
+    }
+}
+
diff --git a/lib/runtime/src/discovery/mock.rs b/lib/runtime/src/discovery/mock.rs
index 5ab66168c5..4c3b0f39f6 100644
--- a/lib/runtime/src/discovery/mock.rs
+++ b/lib/runtime/src/discovery/mock.rs
@@ -46,37 +46,46 @@ impl MockDiscoveryClient {
 /// Helper function to check if an instance matches a discovery key query
 fn matches_key(instance: &DiscoveryInstance, key: &DiscoveryKey) -> bool {
     match (instance, key) {
-        (DiscoveryInstance::Endpoint { .. }, DiscoveryKey::AllEndpoints) => true,
+        // Endpoint matching
+        (DiscoveryInstance::Endpoint(_), DiscoveryKey::AllEndpoints) => true,
         (
-            DiscoveryInstance::Endpoint {
-                namespace: ins_ns, ..
-            },
+            DiscoveryInstance::Endpoint(inst),
             DiscoveryKey::NamespacedEndpoints { namespace },
-        ) => ins_ns == namespace,
+        ) => &inst.namespace == namespace,
         (
-            DiscoveryInstance::Endpoint {
-                namespace: ins_ns,
-                component: ins_comp,
-                ..
-            },
+            DiscoveryInstance::Endpoint(inst),
             DiscoveryKey::ComponentEndpoints {
                 namespace,
                 component,
             },
-        ) => ins_ns == namespace && ins_comp == component,
+        ) => &inst.namespace == namespace && &inst.component == component,
         (
-            DiscoveryInstance::Endpoint {
-                namespace: ins_ns,
-                component: ins_comp,
-                endpoint: ins_ep,
-                ..
-            },
+            DiscoveryInstance::Endpoint(inst),
             DiscoveryKey::Endpoint {
                 namespace,
                 component,
                 endpoint,
             },
-        ) => ins_ns == namespace && ins_comp == component && ins_ep == endpoint,
+        ) => &inst.namespace == namespace && &inst.component == component && &inst.endpoint == endpoint,
+        
+        // ModelCard matching
+        (DiscoveryInstance::ModelCard { .. }, DiscoveryKey::AllModelCards) => true,
+        (
+            DiscoveryInstance::ModelCard { namespace: inst_ns, .. },
+            DiscoveryKey::NamespacedModelCards { namespace },
+        ) => inst_ns == namespace,
+        (
+            DiscoveryInstance::ModelCard { namespace: inst_ns, component: inst_comp, .. },
+            DiscoveryKey::ComponentModelCards { namespace, component },
+        ) => inst_ns == namespace && inst_comp == component,
+        (
+            DiscoveryInstance::ModelCard { namespace: inst_ns, component: inst_comp, endpoint: inst_ep, .. },
+            DiscoveryKey::EndpointModelCards { namespace, component, endpoint },
+        ) => inst_ns == namespace && inst_comp == component && inst_ep == endpoint,
+        
+        // Cross-type matches return false
+        (DiscoveryInstance::Endpoint(_), DiscoveryKey::AllModelCards | DiscoveryKey::NamespacedModelCards { .. } | DiscoveryKey::ComponentModelCards { .. } | DiscoveryKey::EndpointModelCards { .. }) => false,
+        (DiscoveryInstance::ModelCard { .. }, DiscoveryKey::AllEndpoints | DiscoveryKey::NamespacedEndpoints { .. } | DiscoveryKey::ComponentEndpoints { .. } | DiscoveryKey::Endpoint { .. }) => false,
     }
 }
 
@@ -98,6 +107,15 @@ impl DiscoveryClient for MockDiscoveryClient {
         Ok(instance)
     }
 
+    async fn list(&self, key: DiscoveryKey) -> Result<Vec<DiscoveryInstance>> {
+        let instances = self.registry.instances.lock().unwrap();
+        Ok(instances
+            .iter()
+            .filter(|instance| matches_key(instance, &key))
+            .cloned()
+            .collect())
+    }
+
     async fn list_and_watch(&self, key: DiscoveryKey) -> Result<DiscoveryStream> {
         use std::collections::HashSet;
 
@@ -118,14 +136,16 @@ impl DiscoveryClient for MockDiscoveryClient {
 
                 let current_ids: HashSet<_> = current.iter().map(|i| {
                     match i {
-                        DiscoveryInstance::Endpoint { instance_id, .. } => *instance_id,
+                        DiscoveryInstance::Endpoint(inst) => inst.instance_id,
+                        DiscoveryInstance::ModelCard { instance_id, .. } => *instance_id,
                     }
                 }).collect();
 
                 // Emit Added events for new instances
                 for instance in current {
                     let id = match &instance {
-                        DiscoveryInstance::Endpoint { instance_id, .. } => *instance_id,
+                        DiscoveryInstance::Endpoint(inst) => inst.instance_id,
+                        DiscoveryInstance::ModelCard { instance_id, .. } => *instance_id,
                     };
                     if known_instances.insert(id) {
                         yield Ok(DiscoveryEvent::Added(instance));
@@ -161,6 +181,7 @@ mod tests {
             namespace: "test-ns".to_string(),
             component: "test-comp".to_string(),
             endpoint: "test-ep".to_string(),
+            transport: crate::component::TransportType::NatsTcp("test-subject".to_string()),
         };
 
         let key = DiscoveryKey::Endpoint {
@@ -177,8 +198,8 @@ mod tests {
 
         let event = stream.next().await.unwrap().unwrap();
         match event {
-            DiscoveryEvent::Added(DiscoveryInstance::Endpoint { instance_id, .. }) => {
-                assert_eq!(instance_id, 1);
+            DiscoveryEvent::Added(DiscoveryInstance::Endpoint(inst)) => {
+                assert_eq!(inst.instance_id, 1);
             }
             _ => panic!("Expected Added event for instance-1"),
         }
@@ -188,15 +209,16 @@ mod tests {
 
         let event = stream.next().await.unwrap().unwrap();
         match event {
-            DiscoveryEvent::Added(DiscoveryInstance::Endpoint { instance_id, .. }) => {
-                assert_eq!(instance_id, 2);
+            DiscoveryEvent::Added(DiscoveryInstance::Endpoint(inst)) => {
+                assert_eq!(inst.instance_id, 2);
             }
             _ => panic!("Expected Added event for instance-2"),
         }
 
         // Remove first instance
         registry.instances.lock().unwrap().retain(|i| match i {
-            DiscoveryInstance::Endpoint { instance_id, .. } => *instance_id != 1,
+            DiscoveryInstance::Endpoint(inst) => inst.instance_id != 1,
+            DiscoveryInstance::ModelCard { instance_id, .. } => *instance_id != 1,
         });
 
         let event = stream.next().await.unwrap().unwrap();
diff --git a/lib/runtime/src/discovery/mod.rs b/lib/runtime/src/discovery/mod.rs
index 090fff281a..0ac7a6963e 100644
--- a/lib/runtime/src/discovery/mod.rs
+++ b/lib/runtime/src/discovery/mod.rs
@@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
+use crate::component::TransportType;
 use crate::Result;
 use async_trait::async_trait;
 use futures::Stream;
@@ -10,10 +11,21 @@ use std::pin::Pin;
 mod mock;
 pub use mock::{MockDiscoveryClient, SharedMockRegistry};
 
+mod kv_store;
+pub use kv_store::KVStoreDiscoveryClient;
+
+mod kube;
+pub use kube::{KubeDiscoveryClient, DiscoveryMetadata, hash_pod_name};
+
+pub mod utils;
+pub use utils::watch_and_extract_field;
+
 /// Query key for prefix-based discovery queries
 /// Supports hierarchical queries from all endpoints down to specific endpoints
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
+//// rename to query
 pub enum DiscoveryKey {
+
     /// Query all endpoints in the system
     AllEndpoints,
     /// Query all endpoints in a specific namespace
@@ -29,28 +41,63 @@ pub enum DiscoveryKey {
         component: String,
         endpoint: String,
     },
-    // TODO: Extend to support ModelCard queries:
-    // - AllModels
-    // - NamespacedModels { namespace }
-    // - ComponentModels { namespace, component }
-    // - Model { namespace, component, model_name }
+    AllModelCards,
+    NamespacedModelCards { namespace: String },
+    ComponentModelCards {
+        namespace: String,
+        component: String,
+    },
+    EndpointModelCards {
+        namespace: String,
+        component: String,
+        endpoint: String,
+    },
 }
 
 /// Specification for registering objects in the discovery plane
 /// Represents the input to the register() operation
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub enum DiscoverySpec {
     /// Endpoint specification for registration
     Endpoint {
         namespace: String,
         component: String,
         endpoint: String,
+        /// Transport type and routing information
+        transport: TransportType,
+    },
+    ModelCard {
+        namespace: String,
+        component: String,
+        endpoint: String,
+        /// ModelDeploymentCard serialized as JSON
+        /// This allows lib/runtime to remain independent of lib/llm types
+        /// DiscoverySpec.from_model_card() and DiscoveryInstance.deserialize_model_card() are ergonomic helpers to create and deserialize the model card.
+        card_json: serde_json::Value,
     },
-    // TODO: Add ModelCard variant:
-    // - ModelCard { namespace, component, model_name, card: ModelDeploymentCard }
 }
 
 impl DiscoverySpec {
+    /// Creates a ModelCard discovery spec from a serializable type
+    /// The card will be serialized to JSON to avoid cross-crate dependencies
+    pub fn from_model_card<T>(
+        namespace: String,
+        component: String,
+        endpoint: String,
+        card: &T,
+    ) -> crate::Result<Self>
+    where
+        T: Serialize,
+    {
+        let card_json = serde_json::to_value(card)?;
+        Ok(Self::ModelCard {
+            namespace,
+            component,
+            endpoint,
+            card_json,
+        })
+    }
+
     /// Attaches an instance ID to create a DiscoveryInstance
     pub fn with_instance_id(self, instance_id: u64) -> DiscoveryInstance {
         match self {
@@ -58,11 +105,25 @@ impl DiscoverySpec {
                 namespace,
                 component,
                 endpoint,
-            } => DiscoveryInstance::Endpoint {
+                transport,
+            } => DiscoveryInstance::Endpoint(crate::component::Instance {
+                namespace,
+                component,
+                endpoint,
+                instance_id,
+                transport,
+            }),
+            Self::ModelCard {
+                namespace,
+                component,
+                endpoint,
+                card_json,
+            } => DiscoveryInstance::ModelCard {
                 namespace,
                 component,
                 endpoint,
                 instance_id,
+                card_json,
             },
         }
     }
@@ -70,18 +131,44 @@ impl DiscoverySpec {
 
 /// Registered instances in the discovery plane
 /// Represents objects that have been successfully registered with an instance ID
-#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 #[serde(tag = "type")]
 pub enum DiscoveryInstance {
-    /// Registered endpoint instance
-    Endpoint {
+    /// Registered endpoint instance - wraps the component::Instance directly
+    Endpoint(crate::component::Instance),
+    ModelCard {
         namespace: String,
         component: String,
         endpoint: String,
         instance_id: u64,
+        /// ModelDeploymentCard serialized as JSON
+        /// This allows lib/runtime to remain independent of lib/llm types
+        card_json: serde_json::Value,
     },
-    // TODO: Add ModelCard variant:
-    // - ModelCard { namespace, component, model_name, instance_id, card: ModelDeploymentCard }
+}
+
+impl DiscoveryInstance {
+    /// Returns the instance ID for this discovery instance
+    pub fn instance_id(&self) -> u64 {
+        match self {
+            Self::Endpoint(inst) => inst.instance_id,
+            Self::ModelCard { instance_id, .. } => *instance_id,
+        }
+    }
+
+    /// Deserializes the model card JSON into the specified type T
+    /// Returns an error if this is not a ModelCard instance or if deserialization fails
+    pub fn deserialize_model_card<T>(&self) -> crate::Result<T>
+    where
+        T: for<'de> Deserialize<'de>,
+    {
+        match self {
+            Self::ModelCard { card_json, .. } => Ok(serde_json::from_value(card_json.clone())?),
+            Self::Endpoint(_) => crate::raise!(
+                "Cannot deserialize model card from Endpoint instance"
+            ),
+        }
+    }
 }
 
 /// Events emitted by the discovery client watch stream
@@ -97,6 +184,7 @@ pub enum DiscoveryEvent {
 pub type DiscoveryStream = Pin<Box<dyn Stream<Item = Result<DiscoveryEvent>> + Send>>;
 
 /// Discovery client trait for service discovery across different backends
+/// TODO: maybe not discovery client? just discovery
 #[async_trait]
 pub trait DiscoveryClient: Send + Sync {
     /// Returns a unique identifier for this worker (e.g lease id if using etcd or generated id for memory store)
@@ -106,6 +194,11 @@ pub trait DiscoveryClient: Send + Sync {
     /// Registers an object in the discovery plane with the instance id
     async fn register(&self, spec: DiscoverySpec) -> Result<DiscoveryInstance>;
 
+    /// Returns a list of currently registered instances for the given discovery key
+    /// This is a one-time snapshot without watching for changes
+    async fn list(&self, key: DiscoveryKey) -> Result<Vec<DiscoveryInstance>>;
+
     /// Returns a stream of discovery events (Added/Removed) for the given discovery key
     async fn list_and_watch(&self, key: DiscoveryKey) -> Result<DiscoveryStream>;
 }
+
diff --git a/lib/runtime/src/discovery/utils.rs b/lib/runtime/src/discovery/utils.rs
new file mode 100644
index 0000000000..abcd42cf4c
--- /dev/null
+++ b/lib/runtime/src/discovery/utils.rs
@@ -0,0 +1,107 @@
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Utility functions for working with discovery streams
+
+use serde::Deserialize;
+
+use super::{DiscoveryEvent, DiscoveryInstance, DiscoveryStream};
+
+/// Helper to watch a discovery stream and extract a specific field into a HashMap
+///
+/// This helper spawns a background task that:
+/// - Deserializes ModelCards from discovery events
+/// - Extracts a specific field using the provided extractor function
+/// - Maintains a HashMap<instance_id, Field> that auto-updates on Add/Remove events
+/// - Returns a watch::Receiver that consumers can use to read the current state
+///
+/// # Type Parameters
+/// - `T`: The type to deserialize from DiscoveryInstance (e.g., ModelDeploymentCard)
+/// - `V`: The extracted field type (e.g., ModelRuntimeConfig)
+/// - `F`: The extractor function type
+///
+/// # Arguments
+/// - `stream`: The discovery event stream to watch
+/// - `extractor`: Function that extracts the desired field from the deserialized type
+///
+/// # Example
+/// ```ignore
+/// let stream = discovery.list_and_watch(DiscoveryKey::ComponentModelCards { ... }).await?;
+/// let runtime_configs_rx = watch_and_extract_field(
+///     stream,
+///     |card: ModelDeploymentCard| card.runtime_config,
+/// );
+///
+/// // Use it:
+/// let configs = runtime_configs_rx.borrow();
+/// if let Some(config) = configs.get(&worker_id) {
+///     // Use config...
+/// }
+/// ```
+pub fn watch_and_extract_field<T, V, F>(
+    stream: DiscoveryStream,
+    extractor: F,
+) -> tokio::sync::watch::Receiver<std::collections::HashMap<u64, V>>
+where
+    T: for<'de> Deserialize<'de> + 'static,
+    V: Clone + Send + Sync + 'static,
+    F: Fn(T) -> V + Send + 'static,
+{
+    use futures::StreamExt;
+    use std::collections::HashMap;
+
+    let (tx, rx) = tokio::sync::watch::channel(HashMap::new());
+
+    tokio::spawn(async move {
+        let mut state: HashMap<u64, V> = HashMap::new();
+        let mut stream = stream;
+
+        while let Some(result) = stream.next().await {
+            match result {
+                Ok(DiscoveryEvent::Added(instance)) => {
+                    let instance_id = instance.instance_id();
+
+                    // Deserialize the full instance into type T
+                    let deserialized: T = match instance.deserialize_model_card() {
+                        Ok(d) => d,
+                        Err(e) => {
+                            tracing::warn!(
+                                instance_id,
+                                error = %e,
+                                "Failed to deserialize discovery instance, skipping"
+                            );
+                            continue;
+                        }
+                    };
+
+                    // Extract the field we care about
+                    let value = extractor(deserialized);
+
+                    // Update state and send
+                    state.insert(instance_id, value);
+                    if tx.send(state.clone()).is_err() {
+                        tracing::debug!("watch_and_extract_field receiver dropped, stopping");
+                        break;
+                    }
+                }
+                Ok(DiscoveryEvent::Removed(instance_id)) => {
+                    // Remove from state and send update
+                    state.remove(&instance_id);
+                    if tx.send(state.clone()).is_err() {
+                        tracing::debug!("watch_and_extract_field receiver dropped, stopping");
+                        break;
+                    }
+                }
+                Err(e) => {
+                    tracing::error!(error = %e, "Discovery event stream error in watch_and_extract_field");
+                    // Continue processing other events
+                }
+            }
+        }
+
+        tracing::debug!("watch_and_extract_field task stopped");
+    });
+
+    rx
+}
+
diff --git a/lib/runtime/src/distributed.rs b/lib/runtime/src/distributed.rs
index fd2846a0b9..3731fd982d 100644
--- a/lib/runtime/src/distributed.rs
+++ b/lib/runtime/src/distributed.rs
@@ -92,12 +92,59 @@ impl DistributedRuntime {
 
         let nats_client_for_metrics = nats_client.clone();
 
-        // Initialize discovery client with mock implementation
-        // TODO: Replace MockDiscoveryClient with KeyValueStoreDiscoveryClient or KubeDiscoveryClient
-        let discovery_client = {
-            use crate::discovery::{MockDiscoveryClient, SharedMockRegistry};
-            let registry = SharedMockRegistry::new();
-            Arc::new(MockDiscoveryClient::new(None, registry)) as Arc<dyn DiscoveryClient>
+        // Initialize discovery client based on backend configuration
+        let discovery_backend = std::env::var("DYN_DISCOVERY_BACKEND")
+            .unwrap_or_else(|_| "kv_store".to_string());
+        
+        let (discovery_client, discovery_metadata) = match discovery_backend.as_str() {
+            "kubernetes" => {
+                tracing::info!("Initializing Kubernetes discovery backend");
+                
+                // Create shared metadata store
+                let metadata = Arc::new(tokio::sync::RwLock::new(
+                    crate::discovery::DiscoveryMetadata::new()
+                ));
+                
+                // Create Kubernetes discovery client
+                match crate::discovery::KubeDiscoveryClient::new(
+                    metadata.clone(),
+                    runtime.primary_token(),
+                ).await {
+                    Ok(client) => {
+                        tracing::info!("Kubernetes discovery client initialized successfully");
+                        (
+                            Arc::new(client) as Arc<dyn DiscoveryClient>,
+                            Some(metadata),
+                        )
+                    }
+                    Err(e) => {
+                        tracing::warn!(
+                            "Failed to initialize Kubernetes discovery client: {}. Falling back to KV store.",
+                            e
+                        );
+                        // Fallback to KV store
+                        use crate::discovery::KVStoreDiscoveryClient;
+                        (
+                            Arc::new(KVStoreDiscoveryClient::new(
+                                store.clone(),
+                                runtime.primary_token(),
+                            )) as Arc<dyn DiscoveryClient>,
+                            None,
+                        )
+                    }
+                }
+            }
+            _ => {
+                tracing::info!("Initializing KV store discovery backend");
+                use crate::discovery::KVStoreDiscoveryClient;
+                (
+                    Arc::new(KVStoreDiscoveryClient::new(
+                        store.clone(),
+                        runtime.primary_token(),
+                    )) as Arc<dyn DiscoveryClient>,
+                    None,
+                )
+            }
         };
 
         let distributed_runtime = Self {
@@ -108,6 +155,7 @@ impl DistributedRuntime {
             tcp_server: Arc::new(OnceCell::new()),
             system_status_server: Arc::new(OnceLock::new()),
             discovery_client,
+            discovery_metadata,
             component_registry: component::Registry::new(),
             is_static,
             instance_sources: Arc::new(Mutex::new(HashMap::new())),
@@ -151,6 +199,7 @@ impl DistributedRuntime {
                 port,
                 cancel_token,
                 Arc::new(distributed_runtime.clone()),
+                distributed_runtime.discovery_metadata.clone(),
             )
             .await
             {
@@ -229,7 +278,7 @@ impl DistributedRuntime {
     }
 
     pub fn connection_id(&self) -> u64 {
-        self.store.connection_id()
+        self.discovery_client.instance_id()
     }
 
     pub fn shutdown(&self) {
@@ -241,9 +290,10 @@ impl DistributedRuntime {
         Namespace::new(self.clone(), name.into(), self.is_static)
     }
 
-    /// TODO: Return discovery client when KeyValueDiscoveryClient or KubeDiscoveryClient is implemented
-    pub fn discovery_client(&self) -> Result<Arc<dyn DiscoveryClient>> {
-        Err(error!("Discovery client not implemented!"))
+    /// Returns the discovery client for service registration and discovery
+    /// Currently uses MockDiscoveryClient, will be replaced with KeyValueDiscoveryClient or KubeDiscoveryClient
+    pub fn discovery_client(&self) -> Arc<dyn DiscoveryClient> {
+        self.discovery_client.clone()
     }
 
     pub(crate) fn service_client(&self) -> Option<ServiceClient> {
diff --git a/lib/runtime/src/instances.rs b/lib/runtime/src/instances.rs
index 8f9ab0f676..7f875c7669 100644
--- a/lib/runtime/src/instances.rs
+++ b/lib/runtime/src/instances.rs
@@ -9,26 +9,34 @@
 
 use std::sync::Arc;
 
-use crate::component::{INSTANCE_ROOT_PATH, Instance};
-use crate::storage::key_value_store::{KeyValueStore, KeyValueStoreManager};
-use crate::transports::etcd::Client as EtcdClient;
+use crate::component::Instance;
+use crate::discovery::{DiscoveryClient, DiscoveryKey};
 
-pub async fn list_all_instances(client: &KeyValueStoreManager) -> anyhow::Result<Vec<Instance>> {
-    let Some(bucket) = client.get_bucket(INSTANCE_ROOT_PATH).await? else {
-        return Ok(vec![]);
-    };
+pub async fn list_all_instances(
+    discovery_client: Arc<dyn DiscoveryClient>,
+) -> anyhow::Result<Vec<Instance>> {
+    let discovery_instances = discovery_client.list(DiscoveryKey::AllEndpoints).await?;
 
-    let entries = bucket.entries().await?;
-    let mut instances = Vec::with_capacity(entries.len());
-    for (name, bytes) in entries.into_iter() {
-        match serde_json::from_slice::<Instance>(&bytes) {
-            Ok(instance) => instances.push(instance),
-            Err(err) => {
-                tracing::warn!(%err, key = name, "Failed to parse instance from storage");
-            }
-        }
-    }
-    instances.sort();
+    let mut instances: Vec<Instance> = discovery_instances
+        .into_iter()
+        .filter_map(|di| match di {
+            crate::discovery::DiscoveryInstance::Endpoint(instance) => Some(instance),
+            _ => None, // Ignore all other variants (ModelCard, etc.)
+        })
+        .collect();
 
+    instances.sort();
+    
+    // Log all instances found for comparison
+    let instance_details: Vec<(u64, &str, &str, &str)> = instances
+        .iter()
+        .map(|inst| (inst.instance_id, inst.namespace.as_str(), inst.component.as_str(), inst.endpoint.as_str()))
+        .collect();
+    tracing::warn!(
+        "DISCOVERY_VALIDATION: all_instances_found: count={}, instances={:?}",
+        instances.len(),
+        instance_details
+    );
+    
     Ok(instances)
 }
diff --git a/lib/runtime/src/lib.rs b/lib/runtime/src/lib.rs
index 8a39ed32b4..c08fd6002d 100644
--- a/lib/runtime/src/lib.rs
+++ b/lib/runtime/src/lib.rs
@@ -99,6 +99,10 @@ pub struct DistributedRuntime {
     // Service discovery client
     discovery_client: Arc<dyn discovery::DiscoveryClient>,
 
+    // Discovery metadata (only used for Kubernetes backend)
+    // Shared with system status server to expose via /metadata endpoint
+    discovery_metadata: Option<Arc<tokio::sync::RwLock<discovery::DiscoveryMetadata>>>,
+
     // local registry for components
     // the registry allows us to use share runtime resources across instances of the same component object.
     // take for example two instances of a client to the same remote component. The registry allows us to use
diff --git a/lib/runtime/src/storage/key_value_store.rs b/lib/runtime/src/storage/key_value_store.rs
index 7fc122ec40..9946be790c 100644
--- a/lib/runtime/src/storage/key_value_store.rs
+++ b/lib/runtime/src/storage/key_value_store.rs
@@ -243,41 +243,140 @@ impl KeyValueStoreManager {
     ) {
         let bucket_name = bucket_name.to_string();
         let (tx, rx) = tokio::sync::mpsc::channel(128);
+        tracing::debug!("KeyValueStoreManager.watch: Starting watch for bucket={}", bucket_name);
         let watch_task = tokio::spawn(async move {
-            // Start listening for changes but don't poll this yet
+            tracing::debug!("KeyValueStoreManager.watch: Watch task started for bucket={}", bucket_name);
+            // Get or create the bucket
             let bucket = self
                 .0
                 .get_or_create_bucket(&bucket_name, bucket_ttl)
                 .await?;
+            tracing::debug!("KeyValueStoreManager.watch: Got bucket for bucket={}", bucket_name);
+            
+            // CRITICAL: Get existing entries BEFORE starting the watch to avoid missing entries.
+            // This handles the race condition where entries might be added between these calls.
+            // We'll use deduplication to handle any overlap.
+            let existing_entries = bucket.entries().await?;
+            let existing_count = existing_entries.len();
+            tracing::debug!(
+                "KeyValueStoreManager.watch: Found {} existing entries in bucket={}",
+                existing_count,
+                bucket_name
+            );
+            
+            // Now start the watch stream for future changes
             let mut stream = bucket.watch().await?;
-
-            // Send all the existing keys
-            for (key, bytes) in bucket.entries().await? {
+            tracing::debug!("KeyValueStoreManager.watch: Got watch stream for bucket={}", bucket_name);
+
+            // Track keys we've sent to deduplicate between existing entries and watch stream
+            let mut seen_keys = std::collections::HashSet::new();
+            
+            // First, send all existing entries as Put events
+            for (key, bytes) in existing_entries {
+                tracing::debug!(
+                    "KeyValueStoreManager.watch: Sending existing entry key={}, size={} bytes for bucket={}",
+                    key,
+                    bytes.len(),
+                    bucket_name
+                );
+                seen_keys.insert(key.clone());
                 if let Err(err) = tx
                     .send_timeout(
-                        WatchEvent::Put(KeyValue::new(key, bytes)),
+                        WatchEvent::Put(KeyValue::new(key.clone(), bytes)),
                         WATCH_SEND_TIMEOUT,
                     )
                     .await
                 {
-                    tracing::error!(bucket_name, %err, "KeyValueStoreManager.watch failed adding existing key to channel");
+                    tracing::error!(bucket_name, %err, key, "KeyValueStoreManager.watch failed sending existing key to channel");
+                } else {
+                    tracing::debug!(
+                        "KeyValueStoreManager.watch: Successfully sent existing entry key={} for bucket={}",
+                        key,
+                        bucket_name
+                    );
                 }
             }
-
-            // Now block waiting for new entries
+            tracing::debug!(
+                "KeyValueStoreManager.watch: Finished sending {} existing entries for bucket={}, now watching for new events",
+                existing_count,
+                bucket_name
+            );
+
+            // Now forward events from the watch stream with simple deduplication
+            // Note: The memory backend's watch() already includes existing entries and deduplicates
+            // internally, so we may receive some duplicates. We'll skip Put events for keys we
+            // just sent from entries(), but allow Delete events and subsequent updates through.
+            let mut new_event_count = 0;
+            let mut dedup_count = 0;
             loop {
                 let event = tokio::select! {
-                    _ = cancel_token.cancelled() => break,
+                    _ = cancel_token.cancelled() => {
+                        tracing::debug!("KeyValueStoreManager.watch: Cancel token triggered for bucket={}", bucket_name);
+                        break;
+                    }
                     result = stream.next() => match result {
-                        Some(event) => event,
-                        None => break,
+                        Some(event) => {
+                            tracing::debug!(
+                                "KeyValueStoreManager.watch: Received event from stream for bucket={}",
+                                bucket_name
+                            );
+                            event
+                        },
+                        None => {
+                            tracing::debug!("KeyValueStoreManager.watch: Stream closed for bucket={}", bucket_name);
+                            break;
+                        }
                     }
                 };
-                if let Err(err) = tx.send_timeout(event, WATCH_SEND_TIMEOUT).await {
-                    tracing::error!(bucket_name, %err, "KeyValueStoreManager.watch failed adding new key to channel");
+                
+                // Simple deduplication: For Put events, if we just sent this key from entries(),
+                // skip it once. For memory backend, this skips the duplicate from its watch stream.
+                // For etcd backend, this shouldn't trigger since watch only sees new events.
+                // For subsequent Puts to the same key (updates), we'll send them.
+                let should_send = match &event {
+                    WatchEvent::Put(kv) => {
+                        let key = kv.key_str();
+                        if seen_keys.remove(key) {
+                            // We already sent this key from entries(), so skip this one occurrence
+                            dedup_count += 1;
+                            tracing::debug!(
+                                "KeyValueStoreManager.watch: Deduplicating Put for key={} in bucket={} (probably from memory backend's initial yield)",
+                                key,
+                                bucket_name
+                            );
+                            false
+                        } else {
+                            // Either a new key or an update to a key we've already seen
+                            true
+                        }
+                    }
+                    WatchEvent::Delete(_) => {
+                        // Always send deletes
+                        true
+                    }
+                };
+                
+                if should_send {
+                    new_event_count += 1;
+                    if let Err(err) = tx.send_timeout(event, WATCH_SEND_TIMEOUT).await {
+                        tracing::error!(bucket_name, %err, "KeyValueStoreManager.watch failed sending new event to channel");
+                    } else {
+                        tracing::debug!(
+                            "KeyValueStoreManager.watch: Successfully sent new event #{} for bucket={}",
+                            new_event_count,
+                            bucket_name
+                        );
+                    }
                 }
             }
 
+            tracing::debug!(
+                "KeyValueStoreManager.watch: Watch task ending for bucket={}, sent {} existing + {} new events (deduplicated {} events)",
+                bucket_name,
+                existing_count,
+                new_event_count,
+                dedup_count
+            );
             Ok::<(), StoreError>(())
         });
         (watch_task, rx)
diff --git a/lib/runtime/src/system_status_server.rs b/lib/runtime/src/system_status_server.rs
index 679d0f9043..ba0840bf9e 100644
--- a/lib/runtime/src/system_status_server.rs
+++ b/lib/runtime/src/system_status_server.rs
@@ -56,18 +56,33 @@ impl Clone for SystemStatusServerInfo {
 pub struct SystemStatusState {
     // global drt registry is for printing out the entire Prometheus format output
     root_drt: Arc<crate::DistributedRuntime>,
+    // Discovery metadata (only for Kubernetes backend)
+    discovery_metadata: Option<Arc<tokio::sync::RwLock<crate::discovery::DiscoveryMetadata>>>,
 }
 
 impl SystemStatusState {
     /// Create new system status server state with the provided distributed runtime
-    pub fn new(drt: Arc<crate::DistributedRuntime>) -> anyhow::Result<Self> {
-        Ok(Self { root_drt: drt })
+    pub fn new(
+        drt: Arc<crate::DistributedRuntime>,
+        discovery_metadata: Option<Arc<tokio::sync::RwLock<crate::discovery::DiscoveryMetadata>>>,
+    ) -> anyhow::Result<Self> {
+        Ok(Self {
+            root_drt: drt,
+            discovery_metadata,
+        })
     }
 
     /// Get a reference to the distributed runtime
     pub fn drt(&self) -> &crate::DistributedRuntime {
         &self.root_drt
     }
+
+    /// Get a reference to the discovery metadata if available
+    pub fn discovery_metadata(
+        &self,
+    ) -> Option<&Arc<tokio::sync::RwLock<crate::discovery::DiscoveryMetadata>>> {
+        self.discovery_metadata.as_ref()
+    }
 }
 
 /// Start system status server with metrics support
@@ -76,9 +91,10 @@ pub async fn spawn_system_status_server(
     port: u16,
     cancel_token: CancellationToken,
     drt: Arc<crate::DistributedRuntime>,
+    discovery_metadata: Option<Arc<tokio::sync::RwLock<crate::discovery::DiscoveryMetadata>>>,
 ) -> anyhow::Result<(std::net::SocketAddr, tokio::task::JoinHandle<()>)> {
     // Create system status server state with the provided distributed runtime
-    let server_state = Arc::new(SystemStatusState::new(drt)?);
+    let server_state = Arc::new(SystemStatusState::new(drt, discovery_metadata)?);
     let health_path = server_state
         .drt()
         .system_health
@@ -114,6 +130,13 @@ pub async fn spawn_system_status_server(
                 move || metrics_handler(state)
             }),
         )
+        .route(
+            "/metadata",
+            get({
+                let state = Arc::clone(&server_state);
+                move || metadata_handler(state)
+            }),
+        )
         .fallback(|| async {
             tracing::info!("[fallback handler] called");
             (StatusCode::NOT_FOUND, "Route not found").into_response()
@@ -205,6 +228,43 @@ async fn metrics_handler(state: Arc<SystemStatusState>) -> impl IntoResponse {
     (StatusCode::OK, response)
 }
 
+/// Metadata handler for Kubernetes discovery backend
+/// Returns the discovery metadata registered by this pod
+#[tracing::instrument(skip_all, level = "trace")]
+async fn metadata_handler(state: Arc<SystemStatusState>) -> impl IntoResponse {
+    // Check if discovery metadata is available
+    let metadata = match state.discovery_metadata() {
+        Some(metadata) => metadata,
+        None => {
+            tracing::debug!("Metadata endpoint called but no discovery metadata available");
+            return (
+                StatusCode::NOT_FOUND,
+                "Discovery metadata not available (not using Kubernetes backend)".to_string(),
+            )
+                .into_response();
+        }
+    };
+
+    // Read the metadata
+    let metadata_guard = metadata.read().await;
+    
+    // Serialize to JSON
+    match serde_json::to_string(&*metadata_guard) {
+        Ok(json) => {
+            tracing::trace!("Returning metadata: {} bytes", json.len());
+            (StatusCode::OK, json).into_response()
+        }
+        Err(e) => {
+            tracing::error!("Failed to serialize metadata: {}", e);
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                "Failed to serialize metadata".to_string(),
+            )
+                .into_response()
+        }
+    }
+}
+
 // Regular tests: cargo test system_status_server --lib
 #[cfg(test)]
 mod tests {
diff --git a/lib/runtime/tests/kube_client_integration.rs b/lib/runtime/tests/kube_client_integration.rs
new file mode 100644
index 0000000000..d28b599c9e
--- /dev/null
+++ b/lib/runtime/tests/kube_client_integration.rs
@@ -0,0 +1,293 @@
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Integration tests for KubeDiscoveryClient
+//! 
+//! These tests require:
+//! 1. Access to a Kubernetes cluster (kubectl configured)
+//! 2. Test resources deployed (run k8s-test/deploy.sh)
+//! 
+//! Run with: cargo test --test kube_client_integration -- --ignored --nocapture
+
+use dynamo_runtime::discovery::{
+    KubeDiscoveryClient, DiscoveryClient, DiscoveryKey,
+};
+use kube::Client;
+use futures::StreamExt;
+
+/// Helper to create a test client with mock metadata
+async fn create_test_client() -> Result<KubeDiscoveryClient, Box<dyn std::error::Error>> {
+    let kube_client = Client::try_default().await?;
+    let client = KubeDiscoveryClient::new_for_testing(
+        kube_client,
+        "test-pod-123".to_string(),
+        "discovery".to_string(),
+        true, // mock_metadata = true (skip HTTP calls, return mock data)
+    ).await?;
+    Ok(client)
+}
+
+/// Test basic client creation and instance_id
+#[tokio::test]
+#[ignore]
+async fn test_client_creation() {
+    println!("🔌 Testing KubeDiscoveryClient creation...");
+    
+    let client = create_test_client().await
+        .expect("Failed to create test client");
+    
+    let instance_id = client.instance_id();
+    println!("✅ Client created with instance_id: {:x}", instance_id);
+    
+    assert_ne!(instance_id, 0, "Instance ID should not be zero");
+}
+
+/// Test listing all endpoints (without label filtering)
+#[tokio::test]
+#[ignore]
+async fn test_list_all_endpoints() {
+    println!("📋 Testing list all endpoints...");
+    println!("   Note: Using mock metadata (no actual HTTP calls to pods)");
+    
+    let client = create_test_client().await
+        .expect("Failed to create test client");
+    
+    let key = DiscoveryKey::AllEndpoints;
+    
+    println!("Calling list() with key={:?}", key);
+    let result = client.list(key).await;
+    
+    match result {
+        Ok(instances) => {
+            println!("✅ list() succeeded");
+            println!("   Found {} instances", instances.len());
+            
+            for (i, instance) in instances.iter().enumerate() {
+                println!("   [{}] {:?}", i, instance);
+            }
+        }
+        Err(e) => {
+            println!("❌ list() failed: {}", e);
+        }
+    }
+    
+    println!("✅ List test completed");
+}
+
+/// Test listing endpoints in a specific namespace
+#[tokio::test]
+#[ignore]
+async fn test_list_namespaced_endpoints() {
+    println!("📋 Testing list namespaced endpoints...");
+    
+    let client = create_test_client().await
+        .expect("Failed to create test client");
+    
+    let key = DiscoveryKey::NamespacedEndpoints {
+        namespace: "test-namespace".to_string(),
+    };
+    
+    println!("Calling list() with key={:?}", key);
+    let result = client.list(key).await;
+    
+    match result {
+        Ok(instances) => {
+            println!("✅ list() succeeded");
+            println!("   Found {} instances in test-namespace", instances.len());
+        }
+        Err(e) => {
+            println!("⚠️  list() failed: {}", e);
+        }
+    }
+    
+    println!("✅ Namespaced list test completed");
+}
+
+/// Test listing endpoints for a specific component
+#[tokio::test]
+#[ignore]
+async fn test_list_component_endpoints() {
+    println!("📋 Testing list component endpoints...");
+    
+    let client = create_test_client().await
+        .expect("Failed to create test client");
+    
+    let key = DiscoveryKey::ComponentEndpoints {
+        namespace: "test-namespace".to_string(),
+        component: "test-component".to_string(),
+    };
+    
+    println!("Calling list() with key={:?}", key);
+    let result = client.list(key).await;
+    
+    match result {
+        Ok(instances) => {
+            println!("✅ list() succeeded");
+            println!("   Found {} instances for test-namespace/test-component", instances.len());
+        }
+        Err(e) => {
+            println!("⚠️  list() failed: {}", e);
+        }
+    }
+    
+    println!("✅ Component list test completed");
+}
+
+/// Test watching all endpoints
+#[tokio::test]
+#[ignore]
+async fn test_watch_all_endpoints() {
+    println!("👀 Testing watch all endpoints...");
+    println!("   This test will watch for 10 seconds");
+    println!("   Note: Using mock metadata (no actual HTTP calls to pods)");
+    
+    let client = create_test_client().await
+        .expect("Failed to create test client");
+    
+    let key = DiscoveryKey::AllEndpoints;
+    
+    println!("Calling list_and_watch() with key={:?}", key);
+    let stream = client.list_and_watch(key).await
+        .expect("Failed to create watch stream");
+    
+    let mut stream = stream;
+    let timeout = tokio::time::Duration::from_secs(600);
+    let deadline = tokio::time::Instant::now() + timeout;
+    
+    let mut event_count = 0;
+    
+    println!("📡 Watch stream started...");
+    
+    loop {
+        tokio::select! {
+            Some(event) = stream.next() => {
+                event_count += 1;
+                match event {
+                    Ok(discovery_event) => {
+                        println!("  [{}] Event: {:?}", event_count, discovery_event);
+                    }
+                    Err(e) => {
+                        println!("  [{}] Error: {}", event_count, e);
+                    }
+                }
+            }
+            _ = tokio::time::sleep_until(deadline) => {
+                println!("⏰ Timeout reached");
+                break;
+            }
+        }
+    }
+    
+    println!("✅ Watch test completed ({} events received)", event_count);
+    println!("   With mock metadata, you should see Added events for discovered pods");
+}
+
+/// Test watching namespaced endpoints
+#[tokio::test]
+#[ignore]
+async fn test_watch_namespaced_endpoints() {
+    println!("👀 Testing watch namespaced endpoints...");
+    println!("   This test will watch for 5 seconds");
+    
+    let client = create_test_client().await
+        .expect("Failed to create test client");
+    
+    let key = DiscoveryKey::NamespacedEndpoints {
+        namespace: "test-namespace".to_string(),
+    };
+    
+    println!("Calling list_and_watch() with key={:?}", key);
+    let stream = client.list_and_watch(key).await
+        .expect("Failed to create watch stream");
+    
+    let mut stream = stream;
+    let timeout = tokio::time::Duration::from_secs(5);
+    let deadline = tokio::time::Instant::now() + timeout;
+    
+    let mut event_count = 0;
+    
+    println!("📡 Watch stream started...");
+    
+    loop {
+        tokio::select! {
+            Some(event) = stream.next() => {
+                event_count += 1;
+                match event {
+                    Ok(discovery_event) => {
+                        println!("  [{}] Event: {:?}", event_count, discovery_event);
+                    }
+                    Err(e) => {
+                        println!("  [{}] Error: {}", event_count, e);
+                    }
+                }
+            }
+            _ = tokio::time::sleep_until(deadline) => {
+                println!("⏰ Timeout reached");
+                break;
+            }
+        }
+    }
+    
+    println!("✅ Watch test completed ({} events received)", event_count);
+}
+
+/// Comprehensive test: verify the watch stream receives EndpointSlice events
+/// This test verifies that the K8s watcher is working correctly
+#[tokio::test]
+#[ignore]
+async fn test_watch_receives_k8s_events() {
+    println!("🔍 Testing that watch stream receives Kubernetes events...");
+    println!("   This test verifies the K8s watcher layer works correctly");
+    println!("   We'll watch for 10 seconds to ensure we get at least Init/InitDone");
+    
+    let client = create_test_client().await
+        .expect("Failed to create test client");
+    
+    let key = DiscoveryKey::AllEndpoints;
+    
+    let stream = client.list_and_watch(key).await
+        .expect("Failed to create watch stream");
+    
+    let mut stream = stream;
+    let timeout = tokio::time::Duration::from_secs(10);
+    let deadline = tokio::time::Instant::now() + timeout;
+    
+    let mut received_any_event = false;
+    
+    println!("📡 Monitoring watch stream...");
+    
+    loop {
+        tokio::select! {
+            Some(event) = stream.next() => {
+                received_any_event = true;
+                match event {
+                    Ok(discovery_event) => {
+                        println!("  ✅ Received discovery event: {:?}", discovery_event);
+                    }
+                    Err(e) => {
+                        println!("  ⚠️  Stream error: {}", e);
+                    }
+                }
+                // Got at least one event, test passes
+                break;
+            }
+            _ = tokio::time::sleep_until(deadline) => {
+                println!("⏰ Timeout reached");
+                break;
+            }
+        }
+    }
+    
+    if received_any_event {
+        println!("✅ Watch stream is working - received at least one event");
+    } else {
+        println!("⚠️  No events received in 10 seconds");
+        println!("   This might be okay if:");
+        println!("   - No EndpointSlices exist in the cluster");
+        println!("   - Metadata HTTP calls are failing (expected without metadata server)");
+        println!("   The K8s watcher itself is still working correctly.");
+    }
+    
+    println!("✅ Test completed");
+}
+
diff --git a/lib/runtime/tests/kube_discovery_integration.rs b/lib/runtime/tests/kube_discovery_integration.rs
new file mode 100644
index 0000000000..a44438fe28
--- /dev/null
+++ b/lib/runtime/tests/kube_discovery_integration.rs
@@ -0,0 +1,367 @@
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Integration tests for Kubernetes discovery client
+//! 
+//! These tests require:
+//! 1. Access to a Kubernetes cluster (kubectl configured)
+//! 2. Test resources deployed (run k8s-test/deploy.sh)
+//! 
+//! Run with: cargo test --test kube_discovery_integration -- --nocapture
+
+use futures::StreamExt;
+use k8s_openapi::api::discovery::v1::EndpointSlice;
+use kube::{Api, Client};
+use kube::runtime::{watcher, watcher::Config};
+
+/// Test that we can successfully create a Kubernetes client
+#[tokio::test]
+#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_kube_client_connection -- --ignored
+async fn test_kube_client_connection() {
+    println!("🔌 Testing Kubernetes client connection...");
+    
+    let client = Client::try_default()
+        .await
+        .expect("Failed to create Kubernetes client - is kubectl configured?");
+    
+    println!("✅ Successfully connected to Kubernetes cluster");
+    
+    // Try to list namespaces as a connectivity test
+    let namespaces: Api<k8s_openapi::api::core::v1::Namespace> = Api::all(client);
+    let ns_list = namespaces.list(&Default::default()).await
+        .expect("Failed to list namespaces");
+    
+    println!("📋 Found {} namespaces", ns_list.items.len());
+    println!("✅ Kubernetes API is accessible");
+}
+
+/// Test listing EndpointSlices
+#[tokio::test]
+#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_list_endpointslices -- --ignored
+async fn test_list_endpointslices() {
+    println!("📋 Testing EndpointSlice listing...");
+    
+    let client = Client::try_default()
+        .await
+        .expect("Failed to create Kubernetes client");
+    
+    let endpoint_slices: Api<EndpointSlice> = Api::namespaced(client, "default");
+    
+    // List all EndpointSlices in default namespace
+    let list_params = kube::api::ListParams::default();
+    let slices = endpoint_slices.list(&list_params).await
+        .expect("Failed to list EndpointSlices");
+    
+    println!("📊 Found {} EndpointSlices in default namespace", slices.items.len());
+    
+    for slice in &slices.items {
+        let name = slice.metadata.name.as_deref().unwrap_or("<unknown>");
+        let service = slice.metadata.labels.as_ref()
+            .and_then(|l| l.get("kubernetes.io/service-name"))
+            .map(|s| s.as_str())
+            .unwrap_or("<none>");
+        
+        let endpoint_count = slice.endpoints.len();
+        
+        println!("  • {} (service: {}, endpoints: {})", name, service, endpoint_count);
+        
+        // Show endpoint details
+        for (i, endpoint) in slice.endpoints.iter().enumerate() {
+            let ready = endpoint.conditions.as_ref()
+                .and_then(|c| c.ready)
+                .unwrap_or(false);
+            let addresses = &endpoint.addresses;
+            let pod_name = endpoint.target_ref.as_ref()
+                .and_then(|t| t.name.as_ref())
+                .map(|n| n.as_str())
+                .unwrap_or("<unknown>");
+            
+            println!("    [{}] pod={}, ready={}, addresses={:?}", 
+                     i, pod_name, ready, addresses);
+        }
+    }
+    
+    println!("✅ EndpointSlice listing test completed");
+}
+
+/// Test listing EndpointSlices with label selector (like our discovery client does)
+#[tokio::test]
+#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_list_with_labels -- --ignored
+async fn test_list_with_labels() {
+    println!("🏷️  Testing EndpointSlice listing with label selector...");
+    
+    let client = Client::try_default()
+        .await
+        .expect("Failed to create Kubernetes client");
+    
+    let endpoint_slices: Api<EndpointSlice> = Api::all(client);
+    
+    // Test the label selector we use in our discovery client
+    let label_selector = "dynamo.nvidia.com/namespace=test-namespace,dynamo.nvidia.com/component=test-component";
+    println!("Using label selector: {}", label_selector);
+    
+    let list_params = kube::api::ListParams::default()
+        .labels(label_selector);
+    
+    let slices = endpoint_slices.list(&list_params).await
+        .expect("Failed to list EndpointSlices with labels");
+    
+    println!("📊 Found {} EndpointSlices matching labels", slices.items.len());
+    
+    if slices.items.is_empty() {
+        println!("⚠️  No EndpointSlices found with Dynamo labels.");
+        println!("   Make sure test resources are deployed: ./k8s-test/deploy.sh");
+        println!("   Note: Kubernetes creates EndpointSlices automatically,");
+        println!("   but pod labels don't flow to EndpointSlices by default.");
+    }
+    
+    for slice in &slices.items {
+        let name = slice.metadata.name.as_deref().unwrap_or("<unknown>");
+        let endpoint_count = slice.endpoints.len();
+        println!("  • {} (endpoints: {})", name, endpoint_count);
+    }
+    
+    println!("✅ Label selector test completed");
+}
+
+/// Test watching EndpointSlices for changes
+#[tokio::test]
+#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_watch_endpointslices -- --ignored
+async fn test_watch_endpointslices() {
+    println!("👀 Testing EndpointSlice watching...");
+    println!("   This test will watch for 10 seconds or 5 events, whichever comes first");
+    
+    let client = Client::try_default()
+        .await
+        .expect("Failed to create Kubernetes client");
+    
+    let endpoint_slices: Api<EndpointSlice> = Api::namespaced(client, "default");
+    
+    // Create watcher
+    let watch_config = Config::default();
+    let mut watch_stream = Box::pin(watcher(endpoint_slices, watch_config));
+    
+    println!("📡 Watch stream started...");
+    
+    let mut event_count = 0;
+    let max_events = 5;
+    let timeout = tokio::time::Duration::from_secs(10);
+    let deadline = tokio::time::Instant::now() + timeout;
+    
+    loop {
+        tokio::select! {
+            Some(event) = watch_stream.next() => {
+                event_count += 1;
+                match event {
+                    Ok(watcher::Event::Apply(slice)) => {
+                        let name = slice.metadata.name.as_deref().unwrap_or("<unknown>");
+                        let endpoint_count = slice.endpoints.len();
+                        println!("  [{}] ✅ Apply: {} (endpoints: {})", event_count, name, endpoint_count);
+                    }
+                    Ok(watcher::Event::InitApply(slice)) => {
+                        let name = slice.metadata.name.as_deref().unwrap_or("<unknown>");
+                        let endpoint_count = slice.endpoints.len();
+                        println!("  [{}] 🔄 InitApply: {} (endpoints: {})", event_count, name, endpoint_count);
+                    }
+                    Ok(watcher::Event::Delete(slice)) => {
+                        let name = slice.metadata.name.as_deref().unwrap_or("<unknown>");
+                        println!("  [{}] ❌ Delete: {}", event_count, name);
+                    }
+                    Ok(watcher::Event::Init) => {
+                        println!("  [{}] 🚀 Init - watch stream starting", event_count);
+                    }
+                    Ok(watcher::Event::InitDone) => {
+                        println!("  [{}] ✅ InitDone - initial list complete", event_count);
+                    }
+                    Err(e) => {
+                        println!("  [{}] ⚠️  Error: {}", event_count, e);
+                    }
+                }
+                
+                if event_count >= max_events {
+                    println!("📊 Reached max events ({}), stopping watch", max_events);
+                    break;
+                }
+            }
+            _ = tokio::time::sleep_until(deadline) => {
+                println!("⏰ Timeout reached ({}s), stopping watch", timeout.as_secs());
+                break;
+            }
+        }
+    }
+    
+    println!("✅ Watch test completed ({} events received)", event_count);
+}
+
+/// Test watching EndpointSlices with label selector
+#[tokio::test]
+#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_watch_with_labels -- --ignored
+async fn test_watch_with_labels() {
+    println!("👀 Testing EndpointSlice watching with label selector...");
+    println!("   This test will watch for 5 seconds or until InitDone");
+    
+    let client = Client::try_default()
+        .await
+        .expect("Failed to create Kubernetes client");
+    
+    let endpoint_slices: Api<EndpointSlice> = Api::all(client);
+    
+    // Watch with our discovery labels
+    let label_selector = "kubernetes.io/service-name=dynamo-test-service";
+    println!("Using label selector: {}", label_selector);
+    
+    let watch_config = Config::default()
+        .labels(label_selector);
+    let mut watch_stream = Box::pin(watcher(endpoint_slices, watch_config));
+    
+    println!("📡 Watch stream started...");
+    
+    let mut event_count = 0;
+    let timeout = tokio::time::Duration::from_secs(5);
+    let deadline = tokio::time::Instant::now() + timeout;
+    let mut init_done = false;
+    
+    loop {
+        tokio::select! {
+            Some(event) = watch_stream.next() => {
+                event_count += 1;
+                match event {
+                    Ok(watcher::Event::Apply(slice)) => {
+                        let name = slice.metadata.name.as_deref().unwrap_or("<unknown>");
+                        let endpoint_count = slice.endpoints.len();
+                        println!("  [{}] ✅ Apply: {} (endpoints: {})", event_count, name, endpoint_count);
+                    }
+                    Ok(watcher::Event::InitApply(slice)) => {
+                        let name = slice.metadata.name.as_deref().unwrap_or("<unknown>");
+                        let endpoint_count = slice.endpoints.len();
+                        println!("  [{}] 🔄 InitApply: {} (endpoints: {})", event_count, name, endpoint_count);
+                    }
+                    Ok(watcher::Event::Delete(slice)) => {
+                        let name = slice.metadata.name.as_deref().unwrap_or("<unknown>");
+                        println!("  [{}] ❌ Delete: {}", event_count, name);
+                    }
+                    Ok(watcher::Event::Init) => {
+                        println!("  [{}] 🚀 Init - watch stream starting", event_count);
+                    }
+                    Ok(watcher::Event::InitDone) => {
+                        println!("  [{}] ✅ InitDone - initial list complete", event_count);
+                        init_done = true;
+                    }
+                    Err(e) => {
+                        println!("  [{}] ⚠️  Error: {}", event_count, e);
+                    }
+                }
+                
+                if init_done {
+                    println!("📊 InitDone received, stopping watch");
+                    break;
+                }
+            }
+            _ = tokio::time::sleep_until(deadline) => {
+                println!("⏰ Timeout reached ({}s), stopping watch", timeout.as_secs());
+                break;
+            }
+        }
+    }
+    
+    println!("✅ Watch with labels test completed ({} events received)", event_count);
+}
+
+/// Comprehensive test that simulates our discovery client behavior
+#[tokio::test]
+#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_discovery_simulation -- --ignored
+async fn test_discovery_simulation() {
+    println!("🔍 Testing discovery client simulation...");
+    println!("   This simulates how our KubeDiscoveryClient list_and_watch works");
+    
+    let client = Client::try_default()
+        .await
+        .expect("Failed to create Kubernetes client");
+    
+    let endpoint_slices: Api<EndpointSlice> = Api::all(client);
+    
+    // Use service name label (EndpointSlices automatically get this label)
+    let label_selector = "kubernetes.io/service-name=dynamo-test-service";
+    println!("Label selector: {}", label_selector);
+    
+    let watch_config = Config::default()
+        .labels(label_selector);
+    let mut watch_stream = Box::pin(watcher(endpoint_slices, watch_config));
+    
+    println!("📡 Starting watch stream...");
+    
+    let mut seen_endpoints = std::collections::HashSet::new();
+    let timeout = tokio::time::Duration::from_secs(10);
+    let deadline = tokio::time::Instant::now() + timeout;
+    
+    loop {
+        tokio::select! {
+            Some(event) = watch_stream.next() => {
+                match event {
+                    Ok(watcher::Event::Apply(slice)) | Ok(watcher::Event::InitApply(slice)) => {
+                        let name = slice.metadata.name.as_deref().unwrap_or("<unknown>");
+                        println!("  📦 Processing EndpointSlice: {}", name);
+                        
+                        // Extract endpoints (simulate our discovery logic)
+                        for endpoint in &slice.endpoints {
+                            let ready = endpoint.conditions.as_ref()
+                                .and_then(|c| c.ready)
+                                .unwrap_or(false);
+                            
+                            if !ready {
+                                continue;
+                            }
+                            
+                            let pod_name = endpoint.target_ref.as_ref()
+                                .and_then(|t| t.name.as_ref())
+                                .map(|n| n.as_str())
+                                .unwrap_or_default();
+                            
+                            if pod_name.is_empty() {
+                                continue;
+                            }
+                            
+                            // Hash the pod name (simulate instance_id generation)
+                            use std::collections::hash_map::DefaultHasher;
+                            use std::hash::{Hash, Hasher};
+                            let mut hasher = DefaultHasher::new();
+                            pod_name.hash(&mut hasher);
+                            let instance_id = hasher.finish();
+                            
+                            if seen_endpoints.insert(instance_id) {
+                                let addresses = &endpoint.addresses;
+                                println!("    ✅ New endpoint: pod={}, instance_id={:x}, addresses={:?}", 
+                                         pod_name, instance_id, addresses);
+                            }
+                        }
+                    }
+                    Ok(watcher::Event::Delete(slice)) => {
+                        let name = slice.metadata.name.as_deref().unwrap_or("<unknown>");
+                        println!("  ❌ EndpointSlice deleted: {}", name);
+                    }
+                    Ok(watcher::Event::Init) => {
+                        println!("  🚀 Watch stream initialized");
+                    }
+                    Ok(watcher::Event::InitDone) => {
+                        println!("  ✅ Initial sync complete");
+                        println!("  📊 Discovered {} unique endpoints", seen_endpoints.len());
+                        break;
+                    }
+                    Err(e) => {
+                        eprintln!("  ⚠️  Watch error: {}", e);
+                    }
+                }
+            }
+            _ = tokio::time::sleep_until(deadline) => {
+                println!("⏰ Timeout reached");
+                break;
+            }
+        }
+    }
+    
+    println!("✅ Discovery simulation completed");
+    println!("📊 Total unique endpoints discovered: {}", seen_endpoints.len());
+    
+    assert!(seen_endpoints.len() > 0, "Should have discovered at least one endpoint");
+}
+