diff --git a/.gitignore b/.gitignore index a706c5b098..3a59f21e4f 100644 --- a/.gitignore +++ b/.gitignore @@ -109,3 +109,4 @@ profiling_results* # Direnv .envrc +rebuild.sh \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 138677bc65..3fcc631249 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -226,6 +226,18 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" +[[package]] +name = "async-broadcast" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435a87a52755b8f27fcf321ac4f04b2802e337c8c4872923137471ec39c37532" +dependencies = [ + "event-listener", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + [[package]] name = "async-channel" version = "2.5.0" @@ -578,6 +590,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "backon" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" +dependencies = [ + "fastrand", + "gloo-timers", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.75" @@ -2366,6 +2389,8 @@ dependencies = [ "futures", "humantime", "jsonschema", + "k8s-openapi", + "kube", "local-ip-address", "log", "nid", @@ -2382,6 +2407,7 @@ dependencies = [ "regex", "reqwest 0.12.23", "rstest 0.23.0", + "schemars 1.0.4", "serde", "serde_json", "socket2 0.5.10", @@ -3336,6 +3362,18 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "h2" version = "0.3.27" @@ -3504,6 +3542,26 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.0", +] + +[[package]] +name = "hostname" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56f203cd1c76362b69e3863fd987520ac36cf70a8c92627449b2f64a8cf7d65" +dependencies = [ + "cfg-if 1.0.3", + "libc", + "windows-link 0.1.3", +] + [[package]] name = "hound" version = "3.5.1" @@ -3676,6 +3734,7 @@ dependencies = [ "http 1.3.1", "hyper 1.7.0", "hyper-util", + "log", "rustls", "rustls-native-certs 0.8.1", "rustls-pki-types", @@ -4221,6 +4280,18 @@ dependencies = [ "unicode-general-category", ] +[[package]] +name = "json-patch" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f300e415e2134745ef75f04562dd0145405c2f7fd92065db029ac4b16b57fe90" +dependencies = [ + "jsonptr", + "serde", + "serde_json", + "thiserror 1.0.69", +] + [[package]] name = "json5" version = "0.4.1" @@ -4232,6 +4303,29 @@ dependencies = [ "serde", ] +[[package]] +name = "jsonpath-rust" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c00ae348f9f8fd2d09f82a98ca381c60df9e0820d8d79fce43e649b4dc3128b" +dependencies = [ + "pest", + "pest_derive", + "regex", + "serde_json", + "thiserror 2.0.16", +] + +[[package]] +name = "jsonptr" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5a3cc660ba5d72bce0b3bb295bf20847ccbb40fd423f3f05b61273672e561fe" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "jsonschema" version = "0.17.1" @@ -4272,6 +4366,19 @@ dependencies = [ "rayon", ] +[[package]] +name = "k8s-openapi" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d13f06d5326a915becaffabdfab75051b8cdc260c2a5c06c0e90226ede89a692" +dependencies = [ + "base64 0.22.1", + "chrono", + "schemars 1.0.4", + "serde", + "serde_json", +] + [[package]] name = "kernel32-sys" version = "0.2.2" @@ -4282,6 +4389,115 @@ dependencies = [ "winapi-build", ] +[[package]] +name = "kube" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e7bb0b6a46502cc20e4575b6ff401af45cfea150b34ba272a3410b78aa014e" +dependencies = [ + "k8s-openapi", + "kube-client", + "kube-core", + "kube-derive", + "kube-runtime", +] + +[[package]] +name = "kube-client" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4987d57a184d2b5294fdad3d7fc7f278899469d21a4da39a8f6ca16426567a36" +dependencies = [ + "base64 0.22.1", + "bytes", + "chrono", + "either", + "futures", + "home", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "hyper 1.7.0", + "hyper-rustls", + "hyper-timeout", + "hyper-util", + "jsonpath-rust", + "k8s-openapi", + "kube-core", + "pem", + "rustls", + "secrecy", + "serde", + "serde_json", + "serde_yaml", + "thiserror 2.0.16", + "tokio", + "tokio-util", + "tower 0.5.2", + "tower-http", + "tracing", +] + +[[package]] +name = "kube-core" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914bbb770e7bb721a06e3538c0edd2babed46447d128f7c21caa68747060ee73" +dependencies = [ + "chrono", + "derive_more 2.0.1", + "form_urlencoded", + "http 1.3.1", + "json-patch", + "k8s-openapi", + "schemars 1.0.4", + "serde", + "serde-value", + "serde_json", + "thiserror 2.0.16", +] + +[[package]] +name = "kube-derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03dee8252be137772a6ab3508b81cd797dee62ee771112a2453bc85cbbe150d2" +dependencies = [ + "darling 0.21.3", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn 2.0.106", +] + +[[package]] +name = "kube-runtime" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aea4de4b562c5cc89ab10300bb63474ae1fa57ff5a19275f2e26401a323e3fd" +dependencies = [ + "ahash", + "async-broadcast", + "async-stream", + "backon", + "educe", + "futures", + "hashbrown 0.15.5", + "hostname", + "json-patch", + "k8s-openapi", + "kube-client", + "parking_lot", + "pin-project", + "serde", + "serde_json", + "thiserror 2.0.16", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "kvbm-py3" version = "0.1.0" @@ -4918,7 +5134,7 @@ dependencies = [ "num-traits", "objc", "once_cell", - "ordered-float", + "ordered-float 5.1.0", "parking_lot", "radix_trie", "rand 0.9.2", @@ -5755,6 +5971,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + [[package]] name = "ordered-float" version = "5.1.0" @@ -5860,6 +6085,16 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64 0.22.1", + "serde_core", +] + [[package]] name = "pem-rfc7468" version = "0.7.0" @@ -7505,7 +7740,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ "dyn-clone", - "schemars_derive", + "schemars_derive 0.8.22", "serde", "serde_json", ] @@ -7530,6 +7765,7 @@ checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" dependencies = [ "dyn-clone", "ref-cast", + "schemars_derive 1.0.4", "serde", "serde_json", ] @@ -7546,6 +7782,18 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "schemars_derive" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d020396d1d138dc19f1165df7545479dcd58d93810dc5d646a16e55abefa80" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.106", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -7687,6 +7935,16 @@ dependencies = [ "typeid", ] +[[package]] +name = "serde-value" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" +dependencies = [ + "ordered-float 2.10.1", + "serde", +] + [[package]] name = "serde_cbor" version = "0.11.2" @@ -8866,6 +9124,7 @@ dependencies = [ "futures-sink", "futures-util", "pin-project-lite", + "slab", "tokio", ] @@ -9182,12 +9441,14 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" dependencies = [ + "base64 0.22.1", "bitflags 2.9.4", "bytes", "futures-util", "http 1.3.1", "http-body 1.0.1", "iri-string", + "mime", "pin-project-lite", "tower 0.5.2", "tower-layer", diff --git a/components/src/dynamo/vllm/main.py b/components/src/dynamo/vllm/main.py index 4edbd5b05e..24af44b701 100644 --- a/components/src/dynamo/vllm/main.py +++ b/components/src/dynamo/vllm/main.py @@ -8,7 +8,7 @@ from typing import Optional import uvloop -from kvbm.vllm_integration.consolidator_config import get_consolidator_endpoints +# from kvbm.vllm_integration.consolidator_config import get_consolidator_endpoints from prometheus_client import REGISTRY from vllm.distributed.kv_events import ZmqEventPublisher from vllm.usage.usage_lib import UsageContext diff --git a/k8s-test/LOCAL_TESTING.md b/k8s-test/LOCAL_TESTING.md new file mode 100644 index 0000000000..2d4b48801d --- /dev/null +++ b/k8s-test/LOCAL_TESTING.md @@ -0,0 +1,264 @@ +# Local Testing Mode + +This guide explains how to test the Kubernetes discovery client with a **local metadata server** while watching **real Kubernetes resources**. + +## Overview + +The local testing mode allows you to: +- ✅ Watch real Kubernetes EndpointSlices +- ✅ Connect to a local metadata server (on localhost) instead of pod IPs +- ✅ Test the full discovery flow with your actual metadata implementation +- ✅ Debug and iterate quickly without deploying to Kubernetes + +## How It Works + +When `DYN_LOCAL_KUBE_TEST=1` is set: +1. The discovery client watches Kubernetes for EndpointSlices (as normal) +2. When a pod is discovered, it parses the pod name for a port number +3. If the pod name ends with `-` (e.g., `dynamo-test-worker-8080`), it connects to `localhost:` instead of the pod IP +4. Your local metadata server running on that port receives the request + +## Setup + +### 1. Create a Test Pod and Service + +Create a pod and service with a specific port number: + +```bash +cd k8s-test + +# Create pod and service with default labels +./create-local-test-pod.sh 8080 + +# Or with custom Kubernetes namespace +./create-local-test-pod.sh 8080 my-k8s-namespace + +# Or with custom Dynamo namespace and component labels +./create-local-test-pod.sh 8080 discovery hello_world backend +``` + +**Arguments:** +1. `port` - Port number (required) - used in pod name and for localhost connection +2. `k8s-namespace` - Kubernetes namespace (default: `discovery`) +3. `dynamo-namespace` - Value for `dynamo.nvidia.com/namespace` label (default: `test-namespace`) +4. `dynamo-component` - Value for `dynamo.nvidia.com/component` label (default: `test-component`) + +This creates: +- A pod named `dynamo-test-worker-` +- A service named `dynamo-test-service-` +- An EndpointSlice (automatically created by Kubernetes for the service) + +### 2. Start Your Local Metadata Server + +Start your metadata server on the port you specified: + +```bash +# Example with the system status server +cargo run --bin your-app -- --port 8080 +``` + +Make sure your server exposes the `/metadata` endpoint that returns a JSON-serialized `DiscoveryMetadata` structure. + +### 3. Run Tests in Local Mode + +Set the environment variable and run your tests: + +```bash +export DYN_LOCAL_KUBE_TEST=1 +cargo test --test kube_client_integration test_watch_all_endpoints -- --ignored --nocapture +``` + +You should see logs like: +``` +Local test mode: using localhost:8080 for pod dynamo-test-worker-8080 +Fetching metadata from http://localhost:8080/metadata +``` + +## Multiple Local Servers + +You can create multiple test pods with different ports and labels: + +```bash +# Create pods for different components +./create-local-test-pod.sh 8080 discovery hello_world frontend +./create-local-test-pod.sh 8081 discovery hello_world backend +./create-local-test-pod.sh 8082 discovery hello_world worker +``` + +Then run multiple metadata servers on different ports: + +```bash +# Terminal 1 - Frontend server +export PORT=8080 +export POD_NAME=dynamo-test-worker-8080 +export POD_NAMESPACE=discovery +your-server --component frontend + +# Terminal 2 - Backend server +export PORT=8081 +export POD_NAME=dynamo-test-worker-8081 +export POD_NAMESPACE=discovery +your-server --component backend + +# Terminal 3 - Worker server +export PORT=8082 +export POD_NAME=dynamo-test-worker-8082 +export POD_NAMESPACE=discovery +your-server --component worker +``` + +The discovery client will discover all three and connect to the appropriate localhost port for each! + +## Pod Name Format + +The pod name MUST end with `-` where `` is a valid port number: + +✅ Valid: +- `dynamo-test-worker-8080` +- `my-service-9000` +- `test-pod-3000` + +❌ Invalid: +- `dynamo-test-worker` (no port) +- `dynamo-test-worker-abc` (not a number) +- `8080-worker` (port not at the end) + +The helper script automatically creates pods with the correct naming format. + +## Example: Testing the Discovery Flow with hello_world + +Here's a complete example using the `hello_world` app from the terminal output: + +```bash +# 1. Create test pod with hello_world labels +cd k8s-test +./create-local-test-pod.sh 9000 discovery hello_world backend + +# 2. Start your server (in another terminal) +cd ../examples/custom_backend/hello_world + +# Set environment variables for the server +export PORT=9000 +export DYN_SYSTEM_PORT=$PORT +export DYN_LOCAL_KUBE_TEST=1 # Not needed for server, but harmless +export POD_NAME=dynamo-test-worker-$PORT +export POD_NAMESPACE=discovery +export DYN_DISCOVERY_BACKEND=kubernetes + +# Run the server +python3 -m hello_world + +# 3. In another terminal, run the client +export PORT=9009 # Different port for client +export DYN_SYSTEM_PORT=$PORT +export DYN_LOCAL_KUBE_TEST=1 # IMPORTANT: Client needs this! +export POD_NAME=dynamo-test-worker-$PORT +export POD_NAMESPACE=discovery +export DYN_DISCOVERY_BACKEND=kubernetes + +python3 -m client +``` + +You should see: +1. The server registers endpoint `hello_world/backend/generate` with its local metadata +2. The client discovers the pod `dynamo-test-worker-9000` from Kubernetes +3. The client connects to `http://localhost:9000/metadata` (not the pod IP!) +4. The server responds with its registered metadata +5. The client emits an `Added` event and can now make requests + +This lets you: +- ✅ Debug both server and client locally +- ✅ See actual Kubernetes discovery in action +- ✅ Test with real metadata exchange +- ✅ Iterate quickly without container builds + +## Cleanup + +Delete test resources when done: + +```bash +# Delete specific pod and service +kubectl delete pod/dynamo-test-worker-9000 --namespace=discovery +kubectl delete service/dynamo-test-service-9000 --namespace=discovery + +# Or delete multiple at once +kubectl delete pod/dynamo-test-worker-8080 service/dynamo-test-service-8080 --namespace=discovery +kubectl delete pod/dynamo-test-worker-8081 service/dynamo-test-service-8081 --namespace=discovery +``` + +Or delete all local test resources at once: + +```bash +kubectl delete pods,services -l app=dynamo-local-test --namespace=discovery +``` + +## Troubleshooting + +### "Connection refused" to localhost + +**Problem:** The discovery client can't connect to your local metadata server. + +**Solution:** +- Ensure your metadata server is running on the correct port +- Check that the port matches the pod name (e.g., pod `...-8080` → server on port 8080) +- Verify your server exposes `/metadata` endpoint + +### Pod name doesn't have a port + +**Problem:** You created a pod without using the helper script and the name doesn't end with a port number. + +**Solution:** +- Delete the pod: `kubectl delete pod/` +- Use the helper script: `./create-local-test-pod.sh 8080` +- Or manually create a pod with a name ending in `-` + +### Still connecting to pod IP instead of localhost + +**Problem:** The environment variable isn't set. + +**Solution:** +```bash +export DYN_LOCAL_KUBE_TEST=1 +# Verify it's set +echo $DYN_LOCAL_KUBE_TEST # Should print: 1 +``` + +### Metadata format errors + +**Problem:** Your local server returns data that doesn't match the expected `DiscoveryMetadata` format. + +**Solution:** +Check the logs for JSON parsing errors. Your `/metadata` endpoint should return: +```json +{ + "endpoints": { + "namespace/component/endpoint": { + "Endpoint": { + "namespace": "test-namespace", + "component": "test-component", + "endpoint": "test-endpoint", + "instance_id": 12345, + "transport": {"NatsTcp": "nats://localhost:4222"} + } + } + }, + "model_cards": {} +} +``` + +## Production vs. Local Testing + +| Mode | Environment Var | Connection | Use Case | +|------|----------------|------------|----------| +| Production | (none) | Pod IP:port | Real deployment | +| Mock | (test only) | No HTTP calls | Unit tests | +| Local Testing | `DYN_LOCAL_KUBE_TEST=1` | localhost:port | Integration testing with local server | + +## Benefits + +- 🚀 **Fast iteration**: No need to rebuild/redeploy containers +- 🐛 **Easy debugging**: Use debuggers, logging, etc. on your local server +- 🧪 **Full integration**: Test with real Kubernetes resources +- 💰 **Cost effective**: No cloud resources needed for testing +- ⚡ **Quick validation**: Test changes to metadata format instantly + diff --git a/k8s-test/README.md b/k8s-test/README.md new file mode 100644 index 0000000000..2d22e60a9d --- /dev/null +++ b/k8s-test/README.md @@ -0,0 +1,547 @@ +# Kubernetes Discovery Integration Tests + +This directory contains integration tests for the Dynamo Kubernetes discovery client. These tests verify that our Rust code can correctly interact with the Kubernetes API to list and watch EndpointSlices. + +## Prerequisites + +1. **Kubernetes Cluster Access**: You need a running Kubernetes cluster with `kubectl` configured + - Local: Docker Desktop, Minikube, Kind, k3s, etc. + - Cloud: GKE, EKS, AKS, etc. + +2. **Admin/Sufficient Permissions**: Your current kubectl context should have permissions to: + - Create/delete Deployments and Services + - List/watch EndpointSlices + +3. **Rust Environment**: Cargo with the dynamo-runtime crate compiled + +## Quick Start + +### 1. Deploy Test Resources + +```bash +cd k8s-test + +# Deploy to default namespace +./deploy.sh + +# Or deploy to a specific namespace +./deploy.sh my-namespace +``` + +This will: +- Create the namespace if it doesn't exist +- Create a deployment with 3 nginx pods +- Create a service that generates EndpointSlices +- Wait for pods to be ready +- Show the current status + +**Examples:** +```bash +./deploy.sh # Deploy to 'default' namespace +./deploy.sh test-namespace # Deploy to 'test-namespace' +./deploy.sh production # Deploy to 'production' namespace +``` + +### 2. Run Integration Tests + +There are two test suites: + +#### A. Raw Kubernetes API Tests (kube_discovery_integration) + +These tests verify the raw Kubernetes API interactions work correctly: + +```bash +# Run all raw K8s tests +cargo test --test kube_discovery_integration -- --ignored --nocapture + +# Or run individual tests: +cargo test --test kube_discovery_integration test_kube_client_connection -- --ignored --nocapture +cargo test --test kube_discovery_integration test_list_endpointslices -- --ignored --nocapture +cargo test --test kube_discovery_integration test_watch_endpointslices -- --ignored --nocapture +cargo test --test kube_discovery_integration test_discovery_simulation -- --ignored --nocapture +``` + +#### B. KubeDiscoveryClient Tests (kube_client_integration) **[RECOMMENDED]** + +These tests verify the actual `KubeDiscoveryClient` implementation: + +```bash +# Run all KubeDiscoveryClient tests (sequential for clean output) +cargo test --test kube_client_integration -- --ignored --nocapture --test-threads=1 + +# Or run individual tests: + +# Test client creation +cargo test --test kube_client_integration test_client_creation -- --ignored --nocapture + +# Test list() method +cargo test --test kube_client_integration test_list_all_endpoints -- --ignored --nocapture +cargo test --test kube_client_integration test_list_namespaced_endpoints -- --ignored --nocapture +cargo test --test kube_client_integration test_list_component_endpoints -- --ignored --nocapture + +# Test list_and_watch() method +cargo test --test kube_client_integration test_watch_all_endpoints -- --ignored --nocapture +cargo test --test kube_client_integration test_watch_namespaced_endpoints -- --ignored --nocapture +cargo test --test kube_client_integration test_watch_receives_k8s_events -- --ignored --nocapture +``` + +**Note:** The `--test-threads=1` flag ensures tests run sequentially, preventing output from multiple tests from being interleaved. This makes the output much more readable, especially for watch tests that print events over time. + +**Note:** The `KubeDiscoveryClient` tests use **mock metadata** mode, which means they skip actual HTTP calls to pods and return mock `DiscoveryMetadata` instead. This allows the tests to verify: +- ✅ Kubernetes API interactions (listing/watching EndpointSlices) +- ✅ Endpoint extraction from EndpointSlices +- ✅ Discovery event flow (Added/Removed events) +- ✅ The full discovery pipeline + +Without needing actual metadata servers running in pods. This makes tests fast, reliable, and easy to run. + +#### Alternative: Using the Test Runner Script + +You can also use the `run-tests.sh` script for a more convenient workflow: + +```bash +cd k8s-test + +# Run all client tests (checks default namespace) +./run-tests.sh + +# Run specific test +./run-tests.sh client test_list_all_endpoints + +# Run tests and check a specific namespace +./run-tests.sh client "" my-namespace + +# Run all test suites +./run-tests.sh all +``` + +The script will: +- Check if kubectl is configured +- Verify test resources exist in the specified namespace +- Run the requested tests +- Provide helpful error messages if resources aren't deployed + +### 3. Clean Up + +```bash +# Clean up from default namespace +./cleanup.sh + +# Or clean up from a specific namespace +./cleanup.sh my-namespace +``` + +**Examples:** +```bash +./cleanup.sh # Clean up from 'default' namespace +./cleanup.sh test-namespace # Clean up from 'test-namespace' +./cleanup.sh production # Clean up from 'production' namespace +``` + +**Note:** The cleanup script does not delete the namespace itself. To delete the namespace: +```bash +kubectl delete namespace my-namespace +``` + +## Test Descriptions + +### KubeDiscoveryClient Tests (Recommended) + +These tests exercise the actual `KubeDiscoveryClient` methods that will be used in production. + +#### `test_client_creation` +Verifies that we can create a `KubeDiscoveryClient` for testing. + +**What it tests:** +- Client instantiation +- Instance ID generation from pod name + +**Expected output:** +``` +🔌 Testing KubeDiscoveryClient creation... +✅ Client created with instance_id: abc123def456 +``` + +#### `test_list_all_endpoints` +Tests the `list()` method with `DiscoveryKey::AllEndpoints`. + +**What it tests:** +- Calling `KubeDiscoveryClient::list()` +- EndpointSlice querying without label filters +- Metadata fetching workflow (will fail gracefully without metadata server) + +**Expected output:** +``` +📋 Testing list all endpoints... + Note: This will try to fetch metadata from pods via HTTP, + which will likely fail unless pods are running the metadata server. + The test verifies the Kubernetes API calls work correctly. +Calling list() with key=AllEndpoints +✅ list() succeeded + Found 0 instances +✅ List test completed (K8s API calls work) +``` + +#### `test_list_namespaced_endpoints` & `test_list_component_endpoints` +Test the `list()` method with label-based filtering. + +**What it tests:** +- Label selector generation from `DiscoveryKey` +- Filtered EndpointSlice queries + +#### `test_watch_all_endpoints` & `test_watch_namespaced_endpoints` +Test the `list_and_watch()` method which creates a streaming watch. + +**What it tests:** +- Creating a watch stream from `KubeDiscoveryClient` +- Receiving discovery events (Added/Removed) +- Watch lifecycle management + +**Expected output:** +``` +👀 Testing watch all endpoints... + This test will watch for 5 seconds +Calling list_and_watch() with key=AllEndpoints +📡 Watch stream started... +⏰ Timeout reached +✅ Watch test completed (0 events received) + Note: Events are only emitted when pods are discovered + and their metadata can be fetched via HTTP +``` + +#### `test_watch_receives_k8s_events` +Verifies the Kubernetes watcher integration is functioning. + +**What it tests:** +- Watch stream receives at least one event +- K8s watcher initialization +- Stream lifecycle + +### Raw Kubernetes API Tests + +These tests verify low-level Kubernetes API interactions. + +#### `test_kube_client_connection` +Verifies that we can create a Kubernetes client and connect to the cluster. + +**What it tests:** +- Client creation from default kubeconfig +- Basic API connectivity by listing namespaces + +**Expected output:** +``` +🔌 Testing Kubernetes client connection... +✅ Successfully connected to Kubernetes cluster +📋 Found X namespaces +✅ Kubernetes API is accessible +``` + +### `test_list_endpointslices` +Tests listing all EndpointSlices in the default namespace. + +**What it tests:** +- EndpointSlice API access +- Parsing EndpointSlice structures +- Extracting endpoint information (pod names, IPs, readiness) + +**Expected output:** +``` +📋 Testing EndpointSlice listing... +📊 Found X EndpointSlices in default namespace + • dynamo-test-service-abcde (service: dynamo-test-service, endpoints: 3) + [0] pod=dynamo-test-worker-xxx, ready=true, addresses=["10.1.2.3"] + [1] pod=dynamo-test-worker-yyy, ready=true, addresses=["10.1.2.4"] + [2] pod=dynamo-test-worker-zzz, ready=true, addresses=["10.1.2.5"] +✅ EndpointSlice listing test completed +``` + +### `test_list_with_labels` +Tests listing EndpointSlices with label selectors (like our discovery client does). + +**What it tests:** +- Label selector functionality +- Filtering EndpointSlices by labels + +**Important:** EndpointSlices are created by Services, not Deployments. The EndpointSlices will have labels from the Service, not from the pod labels. The test uses `kubernetes.io/service-name=dynamo-test-service` which is automatically added by Kubernetes. + +**Expected output:** +``` +🏷️ Testing EndpointSlice listing with label selector... +Using label selector: kubernetes.io/service-name=dynamo-test-service +📊 Found X EndpointSlices matching labels + • dynamo-test-service-abcde (endpoints: 3) +✅ Label selector test completed +``` + +### `test_watch_endpointslices` +Tests the Kubernetes watch mechanism for EndpointSlices. + +**What it tests:** +- Creating a watch stream +- Receiving watch events (Init, InitApply, Apply, Delete, InitDone) +- Event types and their contents + +**Expected output:** +``` +👀 Testing EndpointSlice watching... + This test will watch for 10 seconds or 5 events, whichever comes first +📡 Watch stream started... + [1] 🚀 Init - watch stream starting + [2] 🔄 InitApply: dynamo-test-service-xxx (endpoints: 3) + [3] ✅ InitDone - initial list complete +📊 Reached max events (5), stopping watch +✅ Watch test completed (5 events received) +``` + +### `test_watch_with_labels` +Tests watching EndpointSlices with a label selector. + +**What it tests:** +- Watch with label filtering +- Receiving only relevant events + +**Expected output:** +``` +👀 Testing EndpointSlice watching with label selector... + This test will watch for 5 seconds or until InitDone +Using label selector: kubernetes.io/service-name=dynamo-test-service +📡 Watch stream started... + [1] 🚀 Init - watch stream starting + [2] 🔄 InitApply: dynamo-test-service-xxx (endpoints: 3) + [3] ✅ InitDone - initial list complete +📊 InitDone received, stopping watch +✅ Watch with labels test completed (3 events received) +``` + +### `test_discovery_simulation` +Comprehensive test that simulates the full discovery client behavior. + +**What it tests:** +- Complete discovery flow: watch → extract endpoints → track instances +- Pod name hashing (instance ID generation) +- Ready state filtering +- Duplicate detection + +**Expected output:** +``` +🔍 Testing discovery client simulation... + This simulates how our KubeDiscoveryClient list_and_watch works +Label selector: kubernetes.io/service-name=dynamo-test-service +📡 Starting watch stream... + 🚀 Watch stream initialized + 📦 Processing EndpointSlice: dynamo-test-service-xxx + ✅ New endpoint: pod=dynamo-test-worker-xxx, instance_id=abc123, addresses=["10.1.2.3"] + ✅ New endpoint: pod=dynamo-test-worker-yyy, instance_id=def456, addresses=["10.1.2.4"] + ✅ New endpoint: pod=dynamo-test-worker-zzz, instance_id=789abc, addresses=["10.1.2.5"] + ✅ Initial sync complete + 📊 Discovered 3 unique endpoints +✅ Discovery simulation completed +📊 Total unique endpoints discovered: 3 +``` + +## Architecture Overview + +``` +┌─────────────────────────────────────────┐ +│ Kubernetes Cluster │ +│ │ +│ Namespace: default (POD_NAMESPACE) │ +│ ┌─────────────────────────────────┐ │ +│ │ Deployment: dynamo-test-worker │ │ +│ │ Replicas: 3 │ │ +│ │ Labels: │ │ +│ │ app=dynamo-test │ │ +│ │ component=worker │ │ +│ └─────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────┐ │ +│ │ Pods (3 replicas) │ │ +│ │ - dynamo-test-worker-xxx │ │ +│ │ - dynamo-test-worker-yyy │ │ +│ │ - dynamo-test-worker-zzz │ │ +│ └─────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────┐ │ +│ │ Service: dynamo-test-service │ │ +│ │ Type: ClusterIP │ │ +│ │ Selector: app=dynamo-test │ │ +│ └─────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────┐ │ +│ │ EndpointSlices (auto-created) │ │ +│ │ Labels: │ │ +│ │ kubernetes.io/service-name: │ │ +│ │ dynamo-test-service │ │ +│ │ Endpoints: [pod IPs + status] │ │ +│ └─────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────┘ + │ + │ Kubernetes API + │ (List/Watch - namespace scoped) + ▼ +┌─────────────────────────────────────────┐ +│ Integration Tests (Rust) │ +│ - test_list_endpointslices │ +│ - test_watch_endpointslices │ +│ - test_discovery_simulation │ +└─────────────────────────────────────────┘ +``` + +**Important:** The `KubeDiscoveryClient` is **namespace-scoped**. It only watches EndpointSlices in the namespace specified by the `POD_NAMESPACE` environment variable. This provides: +- ✅ Better security (no cluster-wide access needed) +- ✅ Better performance (fewer resources to watch) +- ✅ Namespace isolation (pods only discover within their namespace) + +## Troubleshooting + +### "Failed to create Kubernetes client" + +**Cause:** kubectl is not configured or kubeconfig is invalid + +**Solution:** +```bash +# Check kubectl connection +kubectl cluster-info + +# Check current context +kubectl config current-context + +# If needed, set context +kubectl config use-context +``` + +### "No EndpointSlices found" + +**Cause:** Test resources not deployed + +**Solution:** +```bash +cd k8s-test +./deploy.sh + +# Verify resources exist +kubectl get endpointslices -l kubernetes.io/service-name=dynamo-test-service +``` + +### "Pods not ready" + +**Cause:** Pods are still starting or failing + +**Solution:** +```bash +# Check pod status +kubectl get pods -l app=dynamo-test + +# Check pod events +kubectl describe pod + +# Check pod logs +kubectl logs +``` + +### "No endpoints discovered" + +**Cause:** Pods might not be ready yet + +**Solution:** +```bash +# Wait for pods to be ready +kubectl wait --for=condition=ready pod -l app=dynamo-test --timeout=60s + +# Check pod readiness +kubectl get pods -l app=dynamo-test -o wide +``` + +## Notes + +### Namespace Configuration + +The `KubeDiscoveryClient` reads the `POD_NAMESPACE` environment variable to determine which namespace to watch. This is automatically set by Kubernetes when you use the downward API: + +```yaml +env: +- name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace +``` + +The client will **only** watch EndpointSlices within this namespace. It does not have cluster-wide access. + +### Why EndpointSlices? + +Kubernetes automatically creates EndpointSlices for Services. EndpointSlices track: +- Pod IPs +- Pod readiness state +- Pod names (via targetRef) +- Port information + +This makes them perfect for service discovery. + +### Labels on EndpointSlices + +**Important:** EndpointSlices inherit labels from the Service, not from Pods. The most reliable label to use is: +- `kubernetes.io/service-name=` (automatically added) + +If you want custom labels on EndpointSlices, add them to the Service, not the Pods. + +### Difference from Production + +These integration tests skip the HTTP metadata fetching part. In production: +1. Watch EndpointSlice → get pod IPs +2. HTTP GET `http://:8080/metadata` → get registration data +3. Cache and return discovery instances + +For these tests, we only verify step 1 works correctly. + +## Local Testing Mode + +Want to test with a **real metadata server** running locally? See **[LOCAL_TESTING.md](LOCAL_TESTING.md)** for detailed instructions. + +Quick start: +```bash +# 1. Create a test pod and service with custom labels +./create-local-test-pod.sh 9000 discovery hello_world backend +# ^port ^k8s-ns ^dynamo-ns ^component + +# This creates: +# - Pod: dynamo-test-worker-9000 +# - Service: dynamo-test-service-9000 +# - EndpointSlice: (auto-created by K8s) + +# 2. Start your metadata server locally (in another terminal) +export PORT=9000 +export DYN_SYSTEM_PORT=$PORT +export POD_NAME=dynamo-test-worker-$PORT +export POD_NAMESPACE=discovery +export DYN_DISCOVERY_BACKEND=kubernetes +python3 -m your_app + +# 3. Run your client in local mode +export DYN_LOCAL_KUBE_TEST=1 # Key: makes client connect to localhost! +export POD_NAMESPACE=discovery +export DYN_DISCOVERY_BACKEND=kubernetes +python3 -m your_client +``` + +This allows you to: +- ✅ Test with real Kubernetes resources +- ✅ Debug your metadata server locally +- ✅ See full discovery flow with actual metadata exchange +- ✅ Iterate quickly without deploying to K8s + +The client discovers the pod from K8s but connects to `localhost:9000` for metadata! + +## Next Steps + +After these tests pass: +1. Test with real metadata servers using local testing mode (see above) +2. Test error handling (network failures, timeouts, etc.) +3. Test scale (100s of pods) +4. Test label selector edge cases +5. Add RBAC roles and test with restricted permissions + diff --git a/k8s-test/cleanup.sh b/k8s-test/cleanup.sh new file mode 100755 index 0000000000..745d8c9a3c --- /dev/null +++ b/k8s-test/cleanup.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Clean up test resources from Kubernetes cluster + +set -e + +# Parse namespace argument (default to "default") +NAMESPACE="${1:-default}" + +echo "🧹 Cleaning up Dynamo test resources from namespace: $NAMESPACE" + +# Delete manifests +kubectl delete -f manifests/test-deployment.yaml --namespace="$NAMESPACE" --ignore-not-found=true + +echo "" +echo "✅ Test resources cleaned up from namespace: $NAMESPACE!" +echo "" +echo "Note: The namespace itself was not deleted. To delete it, run:" +echo " kubectl delete namespace $NAMESPACE" + diff --git a/k8s-test/create-local-test-pod.sh b/k8s-test/create-local-test-pod.sh new file mode 100755 index 0000000000..60b8aa1b35 --- /dev/null +++ b/k8s-test/create-local-test-pod.sh @@ -0,0 +1,157 @@ +#!/bin/bash +# Create a pod and service for local testing with DYN_LOCAL_KUBE_TEST +# The pod name will be in format: dynamo-test-worker- +# This allows the discovery client to connect to localhost: for the metadata server + +set -e + +# Parse arguments +PORT="${1:-9000}" +K8S_NAMESPACE="${2:-discovery}" +DYNAMO_NAMESPACE="${3:-hello_world}" +DYNAMO_COMPONENT="${4:-backend}" + +if [ -z "$PORT" ]; then + echo "Usage: $0 [k8s-namespace] [dynamo-namespace] [dynamo-component]" + echo "" + echo "Creates a pod and service that will be discovered by the Kubernetes client." + echo "When DYN_LOCAL_KUBE_TEST is set, the client will connect to localhost:" + echo "for the metadata endpoint instead of the pod IP." + echo "" + echo "Arguments:" + echo " port - Port number to use (required)" + echo " k8s-namespace - Kubernetes namespace (default: discovery)" + echo " dynamo-namespace - Dynamo namespace label (default: hello_world)" + echo " dynamo-component - Dynamo component label (default: backend)" + echo "" + echo "Examples:" + echo " $0 8080 # backend component (default)" + echo " $0 8081 discovery # backend in discovery namespace" + echo " $0 8082 discovery hello_world backend # Explicit backend component" + echo " $0 8083 discovery hello_world prefill # prefill component" + echo " $0 8084 discovery dynamo frontend # frontend component" + echo "" + echo "After creating the pod, run your metadata server locally:" + echo " # In one terminal:" + echo " your-metadata-server --port $PORT" + echo "" + echo " # In another terminal:" + echo " export DYN_LOCAL_KUBE_TEST=1" + echo " cargo test --test kube_client_integration test_watch_all_endpoints -- --ignored --nocapture" + exit 1 +fi + +POD_NAME="dynamo-test-worker-${PORT}" +SERVICE_NAME="dynamo-test-${DYNAMO_COMPONENT}" + +echo "🚀 Creating local test resources in K8s namespace: $K8S_NAMESPACE" +echo " Pod name: $POD_NAME" +echo " Service name: $SERVICE_NAME (component: $DYNAMO_COMPONENT)" +echo " Port: $PORT" +echo " Dynamo namespace: $DYNAMO_NAMESPACE" +echo " Dynamo component: $DYNAMO_COMPONENT" +echo "" + +# Create namespace if it doesn't exist +if ! kubectl get namespace "$K8S_NAMESPACE" &> /dev/null; then + echo "📦 Creating Kubernetes namespace: $K8S_NAMESPACE" + kubectl create namespace "$K8S_NAMESPACE" +fi + +# Create the pod and service using kubectl +cat < /dev/null; then + echo "📦 Creating namespace: $NAMESPACE" + kubectl create namespace "$NAMESPACE" +else + echo "✅ Namespace $NAMESPACE already exists" +fi + +echo "" +echo "Applying manifests..." + +# Apply manifests with namespace override +kubectl apply -f manifests/test-deployment.yaml --namespace="$NAMESPACE" + +echo "" +echo "✅ Resources deployed!" +echo "" +echo "Waiting for pods to be ready..." +kubectl wait --for=condition=ready pod -l app=dynamo-test --namespace="$NAMESPACE" --timeout=60s + +echo "" +echo "📊 Current status in namespace $NAMESPACE:" +kubectl get deployment dynamo-test-worker --namespace="$NAMESPACE" +kubectl get service dynamo-test-service --namespace="$NAMESPACE" +kubectl get pods -l app=dynamo-test --namespace="$NAMESPACE" +kubectl get endpointslices -l kubernetes.io/service-name=dynamo-test-service --namespace="$NAMESPACE" + +echo "" +echo "✅ Test environment is ready in namespace: $NAMESPACE!" +echo "" +echo "To run tests against this namespace, set POD_NAMESPACE=$NAMESPACE in your test client" + diff --git a/k8s-test/manifests/test-deployment.yaml b/k8s-test/manifests/test-deployment.yaml new file mode 100644 index 0000000000..fb64aeb2f6 --- /dev/null +++ b/k8s-test/manifests/test-deployment.yaml @@ -0,0 +1,65 @@ +--- +# Test deployment for Dynamo discovery integration testing +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dynamo-test-worker + labels: + app: dynamo-test +spec: + replicas: 3 + selector: + matchLabels: + app: dynamo-test + component: worker + template: + metadata: + labels: + app: dynamo-test + component: worker + dynamo.nvidia.com/namespace: "test-namespace" + dynamo.nvidia.com/component: "test-component" + spec: + containers: + - name: worker + image: nginx:alpine # Simple container for testing + ports: + - containerPort: 8080 + name: http + protocol: TCP + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + readinessProbe: + httpGet: + path: / + port: 80 + initialDelaySeconds: 2 + periodSeconds: 5 +--- +# Service to create EndpointSlices +apiVersion: v1 +kind: Service +metadata: + name: dynamo-test-service + labels: + app: dynamo-test + dynamo.nvidia.com/namespace: "test-namespace" + dynamo.nvidia.com/component: "test-component" +spec: + selector: + app: dynamo-test + component: worker + ports: + - port: 8080 + targetPort: 80 + protocol: TCP + name: http + type: ClusterIP + diff --git a/k8s-test/run-tests.sh b/k8s-test/run-tests.sh new file mode 100644 index 0000000000..461f0c97af --- /dev/null +++ b/k8s-test/run-tests.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# Run integration tests for Kubernetes discovery client + +set -e + +echo "🧪 Running Kubernetes Discovery Integration Tests" +echo "" + +# Check if kubectl is configured +if ! kubectl cluster-info &> /dev/null; then + echo "❌ kubectl is not configured or cluster is not accessible" + echo " Please ensure you have access to a Kubernetes cluster" + exit 1 +fi + +echo "✅ kubectl is configured" +echo " Cluster: $(kubectl config current-context)" +echo "" + +# Parse command line arguments +TEST_SUITE="${1:-kube_client}" +TEST_NAME="${2:-}" +NAMESPACE="${3:-default}" + +echo "🔍 Checking for test resources in namespace: $NAMESPACE" + +# Check if test resources are deployed +PODS=$(kubectl get pods -l app=dynamo-test --namespace="$NAMESPACE" --no-headers 2>/dev/null | wc -l) +if [ "$PODS" -eq 0 ]; then + echo "⚠️ Test resources not deployed in namespace: $NAMESPACE" + echo " Run ./deploy.sh $NAMESPACE to create test resources" + echo " (Tests will still run but may not find any endpoints)" + echo "" +else + echo "✅ Found $PODS test pods in namespace: $NAMESPACE" + echo "" +fi + +case "$TEST_SUITE" in + "client"|"kube_client") + echo "Running KubeDiscoveryClient tests..." + if [ -n "$TEST_NAME" ]; then + cargo test --test kube_client_integration "$TEST_NAME" -- --ignored --nocapture --test-threads=1 + else + cargo test --test kube_client_integration -- --ignored --nocapture --test-threads=1 + fi + ;; + "raw"|"kube_api") + echo "Running raw Kubernetes API tests..." + if [ -n "$TEST_NAME" ]; then + cargo test --test kube_discovery_integration "$TEST_NAME" -- --ignored --nocapture --test-threads=1 + else + cargo test --test kube_discovery_integration -- --ignored --nocapture --test-threads=1 + fi + ;; + "all") + echo "Running all integration tests..." + cargo test --test kube_client_integration -- --ignored --nocapture --test-threads=1 + echo "" + echo "---" + echo "" + cargo test --test kube_discovery_integration -- --ignored --nocapture --test-threads=1 + ;; + *) + echo "Usage: $0 [client|raw|all] [test_name] [namespace]" + echo "" + echo "Arguments:" + echo " test_suite - Which test suite to run (default: client)" + echo " test_name - Specific test to run (optional)" + echo " namespace - Kubernetes namespace to check (default: default)" + echo "" + echo "Test suites:" + echo " client (default) - Run KubeDiscoveryClient tests (recommended)" + echo " raw - Run raw Kubernetes API tests" + echo " all - Run all integration tests" + echo "" + echo "Examples:" + echo " $0 # Run client tests (default namespace)" + echo " $0 client test_list_all_endpoints # Run specific client test" + echo " $0 client test_list_all_endpoints my-namespace # Run test, check my-namespace" + echo " $0 raw test_list_endpointslices # Run specific raw API test" + echo " $0 all \"\" my-namespace # Run all tests, check my-namespace" + exit 1 + ;; +esac + +echo "" +echo "✅ Tests completed" +echo "" +echo "Note: Tests check for resources in namespace: $NAMESPACE" +echo " The actual KubeDiscoveryClient namespace is determined by POD_NAMESPACE env var in test code" + diff --git a/lib/bindings/python/Cargo.lock b/lib/bindings/python/Cargo.lock index 839caf2d5e..854303e791 100644 --- a/lib/bindings/python/Cargo.lock +++ b/lib/bindings/python/Cargo.lock @@ -55,6 +55,12 @@ dependencies = [ "equator", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -173,6 +179,18 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "async-broadcast" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435a87a52755b8f27fcf321ac4f04b2802e337c8c4872923137471ec39c37532" +dependencies = [ + "event-listener", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + [[package]] name = "async-channel" version = "2.5.0" @@ -467,6 +485,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "backon" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" +dependencies = [ + "fastrand", + "gloo-timers", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.75" @@ -1144,8 +1173,18 @@ version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + +[[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core 0.21.3", + "darling_macro 0.21.3", ] [[package]] @@ -1162,13 +1201,38 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "darling_core" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.106", +] + [[package]] name = "darling_macro" version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ - "darling_core", + "darling_core 0.20.11", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core 0.21.3", "quote", "syn 2.0.106", ] @@ -1265,7 +1329,7 @@ version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" dependencies = [ - "darling", + "darling 0.20.11", "proc-macro2", "quote", "syn 2.0.106", @@ -1287,7 +1351,16 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" dependencies = [ - "derive_more-impl", + "derive_more-impl 1.0.0", +] + +[[package]] +name = "derive_more" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +dependencies = [ + "derive_more-impl 2.0.1", ] [[package]] @@ -1302,6 +1375,17 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "derive_more-impl" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "dialoguer" version = "0.11.0" @@ -1457,6 +1541,7 @@ dependencies = [ "async_zmq", "axum", "axum-server", + "base64 0.22.1", "bincode 2.0.1", "bitflags 2.9.3", "blake3", @@ -1494,6 +1579,7 @@ dependencies = [ "rand 0.9.2", "rayon", "regex", + "reqwest", "rmp-serde", "rustls", "serde", @@ -1601,6 +1687,8 @@ dependencies = [ "figment", "futures", "humantime", + "k8s-openapi", + "kube", "local-ip-address", "log", "nid", @@ -1615,6 +1703,8 @@ dependencies = [ "rand 0.9.2", "rayon", "regex", + "reqwest", + "schemars 1.0.4", "serde", "serde_json", "socket2 0.5.10", @@ -2381,6 +2471,18 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "h2" version = "0.4.12" @@ -2435,6 +2537,8 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ + "allocator-api2", + "equivalent", "foldhash", ] @@ -2497,6 +2601,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "hostname" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a56f203cd1c76362b69e3863fd987520ac36cf70a8c92627449b2f64a8cf7d65" +dependencies = [ + "cfg-if 1.0.3", + "libc", + "windows-link 0.1.3", +] + [[package]] name = "http" version = "1.3.1" @@ -2581,6 +2696,7 @@ dependencies = [ "http", "hyper", "hyper-util", + "log", "rustls", "rustls-native-certs 0.8.1", "rustls-pki-types", @@ -3045,6 +3161,18 @@ dependencies = [ "unicode-general-category", ] +[[package]] +name = "json-patch" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f300e415e2134745ef75f04562dd0145405c2f7fd92065db029ac4b16b57fe90" +dependencies = [ + "jsonptr", + "serde", + "serde_json", + "thiserror 1.0.69", +] + [[package]] name = "json5" version = "0.4.1" @@ -3056,6 +3184,29 @@ dependencies = [ "serde", ] +[[package]] +name = "jsonpath-rust" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c00ae348f9f8fd2d09f82a98ca381c60df9e0820d8d79fce43e649b4dc3128b" +dependencies = [ + "pest", + "pest_derive", + "regex", + "serde_json", + "thiserror 2.0.16", +] + +[[package]] +name = "jsonptr" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5a3cc660ba5d72bce0b3bb295bf20847ccbb40fd423f3f05b61273672e561fe" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "jwalk" version = "0.8.1" @@ -3066,6 +3217,19 @@ dependencies = [ "rayon", ] +[[package]] +name = "k8s-openapi" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d13f06d5326a915becaffabdfab75051b8cdc260c2a5c06c0e90226ede89a692" +dependencies = [ + "base64 0.22.1", + "chrono", + "schemars 1.0.4", + "serde", + "serde_json", +] + [[package]] name = "kernel32-sys" version = "0.2.2" @@ -3076,6 +3240,115 @@ dependencies = [ "winapi-build", ] +[[package]] +name = "kube" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e7bb0b6a46502cc20e4575b6ff401af45cfea150b34ba272a3410b78aa014e" +dependencies = [ + "k8s-openapi", + "kube-client", + "kube-core", + "kube-derive", + "kube-runtime", +] + +[[package]] +name = "kube-client" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4987d57a184d2b5294fdad3d7fc7f278899469d21a4da39a8f6ca16426567a36" +dependencies = [ + "base64 0.22.1", + "bytes", + "chrono", + "either", + "futures", + "home", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-timeout", + "hyper-util", + "jsonpath-rust", + "k8s-openapi", + "kube-core", + "pem", + "rustls", + "secrecy", + "serde", + "serde_json", + "serde_yaml", + "thiserror 2.0.16", + "tokio", + "tokio-util", + "tower", + "tower-http", + "tracing", +] + +[[package]] +name = "kube-core" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914bbb770e7bb721a06e3538c0edd2babed46447d128f7c21caa68747060ee73" +dependencies = [ + "chrono", + "derive_more 2.0.1", + "form_urlencoded", + "http", + "json-patch", + "k8s-openapi", + "schemars 1.0.4", + "serde", + "serde-value", + "serde_json", + "thiserror 2.0.16", +] + +[[package]] +name = "kube-derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03dee8252be137772a6ab3508b81cd797dee62ee771112a2453bc85cbbe150d2" +dependencies = [ + "darling 0.21.3", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn 2.0.106", +] + +[[package]] +name = "kube-runtime" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aea4de4b562c5cc89ab10300bb63474ae1fa57ff5a19275f2e26401a323e3fd" +dependencies = [ + "ahash", + "async-broadcast", + "async-stream", + "backon", + "educe", + "futures", + "hashbrown 0.15.5", + "hostname", + "json-patch", + "k8s-openapi", + "kube-client", + "parking_lot", + "pin-project", + "serde", + "serde_json", + "thiserror 2.0.16", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "lalrpop-util" version = "0.20.2" @@ -3256,7 +3529,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d149aaa2965d70381709d9df4c7ee1fc0de1c614a4efc2ee356f5e43d68749f8" dependencies = [ - "derive_more", + "derive_more 1.0.0", "malachite", "num-integer", "num-traits", @@ -3940,6 +4213,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + [[package]] name = "ordered-multimap" version = "0.7.3" @@ -4020,6 +4302,16 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64 0.22.1", + "serde_core", +] + [[package]] name = "pem-rfc7468" version = "0.7.0" @@ -5338,10 +5630,23 @@ checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" dependencies = [ "dyn-clone", "ref-cast", + "schemars_derive", "serde", "serde_json", ] +[[package]] +name = "schemars_derive" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d020396d1d138dc19f1165df7545479dcd58d93810dc5d646a16e55abefa80" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.106", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -5414,10 +5719,11 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ + "serde_core", "serde_derive", ] @@ -5432,11 +5738,41 @@ dependencies = [ "typeid", ] +[[package]] +name = "serde-value" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" +dependencies = [ + "ordered-float", + "serde", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", @@ -5542,7 +5878,7 @@ version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" dependencies = [ - "darling", + "darling 0.20.11", "proc-macro2", "quote", "syn 2.0.106", @@ -6138,6 +6474,7 @@ dependencies = [ "futures-sink", "futures-util", "pin-project-lite", + "slab", "tokio", ] @@ -6367,12 +6704,14 @@ version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" dependencies = [ + "base64 0.22.1", "bitflags 2.9.3", "bytes", "futures-util", "http", "http-body", "iri-string", + "mime", "pin-project-lite", "tower", "tower-layer", @@ -6836,7 +7175,7 @@ version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7df16e474ef958526d1205f6dda359fdfab79d9aa6d54bafcb92dcd07673dca" dependencies = [ - "darling", + "darling 0.20.11", "once_cell", "proc-macro-error2", "proc-macro2", diff --git a/lib/llm/src/discovery/watcher.rs b/lib/llm/src/discovery/watcher.rs index 00412422ad..9dd6f6b688 100644 --- a/lib/llm/src/discovery/watcher.rs +++ b/lib/llm/src/discovery/watcher.rs @@ -3,25 +3,26 @@ use std::sync::Arc; use tokio::sync::mpsc::Sender; +use tokio::sync::Notify; use anyhow::Context as _; -use tokio::sync::{Notify, mpsc::Receiver}; +use futures::StreamExt; use dynamo_runtime::{ DistributedRuntime, + discovery::{DiscoveryEvent, DiscoveryInstance, DiscoveryKey, DiscoveryStream}, pipeline::{ ManyOut, Operator, RouterMode, SegmentSource, ServiceBackend, SingleIn, Source, network::egress::push_router::PushRouter, }, protocols::{EndpointId, annotated::Annotated}, - storage::key_value_store::WatchEvent, }; use crate::{ backend::Backend, entrypoint, kv_router::{KvRouterConfig, PrefillRouter}, - model_card::{self, ModelDeploymentCard}, + model_card::ModelDeploymentCard, model_type::{ModelInput, ModelType}, preprocessor::{OpenAIPreprocessor, PreprocessedEmbeddingRequest, prompt::PromptFormatter}, protocols::{ @@ -99,17 +100,45 @@ impl ModelWatcher { } /// Common watch logic with optional namespace filtering - pub async fn watch(&self, mut events_rx: Receiver, target_namespace: Option<&str>) { + pub async fn watch(&self, mut discovery_stream: DiscoveryStream, target_namespace: Option<&str>) { let global_namespace = target_namespace.is_none_or(is_global_namespace); - while let Some(event) = events_rx.recv().await { + while let Some(result) = discovery_stream.next().await { + let event = match result { + Ok(event) => event, + Err(err) => { + tracing::error!(%err, "Error in discovery stream"); + continue; + } + }; + match event { - WatchEvent::Put(kv) => { - let key = kv.key_str(); - let endpoint_id = match key_extract(key) { - Ok((eid, _)) => eid, - Err(err) => { - tracing::error!(%key, %err, "Failed extracting EndpointId from key. Ignoring instance."); + DiscoveryEvent::Added(instance) => { + // Extract EndpointId, instance_id, and card from the discovery instance + let (endpoint_id, instance_id, mut card) = match &instance { + DiscoveryInstance::ModelCard { + namespace, + component, + endpoint, + instance_id, + .. + } => { + let eid = EndpointId { + namespace: namespace.clone(), + component: component.clone(), + name: endpoint.clone(), + }; + + match instance.deserialize_model_card::() { + Ok(card) => (eid, *instance_id, card), + Err(err) => { + tracing::error!(%err, instance_id, "Failed to deserialize model card"); + continue; + } + } + } + _ => { + tracing::error!("Unexpected discovery instance type (expected ModelCard)"); continue; } }; @@ -127,21 +156,6 @@ impl ModelWatcher { continue; } - let mut card = match serde_json::from_slice::(kv.value()) { - Ok(card) => card, - Err(err) => { - match kv.value_str() { - Ok(value) => { - tracing::error!(%err, value, "Invalid JSON in model card") - } - Err(value_str_err) => { - tracing::error!(original_error = %err, %value_str_err, "Invalid UTF-8 string in model card, expected JSON") - } - } - continue; - } - }; - // If we already have a worker for this model, and the ModelDeploymentCard // cards don't match, alert, and don't add the new instance let can_add = @@ -164,7 +178,10 @@ impl ModelWatcher { continue; } - match self.handle_put(key, &endpoint_id, &mut card).await { + // Use instance_id as the HashMap key (simpler and sufficient since keys are opaque) + let key = format!("{:x}", instance_id); + + match self.handle_put(&key, &endpoint_id, &mut card).await { Ok(()) => { tracing::info!( model_name = card.name(), @@ -183,10 +200,12 @@ impl ModelWatcher { } } } - WatchEvent::Delete(kv) => { - let deleted_key = kv.key_str(); + DiscoveryEvent::Removed(instance_id) => { + // Use instance_id hex as the HashMap key (matches what we saved with) + let key = format!("{:x}", instance_id); + match self - .handle_delete(deleted_key, target_namespace, global_namespace) + .handle_delete(&key, target_namespace, global_namespace) .await { Ok(Some(model_name)) => { @@ -212,6 +231,8 @@ impl ModelWatcher { target_namespace: Option<&str>, is_global_namespace: bool, ) -> anyhow::Result> { + tracing::warn!("DISCOVERY_VALIDATION: handle_delete: key={}", key); + let card = match self.manager.remove_model_card(key) { Some(card) => card, None => { @@ -303,6 +324,8 @@ impl ModelWatcher { endpoint_id: &EndpointId, card: &mut ModelDeploymentCard, ) -> anyhow::Result<()> { + tracing::warn!("DISCOVERY_VALIDATION: handle_put: key={}", key); + card.download_config().await?; let component = self @@ -559,35 +582,37 @@ impl ModelWatcher { /// All the registered ModelDeploymentCard with the EndpointId they are attached to, one per instance async fn all_cards(&self) -> anyhow::Result> { - let store = self.drt.store(); - let Some(card_bucket) = store.get_bucket(model_card::ROOT_PATH).await? else { - // no cards - return Ok(vec![]); - }; - let entries = card_bucket.entries().await?; + let discovery = self.drt.discovery_client(); + let instances = discovery.list(DiscoveryKey::AllModelCards).await?; - let mut results = Vec::with_capacity(entries.len()); - for (key, card_bytes) in entries { - let r = match serde_json::from_slice::(&card_bytes) { + let mut results = Vec::with_capacity(instances.len()); + for instance in instances { + match instance.deserialize_model_card::() { Ok(card) => { - let maybe_endpoint_id = - key_extract(&key).map(|(endpoint_id, _instance_id)| endpoint_id); - let endpoint_id = match maybe_endpoint_id { - Ok(eid) => eid, - Err(err) => { - tracing::error!(%err, "Skipping invalid key, not string or not EndpointId"); + // Extract EndpointId from the instance + let endpoint_id = match &instance { + dynamo_runtime::discovery::DiscoveryInstance::ModelCard { + namespace, + component, + endpoint, + .. + } => EndpointId { + namespace: namespace.clone(), + component: component.clone(), + name: endpoint.clone(), + }, + _ => { + tracing::error!("Unexpected discovery instance type (expected ModelCard)"); continue; } }; - (endpoint_id, card) + results.push((endpoint_id, card)); } Err(err) => { - let value = String::from_utf8_lossy(&card_bytes); - tracing::error!(%err, %value, "Invalid JSON in model card"); + tracing::error!(%err, "Failed to deserialize model card"); continue; } - }; - results.push(r); + } } Ok(results) } @@ -612,40 +637,4 @@ impl ModelWatcher { } } -/// The ModelDeploymentCard is published in store with a key like "v1/mdc/dynamo/backend/generate/694d9981145a61ad". -/// Extract the EndpointId and instance_id from that. -fn key_extract(s: &str) -> anyhow::Result<(EndpointId, String)> { - if !s.starts_with(model_card::ROOT_PATH) { - anyhow::bail!("Invalid format: expected model card ROOT_PATH segment in {s}"); - } - let parts: Vec<&str> = s.split('/').collect(); - - // Need at least prefix model_card::ROOT_PATH (2 parts) + namespace, component, name (3 parts) - if parts.len() <= 5 { - anyhow::bail!("Invalid format: not enough path segments in {s}"); - } - let endpoint_id = EndpointId { - namespace: parts[2].to_string(), - component: parts[3].to_string(), - name: parts[4].to_string(), - }; - Ok((endpoint_id, parts[parts.len() - 1].to_string())) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_key_extract() { - let input = format!( - "{}/dynamo/backend/generate/694d9981145a61ad", - model_card::ROOT_PATH - ); - let (endpoint_id, _) = key_extract(&input).unwrap(); - assert_eq!(endpoint_id.namespace, "dynamo"); - assert_eq!(endpoint_id.component, "backend"); - assert_eq!(endpoint_id.name, "generate"); - } -} diff --git a/lib/llm/src/discovery/worker_monitor.rs b/lib/llm/src/discovery/worker_monitor.rs index bc43dd38bf..3e0e6b7031 100644 --- a/lib/llm/src/discovery/worker_monitor.rs +++ b/lib/llm/src/discovery/worker_monitor.rs @@ -3,12 +3,12 @@ use crate::kv_router::KV_METRICS_SUBJECT; use crate::kv_router::scoring::LoadEvent; -use crate::model_card::{self, ModelDeploymentCard}; +use crate::model_card::ModelDeploymentCard; use dynamo_runtime::component::Client; +use dynamo_runtime::discovery::{watch_and_extract_field, DiscoveryKey}; use dynamo_runtime::pipeline::{WorkerLoadMonitor, async_trait}; use dynamo_runtime::traits::DistributedRuntimeProvider; use dynamo_runtime::traits::events::EventSubscriber; -use dynamo_runtime::utils::typed_prefix_watcher::{key_extractors, watch_prefix_with_extraction}; use std::collections::HashMap; use std::sync::{Arc, RwLock}; use tokio_stream::StreamExt; @@ -79,21 +79,13 @@ impl WorkerLoadMonitor for KvWorkerMonitor { let endpoint = &self.client.endpoint; let component = endpoint.component(); - let Some(etcd_client) = component.drt().etcd_client() else { - // Static mode, no monitoring needed - return Ok(()); - }; - - // Watch for runtime config updates from model deployment cards - let runtime_configs_watcher = watch_prefix_with_extraction( - etcd_client, - model_card::ROOT_PATH, - key_extractors::lease_id, - |card: ModelDeploymentCard| Some(card.runtime_config), - component.drt().child_token(), - ) - .await?; - let mut config_events_rx = runtime_configs_watcher.receiver(); + // Watch for runtime config updates from model deployment cards via discovery interface + let discovery = component.drt().discovery_client(); + let discovery_stream = discovery.list_and_watch(DiscoveryKey::AllModelCards).await?; + let mut config_events_rx = watch_and_extract_field( + discovery_stream, + |card: ModelDeploymentCard| card.runtime_config, + ); // Subscribe to KV metrics events let mut kv_metrics_rx = component.namespace().subscribe(KV_METRICS_SUBJECT).await?; @@ -117,6 +109,21 @@ impl WorkerLoadMonitor for KvWorkerMonitor { // Handle runtime config updates _ = config_events_rx.changed() => { let runtime_configs = config_events_rx.borrow().clone(); + + tracing::warn!( + worker_count = runtime_configs.len(), + "DISCOVERY: Runtime config updates received" + ); + + // Log detailed config state for comparison + let config_details: Vec<(u64, Option)> = runtime_configs + .iter() + .map(|(&lease_id, config)| (lease_id, config.total_kv_blocks)) + .collect(); + tracing::warn!( + "DISCOVERY_VALIDATION: config_state: configs={:?}", + config_details + ); let mut states = worker_load_states.write().unwrap(); states.retain(|lease_id, _| runtime_configs.contains_key(lease_id)); diff --git a/lib/llm/src/entrypoint/input/common.rs b/lib/llm/src/entrypoint/input/common.rs index df382b3b62..feac5d442a 100644 --- a/lib/llm/src/entrypoint/input/common.rs +++ b/lib/llm/src/entrypoint/input/common.rs @@ -10,7 +10,7 @@ use crate::{ entrypoint::{self, EngineConfig}, kv_router::{KvPushRouter, KvRouter, PrefillRouter}, migration::Migration, - model_card::{self, ModelDeploymentCard}, + model_card::ModelDeploymentCard, preprocessor::{OpenAIPreprocessor, prompt::PromptFormatter}, protocols::common::llm_backend::{BackendOutput, LLMEngineOutput, PreprocessedRequest}, request_template::RequestTemplate, @@ -62,19 +62,19 @@ pub async fn prepare_engine( EngineConfig::Dynamic(local_model) => { let distributed_runtime = DistributedRuntime::from_settings(runtime.clone()).await?; - let store = Arc::new(distributed_runtime.store().clone()); let model_manager = Arc::new(ModelManager::new()); let watch_obj = Arc::new(ModelWatcher::new( - distributed_runtime, + distributed_runtime.clone(), model_manager.clone(), dynamo_runtime::pipeline::RouterMode::RoundRobin, None, None, )); - let (_, receiver) = store.watch(model_card::ROOT_PATH, None, runtime.primary_token()); + let discovery = distributed_runtime.discovery_client(); + let discovery_stream = discovery.list_and_watch(dynamo_runtime::discovery::DiscoveryKey::AllModelCards).await?; let inner_watch_obj = watch_obj.clone(); let _watcher_task = tokio::spawn(async move { - inner_watch_obj.watch(receiver, None).await; + inner_watch_obj.watch(discovery_stream, None).await; }); tracing::info!("Waiting for remote model.."); diff --git a/lib/llm/src/entrypoint/input/grpc.rs b/lib/llm/src/entrypoint/input/grpc.rs index 8693c4d1d1..e1653e7e15 100644 --- a/lib/llm/src/entrypoint/input/grpc.rs +++ b/lib/llm/src/entrypoint/input/grpc.rs @@ -9,14 +9,13 @@ use crate::{ entrypoint::{self, EngineConfig, input::common}, grpc::service::kserve, kv_router::KvRouterConfig, - model_card, namespace::is_global_namespace, types::openai::{ chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse}, completions::{NvCreateCompletionRequest, NvCreateCompletionResponse}, }, }; -use dynamo_runtime::{DistributedRuntime, Runtime, storage::key_value_store::KeyValueStoreManager}; +use dynamo_runtime::{DistributedRuntime, Runtime}; use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode}; /// Build and run an KServe gRPC service @@ -28,7 +27,6 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul let grpc_service = match engine_config { EngineConfig::Dynamic(_) => { let distributed_runtime = DistributedRuntime::from_settings(runtime.clone()).await?; - let store = Arc::new(distributed_runtime.store().clone()); let grpc_service = grpc_service_builder.build()?; let router_config = engine_config.local_model().router_config(); // Listen for models registering themselves, add them to gRPC service @@ -41,7 +39,6 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul run_watcher( distributed_runtime, grpc_service.state().manager_clone(), - store, router_config.router_mode, Some(router_config.kv_router_config), router_config.busy_threshold, @@ -164,34 +161,32 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul /// Spawns a task that watches for new models in store, /// and registers them with the ModelManager so that the HTTP service can use them. -#[allow(clippy::too_many_arguments)] async fn run_watcher( runtime: DistributedRuntime, model_manager: Arc, - store: Arc, router_mode: RouterMode, kv_router_config: Option, busy_threshold: Option, target_namespace: Option, ) -> anyhow::Result<()> { - let cancellation_token = runtime.primary_token(); let watch_obj = ModelWatcher::new( - runtime, + runtime.clone(), model_manager, router_mode, kv_router_config, busy_threshold, ); tracing::debug!("Waiting for remote model"); - let (_, receiver) = store.watch(model_card::ROOT_PATH, None, cancellation_token); + let discovery = runtime.discovery_client(); + let discovery_stream = discovery.list_and_watch(dynamo_runtime::discovery::DiscoveryKey::AllModelCards).await?; // [gluo NOTE] This is different from http::run_watcher where it alters the HTTP service // endpoint being exposed, gRPC doesn't have the same concept as the KServe service // only has one kind of inference endpoint. - // Pass the sender to the watcher + // Pass the discovery stream to the watcher let _watcher_task = tokio::spawn(async move { - watch_obj.watch(receiver, target_namespace.as_deref()).await; + watch_obj.watch(discovery_stream, target_namespace.as_deref()).await; }); Ok(()) diff --git a/lib/llm/src/entrypoint/input/http.rs b/lib/llm/src/entrypoint/input/http.rs index 88b4e3e979..e95c7eef36 100644 --- a/lib/llm/src/entrypoint/input/http.rs +++ b/lib/llm/src/entrypoint/input/http.rs @@ -10,14 +10,12 @@ use crate::{ entrypoint::{self, EngineConfig, input::common}, http::service::service_v2::{self, HttpService}, kv_router::KvRouterConfig, - model_card, namespace::is_global_namespace, types::openai::{ chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse}, completions::{NvCreateCompletionRequest, NvCreateCompletionResponse}, }, }; -use dynamo_runtime::storage::key_value_store::KeyValueStoreManager; use dynamo_runtime::{DistributedRuntime, Runtime}; use dynamo_runtime::{distributed::DistributedConfig, pipeline::RouterMode}; @@ -67,7 +65,6 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul // This allows the /health endpoint to query store for active instances http_service_builder = http_service_builder.store(distributed_runtime.store().clone()); let http_service = http_service_builder.build()?; - let store = Arc::new(distributed_runtime.store().clone()); let router_config = engine_config.local_model().router_config(); // Listen for models registering themselves, add them to HTTP service @@ -82,7 +79,6 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul run_watcher( distributed_runtime, http_service.state().manager_clone(), - store, router_config.router_mode, Some(router_config.kv_router_config), router_config.busy_threshold, @@ -273,7 +269,6 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul async fn run_watcher( runtime: DistributedRuntime, model_manager: Arc, - store: Arc, router_mode: RouterMode, kv_router_config: Option, busy_threshold: Option, @@ -281,16 +276,16 @@ async fn run_watcher( http_service: Arc, metrics: Arc, ) -> anyhow::Result<()> { - let cancellation_token = runtime.primary_token(); let mut watch_obj = ModelWatcher::new( - runtime, + runtime.clone(), model_manager, router_mode, kv_router_config, busy_threshold, ); tracing::debug!("Waiting for remote model"); - let (_, receiver) = store.watch(model_card::ROOT_PATH, None, cancellation_token); + let discovery = runtime.discovery_client(); + let discovery_stream = discovery.list_and_watch(dynamo_runtime::discovery::DiscoveryKey::AllModelCards).await?; // Create a channel to receive model type updates let (tx, mut rx) = tokio::sync::mpsc::channel(32); @@ -304,9 +299,9 @@ async fn run_watcher( } }); - // Pass the sender to the watcher + // Pass the discovery stream to the watcher let _watcher_task = tokio::spawn(async move { - watch_obj.watch(receiver, target_namespace.as_deref()).await; + watch_obj.watch(discovery_stream, target_namespace.as_deref()).await; }); Ok(()) diff --git a/lib/llm/src/http/service/clear_kv_blocks.rs b/lib/llm/src/http/service/clear_kv_blocks.rs index ee1cc3bc3e..b734b60480 100644 --- a/lib/llm/src/http/service/clear_kv_blocks.rs +++ b/lib/llm/src/http/service/clear_kv_blocks.rs @@ -6,7 +6,7 @@ use axum::{http::Method, response::IntoResponse, routing::post, Json, Router}; use serde_json::json; use std::sync::Arc; -use dynamo_runtime::{pipeline::PushRouter, stream::StreamExt}; +use dynamo_runtime::{discovery::DiscoveryKey, pipeline::PushRouter, stream::StreamExt}; pub const CLEAR_KV_ENDPOINT: &str = "clear_kv_blocks"; @@ -150,7 +150,14 @@ async fn clear_kv_blocks_handler( } }; - let instances = match component_obj.list_instances().await { + let discovery_client = distributed.discovery_client(); + let discovery_key = DiscoveryKey::Endpoint { + namespace: namespace.clone(), + component: component.clone(), + endpoint: CLEAR_KV_ENDPOINT.to_string(), + }; + + let discovery_instances = match discovery_client.list(discovery_key).await { Ok(instances) => instances, Err(e) => { add_worker_result( @@ -165,11 +172,11 @@ async fn clear_kv_blocks_handler( } }; - if instances.is_empty() { + if discovery_instances.is_empty() { add_worker_result( false, entry_name, - "No instances found for worker group", + "No instances found for clear_kv_blocks endpoint", namespace, component, None, @@ -177,30 +184,12 @@ async fn clear_kv_blocks_handler( continue; } - let instances_filtered = instances - .clone() + let instances_filtered: Vec = discovery_instances .into_iter() - .filter(|instance| instance.endpoint == CLEAR_KV_ENDPOINT) - .collect::>(); - - if instances_filtered.is_empty() { - let found_endpoints: Vec = instances - .iter() - .map(|instance| instance.endpoint.clone()) - .collect(); - add_worker_result( - false, - entry_name, - &format!( - "Worker group doesn't support clear_kv_blocks. Supported endpoints: {}", - found_endpoints.join(", ") - ), - namespace, - component, - None, - ); - continue; - } + .map(|di| match di { + dynamo_runtime::discovery::DiscoveryInstance::Endpoint(instance) => instance, + }) + .collect(); for instance in &instances_filtered { let instance_name = format!("{}-instance-{}", entry.name, instance.id()); diff --git a/lib/llm/src/http/service/health.rs b/lib/llm/src/http/service/health.rs index 5f007a9bd4..5e4e9deb5f 100644 --- a/lib/llm/src/http/service/health.rs +++ b/lib/llm/src/http/service/health.rs @@ -52,14 +52,13 @@ async fn live_handler( async fn health_handler( axum::extract::State(state): axum::extract::State>, ) -> impl IntoResponse { - let instances = match list_all_instances(state.store()).await { + let instances = match list_all_instances(state.discovery_client()).await { Ok(instances) => instances, Err(err) => { - tracing::warn!(%err, "Failed to fetch instances from store"); + tracing::warn!(%err, "Failed to fetch instances from discovery client"); vec![] } }; - let mut endpoints: Vec = instances .iter() .map(|instance| instance.endpoint_id().as_url()) diff --git a/lib/llm/src/http/service/service_v2.rs b/lib/llm/src/http/service/service_v2.rs index ae18a67bdb..40d5007fbc 100644 --- a/lib/llm/src/http/service/service_v2.rs +++ b/lib/llm/src/http/service/service_v2.rs @@ -18,6 +18,7 @@ use crate::request_template::RequestTemplate; use anyhow::Result; use axum_server::tls_rustls::RustlsConfig; use derive_builder::Builder; +use dynamo_runtime::discovery::{DiscoveryClient, KVStoreDiscoveryClient}; use dynamo_runtime::logging::make_request_span; use dynamo_runtime::metrics::prometheus_names::name_prefix; use dynamo_runtime::storage::key_value_store::KeyValueStoreManager; @@ -31,6 +32,7 @@ pub struct State { metrics: Arc, manager: Arc, store: KeyValueStoreManager, + discovery_client: Arc, flags: StateFlags, } @@ -72,10 +74,21 @@ impl StateFlags { impl State { pub fn new(manager: Arc, store: KeyValueStoreManager) -> Self { + // Initialize discovery client backed by KV store + // Create a cancellation token for the discovery client's watch streams + let discovery_client = { + let cancel_token = CancellationToken::new(); + Arc::new(KVStoreDiscoveryClient::new( + store.clone(), + cancel_token, + )) as Arc + }; + Self { manager, metrics: Arc::new(Metrics::default()), store, + discovery_client, flags: StateFlags { chat_endpoints_enabled: AtomicBool::new(false), cmpl_endpoints_enabled: AtomicBool::new(false), @@ -102,6 +115,10 @@ impl State { &self.store } + pub fn discovery_client(&self) -> Arc { + self.discovery_client.clone() + } + // TODO pub fn sse_keep_alive(&self) -> Option { None diff --git a/lib/llm/src/kv_router.rs b/lib/llm/src/kv_router.rs index 63b3c0c8c0..26ca4a4c49 100644 --- a/lib/llm/src/kv_router.rs +++ b/lib/llm/src/kv_router.rs @@ -9,13 +9,13 @@ use anyhow::Result; use derive_builder::Builder; use dynamo_runtime::{ component::{Component, InstanceSource}, + discovery::{watch_and_extract_field, DiscoveryKey}, pipeline::{ AsyncEngine, AsyncEngineContextProvider, Error, ManyOut, PushRouter, ResponseStream, SingleIn, async_trait, }, - prelude::*, protocols::annotated::Annotated, - utils::typed_prefix_watcher::{key_extractors, watch_prefix_with_extraction}, + traits::DistributedRuntimeProvider, }; use futures::stream::{self, StreamExt}; use serde::{Deserialize, Serialize}; @@ -47,7 +47,7 @@ use crate::{ subscriber::start_kv_router_background, }, local_model::runtime_config::ModelRuntimeConfig, - model_card::{self, ModelDeploymentCard}, + model_card::ModelDeploymentCard, preprocessor::PreprocessedRequest, protocols::common::llm_backend::LLMEngineOutput, }; @@ -235,22 +235,18 @@ impl KvRouter { } }; - // Create runtime config watcher using the generic etcd watcher - // TODO: Migrate to discovery_client() once it exposes kv_get_and_watch_prefix functionality - let etcd_client = component - .drt() - .etcd_client() - .expect("Cannot KV route without etcd client"); - - let runtime_configs_watcher = watch_prefix_with_extraction( - etcd_client, - &format!("{}/{}", model_card::ROOT_PATH, component.path()), - key_extractors::lease_id, - |card: ModelDeploymentCard| Some(card.runtime_config), - cancellation_token.clone(), - ) - .await?; - let runtime_configs_rx = runtime_configs_watcher.receiver(); + // Watch for runtime config updates via discovery interface + let discovery = component.drt().discovery_client(); + let discovery_key = DiscoveryKey::EndpointModelCards { + namespace: component.namespace().name().to_string(), + component: component.name().to_string(), + endpoint: "generate".to_string(), + }; + let discovery_stream = discovery.list_and_watch(discovery_key).await?; + let runtime_configs_rx = watch_and_extract_field( + discovery_stream, + |card: ModelDeploymentCard| card.runtime_config, + ); let indexer = if kv_router_config.overlap_score_weight == 0.0 { // When overlap_score_weight is zero, we don't need to track prefixes diff --git a/lib/llm/src/kv_router/scheduler.rs b/lib/llm/src/kv_router/scheduler.rs index 9a90b49116..7e9addc436 100644 --- a/lib/llm/src/kv_router/scheduler.rs +++ b/lib/llm/src/kv_router/scheduler.rs @@ -162,6 +162,16 @@ impl KvScheduler { let new_instances = instances_monitor_rx.borrow_and_update().clone(); let new_configs = configs_monitor_rx.borrow_and_update().clone(); + // Log config state for comparison + let config_details: Vec<(u64, Option)> = new_configs + .iter() + .map(|(&worker_id, config)| (worker_id, config.total_kv_blocks)) + .collect(); + tracing::warn!( + "DISCOVERY_VALIDATION: scheduler_config_state: configs={:?}", + config_details + ); + // Build the new workers_with_configs map let mut new_workers_with_configs = HashMap::new(); for instance in &new_instances { diff --git a/lib/llm/src/kv_router/subscriber.rs b/lib/llm/src/kv_router/subscriber.rs index dbdc4da69a..7051327be0 100644 --- a/lib/llm/src/kv_router/subscriber.rs +++ b/lib/llm/src/kv_router/subscriber.rs @@ -8,6 +8,7 @@ use std::{collections::HashSet, time::Duration}; use anyhow::Result; use dynamo_runtime::{ component::Component, + discovery::DiscoveryKey, prelude::*, traits::events::EventPublisher, transports::{ @@ -15,6 +16,7 @@ use dynamo_runtime::{ nats::{NatsQueue, Slug}, }, }; +use futures::StreamExt; use tokio::sync::{mpsc, oneshot}; use tokio_util::sync::CancellationToken; @@ -248,10 +250,13 @@ pub async fn start_kv_router_background( // Get the generate endpoint and watch for instance deletions let generate_endpoint = component.endpoint("generate"); - let (_instance_prefix, _instance_watcher, mut instance_event_rx) = etcd_client - .kv_get_and_watch_prefix(generate_endpoint.etcd_root()) - .await? - .dissolve(); + let discovery_client = component.drt().discovery_client(); + let discovery_key = DiscoveryKey::Endpoint { + namespace: component.namespace().name().to_string(), + component: component.name().to_string(), + endpoint: "generate".to_string(), + }; + let mut instance_event_stream = discovery_client.list_and_watch(discovery_key).await?; // Get instances_rx for tracking current workers let client = generate_endpoint.client().await?; @@ -299,25 +304,21 @@ pub async fn start_kv_router_background( } // Handle generate endpoint instance deletion events - Some(event) = instance_event_rx.recv() => { - let WatchEvent::Delete(kv) = event else { + Some(discovery_event_result) = instance_event_stream.next() => { + let Ok(discovery_event) = discovery_event_result else { continue; }; - let key = String::from_utf8_lossy(kv.key()); - - let Some(worker_id_str) = key.split(&['/', ':'][..]).next_back() else { - tracing::warn!("Could not extract worker ID from instance key: {key}"); + let dynamo_runtime::discovery::DiscoveryEvent::Removed(worker_id) = discovery_event else { continue; }; - // Parse as hexadecimal (base 16) - let Ok(worker_id) = u64::from_str_radix(worker_id_str, 16) else { - tracing::warn!("Could not parse worker ID from instance key: {key}"); - continue; - }; + tracing::warn!( + worker_id = worker_id, + "DISCOVERY: Generate endpoint instance removed, removing worker" + ); - tracing::info!("Generate endpoint instance deleted, removing worker {worker_id}"); + tracing::warn!("DISCOVERY_VALIDATION: remove_worker_tx: worker_id={}", worker_id); if let Err(e) = remove_worker_tx.send(worker_id).await { tracing::warn!("Failed to send worker removal for worker {worker_id}: {e}"); } diff --git a/lib/llm/src/local_model.rs b/lib/llm/src/local_model.rs index de869047c5..a307449397 100644 --- a/lib/llm/src/local_model.rs +++ b/lib/llm/src/local_model.rs @@ -5,14 +5,14 @@ use std::fs; use std::path::{Path, PathBuf}; use dynamo_runtime::component::Endpoint; +use dynamo_runtime::discovery::DiscoverySpec; use dynamo_runtime::protocols::EndpointId; use dynamo_runtime::slug::Slug; -use dynamo_runtime::storage::key_value_store::Key; use dynamo_runtime::traits::DistributedRuntimeProvider; use crate::entrypoint::RouterConfig; use crate::mocker::protocols::MockEngineArgs; -use crate::model_card::{self, ModelDeploymentCard}; +use crate::model_card::ModelDeploymentCard; use crate::model_type::{ModelInput, ModelType}; use crate::request_template::RequestTemplate; @@ -413,13 +413,24 @@ impl LocalModel { self.card.model_type = model_type; self.card.model_input = model_input; - // Publish the Model Deployment Card to KV store - let card_store = endpoint.drt().store(); - let key = Key::from_raw(endpoint.unique_path(card_store.connection_id())); - - let _outcome = card_store - .publish(model_card::ROOT_PATH, None, &key, &mut self.card) - .await?; + // Register the Model Deployment Card via discovery interface + let discovery = endpoint.drt().discovery_client(); + let spec = DiscoverySpec::from_model_card( + endpoint.component().namespace().name().to_string(), + endpoint.component().name().to_string(), + endpoint.name().to_string(), + &self.card, + )?; + let _instance = discovery.register(spec).await?; + + tracing::warn!( + "DISCOVERY_VALIDATION: model_card_registered: namespace={}, component={}, endpoint={}, model_name={}", + endpoint.component().namespace().name(), + endpoint.component().name(), + endpoint.name(), + self.card.name() + ); + Ok(()) } } diff --git a/lib/llm/tests/http_metrics.rs b/lib/llm/tests/http_metrics.rs index 36a34be2f1..e3bd1bc5b4 100644 --- a/lib/llm/tests/http_metrics.rs +++ b/lib/llm/tests/http_metrics.rs @@ -295,8 +295,10 @@ mod integration_tests { use super::*; use dynamo_llm::{ discovery::ModelWatcher, engines::make_echo_engine, entrypoint::EngineConfig, - local_model::LocalModelBuilder, model_card, + local_model::LocalModelBuilder, }; + use dynamo_runtime::discovery::DiscoveryKey; + use dynamo_runtime::traits::DistributedRuntimeProvider; use dynamo_runtime::DistributedRuntime; use dynamo_runtime::pipeline::RouterMode; use std::sync::Arc; @@ -333,7 +335,7 @@ mod integration_tests { .build() .unwrap(); - // Set up model watcher to discover models from etcd (like production) + // Set up model watcher to discover models via discovery interface (like production) // This is crucial for the polling task to find model entries let model_watcher = ModelWatcher::new( @@ -343,17 +345,16 @@ mod integration_tests { None, None, ); - // Start watching etcd for model registrations - let store = Arc::new(distributed_runtime.store().clone()); - let (_, receiver) = store.watch( - model_card::ROOT_PATH, - None, - distributed_runtime.primary_token(), - ); + // Start watching for model registrations via discovery interface + let discovery = distributed_runtime.discovery_client(); + let discovery_stream = discovery + .list_and_watch(DiscoveryKey::AllModelCards) + .await + .unwrap(); - // Spawn watcher task to discover models from etcd + // Spawn watcher task to discover models let _watcher_task = tokio::spawn(async move { - model_watcher.watch(receiver, None).await; + model_watcher.watch(discovery_stream, None).await; }); // Set up the engine following the StaticFull pattern from http.rs diff --git a/lib/runtime/Cargo.toml b/lib/runtime/Cargo.toml index cd774ba16e..b11a570b4d 100644 --- a/lib/runtime/Cargo.toml +++ b/lib/runtime/Cargo.toml @@ -39,6 +39,7 @@ humantime = { workspace = true } parking_lot = { workspace = true } prometheus = { workspace = true } rand = { workspace = true } +reqwest = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } tokio = { workspace = true } @@ -74,6 +75,11 @@ regex = { version = "1" } socket2 = { version = "0.5.8" } tokio-rayon = { version = "2.1" } +# Kubernetes discovery backend +kube = { version = "2.0.1", default-features = false, features = ["runtime", "derive", "client", "rustls-tls", "aws-lc-rs"] } +k8s-openapi = { version = "0.26.0", features = ["latest", "schemars"] } +schemars = { version = "1" } + [dev-dependencies] assert_matches = { version = "1.5.0" } criterion = { version = "0.5", features = ["async_tokio"] } diff --git a/lib/runtime/src/component.rs b/lib/runtime/src/component.rs index a97193928a..f695b67f0f 100644 --- a/lib/runtime/src/component.rs +++ b/lib/runtime/src/component.rs @@ -75,7 +75,7 @@ pub use client::{Client, InstanceSource}; /// An instance is namespace+component+endpoint+lease_id and must be unique. pub const INSTANCE_ROOT_PATH: &str = "v1/instances"; -#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)] #[serde(rename_all = "snake_case")] pub enum TransportType { NatsTcp(String), @@ -278,21 +278,24 @@ impl Component { } pub async fn list_instances(&self) -> anyhow::Result> { - let client = self.drt.store(); - let Some(bucket) = client.get_bucket(&self.instance_root()).await? else { - return Ok(vec![]); + let discovery_client = self.drt.discovery_client(); + + let discovery_key = crate::discovery::DiscoveryKey::ComponentEndpoints { + namespace: self.namespace.name(), + component: self.name.clone(), }; - let entries = bucket.entries().await?; - let mut instances = Vec::with_capacity(entries.len()); - for (name, bytes) in entries.into_iter() { - let val = match serde_json::from_slice::(&bytes) { - Ok(val) => val, - Err(err) => { - anyhow::bail!("Error converting storage response to Instance: {err}. {name}",); - } - }; - instances.push(val); - } + + let discovery_instances = discovery_client.list(discovery_key).await?; + + // Extract Instance from DiscoveryInstance::Endpoint wrapper + let mut instances: Vec = discovery_instances + .into_iter() + .filter_map(|di| match di { + crate::discovery::DiscoveryInstance::Endpoint(instance) => Some(instance), + _ => None, // Ignore all other variants (ModelCard, etc.) + }) + .collect(); + instances.sort(); Ok(instances) } diff --git a/lib/runtime/src/component/client.rs b/lib/runtime/src/component/client.rs index 987c5002d8..6c7734af02 100644 --- a/lib/runtime/src/component/client.rs +++ b/lib/runtime/src/component/client.rs @@ -6,14 +6,12 @@ use crate::pipeline::{ SingleIn, }; use arc_swap::ArcSwap; +use futures::StreamExt; use std::collections::HashMap; use std::sync::Arc; use tokio::net::unix::pipe::Receiver; -use crate::{ - pipeline::async_trait, - transports::etcd::{Client as EtcdClient, WatchEvent}, -}; +use crate::{pipeline::async_trait, transports::etcd::Client as EtcdClient}; use super::*; @@ -67,23 +65,21 @@ impl Client { // Client with auto-discover instances using etcd pub(crate) async fn new_dynamic(endpoint: Endpoint) -> Result { + tracing::debug!("Client::new_dynamic: Creating dynamic client for endpoint: {}", endpoint.path()); const INSTANCE_REFRESH_PERIOD: Duration = Duration::from_secs(1); - // create live endpoint watcher - let Some(etcd_client) = &endpoint.component.drt.etcd_client else { - anyhow::bail!("Attempt to create a dynamic client on a static endpoint"); - }; - - let instance_source = - Self::get_or_create_dynamic_instance_source(etcd_client, &endpoint).await?; + let instance_source = Self::get_or_create_dynamic_instance_source(&endpoint).await?; + tracing::debug!("Client::new_dynamic: Got instance source for endpoint: {}", endpoint.path()); let client = Client { - endpoint, + endpoint: endpoint.clone(), instance_source: instance_source.clone(), instance_avail: Arc::new(ArcSwap::from(Arc::new(vec![]))), instance_free: Arc::new(ArcSwap::from(Arc::new(vec![]))), }; + tracing::debug!("Client::new_dynamic: Starting instance source monitor for endpoint: {}", endpoint.path()); client.monitor_instance_source(); + tracing::debug!("Client::new_dynamic: Successfully created dynamic client for endpoint: {}", endpoint.path()); Ok(client) } @@ -118,17 +114,47 @@ impl Client { /// Wait for at least one Instance to be available for this Endpoint pub async fn wait_for_instances(&self) -> Result> { + tracing::debug!( + "wait_for_instances: Starting wait for endpoint: {}", + self.endpoint.path() + ); let mut instances: Vec = vec![]; if let InstanceSource::Dynamic(mut rx) = self.instance_source.as_ref().clone() { // wait for there to be 1 or more endpoints + let mut iteration = 0; loop { instances = rx.borrow_and_update().to_vec(); + tracing::debug!( + "wait_for_instances: iteration={}, current_instance_count={}, endpoint={}", + iteration, + instances.len(), + self.endpoint.path() + ); if instances.is_empty() { + tracing::debug!( + "wait_for_instances: No instances yet, waiting for change notification for endpoint: {}", + self.endpoint.path() + ); rx.changed().await?; + tracing::debug!( + "wait_for_instances: Change notification received for endpoint: {}", + self.endpoint.path() + ); } else { + tracing::info!( + "wait_for_instances: Found {} instance(s) for endpoint: {}", + instances.len(), + self.endpoint.path() + ); break; } + iteration += 1; } + } else { + tracing::debug!( + "wait_for_instances: Static instance source, no dynamic discovery for endpoint: {}", + self.endpoint.path() + ); } Ok(instances) } @@ -164,14 +190,17 @@ impl Client { fn monitor_instance_source(&self) { let cancel_token = self.endpoint.drt().primary_token(); let client = self.clone(); + let endpoint_path = self.endpoint.path(); + tracing::debug!("monitor_instance_source: Starting monitor for endpoint: {}", endpoint_path); tokio::task::spawn(async move { let mut rx = match client.instance_source.as_ref() { InstanceSource::Static => { - tracing::error!("Static instance source is not watchable"); + tracing::error!("monitor_instance_source: Static instance source is not watchable"); return; } InstanceSource::Dynamic(rx) => rx.clone(), }; + let mut iteration = 0; while !cancel_token.is_cancelled() { let instance_ids: Vec = rx .borrow_and_update() @@ -179,107 +208,177 @@ impl Client { .map(|instance| instance.id()) .collect(); + tracing::debug!( + "monitor_instance_source: iteration={}, instance_count={}, instance_ids={:?}, endpoint={}", + iteration, + instance_ids.len(), + instance_ids, + endpoint_path + ); + // TODO: this resets both tracked available and free instances client.instance_avail.store(Arc::new(instance_ids.clone())); - client.instance_free.store(Arc::new(instance_ids)); + client.instance_free.store(Arc::new(instance_ids.clone())); - tracing::debug!("instance source updated"); + tracing::warn!( + "DISCOVERY_VALIDATION: endpoint={}, instance_avail={:?}, instance_free={:?}", + endpoint_path, + instance_ids, + instance_ids + ); + + tracing::debug!("monitor_instance_source: instance source updated, endpoint={}", endpoint_path); if let Err(err) = rx.changed().await { - tracing::error!("The Sender is dropped: {}", err); + tracing::error!("monitor_instance_source: The Sender is dropped: {}, endpoint={}", err, endpoint_path); cancel_token.cancel(); } + iteration += 1; } + tracing::debug!("monitor_instance_source: Monitor loop exiting for endpoint: {}", endpoint_path); }); } async fn get_or_create_dynamic_instance_source( - etcd_client: &EtcdClient, endpoint: &Endpoint, ) -> Result> { let drt = endpoint.drt(); let instance_sources = drt.instance_sources(); let mut instance_sources = instance_sources.lock().await; + tracing::debug!( + "get_or_create_dynamic_instance_source: Checking cache for endpoint: {}", + endpoint.path() + ); + if let Some(instance_source) = instance_sources.get(endpoint) { if let Some(instance_source) = instance_source.upgrade() { + tracing::debug!( + "get_or_create_dynamic_instance_source: Found cached instance source for endpoint: {}", + endpoint.path() + ); return Ok(instance_source); } else { + tracing::debug!( + "get_or_create_dynamic_instance_source: Cached instance source was dropped, removing for endpoint: {}", + endpoint.path() + ); instance_sources.remove(endpoint); } } - let prefix_watcher = etcd_client - .kv_get_and_watch_prefix(endpoint.etcd_root()) - .await?; + tracing::debug!( + "get_or_create_dynamic_instance_source: Creating new instance source for endpoint: {}", + endpoint.path() + ); + + let discovery_client = drt.discovery_client(); + let discovery_key = crate::discovery::DiscoveryKey::Endpoint { + namespace: endpoint.component.namespace.name.clone(), + component: endpoint.component.name.clone(), + endpoint: endpoint.name.clone(), + }; + + tracing::debug!( + "get_or_create_dynamic_instance_source: Calling discovery_client.list_and_watch for key: {:?}", + discovery_key + ); - let (prefix, _watcher, mut kv_event_rx) = prefix_watcher.dissolve(); + let mut discovery_stream = discovery_client.list_and_watch(discovery_key.clone()).await?; + + tracing::debug!( + "get_or_create_dynamic_instance_source: Got discovery stream for key: {:?}", + discovery_key + ); let (watch_tx, watch_rx) = tokio::sync::watch::channel(vec![]); let secondary = endpoint.component.drt.runtime.secondary().clone(); - // this task should be included in the registry - // currently this is created once per client, but this object/task should only be instantiated - // once per worker/instance secondary.spawn(async move { - tracing::debug!("Starting endpoint watcher for prefix: {}", prefix); - let mut map = HashMap::new(); + tracing::debug!("endpoint_watcher: Starting for discovery key: {:?}", discovery_key); + let mut map: HashMap = HashMap::new(); + let mut event_count = 0; loop { - let kv_event = tokio::select! { + let discovery_event = tokio::select! { _ = watch_tx.closed() => { - tracing::debug!("all watchers have closed; shutting down endpoint watcher for prefix: {prefix}"); + tracing::debug!("endpoint_watcher: all watchers have closed; shutting down for discovery key: {:?}", discovery_key); break; } - kv_event = kv_event_rx.recv() => { - match kv_event { - Some(kv_event) => kv_event, + discovery_event = discovery_stream.next() => { + tracing::debug!("endpoint_watcher: Received stream event for discovery key: {:?}", discovery_key); + match discovery_event { + Some(Ok(event)) => { + tracing::debug!("endpoint_watcher: Got Ok event: {:?}", event); + event + }, + Some(Err(e)) => { + tracing::error!("endpoint_watcher: discovery stream error: {}; shutting down for discovery key: {:?}", e, discovery_key); + break; + } None => { - tracing::debug!("watch stream has closed; shutting down endpoint watcher for prefix: {prefix}"); + tracing::debug!("endpoint_watcher: watch stream has closed; shutting down for discovery key: {:?}", discovery_key); break; } } } }; - match kv_event { - WatchEvent::Put(kv) => { - let key = String::from_utf8(kv.key().to_vec()); - let val = serde_json::from_slice::(kv.value()); - if let (Ok(key), Ok(val)) = (key, val) { - map.insert(key.clone(), val); - } else { - tracing::error!("Unable to parse put endpoint event; shutting down endpoint watcher for prefix: {prefix}"); - break; - } - } - WatchEvent::Delete(kv) => { - match String::from_utf8(kv.key().to_vec()) { - Ok(key) => { map.remove(&key); } - Err(_) => { - tracing::error!("Unable to parse delete endpoint event; shutting down endpoint watcher for prefix: {}", prefix); - break; + event_count += 1; + tracing::debug!("endpoint_watcher: Processing event #{} for discovery key: {:?}", event_count, discovery_key); + + match discovery_event { + crate::discovery::DiscoveryEvent::Added(discovery_instance) => { + match discovery_instance { + crate::discovery::DiscoveryInstance::Endpoint(instance) => { + tracing::info!( + "endpoint_watcher: Added endpoint instance_id={}, namespace={}, component={}, endpoint={}", + instance.instance_id, + instance.namespace, + instance.component, + instance.endpoint + ); + map.insert(instance.instance_id, instance); + } + _ => { + tracing::debug!("endpoint_watcher: Ignoring non-endpoint instance (ModelCard, etc.) for discovery key: {:?}", discovery_key); } } } + crate::discovery::DiscoveryEvent::Removed(instance_id) => { + tracing::info!( + "endpoint_watcher: Removed instance_id={} for discovery key: {:?}", + instance_id, + discovery_key + ); + map.remove(&instance_id); + } } let instances: Vec = map.values().cloned().collect(); + tracing::debug!( + "endpoint_watcher: Current map size={}, sending update for discovery key: {:?}", + instances.len(), + discovery_key + ); if watch_tx.send(instances).is_err() { - tracing::debug!("Unable to send watch updates; shutting down endpoint watcher for prefix: {}", prefix); + tracing::debug!("endpoint_watcher: Unable to send watch updates; shutting down for discovery key: {:?}", discovery_key); break; } - } - tracing::debug!("Completed endpoint watcher for prefix: {prefix}"); + tracing::debug!("endpoint_watcher: Completed for discovery key: {:?}, total events processed: {}", discovery_key, event_count); let _ = watch_tx.send(vec![]); }); let instance_source = Arc::new(InstanceSource::Dynamic(watch_rx)); instance_sources.insert(endpoint.clone(), Arc::downgrade(&instance_source)); + tracing::debug!( + "get_or_create_dynamic_instance_source: Successfully created and cached instance source for endpoint: {}", + endpoint.path() + ); Ok(instance_source) } } diff --git a/lib/runtime/src/component/endpoint.rs b/lib/runtime/src/component/endpoint.rs index baeb46683f..90b4d8e6e2 100644 --- a/lib/runtime/src/component/endpoint.rs +++ b/lib/runtime/src/component/endpoint.rs @@ -118,8 +118,6 @@ impl EndpointConfigBuilder { let endpoint_name = endpoint.name.clone(); let system_health = endpoint.drt().system_health.clone(); let subject = endpoint.subject_to(connection_id); - let etcd_path = endpoint.etcd_path_with_lease_id(connection_id); - let etcd_client = endpoint.component.drt.etcd_client.clone(); // Register health check target in SystemHealth if provided if let Some(health_check_payload) = &health_check_payload { @@ -193,24 +191,19 @@ impl EndpointConfigBuilder { result }); - // make the components service endpoint discovery in etcd - - // client.register_service() - let info = Instance { + // Register this endpoint instance in the discovery plane + // The discovery interface abstracts storage backend (etcd, k8s, etc) and provides + // consistent registration/discovery across the system. + let discovery_client = endpoint.drt().discovery_client(); + + let discovery_spec = crate::discovery::DiscoverySpec::Endpoint { + namespace: namespace_name.clone(), component: component_name.clone(), endpoint: endpoint_name.clone(), - namespace: namespace_name.clone(), - instance_id: connection_id, - transport: TransportType::NatsTcp(subject), + transport: TransportType::NatsTcp(subject.clone()), }; - let info = serde_json::to_vec_pretty(&info)?; - - if let Some(etcd_client) = &etcd_client - && let Err(e) = etcd_client - .kv_create(&etcd_path, info, Some(connection_id)) - .await - { + if let Err(e) = discovery_client.register(discovery_spec).await { tracing::error!( component_name, endpoint_name, @@ -222,6 +215,15 @@ impl EndpointConfigBuilder { "Unable to register service for discovery. Check discovery service status" )); } + + tracing::warn!( + "DISCOVERY_VALIDATION: endpoint_registered: namespace={}, component={}, endpoint={}, instance_id={}", + namespace_name, + component_name, + endpoint_name, + connection_id + ); + task.await??; Ok(()) diff --git a/lib/runtime/src/discovery/kube.rs b/lib/runtime/src/discovery/kube.rs new file mode 100644 index 0000000000..2499a81d88 --- /dev/null +++ b/lib/runtime/src/discovery/kube.rs @@ -0,0 +1,1119 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use crate::{CancellationToken, Result}; +use async_trait::async_trait; +use std::collections::{HashMap, HashSet}; +use std::hash::{Hash, Hasher}; +use std::sync::Arc; +use tokio::sync::RwLock; +use uuid; + +use super::{DiscoveryClient, DiscoveryEvent, DiscoveryInstance, DiscoveryKey, DiscoverySpec, DiscoveryStream}; +use k8s_openapi::api::discovery::v1::EndpointSlice; +use kube::{ + Api, Client as KubeClient, + api::ListParams, + runtime::{watcher, watcher::Config, reflector, WatchStreamExt}, +}; + +/// Hash a pod name to get a consistent instance ID +pub fn hash_pod_name(pod_name: &str) -> u64 { + use std::collections::hash_map::DefaultHasher; + let mut hasher = DefaultHasher::new(); + pod_name.hash(&mut hasher); + hasher.finish() +} + +/// Key for organizing metadata internally +/// Format: "namespace/component/endpoint" +fn make_endpoint_key(namespace: &str, component: &str, endpoint: &str) -> String { + format!("{}/{}/{}", namespace, component, endpoint) +} + +/// Metadata stored on each pod and exposed via HTTP endpoint +/// This struct holds all discovery registrations for this pod instance +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct DiscoveryMetadata { + /// Registered endpoint instances (key: "namespace/component/endpoint") + endpoints: HashMap, + /// Registered model card instances (key: "namespace/component/endpoint") + model_cards: HashMap, +} + +impl DiscoveryMetadata { + /// Create a new empty metadata store + pub fn new() -> Self { + Self { + endpoints: HashMap::new(), + model_cards: HashMap::new(), + } + } + + /// Register an endpoint instance + pub fn register_endpoint(&mut self, instance: DiscoveryInstance) -> Result<()> { + if let DiscoveryInstance::Endpoint(ref inst) = instance { + let key = make_endpoint_key(&inst.namespace, &inst.component, &inst.endpoint); + self.endpoints.insert(key, instance); + Ok(()) + } else { + crate::raise!("Cannot register non-endpoint instance as endpoint") + } + } + + /// Register a model card instance + pub fn register_model_card(&mut self, instance: DiscoveryInstance) -> Result<()> { + if let DiscoveryInstance::ModelCard { + ref namespace, + ref component, + ref endpoint, + .. + } = instance + { + let key = make_endpoint_key(namespace, component, endpoint); + self.model_cards.insert(key, instance); + Ok(()) + } else { + crate::raise!("Cannot register non-model-card instance as model card") + } + } + + /// Get all registered endpoints + pub fn get_all_endpoints(&self) -> Vec { + self.endpoints.values().cloned().collect() + } + + /// Get all registered model cards + pub fn get_all_model_cards(&self) -> Vec { + self.model_cards.values().cloned().collect() + } + + /// Get all registered instances (endpoints and model cards) + pub fn get_all(&self) -> Vec { + self.endpoints + .values() + .chain(self.model_cards.values()) + .cloned() + .collect() + } +} + +impl Default for DiscoveryMetadata { + fn default() -> Self { + Self::new() + } +} + +/// Cached metadata from a remote pod +struct CachedMetadata { + metadata: Arc, + pod_ip: String, + fetched_at: std::time::Instant, +} + +/// Pod information extracted from environment +#[derive(Debug, Clone)] +struct PodInfo { + pod_name: String, + pod_namespace: String, + system_port: u16, +} + +impl PodInfo { + /// Discover pod information from environment variables + fn from_env() -> Result { + let pod_name = std::env::var("POD_NAME") + .map_err(|_| crate::error!("POD_NAME environment variable not set"))?; + + let pod_namespace = std::env::var("POD_NAMESPACE") + .unwrap_or_else(|_| { + tracing::warn!("POD_NAMESPACE not set, defaulting to 'default'"); + "default".to_string() + }); + + // Read system server port from config + let config = crate::config::RuntimeConfig::from_settings().unwrap_or_default(); + let system_port = config.system_port; + + Ok(Self { + pod_name, + pod_namespace, + system_port, + }) + } +} + +/// Discovery client implementation backed by Kubernetes EndpointSlices +#[derive(Clone)] +pub struct KubeDiscoveryClient { + /// Instance ID derived from pod name hash + instance_id: u64, + /// Local pod's metadata (shared with system server) + metadata: Arc>, + /// HTTP client for fetching remote metadata + http_client: reqwest::Client, + /// Cache of remote pod metadata (instance_id -> metadata) + metadata_cache: Arc>>, + /// Pod information + pod_info: PodInfo, + /// Cancellation token + cancel_token: CancellationToken, + /// Kubernetes client + kube_client: KubeClient, + /// Mock mode for testing (skips HTTP calls and returns mock metadata) + mock_metadata: bool, +} + +impl KubeDiscoveryClient { + /// Create a new Kubernetes discovery client + /// + /// # Arguments + /// * `metadata` - Shared metadata store (also used by system server) + /// * `cancel_token` - Cancellation token for shutdown + pub async fn new( + metadata: Arc>, + cancel_token: CancellationToken, + ) -> Result { + let pod_info = PodInfo::from_env()?; + let instance_id = hash_pod_name(&pod_info.pod_name); + + tracing::info!( + "Initializing KubeDiscoveryClient: pod_name={}, instance_id={:x}, namespace={}", + pod_info.pod_name, + instance_id, + pod_info.pod_namespace + ); + + let http_client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .map_err(|e| crate::error!("Failed to create HTTP client: {}", e))?; + + let kube_client = KubeClient::try_default() + .await + .map_err(|e| crate::error!("Failed to create Kubernetes client: {}", e))?; + + Ok(Self { + instance_id, + metadata, + http_client, + metadata_cache: Arc::new(RwLock::new(HashMap::new())), + pod_info, + cancel_token, + kube_client, + mock_metadata: false, + }) + } + + /// Create a new client for testing (doesn't require environment variables) + /// + /// This method is intended for testing only and bypasses the normal + /// environment variable requirements. When `mock_metadata` is true, + /// HTTP calls are skipped and mock metadata is returned. + #[doc(hidden)] + pub async fn new_for_testing( + kube_client: KubeClient, + pod_name: String, + pod_namespace: String, + mock_metadata: bool, + ) -> Result { + let instance_id = hash_pod_name(&pod_name); + let metadata = Arc::new(RwLock::new(DiscoveryMetadata::new())); + let cancel_token = CancellationToken::new(); + + let http_client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) + .build() + .map_err(|e| crate::error!("Failed to create HTTP client: {}", e))?; + + let pod_info = PodInfo { + pod_name, + pod_namespace, + system_port: 8080, + }; + + Ok(Self { + instance_id, + metadata, + http_client, + metadata_cache: Arc::new(RwLock::new(HashMap::new())), + pod_info, + cancel_token, + kube_client, + mock_metadata, + }) + } + + /// Generate mock metadata for testing + /// Returns a DiscoveryMetadata with one endpoint instance + fn create_mock_metadata(pod_name: &str) -> DiscoveryMetadata { + use crate::component::{Instance, TransportType}; + + let mut metadata = DiscoveryMetadata::new(); + let instance_id = hash_pod_name(pod_name); + + // Create a mock endpoint instance + let endpoint = DiscoveryInstance::Endpoint(Instance { + namespace: "test-namespace".to_string(), + component: "test-component".to_string(), + endpoint: "test-endpoint".to_string(), + instance_id, + transport: TransportType::NatsTcp("nats://test:4222".to_string()), + }); + + // Ignore errors in mock data creation + let _ = metadata.register_endpoint(endpoint); + + metadata + } + + /// Get metadata for a remote pod, using cache if available + async fn get_metadata(&self, pod_name: &str, pod_ip: &str) -> Result> { + let instance_id = hash_pod_name(pod_name); + + // Mock mode: return mock metadata without HTTP calls + if self.mock_metadata { + tracing::debug!( + "Mock mode: returning mock metadata for pod_name={}, instance_id={:x}", + pod_name, + instance_id + ); + let metadata = Self::create_mock_metadata(pod_name); + return Ok(Arc::new(metadata)); + } + + // Local test mode: parse port from pod name and use localhost + let target_host = if std::env::var("DYN_LOCAL_KUBE_TEST").is_ok() { + if let Some(port) = Self::parse_port_from_pod_name(pod_name) { + tracing::info!( + "Local test mode: using localhost:{} for pod {}", + port, + pod_name + ); + format!("localhost:{}", port) + } else { + tracing::warn!( + "Local test mode enabled but couldn't parse port from pod name: {}", + pod_name + ); + format!("{}:{}", pod_ip, self.pod_info.system_port) + } + } else { + format!("{}:{}", pod_ip, self.pod_info.system_port) + }; + + // Fast path: check cache + { + let cache = self.metadata_cache.read().await; + if let Some(cached) = cache.get(&instance_id) { + tracing::debug!( + "Cache hit for pod_name={}, instance_id={:x}", + pod_name, + instance_id + ); + return Ok(cached.metadata.clone()); + } + } + + // Cache miss: fetch from remote pod + tracing::debug!( + "Cache miss for pod_name={}, instance_id={:x}, fetching from {}", + pod_name, + instance_id, + target_host + ); + self.fetch_and_cache_from_host(instance_id, pod_name, &target_host).await + } + + /// Parse port number from pod name (format: pod-name-) + /// Returns Some(port) if successfully parsed, None otherwise + fn parse_port_from_pod_name(pod_name: &str) -> Option { + // Split by '-' and try to parse the last segment as a port number + pod_name.rsplit('-') + .next() + .and_then(|last| last.parse::().ok()) + } + + /// Fetch metadata from a remote pod and cache it + async fn fetch_and_cache_from_host( + &self, + instance_id: u64, + pod_name: &str, + target_host: &str, + ) -> Result> { + let url = format!("http://{}/metadata", target_host); + + tracing::debug!("Fetching metadata from {}", url); + + let response = self + .http_client + .get(&url) + .send() + .await + .map_err(|e| crate::error!("Failed to fetch metadata from {}: {}", url, e))?; + + let metadata: DiscoveryMetadata = response + .json() + .await + .map_err(|e| crate::error!("Failed to parse metadata from {}: {}", url, e))?; + + let metadata = Arc::new(metadata); + + // Store in cache + { + let mut cache = self.metadata_cache.write().await; + // Check again in case another task inserted while we were fetching + if let Some(existing) = cache.get(&instance_id) { + tracing::debug!( + "Another task cached metadata for instance_id={:x} while we were fetching", + instance_id + ); + return Ok(existing.metadata.clone()); + } + + cache.insert( + instance_id, + CachedMetadata { + metadata: metadata.clone(), + pod_ip: target_host.to_string(), + fetched_at: std::time::Instant::now(), + }, + ); + + tracing::debug!( + "Cached metadata for pod_name={}, instance_id={:x}", + pod_name, + instance_id + ); + } + + Ok(metadata) + } + + /// Invalidate cache entry for a given instance + async fn invalidate_cache(&self, instance_id: u64) { + let mut cache = self.metadata_cache.write().await; + if cache.remove(&instance_id).is_some() { + tracing::debug!("Invalidated cache for instance_id={:x}", instance_id); + } + } + + /// Build label selector for Kubernetes EndpointSlices from DiscoveryKey + fn build_label_selector(key: &DiscoveryKey) -> String { + match key { + DiscoveryKey::AllEndpoints => String::new(), + DiscoveryKey::NamespacedEndpoints { namespace } => { + format!("dynamo.nvidia.com/namespace={}", namespace) + } + DiscoveryKey::ComponentEndpoints { namespace, component } => { + format!("dynamo.nvidia.com/namespace={},dynamo.nvidia.com/component={}", namespace, component) + } + DiscoveryKey::Endpoint { namespace, component, .. } => { + format!("dynamo.nvidia.com/namespace={},dynamo.nvidia.com/component={}", namespace, component) + } + DiscoveryKey::AllModelCards => String::new(), + DiscoveryKey::NamespacedModelCards { namespace } => { + format!("dynamo.nvidia.com/namespace={}", namespace) + } + DiscoveryKey::ComponentModelCards { namespace, component } => { + format!("dynamo.nvidia.com/namespace={},dynamo.nvidia.com/component={}", namespace, component) + } + DiscoveryKey::EndpointModelCards { namespace, component, .. } => { + format!("dynamo.nvidia.com/namespace={},dynamo.nvidia.com/component={}", namespace, component) + } + } + } + + /// Extract ready endpoints from an EndpointSlice + /// Returns (pod_name, pod_ip) pairs + fn extract_ready_endpoints(slice: &EndpointSlice) -> Vec<(String, String)> { + let mut result = Vec::new(); + + let endpoints = &slice.endpoints; + + for endpoint in endpoints { + // Check if endpoint is ready + let is_ready = endpoint.conditions.as_ref() + .and_then(|c| c.ready) + .unwrap_or(false); + + if !is_ready { + continue; + } + + // Get pod name from targetRef + let pod_name = match endpoint.target_ref.as_ref() { + Some(target_ref) => target_ref.name.as_deref().unwrap_or(""), + None => continue, + }; + + if pod_name.is_empty() { + continue; + } + + // Get IP addresses + for ip in &endpoint.addresses { + result.push((pod_name.to_string(), ip.clone())); + } + } + + result + } + + /// Extract instance IDs from an EndpointSlice (only ready endpoints) + fn extract_instance_ids(slice: &EndpointSlice) -> HashSet { + let mut ids = HashSet::new(); + + let endpoints = &slice.endpoints; + + for endpoint in endpoints { + // Only count ready endpoints + let is_ready = endpoint.conditions.as_ref() + .and_then(|c| c.ready) + .unwrap_or(false); + + if !is_ready { + continue; + } + + if let Some(target_ref) = &endpoint.target_ref { + if let Some(pod_name) = &target_ref.name { + ids.insert(hash_pod_name(pod_name)); + } + } + } + + ids + } + + /// Extract endpoint information from an EndpointSlice + /// Returns (instance_id, pod_name, pod_ip) tuples for ready endpoints + fn extract_endpoint_info(slice: &EndpointSlice) -> Vec<(u64, String, String)> { + let mut result = Vec::new(); + + let endpoints = &slice.endpoints; + + for endpoint in endpoints { + // Check if endpoint is ready + let is_ready = endpoint.conditions.as_ref() + .and_then(|c| c.ready) + .unwrap_or(false); + + if !is_ready { + continue; + } + + // Get pod name from targetRef + let pod_name = match endpoint.target_ref.as_ref() { + Some(target_ref) => target_ref.name.as_deref().unwrap_or(""), + None => continue, + }; + + if pod_name.is_empty() { + continue; + } + + let instance_id = hash_pod_name(pod_name); + + // Get IP addresses + for ip in &endpoint.addresses { + result.push((instance_id, pod_name.to_string(), ip.clone())); + } + } + + result + } + + /// Filter metadata instances by DiscoveryKey + fn filter_metadata( + metadata: &DiscoveryMetadata, + key: &DiscoveryKey, + _instance_id: u64, + ) -> Vec { + let mut result = Vec::new(); + + match key { + DiscoveryKey::AllEndpoints => { + result.extend(metadata.get_all_endpoints()); + } + DiscoveryKey::NamespacedEndpoints { namespace } => { + for instance in metadata.get_all_endpoints() { + if let DiscoveryInstance::Endpoint(ref inst) = instance { + if &inst.namespace == namespace { + result.push(instance); + } + } + } + } + DiscoveryKey::ComponentEndpoints { namespace, component } => { + for instance in metadata.get_all_endpoints() { + if let DiscoveryInstance::Endpoint(ref inst) = instance { + if &inst.namespace == namespace && &inst.component == component { + result.push(instance); + } + } + } + } + DiscoveryKey::Endpoint { namespace, component, endpoint } => { + for instance in metadata.get_all_endpoints() { + if let DiscoveryInstance::Endpoint(ref inst) = instance { + if &inst.namespace == namespace + && &inst.component == component + && &inst.endpoint == endpoint { + result.push(instance); + } + } + } + } + DiscoveryKey::AllModelCards => { + result.extend(metadata.get_all_model_cards()); + } + DiscoveryKey::NamespacedModelCards { namespace } => { + for instance in metadata.get_all_model_cards() { + if let DiscoveryInstance::ModelCard { namespace: ns, .. } = &instance { + if ns == namespace { + result.push(instance); + } + } + } + } + DiscoveryKey::ComponentModelCards { namespace, component } => { + for instance in metadata.get_all_model_cards() { + if let DiscoveryInstance::ModelCard { + namespace: ns, + component: comp, + .. + } = &instance { + if ns == namespace && comp == component { + result.push(instance); + } + } + } + } + DiscoveryKey::EndpointModelCards { namespace, component, endpoint } => { + for instance in metadata.get_all_model_cards() { + if let DiscoveryInstance::ModelCard { + namespace: ns, + component: comp, + endpoint: ep, + .. + } = &instance { + if ns == namespace && comp == component && ep == endpoint { + result.push(instance); + } + } + } + } + } + + result + } +} + +#[async_trait] +impl DiscoveryClient for KubeDiscoveryClient { + fn instance_id(&self) -> u64 { + self.instance_id + } + + async fn register(&self, spec: DiscoverySpec) -> Result { + let instance_id = self.instance_id(); + let instance = spec.with_instance_id(instance_id); + + tracing::debug!( + "Registering instance: {:?} with instance_id={:x}", + instance, + instance_id + ); + + // Write to local metadata + let mut metadata = self.metadata.write().await; + match &instance { + DiscoveryInstance::Endpoint(inst) => { + tracing::info!( + "Registered endpoint: namespace={}, component={}, endpoint={}, instance_id={:x}", + inst.namespace, + inst.component, + inst.endpoint, + instance_id + ); + metadata.register_endpoint(instance.clone())?; + } + DiscoveryInstance::ModelCard { + namespace, + component, + endpoint, + .. + } => { + tracing::info!( + "Registered model card: namespace={}, component={}, endpoint={}, instance_id={:x}", + namespace, + component, + endpoint, + instance_id + ); + metadata.register_model_card(instance.clone())?; + } + } + + Ok(instance) + } + + async fn list(&self, key: DiscoveryKey) -> Result> { + use futures::StreamExt; + + tracing::debug!("KubeDiscoveryClient::list called with key={:?}", key); + + // Build label selector + let label_selector = Self::build_label_selector(&key); + tracing::debug!("Using label selector: {}", label_selector); + + // Query EndpointSlices in our namespace only + let endpoint_slices: Api = Api::namespaced( + self.kube_client.clone(), + &self.pod_info.pod_namespace, + ); + let mut list_params = ListParams::default(); + if !label_selector.is_empty() { + list_params = list_params.labels(&label_selector); + } + + tracing::debug!( + "Listing EndpointSlices in namespace: {}", + self.pod_info.pod_namespace + ); + + let slices = endpoint_slices + .list(&list_params) + .await + .map_err(|e| crate::error!("Failed to list EndpointSlices: {}", e))?; + + tracing::debug!("Found {} EndpointSlices", slices.items.len()); + + // Extract ready endpoints + let ready_endpoints: Vec<(String, String)> = slices + .items + .iter() + .flat_map(Self::extract_ready_endpoints) + .collect(); + + tracing::debug!("Found {} ready endpoints", ready_endpoints.len()); + + // Fetch metadata concurrently with rate limiting + let metadata_futures = ready_endpoints.into_iter().map(|(pod_name, pod_ip)| { + let client = self.clone(); + async move { + match client.get_metadata(&pod_name, &pod_ip).await { + Ok(metadata) => Some((hash_pod_name(&pod_name), metadata)), + Err(e) => { + tracing::warn!( + "Failed to fetch metadata from pod {} ({}): {}", + pod_name, + pod_ip, + e + ); + None + } + } + } + }); + + let results: Vec<_> = futures::stream::iter(metadata_futures) + .buffer_unordered(20) + .collect() + .await; + + // Filter and collect instances + let mut instances = Vec::new(); + for result in results { + if let Some((instance_id, metadata)) = result { + let filtered = Self::filter_metadata(&metadata, &key, instance_id); + instances.extend(filtered); + } + } + + tracing::info!( + "KubeDiscoveryClient::list returning {} instances for key={:?}", + instances.len(), + key + ); + + Ok(instances) + } + + async fn list_and_watch(&self, key: DiscoveryKey) -> Result { + use futures::{StreamExt, future}; + use tokio::sync::mpsc; + + tracing::info!( + "KubeDiscoveryClient::list_and_watch started for key={:?} in namespace={}", + key, + self.pod_info.pod_namespace + ); + + // Build label selector + let label_selector = Self::build_label_selector(&key); + + // Create EndpointSlice API and watcher (scoped to our namespace) + let endpoint_slices: Api = Api::namespaced( + self.kube_client.clone(), + &self.pod_info.pod_namespace, + ); + let mut watch_config = Config::default(); + if !label_selector.is_empty() { + watch_config = watch_config.labels(&label_selector); + } + + tracing::debug!( + "Watching EndpointSlices in namespace: {} with label selector: {:?}", + self.pod_info.pod_namespace, + label_selector + ); + + // Create reflector to maintain complete current state + let (reader, writer) = reflector::store(); + + // Generate unique stream identifier for tracing + let stream_id = uuid::Uuid::new_v4(); + + // Set up reflector stream that polls forever to keep store updated + let reflector_stream = reflector(writer, watcher(endpoint_slices, watch_config)) + .default_backoff() + .touched_objects() + .for_each(move |res| { + future::ready(match res { + Ok(obj) => { + tracing::debug!( + stream_id = %stream_id, + slice_name = obj.metadata.name.as_deref().unwrap_or("unknown"), + "Reflector updated" + ); + } + Err(e) => { + tracing::warn!( + stream_id = %stream_id, + error = %e, + "Reflector error" + ); + } + }) + }); + + // Spawn background task to poll reflector forever + tokio::spawn(reflector_stream); + + // Track known instances for diffing + let known_instances = Arc::new(RwLock::new(HashSet::::new())); + let client = self.clone(); + let key_clone = key.clone(); + + // Create channel for emitting discovery events + let (tx, rx) = mpsc::unbounded_channel(); + + // Spawn task that watches the reflector store and emits events + tokio::spawn(async move { + let mut interval = tokio::time::interval(tokio::time::Duration::from_millis(500)); + + tracing::debug!( + stream_id = %stream_id, + "Store monitor started for key={:?}", + key_clone + ); + + loop { + interval.tick().await; + + // Get complete current state from reflector + let all_slices: Vec = reader.state() + .iter() + .map(|arc_slice| arc_slice.as_ref().clone()) + .collect(); + + // Debug: print all slices + // let slice_names: Vec = all_slices.iter() + // .map(|s| s.metadata.name.as_deref().unwrap_or("unnamed").to_string()) + // .collect(); + // tracing::debug!( + // stream_id = %stream_id, + // slice_count = all_slices.len(), + // slices = ?slice_names, + // "Store monitor tick - all slices" + // ); + + // Extract ALL current instances from ALL slices + let current_instances: HashSet = all_slices.iter() + .flat_map(Self::extract_instance_ids) + .collect(); + + // Build endpoint info map for fetching + let mut endpoint_info_map = HashMap::new(); + for slice in &all_slices { + let endpoint_infos = Self::extract_endpoint_info(slice); + for (instance_id, pod_name, pod_ip) in endpoint_infos { + endpoint_info_map.entry(instance_id) + .or_insert((pod_name, pod_ip)); + } + } + + // Diff against previous state + let prev_instances = known_instances.read().await.clone(); + let added: Vec<_> = current_instances.difference(&prev_instances).copied().collect(); + let removed: Vec<_> = prev_instances.difference(¤t_instances).copied().collect(); + + if !added.is_empty() || !removed.is_empty() { + tracing::debug!( + stream_id = %stream_id, + added = added.len(), + removed = removed.len(), + total = current_instances.len(), + "State diff computed" + ); + } + + // Update known_instances before fetching + *known_instances.write().await = current_instances.clone(); + + // Fetch metadata for new instances concurrently + let fetch_futures: Vec<_> = added.iter().filter_map(|&instance_id| { + endpoint_info_map.get(&instance_id).map(|(pod_name, pod_ip)| { + let client = client.clone(); + let pod_name = pod_name.clone(); + let pod_ip = pod_ip.clone(); + let key_clone = key_clone.clone(); + let known_instances = known_instances.clone(); + + async move { + match client.get_metadata(&pod_name, &pod_ip).await { + Ok(metadata) => { + // Fetch-after-delete guard: check if still in known set + if known_instances.read().await.contains(&instance_id) { + let instances = Self::filter_metadata(&metadata, &key_clone, instance_id); + Some((instance_id, instances)) + } else { + tracing::debug!( + stream_id = %stream_id, + instance_id = format!("{:x}", instance_id), + "Instance removed before fetch completed, skipping" + ); + None + } + } + Err(e) => { + tracing::warn!( + stream_id = %stream_id, + pod_name = %pod_name, + error = %e, + "Failed to fetch metadata" + ); + None + } + } + } + }) + }).collect(); + + // Fetch concurrently and emit Added events + let results: Vec<_> = futures::stream::iter(fetch_futures) + .buffer_unordered(20) + .collect() + .await; + + for result in results { + if let Some((_instance_id, instances)) = result { + for instance in instances { + tracing::info!( + stream_id = %stream_id, + instance_id = format!("{:x}", instance.instance_id()), + "Emitting Added event" + ); + if tx.send(Ok(DiscoveryEvent::Added(instance))).is_err() { + tracing::debug!(stream_id = %stream_id, "Receiver dropped, stopping monitor"); + return; + } + } + } + } + + // Emit Removed events + for instance_id in removed { + tracing::info!( + stream_id = %stream_id, + instance_id = format!("{:x}", instance_id), + "Emitting Removed event" + ); + client.invalidate_cache(instance_id).await; + if tx.send(Ok(DiscoveryEvent::Removed(instance_id))).is_err() { + tracing::debug!(stream_id = %stream_id, "Receiver dropped, stopping monitor"); + return; + } + } + } + }); + + // Convert receiver to stream + let stream = tokio_stream::wrappers::UnboundedReceiverStream::new(rx); + Ok(Box::pin(stream)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::component::TransportType; + + #[test] + fn test_hash_consistency() { + let pod_name = "test-pod-123"; + let hash1 = hash_pod_name(pod_name); + let hash2 = hash_pod_name(pod_name); + assert_eq!(hash1, hash2, "Hash should be consistent"); + } + + #[test] + fn test_hash_uniqueness() { + let hash1 = hash_pod_name("pod-1"); + let hash2 = hash_pod_name("pod-2"); + assert_ne!(hash1, hash2, "Different pods should have different hashes"); + } + + #[test] + fn test_metadata_serde() { + let mut metadata = DiscoveryMetadata::new(); + + // Add an endpoint + let instance = DiscoveryInstance::Endpoint(crate::component::Instance { + namespace: "test".to_string(), + component: "comp1".to_string(), + endpoint: "ep1".to_string(), + instance_id: 123, + transport: TransportType::NatsTcp("nats://localhost:4222".to_string()), + }); + + metadata.register_endpoint(instance).unwrap(); + + // Serialize + let json = serde_json::to_string(&metadata).unwrap(); + + // Deserialize + let deserialized: DiscoveryMetadata = serde_json::from_str(&json).unwrap(); + + assert_eq!(deserialized.endpoints.len(), 1); + assert_eq!(deserialized.model_cards.len(), 0); + } + + #[tokio::test] + async fn test_concurrent_registration() { + let metadata = Arc::new(RwLock::new(DiscoveryMetadata::new())); + + // Spawn multiple tasks registering concurrently + let handles: Vec<_> = (0..10) + .map(|i| { + let metadata = metadata.clone(); + tokio::spawn(async move { + let mut meta = metadata.write().await; + let instance = DiscoveryInstance::Endpoint(crate::component::Instance { + namespace: "test".to_string(), + component: "comp1".to_string(), + endpoint: format!("ep{}", i), + instance_id: i, + transport: TransportType::NatsTcp("nats://localhost:4222".to_string()), + }); + meta.register_endpoint(instance).unwrap(); + }) + }) + .collect(); + + // Wait for all to complete + for handle in handles { + handle.await.unwrap(); + } + + // Verify all registrations succeeded + let meta = metadata.read().await; + assert_eq!(meta.endpoints.len(), 10); + } + + #[test] + fn test_endpoint_key() { + let key1 = make_endpoint_key("ns1", "comp1", "ep1"); + let key2 = make_endpoint_key("ns1", "comp1", "ep1"); + let key3 = make_endpoint_key("ns1", "comp1", "ep2"); + + assert_eq!(key1, key2); + assert_ne!(key1, key3); + assert_eq!(key1, "ns1/comp1/ep1"); + } + + #[test] + fn test_parse_port_from_pod_name() { + // Valid port numbers + assert_eq!( + KubeDiscoveryClient::parse_port_from_pod_name("dynamo-test-worker-8080"), + Some(8080) + ); + assert_eq!( + KubeDiscoveryClient::parse_port_from_pod_name("my-service-9000"), + Some(9000) + ); + assert_eq!( + KubeDiscoveryClient::parse_port_from_pod_name("test-3000"), + Some(3000) + ); + assert_eq!( + KubeDiscoveryClient::parse_port_from_pod_name("a-b-c-80"), + Some(80) + ); + + // Invalid - no port number at end + assert_eq!( + KubeDiscoveryClient::parse_port_from_pod_name("dynamo-test-worker"), + None + ); + assert_eq!( + KubeDiscoveryClient::parse_port_from_pod_name("8080-worker"), + None // Port at beginning, not end + ); + assert_eq!( + KubeDiscoveryClient::parse_port_from_pod_name("worker-abc"), + None // Not a number + ); + assert_eq!( + KubeDiscoveryClient::parse_port_from_pod_name(""), + None // Empty string + ); + } + + #[tokio::test] + async fn test_metadata_accessors() { + let mut metadata = DiscoveryMetadata::new(); + + // Register endpoints + for i in 0..3 { + let instance = DiscoveryInstance::Endpoint(crate::component::Instance { + namespace: "test".to_string(), + component: "comp1".to_string(), + endpoint: format!("ep{}", i), + instance_id: i, + transport: TransportType::NatsTcp("nats://localhost:4222".to_string()), + }); + metadata.register_endpoint(instance).unwrap(); + } + + // Register model cards + for i in 0..2 { + let instance = DiscoveryInstance::ModelCard { + namespace: "test".to_string(), + component: "comp1".to_string(), + endpoint: format!("ep{}", i), + instance_id: i, + card_json: serde_json::json!({"model": "test"}), + }; + metadata.register_model_card(instance).unwrap(); + } + + assert_eq!(metadata.get_all_endpoints().len(), 3); + assert_eq!(metadata.get_all_model_cards().len(), 2); + assert_eq!(metadata.get_all().len(), 5); + } +} + diff --git a/lib/runtime/src/discovery/kv_store.rs b/lib/runtime/src/discovery/kv_store.rs new file mode 100644 index 0000000000..e284614e21 --- /dev/null +++ b/lib/runtime/src/discovery/kv_store.rs @@ -0,0 +1,470 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use crate::storage::key_value_store::{KeyValueStoreManager, WatchEvent}; +use crate::{CancellationToken, Result}; +use async_trait::async_trait; +use futures::{Stream, StreamExt}; +use std::pin::Pin; +use std::sync::Arc; + +use super::{DiscoveryClient, DiscoveryEvent, DiscoveryInstance, DiscoveryKey, DiscoverySpec, DiscoveryStream}; + +const INSTANCES_BUCKET: &str = "v1/instances"; +const MODEL_CARDS_BUCKET: &str = "v1/mdc"; + +/// Discovery client implementation backed by a KeyValueStore +pub struct KVStoreDiscoveryClient { + store: Arc, + cancel_token: CancellationToken, +} + +impl KVStoreDiscoveryClient { + pub fn new(store: KeyValueStoreManager, cancel_token: CancellationToken) -> Self { + Self { + store: Arc::new(store), + cancel_token, + } + } + + /// Build the key path for an endpoint (relative to bucket, not absolute) + fn endpoint_key(namespace: &str, component: &str, endpoint: &str, instance_id: u64) -> String { + format!("{}/{}/{}/{:x}", namespace, component, endpoint, instance_id) + } + + /// Build the key path for a model card (relative to bucket, not absolute) + fn model_card_key(namespace: &str, component: &str, endpoint: &str, instance_id: u64) -> String { + format!("{}/{}/{}/{:x}", namespace, component, endpoint, instance_id) + } + + /// Extract prefix for querying based on discovery key + fn key_prefix(key: &DiscoveryKey) -> String { + match key { + DiscoveryKey::AllEndpoints => INSTANCES_BUCKET.to_string(), + DiscoveryKey::NamespacedEndpoints { namespace } => { + format!("{}/{}", INSTANCES_BUCKET, namespace) + } + DiscoveryKey::ComponentEndpoints { namespace, component } => { + format!("{}/{}/{}", INSTANCES_BUCKET, namespace, component) + } + DiscoveryKey::Endpoint { namespace, component, endpoint } => { + format!("{}/{}/{}/{}", INSTANCES_BUCKET, namespace, component, endpoint) + } + DiscoveryKey::AllModelCards => MODEL_CARDS_BUCKET.to_string(), + DiscoveryKey::NamespacedModelCards { namespace } => { + format!("{}/{}", MODEL_CARDS_BUCKET, namespace) + } + DiscoveryKey::ComponentModelCards { namespace, component } => { + format!("{}/{}/{}", MODEL_CARDS_BUCKET, namespace, component) + } + DiscoveryKey::EndpointModelCards { namespace, component, endpoint } => { + format!("{}/{}/{}/{}", MODEL_CARDS_BUCKET, namespace, component, endpoint) + } + } + } + + /// Check if a key matches the given discovery key filter + fn matches_prefix(key_str: &str, prefix: &str) -> bool { + key_str.starts_with(prefix) + } + + /// Parse and deserialize a discovery instance from KV store entry + fn parse_instance(value: &[u8]) -> Result { + let instance: DiscoveryInstance = serde_json::from_slice(value)?; + Ok(instance) + } +} + +#[async_trait] +impl DiscoveryClient for KVStoreDiscoveryClient { + fn instance_id(&self) -> u64 { + self.store.connection_id() + } + + async fn register(&self, spec: DiscoverySpec) -> Result { + let instance_id = self.instance_id(); + let instance = spec.with_instance_id(instance_id); + + let (bucket_name, key_path) = match &instance { + DiscoveryInstance::Endpoint(inst) => { + let key = Self::endpoint_key( + &inst.namespace, + &inst.component, + &inst.endpoint, + inst.instance_id, + ); + tracing::debug!( + "KVStoreDiscoveryClient::register: Registering endpoint instance_id={}, namespace={}, component={}, endpoint={}, key={}", + inst.instance_id, + inst.namespace, + inst.component, + inst.endpoint, + key + ); + (INSTANCES_BUCKET, key) + } + DiscoveryInstance::ModelCard { + namespace, + component, + endpoint, + instance_id, + .. + } => { + let key = Self::model_card_key(namespace, component, endpoint, *instance_id); + tracing::debug!( + "KVStoreDiscoveryClient::register: Registering model card instance_id={}, namespace={}, component={}, endpoint={}, key={}", + instance_id, + namespace, + component, + endpoint, + key + ); + (MODEL_CARDS_BUCKET, key) + } + }; + + // Serialize the instance + let instance_json = serde_json::to_vec(&instance)?; + tracing::debug!( + "KVStoreDiscoveryClient::register: Serialized instance to {} bytes for key={}", + instance_json.len(), + key_path + ); + + // Store in the KV store with no TTL (instances persist until explicitly removed) + tracing::debug!( + "KVStoreDiscoveryClient::register: Getting/creating bucket={} for key={}", + bucket_name, + key_path + ); + let bucket = self + .store + .get_or_create_bucket(bucket_name, None) + .await?; + let key = crate::storage::key_value_store::Key::from_raw(key_path.clone()); + + tracing::debug!( + "KVStoreDiscoveryClient::register: Inserting into bucket={}, key={}", + bucket_name, + key_path + ); + // Use revision 0 for initial registration + let outcome = bucket.insert(&key, instance_json.into(), 0).await?; + tracing::info!( + "KVStoreDiscoveryClient::register: Successfully registered instance_id={}, key={}, outcome={:?}", + instance_id, + key_path, + outcome + ); + + Ok(instance) + } + + async fn list(&self, key: DiscoveryKey) -> Result> { + let prefix = Self::key_prefix(&key); + let bucket_name = if prefix.starts_with(INSTANCES_BUCKET) { + INSTANCES_BUCKET + } else { + MODEL_CARDS_BUCKET + }; + + // Get bucket - if it doesn't exist, return empty list + let Some(bucket) = self.store.get_bucket(bucket_name).await? else { + return Ok(Vec::new()); + }; + + // Get all entries from the bucket + let entries = bucket.entries().await?; + + // Filter by prefix and deserialize + let mut instances = Vec::new(); + for (key_str, value) in entries { + if Self::matches_prefix(&key_str, &prefix) { + match Self::parse_instance(&value) { + Ok(instance) => instances.push(instance), + Err(e) => { + tracing::warn!(key = %key_str, error = %e, "Failed to parse discovery instance"); + } + } + } + } + + Ok(instances) + } + + async fn list_and_watch(&self, key: DiscoveryKey) -> Result { + let prefix = Self::key_prefix(&key); + let bucket_name = if prefix.starts_with(INSTANCES_BUCKET) { + INSTANCES_BUCKET + } else { + MODEL_CARDS_BUCKET + }; + + tracing::debug!( + "KVStoreDiscoveryClient::list_and_watch: Starting watch for key={:?}, prefix={}, bucket={}", + key, + prefix, + bucket_name + ); + + // Use the KeyValueStoreManager's watch mechanism + let (_, mut rx) = self.store.clone().watch( + bucket_name, + None, // No TTL + self.cancel_token.clone(), + ); + + tracing::debug!( + "KVStoreDiscoveryClient::list_and_watch: Got watch receiver for bucket={}", + bucket_name + ); + + // Create a stream that filters and transforms WatchEvents to DiscoveryEvents + let stream = async_stream::stream! { + let mut event_count = 0; + tracing::debug!("KVStoreDiscoveryClient::list_and_watch: Stream started, waiting for events on prefix={}", prefix); + while let Some(event) = rx.recv().await { + event_count += 1; + tracing::debug!( + "KVStoreDiscoveryClient::list_and_watch: Received event #{} for prefix={}", + event_count, + prefix + ); + let discovery_event = match event { + WatchEvent::Put(kv) => { + tracing::debug!( + "KVStoreDiscoveryClient::list_and_watch: Put event, key={}, prefix={}, matches={}", + kv.key_str(), + prefix, + Self::matches_prefix(kv.key_str(), &prefix) + ); + // Check if this key matches our prefix + if !Self::matches_prefix(kv.key_str(), &prefix) { + tracing::debug!( + "KVStoreDiscoveryClient::list_and_watch: Skipping key {} (doesn't match prefix {})", + kv.key_str(), + prefix + ); + continue; + } + + match Self::parse_instance(kv.value()) { + Ok(instance) => { + tracing::info!( + "KVStoreDiscoveryClient::list_and_watch: Emitting Added event for instance_id={}, key={}", + instance.instance_id(), + kv.key_str() + ); + Some(DiscoveryEvent::Added(instance)) + }, + Err(e) => { + tracing::warn!( + key = %kv.key_str(), + error = %e, + "Failed to parse discovery instance from watch event" + ); + None + } + } + } + WatchEvent::Delete(kv) => { + tracing::debug!( + "KVStoreDiscoveryClient::list_and_watch: Delete event, key={}, prefix={}", + kv.key_str(), + prefix + ); + // Check if this key matches our prefix + if !Self::matches_prefix(kv.key_str(), &prefix) { + tracing::debug!( + "KVStoreDiscoveryClient::list_and_watch: Skipping deleted key {} (doesn't match prefix {})", + kv.key_str(), + prefix + ); + continue; + } + + // Extract instance_id from the key path, not the value + // Delete events have empty values in etcd, so we parse the instance_id from the key + // Key format: "v1/instances/namespace/component/endpoint/{instance_id:x}" + let key_parts: Vec<&str> = kv.key_str().split('/').collect(); + match key_parts.last() { + Some(instance_id_hex) => { + match u64::from_str_radix(instance_id_hex, 16) { + Ok(instance_id) => { + tracing::info!( + "KVStoreDiscoveryClient::list_and_watch: Emitting Removed event for instance_id={}, key={}", + instance_id, + kv.key_str() + ); + Some(DiscoveryEvent::Removed(instance_id)) + } + Err(e) => { + tracing::warn!( + key = %kv.key_str(), + error = %e, + "Failed to parse instance_id hex from deleted key" + ); + None + } + } + } + None => { + tracing::warn!( + key = %kv.key_str(), + "Delete event key has no path components" + ); + None + } + } + } + }; + + if let Some(event) = discovery_event { + tracing::debug!("KVStoreDiscoveryClient::list_and_watch: Yielding event: {:?}", event); + yield Ok(event); + } else { + tracing::debug!("KVStoreDiscoveryClient::list_and_watch: Event was filtered out (None)"); + } + } + tracing::debug!("KVStoreDiscoveryClient::list_and_watch: Stream ended after {} events for prefix={}", event_count, prefix); + }; + + tracing::debug!( + "KVStoreDiscoveryClient::list_and_watch: Returning stream for key={:?}", + key + ); + Ok(Box::pin(stream)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::component::TransportType; + + #[tokio::test] + async fn test_kv_store_discovery_register_endpoint() { + let store = KeyValueStoreManager::memory(); + let cancel_token = CancellationToken::new(); + let client = KVStoreDiscoveryClient::new(store, cancel_token); + + let spec = DiscoverySpec::Endpoint { + namespace: "test".to_string(), + component: "comp1".to_string(), + endpoint: "ep1".to_string(), + transport: TransportType::NatsTcp("nats://localhost:4222".to_string()), + }; + + let instance = client.register(spec).await.unwrap(); + + match instance { + DiscoveryInstance::Endpoint(inst) => { + assert_eq!(inst.namespace, "test"); + assert_eq!(inst.component, "comp1"); + assert_eq!(inst.endpoint, "ep1"); + } + _ => panic!("Expected Endpoint instance"), + } + } + + #[tokio::test] + async fn test_kv_store_discovery_list() { + let store = KeyValueStoreManager::memory(); + let cancel_token = CancellationToken::new(); + let client = KVStoreDiscoveryClient::new(store, cancel_token); + + // Register multiple endpoints + let spec1 = DiscoverySpec::Endpoint { + namespace: "ns1".to_string(), + component: "comp1".to_string(), + endpoint: "ep1".to_string(), + transport: TransportType::NatsTcp("nats://localhost:4222".to_string()), + }; + client.register(spec1).await.unwrap(); + + let spec2 = DiscoverySpec::Endpoint { + namespace: "ns1".to_string(), + component: "comp1".to_string(), + endpoint: "ep2".to_string(), + transport: TransportType::NatsTcp("nats://localhost:4222".to_string()), + }; + client.register(spec2).await.unwrap(); + + let spec3 = DiscoverySpec::Endpoint { + namespace: "ns2".to_string(), + component: "comp2".to_string(), + endpoint: "ep1".to_string(), + transport: TransportType::NatsTcp("nats://localhost:4222".to_string()), + }; + client.register(spec3).await.unwrap(); + + // List all endpoints + let all = client.list(DiscoveryKey::AllEndpoints).await.unwrap(); + assert_eq!(all.len(), 3); + + // List namespaced endpoints + let ns1 = client + .list(DiscoveryKey::NamespacedEndpoints { + namespace: "ns1".to_string(), + }) + .await + .unwrap(); + assert_eq!(ns1.len(), 2); + + // List component endpoints + let comp1 = client + .list(DiscoveryKey::ComponentEndpoints { + namespace: "ns1".to_string(), + component: "comp1".to_string(), + }) + .await + .unwrap(); + assert_eq!(comp1.len(), 2); + } + + #[tokio::test] + async fn test_kv_store_discovery_watch() { + let store = KeyValueStoreManager::memory(); + let cancel_token = CancellationToken::new(); + let client = Arc::new(KVStoreDiscoveryClient::new(store, cancel_token.clone())); + + // Start watching before registering + let mut stream = client + .list_and_watch(DiscoveryKey::AllEndpoints) + .await + .unwrap(); + + let client_clone = client.clone(); + let register_task = tokio::spawn(async move { + tokio::time::sleep(tokio::time::Duration::from_millis(50)).await; + + let spec = DiscoverySpec::Endpoint { + namespace: "test".to_string(), + component: "comp1".to_string(), + endpoint: "ep1".to_string(), + transport: TransportType::NatsTcp("nats://localhost:4222".to_string()), + }; + client_clone.register(spec).await.unwrap(); + }); + + // Wait for the added event + let event = stream.next().await.unwrap().unwrap(); + match event { + DiscoveryEvent::Added(instance) => { + match instance { + DiscoveryInstance::Endpoint(inst) => { + assert_eq!(inst.namespace, "test"); + assert_eq!(inst.component, "comp1"); + assert_eq!(inst.endpoint, "ep1"); + } + _ => panic!("Expected Endpoint instance"), + } + } + _ => panic!("Expected Added event"), + } + + register_task.await.unwrap(); + cancel_token.cancel(); + } +} + diff --git a/lib/runtime/src/discovery/mock.rs b/lib/runtime/src/discovery/mock.rs index 5ab66168c5..4c3b0f39f6 100644 --- a/lib/runtime/src/discovery/mock.rs +++ b/lib/runtime/src/discovery/mock.rs @@ -46,37 +46,46 @@ impl MockDiscoveryClient { /// Helper function to check if an instance matches a discovery key query fn matches_key(instance: &DiscoveryInstance, key: &DiscoveryKey) -> bool { match (instance, key) { - (DiscoveryInstance::Endpoint { .. }, DiscoveryKey::AllEndpoints) => true, + // Endpoint matching + (DiscoveryInstance::Endpoint(_), DiscoveryKey::AllEndpoints) => true, ( - DiscoveryInstance::Endpoint { - namespace: ins_ns, .. - }, + DiscoveryInstance::Endpoint(inst), DiscoveryKey::NamespacedEndpoints { namespace }, - ) => ins_ns == namespace, + ) => &inst.namespace == namespace, ( - DiscoveryInstance::Endpoint { - namespace: ins_ns, - component: ins_comp, - .. - }, + DiscoveryInstance::Endpoint(inst), DiscoveryKey::ComponentEndpoints { namespace, component, }, - ) => ins_ns == namespace && ins_comp == component, + ) => &inst.namespace == namespace && &inst.component == component, ( - DiscoveryInstance::Endpoint { - namespace: ins_ns, - component: ins_comp, - endpoint: ins_ep, - .. - }, + DiscoveryInstance::Endpoint(inst), DiscoveryKey::Endpoint { namespace, component, endpoint, }, - ) => ins_ns == namespace && ins_comp == component && ins_ep == endpoint, + ) => &inst.namespace == namespace && &inst.component == component && &inst.endpoint == endpoint, + + // ModelCard matching + (DiscoveryInstance::ModelCard { .. }, DiscoveryKey::AllModelCards) => true, + ( + DiscoveryInstance::ModelCard { namespace: inst_ns, .. }, + DiscoveryKey::NamespacedModelCards { namespace }, + ) => inst_ns == namespace, + ( + DiscoveryInstance::ModelCard { namespace: inst_ns, component: inst_comp, .. }, + DiscoveryKey::ComponentModelCards { namespace, component }, + ) => inst_ns == namespace && inst_comp == component, + ( + DiscoveryInstance::ModelCard { namespace: inst_ns, component: inst_comp, endpoint: inst_ep, .. }, + DiscoveryKey::EndpointModelCards { namespace, component, endpoint }, + ) => inst_ns == namespace && inst_comp == component && inst_ep == endpoint, + + // Cross-type matches return false + (DiscoveryInstance::Endpoint(_), DiscoveryKey::AllModelCards | DiscoveryKey::NamespacedModelCards { .. } | DiscoveryKey::ComponentModelCards { .. } | DiscoveryKey::EndpointModelCards { .. }) => false, + (DiscoveryInstance::ModelCard { .. }, DiscoveryKey::AllEndpoints | DiscoveryKey::NamespacedEndpoints { .. } | DiscoveryKey::ComponentEndpoints { .. } | DiscoveryKey::Endpoint { .. }) => false, } } @@ -98,6 +107,15 @@ impl DiscoveryClient for MockDiscoveryClient { Ok(instance) } + async fn list(&self, key: DiscoveryKey) -> Result> { + let instances = self.registry.instances.lock().unwrap(); + Ok(instances + .iter() + .filter(|instance| matches_key(instance, &key)) + .cloned() + .collect()) + } + async fn list_and_watch(&self, key: DiscoveryKey) -> Result { use std::collections::HashSet; @@ -118,14 +136,16 @@ impl DiscoveryClient for MockDiscoveryClient { let current_ids: HashSet<_> = current.iter().map(|i| { match i { - DiscoveryInstance::Endpoint { instance_id, .. } => *instance_id, + DiscoveryInstance::Endpoint(inst) => inst.instance_id, + DiscoveryInstance::ModelCard { instance_id, .. } => *instance_id, } }).collect(); // Emit Added events for new instances for instance in current { let id = match &instance { - DiscoveryInstance::Endpoint { instance_id, .. } => *instance_id, + DiscoveryInstance::Endpoint(inst) => inst.instance_id, + DiscoveryInstance::ModelCard { instance_id, .. } => *instance_id, }; if known_instances.insert(id) { yield Ok(DiscoveryEvent::Added(instance)); @@ -161,6 +181,7 @@ mod tests { namespace: "test-ns".to_string(), component: "test-comp".to_string(), endpoint: "test-ep".to_string(), + transport: crate::component::TransportType::NatsTcp("test-subject".to_string()), }; let key = DiscoveryKey::Endpoint { @@ -177,8 +198,8 @@ mod tests { let event = stream.next().await.unwrap().unwrap(); match event { - DiscoveryEvent::Added(DiscoveryInstance::Endpoint { instance_id, .. }) => { - assert_eq!(instance_id, 1); + DiscoveryEvent::Added(DiscoveryInstance::Endpoint(inst)) => { + assert_eq!(inst.instance_id, 1); } _ => panic!("Expected Added event for instance-1"), } @@ -188,15 +209,16 @@ mod tests { let event = stream.next().await.unwrap().unwrap(); match event { - DiscoveryEvent::Added(DiscoveryInstance::Endpoint { instance_id, .. }) => { - assert_eq!(instance_id, 2); + DiscoveryEvent::Added(DiscoveryInstance::Endpoint(inst)) => { + assert_eq!(inst.instance_id, 2); } _ => panic!("Expected Added event for instance-2"), } // Remove first instance registry.instances.lock().unwrap().retain(|i| match i { - DiscoveryInstance::Endpoint { instance_id, .. } => *instance_id != 1, + DiscoveryInstance::Endpoint(inst) => inst.instance_id != 1, + DiscoveryInstance::ModelCard { instance_id, .. } => *instance_id != 1, }); let event = stream.next().await.unwrap().unwrap(); diff --git a/lib/runtime/src/discovery/mod.rs b/lib/runtime/src/discovery/mod.rs index 090fff281a..0ac7a6963e 100644 --- a/lib/runtime/src/discovery/mod.rs +++ b/lib/runtime/src/discovery/mod.rs @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +use crate::component::TransportType; use crate::Result; use async_trait::async_trait; use futures::Stream; @@ -10,10 +11,21 @@ use std::pin::Pin; mod mock; pub use mock::{MockDiscoveryClient, SharedMockRegistry}; +mod kv_store; +pub use kv_store::KVStoreDiscoveryClient; + +mod kube; +pub use kube::{KubeDiscoveryClient, DiscoveryMetadata, hash_pod_name}; + +pub mod utils; +pub use utils::watch_and_extract_field; + /// Query key for prefix-based discovery queries /// Supports hierarchical queries from all endpoints down to specific endpoints #[derive(Debug, Clone, PartialEq, Eq, Hash)] +//// rename to query pub enum DiscoveryKey { + /// Query all endpoints in the system AllEndpoints, /// Query all endpoints in a specific namespace @@ -29,28 +41,63 @@ pub enum DiscoveryKey { component: String, endpoint: String, }, - // TODO: Extend to support ModelCard queries: - // - AllModels - // - NamespacedModels { namespace } - // - ComponentModels { namespace, component } - // - Model { namespace, component, model_name } + AllModelCards, + NamespacedModelCards { namespace: String }, + ComponentModelCards { + namespace: String, + component: String, + }, + EndpointModelCards { + namespace: String, + component: String, + endpoint: String, + }, } /// Specification for registering objects in the discovery plane /// Represents the input to the register() operation -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum DiscoverySpec { /// Endpoint specification for registration Endpoint { namespace: String, component: String, endpoint: String, + /// Transport type and routing information + transport: TransportType, + }, + ModelCard { + namespace: String, + component: String, + endpoint: String, + /// ModelDeploymentCard serialized as JSON + /// This allows lib/runtime to remain independent of lib/llm types + /// DiscoverySpec.from_model_card() and DiscoveryInstance.deserialize_model_card() are ergonomic helpers to create and deserialize the model card. + card_json: serde_json::Value, }, - // TODO: Add ModelCard variant: - // - ModelCard { namespace, component, model_name, card: ModelDeploymentCard } } impl DiscoverySpec { + /// Creates a ModelCard discovery spec from a serializable type + /// The card will be serialized to JSON to avoid cross-crate dependencies + pub fn from_model_card( + namespace: String, + component: String, + endpoint: String, + card: &T, + ) -> crate::Result + where + T: Serialize, + { + let card_json = serde_json::to_value(card)?; + Ok(Self::ModelCard { + namespace, + component, + endpoint, + card_json, + }) + } + /// Attaches an instance ID to create a DiscoveryInstance pub fn with_instance_id(self, instance_id: u64) -> DiscoveryInstance { match self { @@ -58,11 +105,25 @@ impl DiscoverySpec { namespace, component, endpoint, - } => DiscoveryInstance::Endpoint { + transport, + } => DiscoveryInstance::Endpoint(crate::component::Instance { + namespace, + component, + endpoint, + instance_id, + transport, + }), + Self::ModelCard { + namespace, + component, + endpoint, + card_json, + } => DiscoveryInstance::ModelCard { namespace, component, endpoint, instance_id, + card_json, }, } } @@ -70,18 +131,44 @@ impl DiscoverySpec { /// Registered instances in the discovery plane /// Represents objects that have been successfully registered with an instance ID -#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[serde(tag = "type")] pub enum DiscoveryInstance { - /// Registered endpoint instance - Endpoint { + /// Registered endpoint instance - wraps the component::Instance directly + Endpoint(crate::component::Instance), + ModelCard { namespace: String, component: String, endpoint: String, instance_id: u64, + /// ModelDeploymentCard serialized as JSON + /// This allows lib/runtime to remain independent of lib/llm types + card_json: serde_json::Value, }, - // TODO: Add ModelCard variant: - // - ModelCard { namespace, component, model_name, instance_id, card: ModelDeploymentCard } +} + +impl DiscoveryInstance { + /// Returns the instance ID for this discovery instance + pub fn instance_id(&self) -> u64 { + match self { + Self::Endpoint(inst) => inst.instance_id, + Self::ModelCard { instance_id, .. } => *instance_id, + } + } + + /// Deserializes the model card JSON into the specified type T + /// Returns an error if this is not a ModelCard instance or if deserialization fails + pub fn deserialize_model_card(&self) -> crate::Result + where + T: for<'de> Deserialize<'de>, + { + match self { + Self::ModelCard { card_json, .. } => Ok(serde_json::from_value(card_json.clone())?), + Self::Endpoint(_) => crate::raise!( + "Cannot deserialize model card from Endpoint instance" + ), + } + } } /// Events emitted by the discovery client watch stream @@ -97,6 +184,7 @@ pub enum DiscoveryEvent { pub type DiscoveryStream = Pin> + Send>>; /// Discovery client trait for service discovery across different backends +/// TODO: maybe not discovery client? just discovery #[async_trait] pub trait DiscoveryClient: Send + Sync { /// Returns a unique identifier for this worker (e.g lease id if using etcd or generated id for memory store) @@ -106,6 +194,11 @@ pub trait DiscoveryClient: Send + Sync { /// Registers an object in the discovery plane with the instance id async fn register(&self, spec: DiscoverySpec) -> Result; + /// Returns a list of currently registered instances for the given discovery key + /// This is a one-time snapshot without watching for changes + async fn list(&self, key: DiscoveryKey) -> Result>; + /// Returns a stream of discovery events (Added/Removed) for the given discovery key async fn list_and_watch(&self, key: DiscoveryKey) -> Result; } + diff --git a/lib/runtime/src/discovery/utils.rs b/lib/runtime/src/discovery/utils.rs new file mode 100644 index 0000000000..abcd42cf4c --- /dev/null +++ b/lib/runtime/src/discovery/utils.rs @@ -0,0 +1,107 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Utility functions for working with discovery streams + +use serde::Deserialize; + +use super::{DiscoveryEvent, DiscoveryInstance, DiscoveryStream}; + +/// Helper to watch a discovery stream and extract a specific field into a HashMap +/// +/// This helper spawns a background task that: +/// - Deserializes ModelCards from discovery events +/// - Extracts a specific field using the provided extractor function +/// - Maintains a HashMap that auto-updates on Add/Remove events +/// - Returns a watch::Receiver that consumers can use to read the current state +/// +/// # Type Parameters +/// - `T`: The type to deserialize from DiscoveryInstance (e.g., ModelDeploymentCard) +/// - `V`: The extracted field type (e.g., ModelRuntimeConfig) +/// - `F`: The extractor function type +/// +/// # Arguments +/// - `stream`: The discovery event stream to watch +/// - `extractor`: Function that extracts the desired field from the deserialized type +/// +/// # Example +/// ```ignore +/// let stream = discovery.list_and_watch(DiscoveryKey::ComponentModelCards { ... }).await?; +/// let runtime_configs_rx = watch_and_extract_field( +/// stream, +/// |card: ModelDeploymentCard| card.runtime_config, +/// ); +/// +/// // Use it: +/// let configs = runtime_configs_rx.borrow(); +/// if let Some(config) = configs.get(&worker_id) { +/// // Use config... +/// } +/// ``` +pub fn watch_and_extract_field( + stream: DiscoveryStream, + extractor: F, +) -> tokio::sync::watch::Receiver> +where + T: for<'de> Deserialize<'de> + 'static, + V: Clone + Send + Sync + 'static, + F: Fn(T) -> V + Send + 'static, +{ + use futures::StreamExt; + use std::collections::HashMap; + + let (tx, rx) = tokio::sync::watch::channel(HashMap::new()); + + tokio::spawn(async move { + let mut state: HashMap = HashMap::new(); + let mut stream = stream; + + while let Some(result) = stream.next().await { + match result { + Ok(DiscoveryEvent::Added(instance)) => { + let instance_id = instance.instance_id(); + + // Deserialize the full instance into type T + let deserialized: T = match instance.deserialize_model_card() { + Ok(d) => d, + Err(e) => { + tracing::warn!( + instance_id, + error = %e, + "Failed to deserialize discovery instance, skipping" + ); + continue; + } + }; + + // Extract the field we care about + let value = extractor(deserialized); + + // Update state and send + state.insert(instance_id, value); + if tx.send(state.clone()).is_err() { + tracing::debug!("watch_and_extract_field receiver dropped, stopping"); + break; + } + } + Ok(DiscoveryEvent::Removed(instance_id)) => { + // Remove from state and send update + state.remove(&instance_id); + if tx.send(state.clone()).is_err() { + tracing::debug!("watch_and_extract_field receiver dropped, stopping"); + break; + } + } + Err(e) => { + tracing::error!(error = %e, "Discovery event stream error in watch_and_extract_field"); + // Continue processing other events + } + } + } + + tracing::debug!("watch_and_extract_field task stopped"); + }); + + rx +} + diff --git a/lib/runtime/src/distributed.rs b/lib/runtime/src/distributed.rs index fd2846a0b9..3731fd982d 100644 --- a/lib/runtime/src/distributed.rs +++ b/lib/runtime/src/distributed.rs @@ -92,12 +92,59 @@ impl DistributedRuntime { let nats_client_for_metrics = nats_client.clone(); - // Initialize discovery client with mock implementation - // TODO: Replace MockDiscoveryClient with KeyValueStoreDiscoveryClient or KubeDiscoveryClient - let discovery_client = { - use crate::discovery::{MockDiscoveryClient, SharedMockRegistry}; - let registry = SharedMockRegistry::new(); - Arc::new(MockDiscoveryClient::new(None, registry)) as Arc + // Initialize discovery client based on backend configuration + let discovery_backend = std::env::var("DYN_DISCOVERY_BACKEND") + .unwrap_or_else(|_| "kv_store".to_string()); + + let (discovery_client, discovery_metadata) = match discovery_backend.as_str() { + "kubernetes" => { + tracing::info!("Initializing Kubernetes discovery backend"); + + // Create shared metadata store + let metadata = Arc::new(tokio::sync::RwLock::new( + crate::discovery::DiscoveryMetadata::new() + )); + + // Create Kubernetes discovery client + match crate::discovery::KubeDiscoveryClient::new( + metadata.clone(), + runtime.primary_token(), + ).await { + Ok(client) => { + tracing::info!("Kubernetes discovery client initialized successfully"); + ( + Arc::new(client) as Arc, + Some(metadata), + ) + } + Err(e) => { + tracing::warn!( + "Failed to initialize Kubernetes discovery client: {}. Falling back to KV store.", + e + ); + // Fallback to KV store + use crate::discovery::KVStoreDiscoveryClient; + ( + Arc::new(KVStoreDiscoveryClient::new( + store.clone(), + runtime.primary_token(), + )) as Arc, + None, + ) + } + } + } + _ => { + tracing::info!("Initializing KV store discovery backend"); + use crate::discovery::KVStoreDiscoveryClient; + ( + Arc::new(KVStoreDiscoveryClient::new( + store.clone(), + runtime.primary_token(), + )) as Arc, + None, + ) + } }; let distributed_runtime = Self { @@ -108,6 +155,7 @@ impl DistributedRuntime { tcp_server: Arc::new(OnceCell::new()), system_status_server: Arc::new(OnceLock::new()), discovery_client, + discovery_metadata, component_registry: component::Registry::new(), is_static, instance_sources: Arc::new(Mutex::new(HashMap::new())), @@ -151,6 +199,7 @@ impl DistributedRuntime { port, cancel_token, Arc::new(distributed_runtime.clone()), + distributed_runtime.discovery_metadata.clone(), ) .await { @@ -229,7 +278,7 @@ impl DistributedRuntime { } pub fn connection_id(&self) -> u64 { - self.store.connection_id() + self.discovery_client.instance_id() } pub fn shutdown(&self) { @@ -241,9 +290,10 @@ impl DistributedRuntime { Namespace::new(self.clone(), name.into(), self.is_static) } - /// TODO: Return discovery client when KeyValueDiscoveryClient or KubeDiscoveryClient is implemented - pub fn discovery_client(&self) -> Result> { - Err(error!("Discovery client not implemented!")) + /// Returns the discovery client for service registration and discovery + /// Currently uses MockDiscoveryClient, will be replaced with KeyValueDiscoveryClient or KubeDiscoveryClient + pub fn discovery_client(&self) -> Arc { + self.discovery_client.clone() } pub(crate) fn service_client(&self) -> Option { diff --git a/lib/runtime/src/instances.rs b/lib/runtime/src/instances.rs index 8f9ab0f676..7f875c7669 100644 --- a/lib/runtime/src/instances.rs +++ b/lib/runtime/src/instances.rs @@ -9,26 +9,34 @@ use std::sync::Arc; -use crate::component::{INSTANCE_ROOT_PATH, Instance}; -use crate::storage::key_value_store::{KeyValueStore, KeyValueStoreManager}; -use crate::transports::etcd::Client as EtcdClient; +use crate::component::Instance; +use crate::discovery::{DiscoveryClient, DiscoveryKey}; -pub async fn list_all_instances(client: &KeyValueStoreManager) -> anyhow::Result> { - let Some(bucket) = client.get_bucket(INSTANCE_ROOT_PATH).await? else { - return Ok(vec![]); - }; +pub async fn list_all_instances( + discovery_client: Arc, +) -> anyhow::Result> { + let discovery_instances = discovery_client.list(DiscoveryKey::AllEndpoints).await?; - let entries = bucket.entries().await?; - let mut instances = Vec::with_capacity(entries.len()); - for (name, bytes) in entries.into_iter() { - match serde_json::from_slice::(&bytes) { - Ok(instance) => instances.push(instance), - Err(err) => { - tracing::warn!(%err, key = name, "Failed to parse instance from storage"); - } - } - } - instances.sort(); + let mut instances: Vec = discovery_instances + .into_iter() + .filter_map(|di| match di { + crate::discovery::DiscoveryInstance::Endpoint(instance) => Some(instance), + _ => None, // Ignore all other variants (ModelCard, etc.) + }) + .collect(); + instances.sort(); + + // Log all instances found for comparison + let instance_details: Vec<(u64, &str, &str, &str)> = instances + .iter() + .map(|inst| (inst.instance_id, inst.namespace.as_str(), inst.component.as_str(), inst.endpoint.as_str())) + .collect(); + tracing::warn!( + "DISCOVERY_VALIDATION: all_instances_found: count={}, instances={:?}", + instances.len(), + instance_details + ); + Ok(instances) } diff --git a/lib/runtime/src/lib.rs b/lib/runtime/src/lib.rs index 8a39ed32b4..c08fd6002d 100644 --- a/lib/runtime/src/lib.rs +++ b/lib/runtime/src/lib.rs @@ -99,6 +99,10 @@ pub struct DistributedRuntime { // Service discovery client discovery_client: Arc, + // Discovery metadata (only used for Kubernetes backend) + // Shared with system status server to expose via /metadata endpoint + discovery_metadata: Option>>, + // local registry for components // the registry allows us to use share runtime resources across instances of the same component object. // take for example two instances of a client to the same remote component. The registry allows us to use diff --git a/lib/runtime/src/storage/key_value_store.rs b/lib/runtime/src/storage/key_value_store.rs index 7fc122ec40..9946be790c 100644 --- a/lib/runtime/src/storage/key_value_store.rs +++ b/lib/runtime/src/storage/key_value_store.rs @@ -243,41 +243,140 @@ impl KeyValueStoreManager { ) { let bucket_name = bucket_name.to_string(); let (tx, rx) = tokio::sync::mpsc::channel(128); + tracing::debug!("KeyValueStoreManager.watch: Starting watch for bucket={}", bucket_name); let watch_task = tokio::spawn(async move { - // Start listening for changes but don't poll this yet + tracing::debug!("KeyValueStoreManager.watch: Watch task started for bucket={}", bucket_name); + // Get or create the bucket let bucket = self .0 .get_or_create_bucket(&bucket_name, bucket_ttl) .await?; + tracing::debug!("KeyValueStoreManager.watch: Got bucket for bucket={}", bucket_name); + + // CRITICAL: Get existing entries BEFORE starting the watch to avoid missing entries. + // This handles the race condition where entries might be added between these calls. + // We'll use deduplication to handle any overlap. + let existing_entries = bucket.entries().await?; + let existing_count = existing_entries.len(); + tracing::debug!( + "KeyValueStoreManager.watch: Found {} existing entries in bucket={}", + existing_count, + bucket_name + ); + + // Now start the watch stream for future changes let mut stream = bucket.watch().await?; - - // Send all the existing keys - for (key, bytes) in bucket.entries().await? { + tracing::debug!("KeyValueStoreManager.watch: Got watch stream for bucket={}", bucket_name); + + // Track keys we've sent to deduplicate between existing entries and watch stream + let mut seen_keys = std::collections::HashSet::new(); + + // First, send all existing entries as Put events + for (key, bytes) in existing_entries { + tracing::debug!( + "KeyValueStoreManager.watch: Sending existing entry key={}, size={} bytes for bucket={}", + key, + bytes.len(), + bucket_name + ); + seen_keys.insert(key.clone()); if let Err(err) = tx .send_timeout( - WatchEvent::Put(KeyValue::new(key, bytes)), + WatchEvent::Put(KeyValue::new(key.clone(), bytes)), WATCH_SEND_TIMEOUT, ) .await { - tracing::error!(bucket_name, %err, "KeyValueStoreManager.watch failed adding existing key to channel"); + tracing::error!(bucket_name, %err, key, "KeyValueStoreManager.watch failed sending existing key to channel"); + } else { + tracing::debug!( + "KeyValueStoreManager.watch: Successfully sent existing entry key={} for bucket={}", + key, + bucket_name + ); } } - - // Now block waiting for new entries + tracing::debug!( + "KeyValueStoreManager.watch: Finished sending {} existing entries for bucket={}, now watching for new events", + existing_count, + bucket_name + ); + + // Now forward events from the watch stream with simple deduplication + // Note: The memory backend's watch() already includes existing entries and deduplicates + // internally, so we may receive some duplicates. We'll skip Put events for keys we + // just sent from entries(), but allow Delete events and subsequent updates through. + let mut new_event_count = 0; + let mut dedup_count = 0; loop { let event = tokio::select! { - _ = cancel_token.cancelled() => break, + _ = cancel_token.cancelled() => { + tracing::debug!("KeyValueStoreManager.watch: Cancel token triggered for bucket={}", bucket_name); + break; + } result = stream.next() => match result { - Some(event) => event, - None => break, + Some(event) => { + tracing::debug!( + "KeyValueStoreManager.watch: Received event from stream for bucket={}", + bucket_name + ); + event + }, + None => { + tracing::debug!("KeyValueStoreManager.watch: Stream closed for bucket={}", bucket_name); + break; + } } }; - if let Err(err) = tx.send_timeout(event, WATCH_SEND_TIMEOUT).await { - tracing::error!(bucket_name, %err, "KeyValueStoreManager.watch failed adding new key to channel"); + + // Simple deduplication: For Put events, if we just sent this key from entries(), + // skip it once. For memory backend, this skips the duplicate from its watch stream. + // For etcd backend, this shouldn't trigger since watch only sees new events. + // For subsequent Puts to the same key (updates), we'll send them. + let should_send = match &event { + WatchEvent::Put(kv) => { + let key = kv.key_str(); + if seen_keys.remove(key) { + // We already sent this key from entries(), so skip this one occurrence + dedup_count += 1; + tracing::debug!( + "KeyValueStoreManager.watch: Deduplicating Put for key={} in bucket={} (probably from memory backend's initial yield)", + key, + bucket_name + ); + false + } else { + // Either a new key or an update to a key we've already seen + true + } + } + WatchEvent::Delete(_) => { + // Always send deletes + true + } + }; + + if should_send { + new_event_count += 1; + if let Err(err) = tx.send_timeout(event, WATCH_SEND_TIMEOUT).await { + tracing::error!(bucket_name, %err, "KeyValueStoreManager.watch failed sending new event to channel"); + } else { + tracing::debug!( + "KeyValueStoreManager.watch: Successfully sent new event #{} for bucket={}", + new_event_count, + bucket_name + ); + } } } + tracing::debug!( + "KeyValueStoreManager.watch: Watch task ending for bucket={}, sent {} existing + {} new events (deduplicated {} events)", + bucket_name, + existing_count, + new_event_count, + dedup_count + ); Ok::<(), StoreError>(()) }); (watch_task, rx) diff --git a/lib/runtime/src/system_status_server.rs b/lib/runtime/src/system_status_server.rs index 679d0f9043..ba0840bf9e 100644 --- a/lib/runtime/src/system_status_server.rs +++ b/lib/runtime/src/system_status_server.rs @@ -56,18 +56,33 @@ impl Clone for SystemStatusServerInfo { pub struct SystemStatusState { // global drt registry is for printing out the entire Prometheus format output root_drt: Arc, + // Discovery metadata (only for Kubernetes backend) + discovery_metadata: Option>>, } impl SystemStatusState { /// Create new system status server state with the provided distributed runtime - pub fn new(drt: Arc) -> anyhow::Result { - Ok(Self { root_drt: drt }) + pub fn new( + drt: Arc, + discovery_metadata: Option>>, + ) -> anyhow::Result { + Ok(Self { + root_drt: drt, + discovery_metadata, + }) } /// Get a reference to the distributed runtime pub fn drt(&self) -> &crate::DistributedRuntime { &self.root_drt } + + /// Get a reference to the discovery metadata if available + pub fn discovery_metadata( + &self, + ) -> Option<&Arc>> { + self.discovery_metadata.as_ref() + } } /// Start system status server with metrics support @@ -76,9 +91,10 @@ pub async fn spawn_system_status_server( port: u16, cancel_token: CancellationToken, drt: Arc, + discovery_metadata: Option>>, ) -> anyhow::Result<(std::net::SocketAddr, tokio::task::JoinHandle<()>)> { // Create system status server state with the provided distributed runtime - let server_state = Arc::new(SystemStatusState::new(drt)?); + let server_state = Arc::new(SystemStatusState::new(drt, discovery_metadata)?); let health_path = server_state .drt() .system_health @@ -114,6 +130,13 @@ pub async fn spawn_system_status_server( move || metrics_handler(state) }), ) + .route( + "/metadata", + get({ + let state = Arc::clone(&server_state); + move || metadata_handler(state) + }), + ) .fallback(|| async { tracing::info!("[fallback handler] called"); (StatusCode::NOT_FOUND, "Route not found").into_response() @@ -205,6 +228,43 @@ async fn metrics_handler(state: Arc) -> impl IntoResponse { (StatusCode::OK, response) } +/// Metadata handler for Kubernetes discovery backend +/// Returns the discovery metadata registered by this pod +#[tracing::instrument(skip_all, level = "trace")] +async fn metadata_handler(state: Arc) -> impl IntoResponse { + // Check if discovery metadata is available + let metadata = match state.discovery_metadata() { + Some(metadata) => metadata, + None => { + tracing::debug!("Metadata endpoint called but no discovery metadata available"); + return ( + StatusCode::NOT_FOUND, + "Discovery metadata not available (not using Kubernetes backend)".to_string(), + ) + .into_response(); + } + }; + + // Read the metadata + let metadata_guard = metadata.read().await; + + // Serialize to JSON + match serde_json::to_string(&*metadata_guard) { + Ok(json) => { + tracing::trace!("Returning metadata: {} bytes", json.len()); + (StatusCode::OK, json).into_response() + } + Err(e) => { + tracing::error!("Failed to serialize metadata: {}", e); + ( + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to serialize metadata".to_string(), + ) + .into_response() + } + } +} + // Regular tests: cargo test system_status_server --lib #[cfg(test)] mod tests { diff --git a/lib/runtime/tests/kube_client_integration.rs b/lib/runtime/tests/kube_client_integration.rs new file mode 100644 index 0000000000..d28b599c9e --- /dev/null +++ b/lib/runtime/tests/kube_client_integration.rs @@ -0,0 +1,293 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Integration tests for KubeDiscoveryClient +//! +//! These tests require: +//! 1. Access to a Kubernetes cluster (kubectl configured) +//! 2. Test resources deployed (run k8s-test/deploy.sh) +//! +//! Run with: cargo test --test kube_client_integration -- --ignored --nocapture + +use dynamo_runtime::discovery::{ + KubeDiscoveryClient, DiscoveryClient, DiscoveryKey, +}; +use kube::Client; +use futures::StreamExt; + +/// Helper to create a test client with mock metadata +async fn create_test_client() -> Result> { + let kube_client = Client::try_default().await?; + let client = KubeDiscoveryClient::new_for_testing( + kube_client, + "test-pod-123".to_string(), + "discovery".to_string(), + true, // mock_metadata = true (skip HTTP calls, return mock data) + ).await?; + Ok(client) +} + +/// Test basic client creation and instance_id +#[tokio::test] +#[ignore] +async fn test_client_creation() { + println!("🔌 Testing KubeDiscoveryClient creation..."); + + let client = create_test_client().await + .expect("Failed to create test client"); + + let instance_id = client.instance_id(); + println!("✅ Client created with instance_id: {:x}", instance_id); + + assert_ne!(instance_id, 0, "Instance ID should not be zero"); +} + +/// Test listing all endpoints (without label filtering) +#[tokio::test] +#[ignore] +async fn test_list_all_endpoints() { + println!("📋 Testing list all endpoints..."); + println!(" Note: Using mock metadata (no actual HTTP calls to pods)"); + + let client = create_test_client().await + .expect("Failed to create test client"); + + let key = DiscoveryKey::AllEndpoints; + + println!("Calling list() with key={:?}", key); + let result = client.list(key).await; + + match result { + Ok(instances) => { + println!("✅ list() succeeded"); + println!(" Found {} instances", instances.len()); + + for (i, instance) in instances.iter().enumerate() { + println!(" [{}] {:?}", i, instance); + } + } + Err(e) => { + println!("❌ list() failed: {}", e); + } + } + + println!("✅ List test completed"); +} + +/// Test listing endpoints in a specific namespace +#[tokio::test] +#[ignore] +async fn test_list_namespaced_endpoints() { + println!("📋 Testing list namespaced endpoints..."); + + let client = create_test_client().await + .expect("Failed to create test client"); + + let key = DiscoveryKey::NamespacedEndpoints { + namespace: "test-namespace".to_string(), + }; + + println!("Calling list() with key={:?}", key); + let result = client.list(key).await; + + match result { + Ok(instances) => { + println!("✅ list() succeeded"); + println!(" Found {} instances in test-namespace", instances.len()); + } + Err(e) => { + println!("⚠️ list() failed: {}", e); + } + } + + println!("✅ Namespaced list test completed"); +} + +/// Test listing endpoints for a specific component +#[tokio::test] +#[ignore] +async fn test_list_component_endpoints() { + println!("📋 Testing list component endpoints..."); + + let client = create_test_client().await + .expect("Failed to create test client"); + + let key = DiscoveryKey::ComponentEndpoints { + namespace: "test-namespace".to_string(), + component: "test-component".to_string(), + }; + + println!("Calling list() with key={:?}", key); + let result = client.list(key).await; + + match result { + Ok(instances) => { + println!("✅ list() succeeded"); + println!(" Found {} instances for test-namespace/test-component", instances.len()); + } + Err(e) => { + println!("⚠️ list() failed: {}", e); + } + } + + println!("✅ Component list test completed"); +} + +/// Test watching all endpoints +#[tokio::test] +#[ignore] +async fn test_watch_all_endpoints() { + println!("👀 Testing watch all endpoints..."); + println!(" This test will watch for 10 seconds"); + println!(" Note: Using mock metadata (no actual HTTP calls to pods)"); + + let client = create_test_client().await + .expect("Failed to create test client"); + + let key = DiscoveryKey::AllEndpoints; + + println!("Calling list_and_watch() with key={:?}", key); + let stream = client.list_and_watch(key).await + .expect("Failed to create watch stream"); + + let mut stream = stream; + let timeout = tokio::time::Duration::from_secs(600); + let deadline = tokio::time::Instant::now() + timeout; + + let mut event_count = 0; + + println!("📡 Watch stream started..."); + + loop { + tokio::select! { + Some(event) = stream.next() => { + event_count += 1; + match event { + Ok(discovery_event) => { + println!(" [{}] Event: {:?}", event_count, discovery_event); + } + Err(e) => { + println!(" [{}] Error: {}", event_count, e); + } + } + } + _ = tokio::time::sleep_until(deadline) => { + println!("⏰ Timeout reached"); + break; + } + } + } + + println!("✅ Watch test completed ({} events received)", event_count); + println!(" With mock metadata, you should see Added events for discovered pods"); +} + +/// Test watching namespaced endpoints +#[tokio::test] +#[ignore] +async fn test_watch_namespaced_endpoints() { + println!("👀 Testing watch namespaced endpoints..."); + println!(" This test will watch for 5 seconds"); + + let client = create_test_client().await + .expect("Failed to create test client"); + + let key = DiscoveryKey::NamespacedEndpoints { + namespace: "test-namespace".to_string(), + }; + + println!("Calling list_and_watch() with key={:?}", key); + let stream = client.list_and_watch(key).await + .expect("Failed to create watch stream"); + + let mut stream = stream; + let timeout = tokio::time::Duration::from_secs(5); + let deadline = tokio::time::Instant::now() + timeout; + + let mut event_count = 0; + + println!("📡 Watch stream started..."); + + loop { + tokio::select! { + Some(event) = stream.next() => { + event_count += 1; + match event { + Ok(discovery_event) => { + println!(" [{}] Event: {:?}", event_count, discovery_event); + } + Err(e) => { + println!(" [{}] Error: {}", event_count, e); + } + } + } + _ = tokio::time::sleep_until(deadline) => { + println!("⏰ Timeout reached"); + break; + } + } + } + + println!("✅ Watch test completed ({} events received)", event_count); +} + +/// Comprehensive test: verify the watch stream receives EndpointSlice events +/// This test verifies that the K8s watcher is working correctly +#[tokio::test] +#[ignore] +async fn test_watch_receives_k8s_events() { + println!("🔍 Testing that watch stream receives Kubernetes events..."); + println!(" This test verifies the K8s watcher layer works correctly"); + println!(" We'll watch for 10 seconds to ensure we get at least Init/InitDone"); + + let client = create_test_client().await + .expect("Failed to create test client"); + + let key = DiscoveryKey::AllEndpoints; + + let stream = client.list_and_watch(key).await + .expect("Failed to create watch stream"); + + let mut stream = stream; + let timeout = tokio::time::Duration::from_secs(10); + let deadline = tokio::time::Instant::now() + timeout; + + let mut received_any_event = false; + + println!("📡 Monitoring watch stream..."); + + loop { + tokio::select! { + Some(event) = stream.next() => { + received_any_event = true; + match event { + Ok(discovery_event) => { + println!(" ✅ Received discovery event: {:?}", discovery_event); + } + Err(e) => { + println!(" ⚠️ Stream error: {}", e); + } + } + // Got at least one event, test passes + break; + } + _ = tokio::time::sleep_until(deadline) => { + println!("⏰ Timeout reached"); + break; + } + } + } + + if received_any_event { + println!("✅ Watch stream is working - received at least one event"); + } else { + println!("⚠️ No events received in 10 seconds"); + println!(" This might be okay if:"); + println!(" - No EndpointSlices exist in the cluster"); + println!(" - Metadata HTTP calls are failing (expected without metadata server)"); + println!(" The K8s watcher itself is still working correctly."); + } + + println!("✅ Test completed"); +} + diff --git a/lib/runtime/tests/kube_discovery_integration.rs b/lib/runtime/tests/kube_discovery_integration.rs new file mode 100644 index 0000000000..a44438fe28 --- /dev/null +++ b/lib/runtime/tests/kube_discovery_integration.rs @@ -0,0 +1,367 @@ +// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Integration tests for Kubernetes discovery client +//! +//! These tests require: +//! 1. Access to a Kubernetes cluster (kubectl configured) +//! 2. Test resources deployed (run k8s-test/deploy.sh) +//! +//! Run with: cargo test --test kube_discovery_integration -- --nocapture + +use futures::StreamExt; +use k8s_openapi::api::discovery::v1::EndpointSlice; +use kube::{Api, Client}; +use kube::runtime::{watcher, watcher::Config}; + +/// Test that we can successfully create a Kubernetes client +#[tokio::test] +#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_kube_client_connection -- --ignored +async fn test_kube_client_connection() { + println!("🔌 Testing Kubernetes client connection..."); + + let client = Client::try_default() + .await + .expect("Failed to create Kubernetes client - is kubectl configured?"); + + println!("✅ Successfully connected to Kubernetes cluster"); + + // Try to list namespaces as a connectivity test + let namespaces: Api = Api::all(client); + let ns_list = namespaces.list(&Default::default()).await + .expect("Failed to list namespaces"); + + println!("📋 Found {} namespaces", ns_list.items.len()); + println!("✅ Kubernetes API is accessible"); +} + +/// Test listing EndpointSlices +#[tokio::test] +#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_list_endpointslices -- --ignored +async fn test_list_endpointslices() { + println!("📋 Testing EndpointSlice listing..."); + + let client = Client::try_default() + .await + .expect("Failed to create Kubernetes client"); + + let endpoint_slices: Api = Api::namespaced(client, "default"); + + // List all EndpointSlices in default namespace + let list_params = kube::api::ListParams::default(); + let slices = endpoint_slices.list(&list_params).await + .expect("Failed to list EndpointSlices"); + + println!("📊 Found {} EndpointSlices in default namespace", slices.items.len()); + + for slice in &slices.items { + let name = slice.metadata.name.as_deref().unwrap_or(""); + let service = slice.metadata.labels.as_ref() + .and_then(|l| l.get("kubernetes.io/service-name")) + .map(|s| s.as_str()) + .unwrap_or(""); + + let endpoint_count = slice.endpoints.len(); + + println!(" • {} (service: {}, endpoints: {})", name, service, endpoint_count); + + // Show endpoint details + for (i, endpoint) in slice.endpoints.iter().enumerate() { + let ready = endpoint.conditions.as_ref() + .and_then(|c| c.ready) + .unwrap_or(false); + let addresses = &endpoint.addresses; + let pod_name = endpoint.target_ref.as_ref() + .and_then(|t| t.name.as_ref()) + .map(|n| n.as_str()) + .unwrap_or(""); + + println!(" [{}] pod={}, ready={}, addresses={:?}", + i, pod_name, ready, addresses); + } + } + + println!("✅ EndpointSlice listing test completed"); +} + +/// Test listing EndpointSlices with label selector (like our discovery client does) +#[tokio::test] +#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_list_with_labels -- --ignored +async fn test_list_with_labels() { + println!("🏷️ Testing EndpointSlice listing with label selector..."); + + let client = Client::try_default() + .await + .expect("Failed to create Kubernetes client"); + + let endpoint_slices: Api = Api::all(client); + + // Test the label selector we use in our discovery client + let label_selector = "dynamo.nvidia.com/namespace=test-namespace,dynamo.nvidia.com/component=test-component"; + println!("Using label selector: {}", label_selector); + + let list_params = kube::api::ListParams::default() + .labels(label_selector); + + let slices = endpoint_slices.list(&list_params).await + .expect("Failed to list EndpointSlices with labels"); + + println!("📊 Found {} EndpointSlices matching labels", slices.items.len()); + + if slices.items.is_empty() { + println!("⚠️ No EndpointSlices found with Dynamo labels."); + println!(" Make sure test resources are deployed: ./k8s-test/deploy.sh"); + println!(" Note: Kubernetes creates EndpointSlices automatically,"); + println!(" but pod labels don't flow to EndpointSlices by default."); + } + + for slice in &slices.items { + let name = slice.metadata.name.as_deref().unwrap_or(""); + let endpoint_count = slice.endpoints.len(); + println!(" • {} (endpoints: {})", name, endpoint_count); + } + + println!("✅ Label selector test completed"); +} + +/// Test watching EndpointSlices for changes +#[tokio::test] +#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_watch_endpointslices -- --ignored +async fn test_watch_endpointslices() { + println!("👀 Testing EndpointSlice watching..."); + println!(" This test will watch for 10 seconds or 5 events, whichever comes first"); + + let client = Client::try_default() + .await + .expect("Failed to create Kubernetes client"); + + let endpoint_slices: Api = Api::namespaced(client, "default"); + + // Create watcher + let watch_config = Config::default(); + let mut watch_stream = Box::pin(watcher(endpoint_slices, watch_config)); + + println!("📡 Watch stream started..."); + + let mut event_count = 0; + let max_events = 5; + let timeout = tokio::time::Duration::from_secs(10); + let deadline = tokio::time::Instant::now() + timeout; + + loop { + tokio::select! { + Some(event) = watch_stream.next() => { + event_count += 1; + match event { + Ok(watcher::Event::Apply(slice)) => { + let name = slice.metadata.name.as_deref().unwrap_or(""); + let endpoint_count = slice.endpoints.len(); + println!(" [{}] ✅ Apply: {} (endpoints: {})", event_count, name, endpoint_count); + } + Ok(watcher::Event::InitApply(slice)) => { + let name = slice.metadata.name.as_deref().unwrap_or(""); + let endpoint_count = slice.endpoints.len(); + println!(" [{}] 🔄 InitApply: {} (endpoints: {})", event_count, name, endpoint_count); + } + Ok(watcher::Event::Delete(slice)) => { + let name = slice.metadata.name.as_deref().unwrap_or(""); + println!(" [{}] ❌ Delete: {}", event_count, name); + } + Ok(watcher::Event::Init) => { + println!(" [{}] 🚀 Init - watch stream starting", event_count); + } + Ok(watcher::Event::InitDone) => { + println!(" [{}] ✅ InitDone - initial list complete", event_count); + } + Err(e) => { + println!(" [{}] ⚠️ Error: {}", event_count, e); + } + } + + if event_count >= max_events { + println!("📊 Reached max events ({}), stopping watch", max_events); + break; + } + } + _ = tokio::time::sleep_until(deadline) => { + println!("⏰ Timeout reached ({}s), stopping watch", timeout.as_secs()); + break; + } + } + } + + println!("✅ Watch test completed ({} events received)", event_count); +} + +/// Test watching EndpointSlices with label selector +#[tokio::test] +#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_watch_with_labels -- --ignored +async fn test_watch_with_labels() { + println!("👀 Testing EndpointSlice watching with label selector..."); + println!(" This test will watch for 5 seconds or until InitDone"); + + let client = Client::try_default() + .await + .expect("Failed to create Kubernetes client"); + + let endpoint_slices: Api = Api::all(client); + + // Watch with our discovery labels + let label_selector = "kubernetes.io/service-name=dynamo-test-service"; + println!("Using label selector: {}", label_selector); + + let watch_config = Config::default() + .labels(label_selector); + let mut watch_stream = Box::pin(watcher(endpoint_slices, watch_config)); + + println!("📡 Watch stream started..."); + + let mut event_count = 0; + let timeout = tokio::time::Duration::from_secs(5); + let deadline = tokio::time::Instant::now() + timeout; + let mut init_done = false; + + loop { + tokio::select! { + Some(event) = watch_stream.next() => { + event_count += 1; + match event { + Ok(watcher::Event::Apply(slice)) => { + let name = slice.metadata.name.as_deref().unwrap_or(""); + let endpoint_count = slice.endpoints.len(); + println!(" [{}] ✅ Apply: {} (endpoints: {})", event_count, name, endpoint_count); + } + Ok(watcher::Event::InitApply(slice)) => { + let name = slice.metadata.name.as_deref().unwrap_or(""); + let endpoint_count = slice.endpoints.len(); + println!(" [{}] 🔄 InitApply: {} (endpoints: {})", event_count, name, endpoint_count); + } + Ok(watcher::Event::Delete(slice)) => { + let name = slice.metadata.name.as_deref().unwrap_or(""); + println!(" [{}] ❌ Delete: {}", event_count, name); + } + Ok(watcher::Event::Init) => { + println!(" [{}] 🚀 Init - watch stream starting", event_count); + } + Ok(watcher::Event::InitDone) => { + println!(" [{}] ✅ InitDone - initial list complete", event_count); + init_done = true; + } + Err(e) => { + println!(" [{}] ⚠️ Error: {}", event_count, e); + } + } + + if init_done { + println!("📊 InitDone received, stopping watch"); + break; + } + } + _ = tokio::time::sleep_until(deadline) => { + println!("⏰ Timeout reached ({}s), stopping watch", timeout.as_secs()); + break; + } + } + } + + println!("✅ Watch with labels test completed ({} events received)", event_count); +} + +/// Comprehensive test that simulates our discovery client behavior +#[tokio::test] +#[ignore] // Run manually with: cargo test --test kube_discovery_integration test_discovery_simulation -- --ignored +async fn test_discovery_simulation() { + println!("🔍 Testing discovery client simulation..."); + println!(" This simulates how our KubeDiscoveryClient list_and_watch works"); + + let client = Client::try_default() + .await + .expect("Failed to create Kubernetes client"); + + let endpoint_slices: Api = Api::all(client); + + // Use service name label (EndpointSlices automatically get this label) + let label_selector = "kubernetes.io/service-name=dynamo-test-service"; + println!("Label selector: {}", label_selector); + + let watch_config = Config::default() + .labels(label_selector); + let mut watch_stream = Box::pin(watcher(endpoint_slices, watch_config)); + + println!("📡 Starting watch stream..."); + + let mut seen_endpoints = std::collections::HashSet::new(); + let timeout = tokio::time::Duration::from_secs(10); + let deadline = tokio::time::Instant::now() + timeout; + + loop { + tokio::select! { + Some(event) = watch_stream.next() => { + match event { + Ok(watcher::Event::Apply(slice)) | Ok(watcher::Event::InitApply(slice)) => { + let name = slice.metadata.name.as_deref().unwrap_or(""); + println!(" 📦 Processing EndpointSlice: {}", name); + + // Extract endpoints (simulate our discovery logic) + for endpoint in &slice.endpoints { + let ready = endpoint.conditions.as_ref() + .and_then(|c| c.ready) + .unwrap_or(false); + + if !ready { + continue; + } + + let pod_name = endpoint.target_ref.as_ref() + .and_then(|t| t.name.as_ref()) + .map(|n| n.as_str()) + .unwrap_or_default(); + + if pod_name.is_empty() { + continue; + } + + // Hash the pod name (simulate instance_id generation) + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + let mut hasher = DefaultHasher::new(); + pod_name.hash(&mut hasher); + let instance_id = hasher.finish(); + + if seen_endpoints.insert(instance_id) { + let addresses = &endpoint.addresses; + println!(" ✅ New endpoint: pod={}, instance_id={:x}, addresses={:?}", + pod_name, instance_id, addresses); + } + } + } + Ok(watcher::Event::Delete(slice)) => { + let name = slice.metadata.name.as_deref().unwrap_or(""); + println!(" ❌ EndpointSlice deleted: {}", name); + } + Ok(watcher::Event::Init) => { + println!(" 🚀 Watch stream initialized"); + } + Ok(watcher::Event::InitDone) => { + println!(" ✅ Initial sync complete"); + println!(" 📊 Discovered {} unique endpoints", seen_endpoints.len()); + break; + } + Err(e) => { + eprintln!(" ⚠️ Watch error: {}", e); + } + } + } + _ = tokio::time::sleep_until(deadline) => { + println!("⏰ Timeout reached"); + break; + } + } + } + + println!("✅ Discovery simulation completed"); + println!("📊 Total unique endpoints discovered: {}", seen_endpoints.len()); + + assert!(seen_endpoints.len() > 0, "Should have discovered at least one endpoint"); +} +