Skip to content

Commit cdc3670

Browse files
committed
fix(kv-indexer): drop the extra native bin
Signed-off-by: PeaBrane <yanrpei@gmail.com>
1 parent a584cd1 commit cdc3670

File tree

20 files changed

+1179
-585
lines changed

20 files changed

+1179
-585
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from dynamo.indexer.main import main
5+
6+
if __name__ == "__main__":
7+
raise SystemExit(main())
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import os
5+
import sys
6+
from collections.abc import Sequence
7+
8+
os.environ.setdefault("DYNAMO_SKIP_PYTHON_LOG_INIT", "1")
9+
10+
from dynamo.llm import run_kv_indexer
11+
12+
13+
def main(argv: Sequence[str] | None = None) -> int:
14+
args = list(sys.argv[1:] if argv is None else argv)
15+
try:
16+
run_kv_indexer(args)
17+
except Exception as exc:
18+
if "-h" in args or "--help" in args:
19+
print(exc)
20+
return 0
21+
raise
22+
return 0

container/templates/wheel_builder.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,9 @@ RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
439439
uv build --wheel --out-dir /opt/dynamo/dist && \
440440
cd /opt/dynamo/lib/bindings/python && \
441441
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
442-
maturin build --release --bindings bin --features "media-ffmpeg,kv-indexer" --out /opt/dynamo/dist; \
442+
maturin build --release --features "media-ffmpeg,kv-indexer" --out /opt/dynamo/dist; \
443443
else \
444-
maturin build --release --bindings bin --features "kv-indexer" --out /opt/dynamo/dist; \
444+
maturin build --release --features "kv-indexer" --out /opt/dynamo/dist; \
445445
fi && \
446446
/tmp/use-sccache.sh show-stats "Dynamo Runtime"
447447

docs/components/router/standalone-indexer.md

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ subtitle: Run the KV cache indexer as an independent HTTP service for querying b
77

88
## Overview
99

10-
The standalone KV indexer (`dynamo-kv-indexer`) is a lightweight binary that maintains a radix tree of cached blocks and exposes HTTP endpoints for querying and managing workers. It supports two operational modes:
10+
The standalone KV indexer (`python -m dynamo.indexer`) is a lightweight service that maintains a radix tree of cached blocks and exposes HTTP endpoints for querying and managing workers. It supports two operational modes:
1111

1212
- **Standalone mode** (default): subscribes to ZMQ KV event streams directly from workers. No Dynamo runtime dependencies required.
1313
- **Dynamo runtime mode** (`--dynamo-runtime`): integrates with the Dynamo runtime for automatic worker discovery via MDC, KV event ingestion via the event plane (NATS or ZMQ), and overlap queries over the request plane for remote frontends.
@@ -56,11 +56,11 @@ If no peers are reachable, the indexer starts with an empty state.
5656

5757
```bash
5858
# Replica A (first instance, no peers)
59-
dynamo-kv-indexer --port 8090 --block-size 16 \
59+
python -m dynamo.indexer --port 8090 --block-size 16 \
6060
--workers "1=tcp://worker1:5557,2=tcp://worker2:5558"
6161

6262
# Replica B (recovers from A on startup)
63-
dynamo-kv-indexer --port 8091 --block-size 16 \
63+
python -m dynamo.indexer --port 8091 --block-size 16 \
6464
--workers "1=tcp://worker1:5557,2=tcp://worker2:5558" \
6565
--peers "http://localhost:8090"
6666
```
@@ -81,7 +81,7 @@ Peers can be registered at startup via `--peers` or dynamically via the HTTP API
8181

8282
## Building
8383

84-
The binary is built via maturin as part of the Python bindings. Feature flags control which capabilities are compiled in:
84+
The service is exposed through the Python package after building the bindings with maturin. Feature flags control which capabilities are compiled in:
8585

8686
| Feature | Description |
8787
|---------|-------------|
@@ -95,7 +95,7 @@ The binary is built via maturin as part of the Python bindings. Feature flags co
9595
cd lib/bindings/python && VIRTUAL_ENV=../../.venv ../../.venv/bin/maturin develop --uv --features kv-indexer
9696
```
9797

98-
This installs `dynamo-kv-indexer` into the virtualenv.
98+
After installation, launch the service with `python -m dynamo.indexer`.
9999

100100
### Standalone build with metrics
101101

@@ -118,13 +118,13 @@ This enables the `--dynamo-runtime` CLI flag for MDC discovery, event-plane subs
118118
### Standalone mode (default)
119119

120120
```bash
121-
dynamo-kv-indexer --port 8090 [--threads 4] [--block-size 16 --model-name my-model --tenant-id default --workers "1=tcp://host:5557,2:1=tcp://host:5558"] [--peers "http://peer1:8090,http://peer2:8091"]
121+
python -m dynamo.indexer --port 8090 [--threads 4] [--block-size 16 --model-name my-model --tenant-id default --workers "1=tcp://host:5557,2:1=tcp://host:5558"] [--peers "http://peer1:8090,http://peer2:8091"]
122122
```
123123

124124
### Dynamo runtime mode
125125

126126
```bash
127-
dynamo-kv-indexer --dynamo-runtime --namespace default --component-name kv-indexer --worker-component backend --port 8090 [--threads 4]
127+
python -m dynamo.indexer --dynamo-runtime --namespace default --component-name kv-indexer --worker-component backend --port 8090 [--threads 4]
128128
```
129129

130130
In runtime mode, workers are discovered automatically via MDC. The `--workers` flag can still be used to register additional static workers alongside discovered ones.
@@ -168,10 +168,12 @@ curl http://localhost:8090/metrics
168168
| `dynamo_kvindexer_errors_total` | Counter | `endpoint`, `status_class` | HTTP error responses (4xx/5xx) |
169169
| `dynamo_kvindexer_models` | Gauge || Number of active model+tenant indexers |
170170
| `dynamo_kvindexer_workers` | Gauge || Number of registered worker instances |
171+
| `dynamo_kvindexer_listeners` | Gauge | `status` | Number of ZMQ listeners by status (`pending`, `active`, `paused`, `failed`) |
171172

172173
### `POST /register` — Register an endpoint
173174

174175
Register a ZMQ endpoint for an instance. Each call creates or reuses the indexer for the given `(model_name, tenant_id)` pair.
176+
Registration is non-blocking: if the worker is not up yet, the listener is accepted in `pending` state and transitions to `active` once the initial ZMQ connection succeeds.
175177

176178
```bash
177179
# Single model, default tenant
@@ -243,9 +245,38 @@ curl http://localhost:8090/workers
243245

244246
Returns:
245247
```json
246-
[{"instance_id": 1, "endpoints": {"0": "tcp://127.0.0.1:5557", "1": "tcp://127.0.0.1:5558"}}]
248+
[
249+
{
250+
"instance_id": 1,
251+
"source": "zmq",
252+
"status": "active",
253+
"endpoints": {
254+
"0": "tcp://127.0.0.1:5557",
255+
"1": "tcp://127.0.0.1:5558"
256+
},
257+
"listeners": {
258+
"0": {
259+
"endpoint": "tcp://127.0.0.1:5557",
260+
"status": "active"
261+
},
262+
"1": {
263+
"endpoint": "tcp://127.0.0.1:5558",
264+
"status": "active"
265+
}
266+
}
267+
},
268+
{
269+
"instance_id": 2,
270+
"source": "discovery",
271+
"status": "active",
272+
"endpoints": {},
273+
"listeners": {}
274+
}
275+
]
247276
```
248277

278+
For ZMQ-managed workers, `status` is aggregated across listeners with priority `failed > pending > active > paused`. Each listener entry may also expose a `last_error` field when the most recent startup or recv-loop attempt failed.
279+
249280
### `POST /query` — Query overlap for token IDs
250281

251282
Given raw token IDs, compute block hashes and return per-instance overlap scores (in matched tokens):
@@ -379,7 +410,7 @@ The indexer registers a query endpoint on the Dynamo request plane, allowing fro
379410

380411
```bash
381412
# Start the indexer with runtime integration
382-
dynamo-kv-indexer --dynamo-runtime \
413+
python -m dynamo.indexer --dynamo-runtime \
383414
--namespace my-namespace \
384415
--component-name kv-indexer \
385416
--worker-component backend \

lib/bindings/python/Cargo.toml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,4 @@ dynamo-llm = { path = "../../llm" }
7878
[target.'cfg(not(target_os = "linux"))'.dependencies]
7979
dynamo-llm = { path = "../../llm", default-features = false }
8080

81-
[[bin]]
82-
name = "dynamo-kv-indexer"
83-
path = "rust/bin/kv_indexer.rs"
84-
required-features = ["kv-indexer"]
85-
8681
[dev-dependencies]

lib/bindings/python/rust/bin/kv_indexer.rs

Lines changed: 0 additions & 107 deletions
This file was deleted.

lib/bindings/python/rust/lib.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ type JsonServerStreamingIngress =
7777
static INIT: OnceCell<()> = OnceCell::new();
7878

7979
const DEFAULT_ANNOTATED_SETTING: Option<bool> = Some(true);
80+
const SKIP_PYTHON_LOG_INIT_ENV: &str = "DYNAMO_SKIP_PYTHON_LOG_INIT";
8081

8182
// Helper to get appropriate span for instrumentation - always emit spans
8283
fn get_span_for_context(context: &context::Context, operation: &str) -> tracing::Span {
@@ -135,7 +136,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
135136
eprintln!(
136137
"Warning: OTEL_EXPORT_ENABLED detected. Logging initialization deferred until runtime is available. Early logs may be dropped."
137138
);
138-
} else {
139+
} else if std::env::var_os(SKIP_PYTHON_LOG_INIT_ENV).is_none() {
139140
rs::logging::init();
140141
}
141142

@@ -145,6 +146,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
145146
m.add_function(wrap_pyfunction!(register_model, m)?)?;
146147
m.add_function(wrap_pyfunction!(unregister_model, m)?)?;
147148
m.add_function(wrap_pyfunction!(fetch_model, m)?)?;
149+
m.add_function(wrap_pyfunction!(run_kv_indexer, m)?)?;
148150
m.add_function(wrap_pyfunction!(llm::entrypoint::make_engine, m)?)?;
149151
m.add_function(wrap_pyfunction!(llm::entrypoint::run_input, m)?)?;
150152

@@ -200,6 +202,14 @@ where
200202
PyException::new_err(format!("{}", err))
201203
}
202204

205+
#[pyfunction(name = "run_kv_indexer")]
206+
#[pyo3(signature = (argv=None))]
207+
fn run_kv_indexer(py: Python<'_>, argv: Option<Vec<String>>) -> PyResult<()> {
208+
let argv = argv.unwrap_or_default();
209+
py.allow_threads(move || llm::kv::run_kv_indexer_cli(argv))
210+
.map_err(to_pyerr)
211+
}
212+
203213
/// Log a message from Python with file and line info
204214
#[pyfunction]
205215
#[pyo3(text_signature = "(level, message, module, file, line)")]

0 commit comments

Comments
 (0)