Skip to content

Commit 5aa2f53

Browse files
authored
feat(kv-router): add Prometheus metrics to standalone indexer (#7339)
Signed-off-by: PeaBrane <yanrpei@gmail.com>
1 parent c8f7ce9 commit 5aa2f53

File tree

9 files changed

+346
-71
lines changed

9 files changed

+346
-71
lines changed

Cargo.lock

Lines changed: 32 additions & 41 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/components/router/standalone-indexer.md

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,30 @@ dynamo-kv-indexer --port 8090 [--threads 4] [--block-size 16 --model-name my-mod
9999

100100
## HTTP API
101101

102+
### `GET /health` — Liveness check
103+
104+
Returns `200 OK` unconditionally.
105+
106+
```bash
107+
curl http://localhost:8090/health
108+
```
109+
110+
### `GET /metrics` — Prometheus metrics
111+
112+
Returns metrics in Prometheus text exposition format. Available when the binary is built with the `metrics` feature (enabled by default via `standalone-indexer`).
113+
114+
```bash
115+
curl http://localhost:8090/metrics
116+
```
117+
118+
| Metric | Type | Labels | Description |
119+
|--------|------|--------|-------------|
120+
| `dynamo_kvindexer_request_duration_seconds` | Histogram | `endpoint` | HTTP request latency |
121+
| `dynamo_kvindexer_requests_total` | Counter | `endpoint`, `method` | Total HTTP requests |
122+
| `dynamo_kvindexer_errors_total` | Counter | `endpoint`, `status_class` | HTTP error responses (4xx/5xx) |
123+
| `dynamo_kvindexer_models` | Gauge || Number of active model+tenant indexers |
124+
| `dynamo_kvindexer_workers` | Gauge || Number of registered worker instances |
125+
102126
### `POST /register` — Register an endpoint
103127

104128
Register a ZMQ endpoint for an instance. Each call creates or reuses the indexer for the given `(model_name, tenant_id)` pair.
@@ -307,7 +331,7 @@ graph TD
307331
REG[Worker Registry]
308332
ZMQ[ZMQ SUB Listeners]
309333
IDX["Indexer Map<br/>(model, tenant) → Radix Tree"]
310-
HTTP[HTTP API<br/>/query /dump /register]
334+
HTTP[HTTP API<br/>/query /dump /register /metrics /health]
311335
end
312336
313337
CLIENT[External Client]

lib/bindings/python/src/dynamo/prometheus_names.py

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,13 @@ class frontend_service:
132132
OPERATION_LABEL = "operation"
133133

134134

135+
class kv_publisher:
136+
"""KV Publisher metrics"""
137+
138+
# Total number of raw events dropped by engines before reaching publisher (detected via event_id gaps)
139+
ENGINES_DROPPED_EVENTS_TOTAL = "kv_publisher_engines_dropped_events_total"
140+
141+
135142
class kvbm:
136143
"""KVBM"""
137144

@@ -167,6 +174,21 @@ class kvbm:
167174
OBJECT_WRITE_FAILURES = "object_write_failures"
168175

169176

177+
class kvindexer:
178+
"""Standalone KV indexer HTTP service metrics"""
179+
180+
# HTTP request latency
181+
REQUEST_DURATION_SECONDS = "request_duration_seconds"
182+
# Total HTTP requests
183+
REQUESTS_TOTAL = "requests_total"
184+
# HTTP error responses (4xx/5xx)
185+
ERRORS_TOTAL = "errors_total"
186+
# Number of active model+tenant indexers
187+
MODELS = "models"
188+
# Number of registered worker instances
189+
WORKERS = "workers"
190+
191+
170192
class kvrouter:
171193
# Number of KV cache events applied to the index (including status)
172194
KV_CACHE_EVENTS_APPLIED = "kv_cache_events_applied"
@@ -225,6 +247,8 @@ class name_prefix:
225247
ROUTER = "dynamo_router"
226248
# Prefix for tokio runtime metrics
227249
TOKIO = "dynamo_tokio"
250+
# Prefix for standalone KV indexer metrics
251+
KVINDEXER = "dynamo_kvindexer"
228252

229253

230254
class router:
@@ -265,25 +289,6 @@ class routing_overhead:
265289
TOTAL_MS = "overhead_total_ms"
266290

267291

268-
class trtllm_additional:
269-
"""Additional TRT-LLM worker metrics beyond what the engine natively provides."""
270-
271-
# Total number of aborted/cancelled requests
272-
NUM_ABORTED_REQUESTS_TOTAL = "trtllm_num_aborted_requests_total"
273-
# Total number of requests containing image content
274-
REQUEST_TYPE_IMAGE_TOTAL = "trtllm_request_type_image_total"
275-
# Total number of requests using guided/structured decoding
276-
REQUEST_TYPE_STRUCTURED_OUTPUT_TOTAL = "trtllm_request_type_structured_output_total"
277-
# Total number of successful KV cache transfers
278-
KV_TRANSFER_SUCCESS_TOTAL = "trtllm_kv_transfer_success_total"
279-
# KV cache transfer latency per request in seconds
280-
KV_TRANSFER_LATENCY_SECONDS = "trtllm_kv_transfer_latency_seconds"
281-
# KV cache transfer size per request in bytes
282-
KV_TRANSFER_BYTES = "trtllm_kv_transfer_bytes"
283-
# KV cache transfer speed per request in GB/s
284-
KV_TRANSFER_SPEED_GB_S = "trtllm_kv_transfer_speed_gb_s"
285-
286-
287292
class task_tracker:
288293
"""Task tracker Prometheus metric name suffixes"""
289294

@@ -318,6 +323,25 @@ class tokio_perf:
318323
ALIVE_TASKS = "alive_tasks"
319324

320325

326+
class trtllm_additional:
327+
"""Additional TRT-LLM worker metrics beyond what the engine natively provides."""
328+
329+
# Total number of aborted/cancelled requests
330+
NUM_ABORTED_REQUESTS_TOTAL = "trtllm_num_aborted_requests_total"
331+
# Total number of requests containing image content
332+
REQUEST_TYPE_IMAGE_TOTAL = "trtllm_request_type_image_total"
333+
# Total number of requests using guided/structured decoding
334+
REQUEST_TYPE_STRUCTURED_OUTPUT_TOTAL = "trtllm_request_type_structured_output_total"
335+
# Total number of successful KV cache transfers
336+
KV_TRANSFER_SUCCESS_TOTAL = "trtllm_kv_transfer_success_total"
337+
# KV cache transfer latency per request in seconds
338+
KV_TRANSFER_LATENCY_SECONDS = "trtllm_kv_transfer_latency_seconds"
339+
# KV cache transfer size per request in bytes
340+
KV_TRANSFER_BYTES = "trtllm_kv_transfer_bytes"
341+
# KV cache transfer speed per request in GB/s
342+
KV_TRANSFER_SPEED_GB_S = "trtllm_kv_transfer_speed_gb_s"
343+
344+
321345
class work_handler:
322346
"""Work handler Prometheus metric names"""
323347

lib/kv-router/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ repository.workspace = true
1212

1313
[features]
1414
default = []
15-
metrics = []
15+
metrics = ["dep:dynamo-runtime"]
1616
bench = ["dep:clap", "dep:indicatif", "dep:serde_json", "dep:plotters"]
1717
standalone-indexer = ["metrics", "dep:axum", "dep:bytes", "dep:zeromq", "dep:serde_json", "dep:reqwest"]
1818
indexer-bin = ["standalone-indexer", "dep:clap", "dep:tracing-subscriber"]
1919
test-endpoints = ["indexer-bin"]
2020

2121
[dependencies]
2222
# repo
23-
dynamo-runtime = { workspace = true }
23+
dynamo-runtime = { workspace = true, optional = true }
2424
dynamo-tokens = { workspace = true }
2525

2626
# workspace

0 commit comments

Comments
 (0)