Skip to content

Commit f5fb694

Browse files
metrics: Add request-level token histograms
Signed-off-by: Jintao Zhang <[email protected]>
1 parent c101bf0 commit f5fb694

File tree

3 files changed

+52
-12
lines changed

3 files changed

+52
-12
lines changed

deploy/llm-router-dashboard.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@
9494
},
9595
"disableTextWrap": false,
9696
"editorMode": "builder",
97-
"expr": "sum by(category) (llm_category_classifications_total)",
97+
"expr": "sum by(category) (llm_category_classifications_count)",
9898
"fullMetaSearch": false,
9999
"includeNullMetadata": true,
100100
"instant": false,
@@ -440,4 +440,4 @@
440440
"uid": "llm-router-metrics",
441441
"version": 12,
442442
"weekStart": ""
443-
}
443+
}

src/semantic-router/pkg/extproc/metrics_integration_test.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,10 @@ var _ = Describe("Metrics recording", func() {
8181
StartTime: time.Now().Add(-1 * time.Second),
8282
}
8383

84-
before := getHistogramSampleCount("llm_model_tpot_seconds", ctx.RequestModel)
84+
beforeTPOT := getHistogramSampleCount("llm_model_tpot_seconds", ctx.RequestModel)
85+
86+
beforePrompt := getHistogramSampleCount("llm_prompt_tokens_per_request", ctx.RequestModel)
87+
beforeCompletion := getHistogramSampleCount("llm_completion_tokens_per_request", ctx.RequestModel)
8588

8689
openAIResponse := map[string]interface{}{
8790
"id": "chatcmpl-xyz",
@@ -111,7 +114,13 @@ var _ = Describe("Metrics recording", func() {
111114
Expect(err).NotTo(HaveOccurred())
112115
Expect(response.GetResponseBody()).NotTo(BeNil())
113116

114-
after := getHistogramSampleCount("llm_model_tpot_seconds", ctx.RequestModel)
115-
Expect(after).To(BeNumerically(">", before))
117+
afterTPOT := getHistogramSampleCount("llm_model_tpot_seconds", ctx.RequestModel)
118+
Expect(afterTPOT).To(BeNumerically(">", beforeTPOT))
119+
120+
// New per-request token histograms should also be recorded
121+
afterPrompt := getHistogramSampleCount("llm_prompt_tokens_per_request", ctx.RequestModel)
122+
afterCompletion := getHistogramSampleCount("llm_completion_tokens_per_request", ctx.RequestModel)
123+
Expect(afterPrompt).To(BeNumerically(">", beforePrompt))
124+
Expect(afterCompletion).To(BeNumerically(">", beforeCompletion))
116125
})
117126
})

src/semantic-router/pkg/metrics/metrics.go

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,26 @@ var (
147147
[]string{"model"},
148148
)
149149

150+
// PromptTokensPerRequest tracks the distribution of prompt tokens per request by model
151+
PromptTokensPerRequest = promauto.NewHistogramVec(
152+
prometheus.HistogramOpts{
153+
Name: "llm_prompt_tokens_per_request",
154+
Help: "Distribution of prompt tokens per request by model",
155+
Buckets: []float64{0, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384},
156+
},
157+
[]string{"model"},
158+
)
159+
160+
// CompletionTokensPerRequest tracks the distribution of completion tokens per request by model
161+
CompletionTokensPerRequest = promauto.NewHistogramVec(
162+
prometheus.HistogramOpts{
163+
Name: "llm_completion_tokens_per_request",
164+
Help: "Distribution of completion tokens per request by model",
165+
Buckets: []float64{0, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384},
166+
},
167+
[]string{"model"},
168+
)
169+
150170
// ModelRoutingModifications tracks when a model is changed from one to another
151171
ModelRoutingModifications = promauto.NewCounterVec(
152172
prometheus.CounterOpts{
@@ -258,11 +278,12 @@ var (
258278
[]string{"backend"},
259279
)
260280

261-
// CategoryClassifications tracks the number of times each category is classified
262-
CategoryClassifications = promauto.NewGaugeVec(
263-
prometheus.GaugeOpts{
264-
Name: "llm_category_classifications_total",
265-
Help: "The total number of times each category is classified",
281+
// CategoryClassificationsCount is an alias with a name preferred by the issue request.
282+
// It mirrors CategoryClassifications and is incremented alongside it for compatibility.
283+
CategoryClassificationsCount = promauto.NewCounterVec(
284+
prometheus.CounterOpts{
285+
Name: "llm_category_classifications_count",
286+
Help: "The total number of times each category is classified (alias metric)",
266287
},
267288
[]string{"category"},
268289
)
@@ -429,6 +450,13 @@ func RecordModelTokensDetailed(model string, promptTokens, completionTokens floa
429450
ModelTokens.WithLabelValues(model).Add(totalTokens)
430451
ModelPromptTokens.WithLabelValues(model).Add(promptTokens)
431452
ModelCompletionTokens.WithLabelValues(model).Add(completionTokens)
453+
454+
// Also record per-request histograms for visibility into distribution
455+
if model == "" {
456+
model = "unknown"
457+
}
458+
PromptTokensPerRequest.WithLabelValues(model).Observe(promptTokens)
459+
CompletionTokensPerRequest.WithLabelValues(model).Observe(completionTokens)
432460
}
433461

434462
// RecordModelCompletionLatency records the latency of a model completion
@@ -484,9 +512,12 @@ func UpdateCacheEntries(backend string, count int) {
484512
CacheEntriesTotal.WithLabelValues(backend).Set(float64(count))
485513
}
486514

487-
// RecordCategoryClassification increments the gauge for a specific category classification
515+
// RecordCategoryClassification increments the counter for a specific category classification
488516
func RecordCategoryClassification(category string) {
489-
CategoryClassifications.WithLabelValues(category).Inc()
517+
if category == "" {
518+
category = "unknown"
519+
}
520+
CategoryClassificationsCount.WithLabelValues(category).Inc()
490521
}
491522

492523
// RecordPIIViolation records a PII policy violation for a specific model and PII data type

0 commit comments

Comments
 (0)