Skip to content

Commit 9f41254

Browse files
metrics: Add request-level token histograms (#157)
* metrics: Add request-level token histograms Signed-off-by: Jintao Zhang <[email protected]> * add unknown const Signed-off-by: Jintao Zhang <[email protected]> --------- Signed-off-by: Jintao Zhang <[email protected]>
1 parent c101bf0 commit 9f41254

File tree

5 files changed

+77
-30
lines changed

5 files changed

+77
-30
lines changed

deploy/llm-router-dashboard.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@
9494
},
9595
"disableTextWrap": false,
9696
"editorMode": "builder",
97-
"expr": "sum by(category) (llm_category_classifications_total)",
97+
"expr": "sum by(category) (llm_category_classifications_count)",
9898
"fullMetaSearch": false,
9999
"includeNullMetadata": true,
100100
"instant": false,
@@ -440,4 +440,4 @@
440440
"uid": "llm-router-metrics",
441441
"version": 12,
442442
"weekStart": ""
443-
}
443+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
package consts
2+
3+
// UnknownLabel is a canonical fallback label value used across the codebase
4+
// when a more specific value (e.g., model, category, reason) is not available.
5+
const UnknownLabel = "unknown"

src/semantic-router/pkg/extproc/metrics_integration_test.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,10 @@ var _ = Describe("Metrics recording", func() {
8181
StartTime: time.Now().Add(-1 * time.Second),
8282
}
8383

84-
before := getHistogramSampleCount("llm_model_tpot_seconds", ctx.RequestModel)
84+
beforeTPOT := getHistogramSampleCount("llm_model_tpot_seconds", ctx.RequestModel)
85+
86+
beforePrompt := getHistogramSampleCount("llm_prompt_tokens_per_request", ctx.RequestModel)
87+
beforeCompletion := getHistogramSampleCount("llm_completion_tokens_per_request", ctx.RequestModel)
8588

8689
openAIResponse := map[string]interface{}{
8790
"id": "chatcmpl-xyz",
@@ -111,7 +114,13 @@ var _ = Describe("Metrics recording", func() {
111114
Expect(err).NotTo(HaveOccurred())
112115
Expect(response.GetResponseBody()).NotTo(BeNil())
113116

114-
after := getHistogramSampleCount("llm_model_tpot_seconds", ctx.RequestModel)
115-
Expect(after).To(BeNumerically(">", before))
117+
afterTPOT := getHistogramSampleCount("llm_model_tpot_seconds", ctx.RequestModel)
118+
Expect(afterTPOT).To(BeNumerically(">", beforeTPOT))
119+
120+
// New per-request token histograms should also be recorded
121+
afterPrompt := getHistogramSampleCount("llm_prompt_tokens_per_request", ctx.RequestModel)
122+
afterCompletion := getHistogramSampleCount("llm_completion_tokens_per_request", ctx.RequestModel)
123+
Expect(afterPrompt).To(BeNumerically(">", beforePrompt))
124+
Expect(afterCompletion).To(BeNumerically(">", beforeCompletion))
116125
})
117126
})

src/semantic-router/pkg/extproc/reason_mode_selector.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"strings"
77

88
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
9+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/consts"
910
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
1011
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability"
1112
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/entropy"
@@ -132,7 +133,7 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled
132133
}
133134

134135
// Determine model for kwargs and logging
135-
model := "unknown"
136+
model := consts.UnknownLabel
136137
if modelValue, ok := requestMap["model"]; ok {
137138
if modelStr, ok := modelValue.(string); ok {
138139
model = modelStr
@@ -191,7 +192,7 @@ func (r *OpenAIRouter) setReasoningModeToRequestBody(requestBody []byte, enabled
191192
// Record metrics for template usage and effort when enabled
192193
if enabled {
193194
familyConfig := r.getModelReasoningFamily(model)
194-
modelFamily := "unknown"
195+
modelFamily := consts.UnknownLabel
195196
templateParam := "reasoning_effort" // default fallback
196197

197198
if familyConfig != nil {

src/semantic-router/pkg/metrics/metrics.go

Lines changed: 55 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"github.com/prometheus/client_golang/prometheus/promauto"
1010

1111
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
12+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/consts"
1213
)
1314

1415
// Minimal fallback bucket configurations - used only when configuration is completely missing
@@ -147,6 +148,26 @@ var (
147148
[]string{"model"},
148149
)
149150

151+
// PromptTokensPerRequest tracks the distribution of prompt tokens per request by model
152+
PromptTokensPerRequest = promauto.NewHistogramVec(
153+
prometheus.HistogramOpts{
154+
Name: "llm_prompt_tokens_per_request",
155+
Help: "Distribution of prompt tokens per request by model",
156+
Buckets: []float64{0, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384},
157+
},
158+
[]string{"model"},
159+
)
160+
161+
// CompletionTokensPerRequest tracks the distribution of completion tokens per request by model
162+
CompletionTokensPerRequest = promauto.NewHistogramVec(
163+
prometheus.HistogramOpts{
164+
Name: "llm_completion_tokens_per_request",
165+
Help: "Distribution of completion tokens per request by model",
166+
Buckets: []float64{0, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384},
167+
},
168+
[]string{"model"},
169+
)
170+
150171
// ModelRoutingModifications tracks when a model is changed from one to another
151172
ModelRoutingModifications = promauto.NewCounterVec(
152173
prometheus.CounterOpts{
@@ -258,11 +279,12 @@ var (
258279
[]string{"backend"},
259280
)
260281

261-
// CategoryClassifications tracks the number of times each category is classified
262-
CategoryClassifications = promauto.NewGaugeVec(
263-
prometheus.GaugeOpts{
264-
Name: "llm_category_classifications_total",
265-
Help: "The total number of times each category is classified",
282+
// CategoryClassificationsCount is an alias with a name preferred by the issue request.
283+
// It mirrors CategoryClassifications and is incremented alongside it for compatibility.
284+
CategoryClassificationsCount = promauto.NewCounterVec(
285+
prometheus.CounterOpts{
286+
Name: "llm_category_classifications_count",
287+
Help: "The total number of times each category is classified (alias metric)",
266288
},
267289
[]string{"category"},
268290
)
@@ -363,18 +385,18 @@ var (
363385
// RecordModelRequest increments the counter for requests to a specific model
364386
func RecordModelRequest(model string) {
365387
if model == "" {
366-
model = "unknown"
388+
model = consts.UnknownLabel
367389
}
368390
ModelRequests.WithLabelValues(model).Inc()
369391
}
370392

371393
// RecordRequestError increments request error counters labeled by model and normalized reason
372394
func RecordRequestError(model, reason string) {
373395
if model == "" {
374-
model = "unknown"
396+
model = consts.UnknownLabel
375397
}
376398
if reason == "" {
377-
reason = "unknown"
399+
reason = consts.UnknownLabel
378400
}
379401
// Normalize a few common variants to canonical reasons
380402
switch reason {
@@ -414,10 +436,10 @@ func RecordModelCost(model string, currency string, amount float64) {
414436
// RecordRoutingReasonCode increments the counter for a routing decision reason code and model
415437
func RecordRoutingReasonCode(reasonCode, model string) {
416438
if reasonCode == "" {
417-
reasonCode = "unknown"
439+
reasonCode = consts.UnknownLabel
418440
}
419441
if model == "" {
420-
model = "unknown"
442+
model = consts.UnknownLabel
421443
}
422444
RoutingReasonCodes.WithLabelValues(reasonCode, model).Inc()
423445
}
@@ -429,6 +451,13 @@ func RecordModelTokensDetailed(model string, promptTokens, completionTokens floa
429451
ModelTokens.WithLabelValues(model).Add(totalTokens)
430452
ModelPromptTokens.WithLabelValues(model).Add(promptTokens)
431453
ModelCompletionTokens.WithLabelValues(model).Add(completionTokens)
454+
455+
// Also record per-request histograms for visibility into distribution
456+
if model == "" {
457+
model = consts.UnknownLabel
458+
}
459+
PromptTokensPerRequest.WithLabelValues(model).Observe(promptTokens)
460+
CompletionTokensPerRequest.WithLabelValues(model).Observe(completionTokens)
432461
}
433462

434463
// RecordModelCompletionLatency records the latency of a model completion
@@ -442,7 +471,7 @@ func RecordModelTTFT(model string, seconds float64) {
442471
return
443472
}
444473
if model == "" {
445-
model = "unknown"
474+
model = consts.UnknownLabel
446475
}
447476
ModelTTFT.WithLabelValues(model).Observe(seconds)
448477
}
@@ -453,7 +482,7 @@ func RecordModelTPOT(model string, secondsPerToken float64) {
453482
return
454483
}
455484
if model == "" {
456-
model = "unknown"
485+
model = consts.UnknownLabel
457486
}
458487
ModelTPOT.WithLabelValues(model).Observe(secondsPerToken)
459488
}
@@ -484,9 +513,12 @@ func UpdateCacheEntries(backend string, count int) {
484513
CacheEntriesTotal.WithLabelValues(backend).Set(float64(count))
485514
}
486515

487-
// RecordCategoryClassification increments the gauge for a specific category classification
516+
// RecordCategoryClassification increments the counter for a specific category classification
488517
func RecordCategoryClassification(category string) {
489-
CategoryClassifications.WithLabelValues(category).Inc()
518+
if category == "" {
519+
category = consts.UnknownLabel
520+
}
521+
CategoryClassificationsCount.WithLabelValues(category).Inc()
490522
}
491523

492524
// RecordPIIViolation records a PII policy violation for a specific model and PII data type
@@ -544,7 +576,7 @@ func GetBatchSizeRange(size int) string {
544576
}
545577

546578
// Fallback for unexpected cases
547-
return "unknown"
579+
return consts.UnknownLabel
548580
}
549581

550582
// GetBatchSizeRangeFromBuckets generates range labels based on size buckets
@@ -725,7 +757,7 @@ func RecordReasoningDecision(category, model string, enabled bool, effort string
725757
// RecordReasoningTemplateUsage records usage of a model-family-specific template parameter
726758
func RecordReasoningTemplateUsage(family, param string) {
727759
if family == "" {
728-
family = "unknown"
760+
family = consts.UnknownLabel
729761
}
730762
if param == "" {
731763
param = "none"
@@ -736,7 +768,7 @@ func RecordReasoningTemplateUsage(family, param string) {
736768
// RecordReasoningEffortUsage records the effort usage by model family
737769
func RecordReasoningEffortUsage(family, effort string) {
738770
if family == "" {
739-
family = "unknown"
771+
family = consts.UnknownLabel
740772
}
741773
if effort == "" {
742774
effort = "unspecified"
@@ -747,7 +779,7 @@ func RecordReasoningEffortUsage(family, effort string) {
747779
// RecordEntropyClassificationDecision records an entropy-based classification decision
748780
func RecordEntropyClassificationDecision(uncertaintyLevel string, reasoningEnabled bool, decisionReason string, topCategory string) {
749781
if uncertaintyLevel == "" {
750-
uncertaintyLevel = "unknown"
782+
uncertaintyLevel = consts.UnknownLabel
751783
}
752784
if decisionReason == "" {
753785
decisionReason = "unspecified"
@@ -767,7 +799,7 @@ func RecordEntropyClassificationDecision(uncertaintyLevel string, reasoningEnabl
767799
// RecordEntropyValue records the entropy value for a classification
768800
func RecordEntropyValue(category string, classificationType string, entropyValue float64) {
769801
if category == "" {
770-
category = "unknown"
802+
category = consts.UnknownLabel
771803
}
772804
if classificationType == "" {
773805
classificationType = "standard"
@@ -779,7 +811,7 @@ func RecordEntropyValue(category string, classificationType string, entropyValue
779811
// RecordClassificationConfidence records the confidence score from classification
780812
func RecordClassificationConfidence(category string, classificationMethod string, confidence float64) {
781813
if category == "" {
782-
category = "unknown"
814+
category = consts.UnknownLabel
783815
}
784816
if classificationMethod == "" {
785817
classificationMethod = "traditional"
@@ -796,10 +828,10 @@ func RecordEntropyClassificationLatency(seconds float64) {
796828
// RecordProbabilityDistributionQuality records quality checks for probability distributions
797829
func RecordProbabilityDistributionQuality(qualityCheck string, status string) {
798830
if qualityCheck == "" {
799-
qualityCheck = "unknown"
831+
qualityCheck = consts.UnknownLabel
800832
}
801833
if status == "" {
802-
status = "unknown"
834+
status = consts.UnknownLabel
803835
}
804836

805837
ProbabilityDistributionQuality.WithLabelValues(qualityCheck, status).Inc()
@@ -808,7 +840,7 @@ func RecordProbabilityDistributionQuality(qualityCheck string, status string) {
808840
// RecordEntropyFallback records when entropy-based routing falls back to traditional methods
809841
func RecordEntropyFallback(fallbackReason string, fallbackStrategy string) {
810842
if fallbackReason == "" {
811-
fallbackReason = "unknown"
843+
fallbackReason = consts.UnknownLabel
812844
}
813845
if fallbackStrategy == "" {
814846
fallbackStrategy = "unspecified"

0 commit comments

Comments
 (0)