9
9
"github.com/prometheus/client_golang/prometheus/promauto"
10
10
11
11
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
12
+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/consts"
12
13
)
13
14
14
15
// Minimal fallback bucket configurations - used only when configuration is completely missing
@@ -147,6 +148,26 @@ var (
147
148
[]string {"model" },
148
149
)
149
150
151
+ // PromptTokensPerRequest tracks the distribution of prompt tokens per request by model
152
+ PromptTokensPerRequest = promauto .NewHistogramVec (
153
+ prometheus.HistogramOpts {
154
+ Name : "llm_prompt_tokens_per_request" ,
155
+ Help : "Distribution of prompt tokens per request by model" ,
156
+ Buckets : []float64 {0 , 8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 },
157
+ },
158
+ []string {"model" },
159
+ )
160
+
161
+ // CompletionTokensPerRequest tracks the distribution of completion tokens per request by model
162
+ CompletionTokensPerRequest = promauto .NewHistogramVec (
163
+ prometheus.HistogramOpts {
164
+ Name : "llm_completion_tokens_per_request" ,
165
+ Help : "Distribution of completion tokens per request by model" ,
166
+ Buckets : []float64 {0 , 8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 },
167
+ },
168
+ []string {"model" },
169
+ )
170
+
150
171
// ModelRoutingModifications tracks when a model is changed from one to another
151
172
ModelRoutingModifications = promauto .NewCounterVec (
152
173
prometheus.CounterOpts {
@@ -258,11 +279,12 @@ var (
258
279
[]string {"backend" },
259
280
)
260
281
261
- // CategoryClassifications tracks the number of times each category is classified
262
- CategoryClassifications = promauto .NewGaugeVec (
263
- prometheus.GaugeOpts {
264
- Name : "llm_category_classifications_total" ,
265
- Help : "The total number of times each category is classified" ,
282
+ // CategoryClassificationsCount is an alias with a name preferred by the issue request.
283
+ // It mirrors CategoryClassifications and is incremented alongside it for compatibility.
284
+ CategoryClassificationsCount = promauto .NewCounterVec (
285
+ prometheus.CounterOpts {
286
+ Name : "llm_category_classifications_count" ,
287
+ Help : "The total number of times each category is classified (alias metric)" ,
266
288
},
267
289
[]string {"category" },
268
290
)
@@ -363,18 +385,18 @@ var (
363
385
// RecordModelRequest increments the counter for requests to a specific model
364
386
func RecordModelRequest (model string ) {
365
387
if model == "" {
366
- model = "unknown"
388
+ model = consts . UnknownLabel
367
389
}
368
390
ModelRequests .WithLabelValues (model ).Inc ()
369
391
}
370
392
371
393
// RecordRequestError increments request error counters labeled by model and normalized reason
372
394
func RecordRequestError (model , reason string ) {
373
395
if model == "" {
374
- model = "unknown"
396
+ model = consts . UnknownLabel
375
397
}
376
398
if reason == "" {
377
- reason = "unknown"
399
+ reason = consts . UnknownLabel
378
400
}
379
401
// Normalize a few common variants to canonical reasons
380
402
switch reason {
@@ -414,10 +436,10 @@ func RecordModelCost(model string, currency string, amount float64) {
414
436
// RecordRoutingReasonCode increments the counter for a routing decision reason code and model
415
437
func RecordRoutingReasonCode (reasonCode , model string ) {
416
438
if reasonCode == "" {
417
- reasonCode = "unknown"
439
+ reasonCode = consts . UnknownLabel
418
440
}
419
441
if model == "" {
420
- model = "unknown"
442
+ model = consts . UnknownLabel
421
443
}
422
444
RoutingReasonCodes .WithLabelValues (reasonCode , model ).Inc ()
423
445
}
@@ -429,6 +451,13 @@ func RecordModelTokensDetailed(model string, promptTokens, completionTokens floa
429
451
ModelTokens .WithLabelValues (model ).Add (totalTokens )
430
452
ModelPromptTokens .WithLabelValues (model ).Add (promptTokens )
431
453
ModelCompletionTokens .WithLabelValues (model ).Add (completionTokens )
454
+
455
+ // Also record per-request histograms for visibility into distribution
456
+ if model == "" {
457
+ model = consts .UnknownLabel
458
+ }
459
+ PromptTokensPerRequest .WithLabelValues (model ).Observe (promptTokens )
460
+ CompletionTokensPerRequest .WithLabelValues (model ).Observe (completionTokens )
432
461
}
433
462
434
463
// RecordModelCompletionLatency records the latency of a model completion
@@ -442,7 +471,7 @@ func RecordModelTTFT(model string, seconds float64) {
442
471
return
443
472
}
444
473
if model == "" {
445
- model = "unknown"
474
+ model = consts . UnknownLabel
446
475
}
447
476
ModelTTFT .WithLabelValues (model ).Observe (seconds )
448
477
}
@@ -453,7 +482,7 @@ func RecordModelTPOT(model string, secondsPerToken float64) {
453
482
return
454
483
}
455
484
if model == "" {
456
- model = "unknown"
485
+ model = consts . UnknownLabel
457
486
}
458
487
ModelTPOT .WithLabelValues (model ).Observe (secondsPerToken )
459
488
}
@@ -484,9 +513,12 @@ func UpdateCacheEntries(backend string, count int) {
484
513
CacheEntriesTotal .WithLabelValues (backend ).Set (float64 (count ))
485
514
}
486
515
487
- // RecordCategoryClassification increments the gauge for a specific category classification
516
+ // RecordCategoryClassification increments the counter for a specific category classification
488
517
func RecordCategoryClassification (category string ) {
489
- CategoryClassifications .WithLabelValues (category ).Inc ()
518
+ if category == "" {
519
+ category = consts .UnknownLabel
520
+ }
521
+ CategoryClassificationsCount .WithLabelValues (category ).Inc ()
490
522
}
491
523
492
524
// RecordPIIViolation records a PII policy violation for a specific model and PII data type
@@ -544,7 +576,7 @@ func GetBatchSizeRange(size int) string {
544
576
}
545
577
546
578
// Fallback for unexpected cases
547
- return "unknown"
579
+ return consts . UnknownLabel
548
580
}
549
581
550
582
// GetBatchSizeRangeFromBuckets generates range labels based on size buckets
@@ -725,7 +757,7 @@ func RecordReasoningDecision(category, model string, enabled bool, effort string
725
757
// RecordReasoningTemplateUsage records usage of a model-family-specific template parameter
726
758
func RecordReasoningTemplateUsage (family , param string ) {
727
759
if family == "" {
728
- family = "unknown"
760
+ family = consts . UnknownLabel
729
761
}
730
762
if param == "" {
731
763
param = "none"
@@ -736,7 +768,7 @@ func RecordReasoningTemplateUsage(family, param string) {
736
768
// RecordReasoningEffortUsage records the effort usage by model family
737
769
func RecordReasoningEffortUsage (family , effort string ) {
738
770
if family == "" {
739
- family = "unknown"
771
+ family = consts . UnknownLabel
740
772
}
741
773
if effort == "" {
742
774
effort = "unspecified"
@@ -747,7 +779,7 @@ func RecordReasoningEffortUsage(family, effort string) {
747
779
// RecordEntropyClassificationDecision records an entropy-based classification decision
748
780
func RecordEntropyClassificationDecision (uncertaintyLevel string , reasoningEnabled bool , decisionReason string , topCategory string ) {
749
781
if uncertaintyLevel == "" {
750
- uncertaintyLevel = "unknown"
782
+ uncertaintyLevel = consts . UnknownLabel
751
783
}
752
784
if decisionReason == "" {
753
785
decisionReason = "unspecified"
@@ -767,7 +799,7 @@ func RecordEntropyClassificationDecision(uncertaintyLevel string, reasoningEnabl
767
799
// RecordEntropyValue records the entropy value for a classification
768
800
func RecordEntropyValue (category string , classificationType string , entropyValue float64 ) {
769
801
if category == "" {
770
- category = "unknown"
802
+ category = consts . UnknownLabel
771
803
}
772
804
if classificationType == "" {
773
805
classificationType = "standard"
@@ -779,7 +811,7 @@ func RecordEntropyValue(category string, classificationType string, entropyValue
779
811
// RecordClassificationConfidence records the confidence score from classification
780
812
func RecordClassificationConfidence (category string , classificationMethod string , confidence float64 ) {
781
813
if category == "" {
782
- category = "unknown"
814
+ category = consts . UnknownLabel
783
815
}
784
816
if classificationMethod == "" {
785
817
classificationMethod = "traditional"
@@ -796,10 +828,10 @@ func RecordEntropyClassificationLatency(seconds float64) {
796
828
// RecordProbabilityDistributionQuality records quality checks for probability distributions
797
829
func RecordProbabilityDistributionQuality (qualityCheck string , status string ) {
798
830
if qualityCheck == "" {
799
- qualityCheck = "unknown"
831
+ qualityCheck = consts . UnknownLabel
800
832
}
801
833
if status == "" {
802
- status = "unknown"
834
+ status = consts . UnknownLabel
803
835
}
804
836
805
837
ProbabilityDistributionQuality .WithLabelValues (qualityCheck , status ).Inc ()
@@ -808,7 +840,7 @@ func RecordProbabilityDistributionQuality(qualityCheck string, status string) {
808
840
// RecordEntropyFallback records when entropy-based routing falls back to traditional methods
809
841
func RecordEntropyFallback (fallbackReason string , fallbackStrategy string ) {
810
842
if fallbackReason == "" {
811
- fallbackReason = "unknown"
843
+ fallbackReason = consts . UnknownLabel
812
844
}
813
845
if fallbackStrategy == "" {
814
846
fallbackStrategy = "unspecified"
0 commit comments