99 "github.com/prometheus/client_golang/prometheus/promauto"
1010
1111 "github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
12+ "github.com/vllm-project/semantic-router/src/semantic-router/pkg/consts"
1213)
1314
1415// Minimal fallback bucket configurations - used only when configuration is completely missing
@@ -147,6 +148,26 @@ var (
147148 []string {"model" },
148149 )
149150
151+ // PromptTokensPerRequest tracks the distribution of prompt tokens per request by model
152+ PromptTokensPerRequest = promauto .NewHistogramVec (
153+ prometheus.HistogramOpts {
154+ Name : "llm_prompt_tokens_per_request" ,
155+ Help : "Distribution of prompt tokens per request by model" ,
156+ Buckets : []float64 {0 , 8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 },
157+ },
158+ []string {"model" },
159+ )
160+
161+ // CompletionTokensPerRequest tracks the distribution of completion tokens per request by model
162+ CompletionTokensPerRequest = promauto .NewHistogramVec (
163+ prometheus.HistogramOpts {
164+ Name : "llm_completion_tokens_per_request" ,
165+ Help : "Distribution of completion tokens per request by model" ,
166+ Buckets : []float64 {0 , 8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 },
167+ },
168+ []string {"model" },
169+ )
170+
150171 // ModelRoutingModifications tracks when a model is changed from one to another
151172 ModelRoutingModifications = promauto .NewCounterVec (
152173 prometheus.CounterOpts {
@@ -258,11 +279,12 @@ var (
258279 []string {"backend" },
259280 )
260281
261- // CategoryClassifications tracks the number of times each category is classified
262- CategoryClassifications = promauto .NewGaugeVec (
263- prometheus.GaugeOpts {
264- Name : "llm_category_classifications_total" ,
265- Help : "The total number of times each category is classified" ,
282+ // CategoryClassificationsCount is an alias with a name preferred by the issue request.
283+ // It mirrors CategoryClassifications and is incremented alongside it for compatibility.
284+ CategoryClassificationsCount = promauto .NewCounterVec (
285+ prometheus.CounterOpts {
286+ Name : "llm_category_classifications_count" ,
287+ Help : "The total number of times each category is classified (alias metric)" ,
266288 },
267289 []string {"category" },
268290 )
@@ -363,18 +385,18 @@ var (
363385// RecordModelRequest increments the counter for requests to a specific model
364386func RecordModelRequest (model string ) {
365387 if model == "" {
366- model = "unknown"
388+ model = consts . UnknownLabel
367389 }
368390 ModelRequests .WithLabelValues (model ).Inc ()
369391}
370392
371393// RecordRequestError increments request error counters labeled by model and normalized reason
372394func RecordRequestError (model , reason string ) {
373395 if model == "" {
374- model = "unknown"
396+ model = consts . UnknownLabel
375397 }
376398 if reason == "" {
377- reason = "unknown"
399+ reason = consts . UnknownLabel
378400 }
379401 // Normalize a few common variants to canonical reasons
380402 switch reason {
@@ -414,10 +436,10 @@ func RecordModelCost(model string, currency string, amount float64) {
414436// RecordRoutingReasonCode increments the counter for a routing decision reason code and model
415437func RecordRoutingReasonCode (reasonCode , model string ) {
416438 if reasonCode == "" {
417- reasonCode = "unknown"
439+ reasonCode = consts . UnknownLabel
418440 }
419441 if model == "" {
420- model = "unknown"
442+ model = consts . UnknownLabel
421443 }
422444 RoutingReasonCodes .WithLabelValues (reasonCode , model ).Inc ()
423445}
@@ -429,6 +451,13 @@ func RecordModelTokensDetailed(model string, promptTokens, completionTokens floa
429451 ModelTokens .WithLabelValues (model ).Add (totalTokens )
430452 ModelPromptTokens .WithLabelValues (model ).Add (promptTokens )
431453 ModelCompletionTokens .WithLabelValues (model ).Add (completionTokens )
454+
455+ // Also record per-request histograms for visibility into distribution
456+ if model == "" {
457+ model = consts .UnknownLabel
458+ }
459+ PromptTokensPerRequest .WithLabelValues (model ).Observe (promptTokens )
460+ CompletionTokensPerRequest .WithLabelValues (model ).Observe (completionTokens )
432461}
433462
434463// RecordModelCompletionLatency records the latency of a model completion
@@ -442,7 +471,7 @@ func RecordModelTTFT(model string, seconds float64) {
442471 return
443472 }
444473 if model == "" {
445- model = "unknown"
474+ model = consts . UnknownLabel
446475 }
447476 ModelTTFT .WithLabelValues (model ).Observe (seconds )
448477}
@@ -453,7 +482,7 @@ func RecordModelTPOT(model string, secondsPerToken float64) {
453482 return
454483 }
455484 if model == "" {
456- model = "unknown"
485+ model = consts . UnknownLabel
457486 }
458487 ModelTPOT .WithLabelValues (model ).Observe (secondsPerToken )
459488}
@@ -484,9 +513,12 @@ func UpdateCacheEntries(backend string, count int) {
484513 CacheEntriesTotal .WithLabelValues (backend ).Set (float64 (count ))
485514}
486515
487- // RecordCategoryClassification increments the gauge for a specific category classification
516+ // RecordCategoryClassification increments the counter for a specific category classification
488517func RecordCategoryClassification (category string ) {
489- CategoryClassifications .WithLabelValues (category ).Inc ()
518+ if category == "" {
519+ category = consts .UnknownLabel
520+ }
521+ CategoryClassificationsCount .WithLabelValues (category ).Inc ()
490522}
491523
492524// RecordPIIViolation records a PII policy violation for a specific model and PII data type
@@ -544,7 +576,7 @@ func GetBatchSizeRange(size int) string {
544576 }
545577
546578 // Fallback for unexpected cases
547- return "unknown"
579+ return consts . UnknownLabel
548580}
549581
550582// GetBatchSizeRangeFromBuckets generates range labels based on size buckets
@@ -725,7 +757,7 @@ func RecordReasoningDecision(category, model string, enabled bool, effort string
725757// RecordReasoningTemplateUsage records usage of a model-family-specific template parameter
726758func RecordReasoningTemplateUsage (family , param string ) {
727759 if family == "" {
728- family = "unknown"
760+ family = consts . UnknownLabel
729761 }
730762 if param == "" {
731763 param = "none"
@@ -736,7 +768,7 @@ func RecordReasoningTemplateUsage(family, param string) {
736768// RecordReasoningEffortUsage records the effort usage by model family
737769func RecordReasoningEffortUsage (family , effort string ) {
738770 if family == "" {
739- family = "unknown"
771+ family = consts . UnknownLabel
740772 }
741773 if effort == "" {
742774 effort = "unspecified"
@@ -747,7 +779,7 @@ func RecordReasoningEffortUsage(family, effort string) {
747779// RecordEntropyClassificationDecision records an entropy-based classification decision
748780func RecordEntropyClassificationDecision (uncertaintyLevel string , reasoningEnabled bool , decisionReason string , topCategory string ) {
749781 if uncertaintyLevel == "" {
750- uncertaintyLevel = "unknown"
782+ uncertaintyLevel = consts . UnknownLabel
751783 }
752784 if decisionReason == "" {
753785 decisionReason = "unspecified"
@@ -767,7 +799,7 @@ func RecordEntropyClassificationDecision(uncertaintyLevel string, reasoningEnabl
767799// RecordEntropyValue records the entropy value for a classification
768800func RecordEntropyValue (category string , classificationType string , entropyValue float64 ) {
769801 if category == "" {
770- category = "unknown"
802+ category = consts . UnknownLabel
771803 }
772804 if classificationType == "" {
773805 classificationType = "standard"
@@ -779,7 +811,7 @@ func RecordEntropyValue(category string, classificationType string, entropyValue
779811// RecordClassificationConfidence records the confidence score from classification
780812func RecordClassificationConfidence (category string , classificationMethod string , confidence float64 ) {
781813 if category == "" {
782- category = "unknown"
814+ category = consts . UnknownLabel
783815 }
784816 if classificationMethod == "" {
785817 classificationMethod = "traditional"
@@ -796,10 +828,10 @@ func RecordEntropyClassificationLatency(seconds float64) {
796828// RecordProbabilityDistributionQuality records quality checks for probability distributions
797829func RecordProbabilityDistributionQuality (qualityCheck string , status string ) {
798830 if qualityCheck == "" {
799- qualityCheck = "unknown"
831+ qualityCheck = consts . UnknownLabel
800832 }
801833 if status == "" {
802- status = "unknown"
834+ status = consts . UnknownLabel
803835 }
804836
805837 ProbabilityDistributionQuality .WithLabelValues (qualityCheck , status ).Inc ()
@@ -808,7 +840,7 @@ func RecordProbabilityDistributionQuality(qualityCheck string, status string) {
808840// RecordEntropyFallback records when entropy-based routing falls back to traditional methods
809841func RecordEntropyFallback (fallbackReason string , fallbackStrategy string ) {
810842 if fallbackReason == "" {
811- fallbackReason = "unknown"
843+ fallbackReason = consts . UnknownLabel
812844 }
813845 if fallbackStrategy == "" {
814846 fallbackStrategy = "unspecified"
0 commit comments