99	"github.com/prometheus/client_golang/prometheus/promauto" 
1010
1111	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config" 
12+ 	"github.com/vllm-project/semantic-router/src/semantic-router/pkg/consts" 
1213)
1314
1415// Minimal fallback bucket configurations - used only when configuration is completely missing 
@@ -147,6 +148,26 @@ var (
147148		[]string {"model" },
148149	)
149150
151+ 	// PromptTokensPerRequest tracks the distribution of prompt tokens per request by model 
152+ 	PromptTokensPerRequest  =  promauto .NewHistogramVec (
153+ 		prometheus.HistogramOpts {
154+ 			Name :    "llm_prompt_tokens_per_request" ,
155+ 			Help :    "Distribution of prompt tokens per request by model" ,
156+ 			Buckets : []float64 {0 , 8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 },
157+ 		},
158+ 		[]string {"model" },
159+ 	)
160+ 
161+ 	// CompletionTokensPerRequest tracks the distribution of completion tokens per request by model 
162+ 	CompletionTokensPerRequest  =  promauto .NewHistogramVec (
163+ 		prometheus.HistogramOpts {
164+ 			Name :    "llm_completion_tokens_per_request" ,
165+ 			Help :    "Distribution of completion tokens per request by model" ,
166+ 			Buckets : []float64 {0 , 8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 },
167+ 		},
168+ 		[]string {"model" },
169+ 	)
170+ 
150171	// ModelRoutingModifications tracks when a model is changed from one to another 
151172	ModelRoutingModifications  =  promauto .NewCounterVec (
152173		prometheus.CounterOpts {
@@ -258,11 +279,12 @@ var (
258279		[]string {"backend" },
259280	)
260281
261- 	// CategoryClassifications tracks the number of times each category is classified 
262- 	CategoryClassifications  =  promauto .NewGaugeVec (
263- 		prometheus.GaugeOpts {
264- 			Name : "llm_category_classifications_total" ,
265- 			Help : "The total number of times each category is classified" ,
282+ 	// CategoryClassificationsCount is an alias with a name preferred by the issue request. 
283+ 	// It mirrors CategoryClassifications and is incremented alongside it for compatibility. 
284+ 	CategoryClassificationsCount  =  promauto .NewCounterVec (
285+ 		prometheus.CounterOpts {
286+ 			Name : "llm_category_classifications_count" ,
287+ 			Help : "The total number of times each category is classified (alias metric)" ,
266288		},
267289		[]string {"category" },
268290	)
@@ -363,18 +385,18 @@ var (
363385// RecordModelRequest increments the counter for requests to a specific model 
364386func  RecordModelRequest (model  string ) {
365387	if  model  ==  ""  {
366- 		model  =  "unknown" 
388+ 		model  =  consts . UnknownLabel 
367389	}
368390	ModelRequests .WithLabelValues (model ).Inc ()
369391}
370392
371393// RecordRequestError increments request error counters labeled by model and normalized reason 
372394func  RecordRequestError (model , reason  string ) {
373395	if  model  ==  ""  {
374- 		model  =  "unknown" 
396+ 		model  =  consts . UnknownLabel 
375397	}
376398	if  reason  ==  ""  {
377- 		reason  =  "unknown" 
399+ 		reason  =  consts . UnknownLabel 
378400	}
379401	// Normalize a few common variants to canonical reasons 
380402	switch  reason  {
@@ -414,10 +436,10 @@ func RecordModelCost(model string, currency string, amount float64) {
414436// RecordRoutingReasonCode increments the counter for a routing decision reason code and model 
415437func  RecordRoutingReasonCode (reasonCode , model  string ) {
416438	if  reasonCode  ==  ""  {
417- 		reasonCode  =  "unknown" 
439+ 		reasonCode  =  consts . UnknownLabel 
418440	}
419441	if  model  ==  ""  {
420- 		model  =  "unknown" 
442+ 		model  =  consts . UnknownLabel 
421443	}
422444	RoutingReasonCodes .WithLabelValues (reasonCode , model ).Inc ()
423445}
@@ -429,6 +451,13 @@ func RecordModelTokensDetailed(model string, promptTokens, completionTokens floa
429451	ModelTokens .WithLabelValues (model ).Add (totalTokens )
430452	ModelPromptTokens .WithLabelValues (model ).Add (promptTokens )
431453	ModelCompletionTokens .WithLabelValues (model ).Add (completionTokens )
454+ 
455+ 	// Also record per-request histograms for visibility into distribution 
456+ 	if  model  ==  ""  {
457+ 		model  =  consts .UnknownLabel 
458+ 	}
459+ 	PromptTokensPerRequest .WithLabelValues (model ).Observe (promptTokens )
460+ 	CompletionTokensPerRequest .WithLabelValues (model ).Observe (completionTokens )
432461}
433462
434463// RecordModelCompletionLatency records the latency of a model completion 
@@ -442,7 +471,7 @@ func RecordModelTTFT(model string, seconds float64) {
442471		return 
443472	}
444473	if  model  ==  ""  {
445- 		model  =  "unknown" 
474+ 		model  =  consts . UnknownLabel 
446475	}
447476	ModelTTFT .WithLabelValues (model ).Observe (seconds )
448477}
@@ -453,7 +482,7 @@ func RecordModelTPOT(model string, secondsPerToken float64) {
453482		return 
454483	}
455484	if  model  ==  ""  {
456- 		model  =  "unknown" 
485+ 		model  =  consts . UnknownLabel 
457486	}
458487	ModelTPOT .WithLabelValues (model ).Observe (secondsPerToken )
459488}
@@ -484,9 +513,12 @@ func UpdateCacheEntries(backend string, count int) {
484513	CacheEntriesTotal .WithLabelValues (backend ).Set (float64 (count ))
485514}
486515
487- // RecordCategoryClassification increments the gauge  for a specific category classification 
516+ // RecordCategoryClassification increments the counter  for a specific category classification 
488517func  RecordCategoryClassification (category  string ) {
489- 	CategoryClassifications .WithLabelValues (category ).Inc ()
518+ 	if  category  ==  ""  {
519+ 		category  =  consts .UnknownLabel 
520+ 	}
521+ 	CategoryClassificationsCount .WithLabelValues (category ).Inc ()
490522}
491523
492524// RecordPIIViolation records a PII policy violation for a specific model and PII data type 
@@ -544,7 +576,7 @@ func GetBatchSizeRange(size int) string {
544576	}
545577
546578	// Fallback for unexpected cases 
547- 	return  "unknown" 
579+ 	return  consts . UnknownLabel 
548580}
549581
550582// GetBatchSizeRangeFromBuckets generates range labels based on size buckets 
@@ -725,7 +757,7 @@ func RecordReasoningDecision(category, model string, enabled bool, effort string
725757// RecordReasoningTemplateUsage records usage of a model-family-specific template parameter 
726758func  RecordReasoningTemplateUsage (family , param  string ) {
727759	if  family  ==  ""  {
728- 		family  =  "unknown" 
760+ 		family  =  consts . UnknownLabel 
729761	}
730762	if  param  ==  ""  {
731763		param  =  "none" 
@@ -736,7 +768,7 @@ func RecordReasoningTemplateUsage(family, param string) {
736768// RecordReasoningEffortUsage records the effort usage by model family 
737769func  RecordReasoningEffortUsage (family , effort  string ) {
738770	if  family  ==  ""  {
739- 		family  =  "unknown" 
771+ 		family  =  consts . UnknownLabel 
740772	}
741773	if  effort  ==  ""  {
742774		effort  =  "unspecified" 
@@ -747,7 +779,7 @@ func RecordReasoningEffortUsage(family, effort string) {
747779// RecordEntropyClassificationDecision records an entropy-based classification decision 
748780func  RecordEntropyClassificationDecision (uncertaintyLevel  string , reasoningEnabled  bool , decisionReason  string , topCategory  string ) {
749781	if  uncertaintyLevel  ==  ""  {
750- 		uncertaintyLevel  =  "unknown" 
782+ 		uncertaintyLevel  =  consts . UnknownLabel 
751783	}
752784	if  decisionReason  ==  ""  {
753785		decisionReason  =  "unspecified" 
@@ -767,7 +799,7 @@ func RecordEntropyClassificationDecision(uncertaintyLevel string, reasoningEnabl
767799// RecordEntropyValue records the entropy value for a classification 
768800func  RecordEntropyValue (category  string , classificationType  string , entropyValue  float64 ) {
769801	if  category  ==  ""  {
770- 		category  =  "unknown" 
802+ 		category  =  consts . UnknownLabel 
771803	}
772804	if  classificationType  ==  ""  {
773805		classificationType  =  "standard" 
@@ -779,7 +811,7 @@ func RecordEntropyValue(category string, classificationType string, entropyValue
779811// RecordClassificationConfidence records the confidence score from classification 
780812func  RecordClassificationConfidence (category  string , classificationMethod  string , confidence  float64 ) {
781813	if  category  ==  ""  {
782- 		category  =  "unknown" 
814+ 		category  =  consts . UnknownLabel 
783815	}
784816	if  classificationMethod  ==  ""  {
785817		classificationMethod  =  "traditional" 
@@ -796,10 +828,10 @@ func RecordEntropyClassificationLatency(seconds float64) {
796828// RecordProbabilityDistributionQuality records quality checks for probability distributions 
797829func  RecordProbabilityDistributionQuality (qualityCheck  string , status  string ) {
798830	if  qualityCheck  ==  ""  {
799- 		qualityCheck  =  "unknown" 
831+ 		qualityCheck  =  consts . UnknownLabel 
800832	}
801833	if  status  ==  ""  {
802- 		status  =  "unknown" 
834+ 		status  =  consts . UnknownLabel 
803835	}
804836
805837	ProbabilityDistributionQuality .WithLabelValues (qualityCheck , status ).Inc ()
@@ -808,7 +840,7 @@ func RecordProbabilityDistributionQuality(qualityCheck string, status string) {
808840// RecordEntropyFallback records when entropy-based routing falls back to traditional methods 
809841func  RecordEntropyFallback (fallbackReason  string , fallbackStrategy  string ) {
810842	if  fallbackReason  ==  ""  {
811- 		fallbackReason  =  "unknown" 
843+ 		fallbackReason  =  consts . UnknownLabel 
812844	}
813845	if  fallbackStrategy  ==  ""  {
814846		fallbackStrategy  =  "unspecified" 
0 commit comments