@@ -147,6 +147,26 @@ var (
147147 []string {"model" },
148148 )
149149
150+ // PromptTokensPerRequest tracks the distribution of prompt tokens per request by model
151+ PromptTokensPerRequest = promauto .NewHistogramVec (
152+ prometheus.HistogramOpts {
153+ Name : "llm_prompt_tokens_per_request" ,
154+ Help : "Distribution of prompt tokens per request by model" ,
155+ Buckets : []float64 {0 , 8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 },
156+ },
157+ []string {"model" },
158+ )
159+
160+ // CompletionTokensPerRequest tracks the distribution of completion tokens per request by model
161+ CompletionTokensPerRequest = promauto .NewHistogramVec (
162+ prometheus.HistogramOpts {
163+ Name : "llm_completion_tokens_per_request" ,
164+ Help : "Distribution of completion tokens per request by model" ,
165+ Buckets : []float64 {0 , 8 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 , 8192 , 16384 },
166+ },
167+ []string {"model" },
168+ )
169+
150170 // ModelRoutingModifications tracks when a model is changed from one to another
151171 ModelRoutingModifications = promauto .NewCounterVec (
152172 prometheus.CounterOpts {
@@ -258,11 +278,12 @@ var (
258278 []string {"backend" },
259279 )
260280
261- // CategoryClassifications tracks the number of times each category is classified
262- CategoryClassifications = promauto .NewGaugeVec (
263- prometheus.GaugeOpts {
264- Name : "llm_category_classifications_total" ,
265- Help : "The total number of times each category is classified" ,
281+ // CategoryClassificationsCount is an alias with a name preferred by the issue request.
282+ // It mirrors CategoryClassifications and is incremented alongside it for compatibility.
283+ CategoryClassificationsCount = promauto .NewCounterVec (
284+ prometheus.CounterOpts {
285+ Name : "llm_category_classifications_count" ,
286+ Help : "The total number of times each category is classified (alias metric)" ,
266287 },
267288 []string {"category" },
268289 )
@@ -429,6 +450,13 @@ func RecordModelTokensDetailed(model string, promptTokens, completionTokens floa
429450 ModelTokens .WithLabelValues (model ).Add (totalTokens )
430451 ModelPromptTokens .WithLabelValues (model ).Add (promptTokens )
431452 ModelCompletionTokens .WithLabelValues (model ).Add (completionTokens )
453+
454+ // Also record per-request histograms for visibility into distribution
455+ if model == "" {
456+ model = "unknown"
457+ }
458+ PromptTokensPerRequest .WithLabelValues (model ).Observe (promptTokens )
459+ CompletionTokensPerRequest .WithLabelValues (model ).Observe (completionTokens )
432460}
433461
434462// RecordModelCompletionLatency records the latency of a model completion
@@ -484,9 +512,12 @@ func UpdateCacheEntries(backend string, count int) {
484512 CacheEntriesTotal .WithLabelValues (backend ).Set (float64 (count ))
485513}
486514
487- // RecordCategoryClassification increments the gauge for a specific category classification
515+ // RecordCategoryClassification increments the counter for a specific category classification
488516func RecordCategoryClassification (category string ) {
489- CategoryClassifications .WithLabelValues (category ).Inc ()
517+ if category == "" {
518+ category = "unknown"
519+ }
520+ CategoryClassificationsCount .WithLabelValues (category ).Inc ()
490521}
491522
492523// RecordPIIViolation records a PII policy violation for a specific model and PII data type
0 commit comments