@@ -94,6 +94,61 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
9494		return  err 
9595	}
9696
97+ 	s .requestPromptTokens  =  prometheus .NewHistogramVec (
98+ 		prometheus.HistogramOpts {
99+ 			Subsystem : "" ,
100+ 			Name :      "vllm:request_prompt_tokens" ,
101+ 			Help :      "Number of input prompt tokens in the request." ,
102+ 			Buckets :   []float64 {1 , 2 , 5 , 10 , 20 , 50 , 100 , 200 , 500 , 1000 , 2000 , 5000 , 10000 },
103+ 		},
104+ 		[]string {vllmapi .PromLabelModelName },
105+ 	)
106+ 	if  err  :=  s .registry .Register (s .requestPromptTokens ); err  !=  nil  {
107+ 		s .logger .Error (err , "Prometheus request_prompt_tokens histogram register failed" )
108+ 		return  err 
109+ 	}
110+ 
111+ 	s .requestGenerationTokens  =  prometheus .NewHistogramVec (
112+ 		prometheus.HistogramOpts {
113+ 			Subsystem : "" ,
114+ 			Name :      "vllm:request_generation_tokens" ,
115+ 			Help :      "Number of generated tokens so far in the request." ,
116+ 			Buckets :   []float64 {1 , 2 , 5 , 10 , 20 , 50 , 100 , 200 , 500 , 1000 , 2000 , 5000 , 10000 },
117+ 		},
118+ 		[]string {vllmapi .PromLabelModelName },
119+ 	)
120+ 	if  err  :=  s .registry .Register (s .requestGenerationTokens ); err  !=  nil  {
121+ 		s .logger .Error (err , "Prometheus request_generation_tokens histogram register failed" )
122+ 		return  err 
123+ 	}
124+ 
125+ 	s .requestParamsMaxTokens  =  prometheus .NewHistogramVec (
126+ 		prometheus.HistogramOpts {
127+ 			Subsystem : "" ,
128+ 			Name :      "vllm:request_params_max_tokens" ,
129+ 			Help :      "The 'max_tokens' parameter from the request." ,
130+ 			Buckets :   []float64 {1 , 2 , 5 , 10 , 20 , 50 , 100 , 200 , 500 , 1000 , 2000 , 5000 , 10000 },
131+ 		},
132+ 		[]string {vllmapi .PromLabelModelName },
133+ 	)
134+ 	if  err  :=  s .registry .Register (s .requestParamsMaxTokens ); err  !=  nil  {
135+ 		s .logger .Error (err , "Prometheus request_params_max_tokens histogram register failed" )
136+ 		return  err 
137+ 	}
138+ 
139+ 	s .requestSuccessTotal  =  prometheus .NewCounterVec (
140+ 		prometheus.CounterOpts {
141+ 			Subsystem : "" ,
142+ 			Name :      "vllm:request_success_total" ,
143+ 			Help :      "Total number of successful inference requests." ,
144+ 		},
145+ 		[]string {vllmapi .PromLabelModelName , vllmapi .PromLabelFinishReason },
146+ 	)
147+ 	if  err  :=  s .registry .Register (s .requestSuccessTotal ); err  !=  nil  {
148+ 		s .logger .Error (err , "Prometheus request_success_total counter register failed" )
149+ 		return  err 
150+ 	}
151+ 
97152	s .setInitialPrometheusMetrics ()
98153
99154	return  nil 
@@ -282,3 +337,15 @@ func (s *VllmSimulator) decrementLoraRefCount(lora string, theMap *sync.Map) {
282337		s .logger .Error (nil , "Zero model reference" , "model" , lora )
283338	}
284339}
340+ 
341+ // recordRequestMetricsOnSuccess records metrics for a successfully completed request 
342+ func  (s  * VllmSimulator ) recordRequestMetricsOnSuccess (promptTokens ,
343+ 	generationTokens  int , maxTokens  * int64 , finishReason  string ) {
344+ 	modelName  :=  s .getDisplayedModelName (s .config .Model )
345+ 	s .requestPromptTokens .WithLabelValues (modelName ).Observe (float64 (promptTokens ))
346+ 	s .requestGenerationTokens .WithLabelValues (modelName ).Observe (float64 (generationTokens ))
347+ 	if  maxTokens  !=  nil  {
348+ 		s .requestParamsMaxTokens .WithLabelValues (modelName ).Observe (float64 (* maxTokens ))
349+ 	}
350+ 	s .requestSuccessTotal .WithLabelValues (modelName , finishReason ).Inc ()
351+ }
0 commit comments