@@ -38,6 +38,11 @@ const (
3838 reqInferenceTimeMetricName = "vllm:request_inference_time_seconds"
3939 prefillTimeMetricName = "vllm:request_prefill_time_seconds"
4040 decodeTimeMetricName = "vllm:request_decode_time_seconds"
41+ ttftMetricName = "vllm:time_to_first_token_seconds"
42+ tpotMetricName = "vllm:time_per_output_token_seconds"
43+ generationTokensMetricName = "vllm:request_generation_tokens"
44+ paramMaxTokensMetricName = "vllm:request_params_max_tokens"
45+ promptTokensMetricName = "vllm:request_prompt_tokens"
4146)
4247
4348// createAndRegisterPrometheus creates and registers prometheus metrics used by vLLM simulator
@@ -92,7 +97,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
9297 s .metrics .ttft = prometheus .NewHistogramVec (
9398 prometheus.HistogramOpts {
9499 Subsystem : "" ,
95- Name : "vllm:time_to_first_token_seconds" ,
100+ Name : ttftMetricName ,
96101 Help : "Histogram of time to first token in seconds." ,
97102 Buckets : common .TTFTBucketsBoundaries ,
98103 },
@@ -107,7 +112,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
107112 s .metrics .tpot = prometheus .NewHistogramVec (
108113 prometheus.HistogramOpts {
109114 Subsystem : "" ,
110- Name : "vllm:time_per_output_token_seconds" ,
115+ Name : tpotMetricName ,
111116 Help : "Histogram of time per output token in seconds." ,
112117 Buckets : common .TPOTBucketsBoundaries ,
113118 },
@@ -211,7 +216,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
211216 s .metrics .requestPromptTokens = prometheus .NewHistogramVec (
212217 prometheus.HistogramOpts {
213218 Subsystem : "" ,
214- Name : "vllm:request_prompt_tokens" ,
219+ Name : promptTokensMetricName ,
215220 Help : "Number of prefill tokens processed." ,
216221 Buckets : build125Buckets (s .config .MaxModelLen ),
217222 },
@@ -225,7 +230,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
225230 s .metrics .requestGenerationTokens = prometheus .NewHistogramVec (
226231 prometheus.HistogramOpts {
227232 Subsystem : "" ,
228- Name : "vllm:request_generation_tokens" ,
233+ Name : generationTokensMetricName ,
229234 Help : "Number of generation tokens processed." ,
230235 Buckets : build125Buckets (s .config .MaxModelLen ),
231236 },
@@ -239,7 +244,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
239244 s .metrics .requestParamsMaxTokens = prometheus .NewHistogramVec (
240245 prometheus.HistogramOpts {
241246 Subsystem : "" ,
242- Name : "vllm:request_params_max_tokens" ,
247+ Name : paramMaxTokensMetricName ,
243248 Help : "Histogram of the max_tokens request parameter." ,
244249 Buckets : build125Buckets (s .config .MaxModelLen ),
245250 },
0 commit comments