@@ -118,7 +118,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
118118
119119	It ("Should record correct prompt and generation token counts" , func () {
120120		modelName  :=  "testmodel" 
121- 		prompt  :=  strings .Repeat ("hello " , 10 )
121+ 		prompt  :=  strings .Repeat ("hello " , 25 )
122122		maxTokens  :=  25 
123123
124124		ctx  :=  context .TODO ()
@@ -153,10 +153,38 @@ var _ = Describe("Simulator metrics", Ordered, func() {
153153		data , err  :=  io .ReadAll (metricsResp .Body )
154154		Expect (err ).NotTo (HaveOccurred ())
155155		metrics  :=  string (data )
156+ 		// request_prompt_tokens_bucket 
157+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="1"} 0` ))
158+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="2"} 0` ))
159+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="5"} 0` ))
160+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="10"} 0` ))
161+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="20"} 0` ))
156162		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="50"} 1` ))
163+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="100"} 1` ))
164+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="200"} 1` ))
165+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="500"} 1` ))
166+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="100"} 1` ))
167+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="+Inf"} 1` ))
168+ 		// request_params_max_tokens_bucket 
169+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="1"} 0` ))
170+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="2"} 0` ))
171+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="5"} 0` ))
172+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="10"} 0` ))
173+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="20"} 0` ))
157174		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="50"} 1` ))
175+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="100"} 1` ))
176+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="200"} 1` ))
177+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="500"} 1` ))
178+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="1000"} 1` ))
179+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="+Inf"} 1` ))
180+ 		// request_generation_tokens 
181+ 		// We do not verify the distribution of the number of tokens generated per request, 
182+ 		// as the number of generated tokens is unpredictable in this test. 
183+ 		// Therefore, we only verify the number of requests and the total number of generated tokens, 
184+ 		// and skip the bucket distribution. 
158185		Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_count{model_name="testmodel"} 1` ))
159- 		Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="stop",model_name="testmodel"} 1` ))
186+ 		// request_success_total 
187+ 		Expect (metrics ).To (MatchRegexp (`vllm:request_success_total{finish_reason="(stop|length)",model_name="testmodel"} 1` ))
160188	})
161189
162190	It ("Should send correct lora metrics" , func () {
@@ -518,7 +546,32 @@ var _ = Describe("Simulator metrics", Ordered, func() {
518546			ctx  :=  context .TODO ()
519547			args  :=  []string {"cmd" , "--model" , model , "--mode" , common .ModeRandom ,
520548				"--fake-metrics" ,
521- 				"{\" running-requests\" :10,\" waiting-requests\" :30,\" kv-cache-usage\" :0.4,\" loras\" :[{\" running\" :\" lora4,lora2\" ,\" waiting\" :\" lora3\" ,\" timestamp\" :1257894567},{\" running\" :\" lora4,lora3\" ,\" waiting\" :\" \" ,\" timestamp\" :1257894569}]}" ,
549+ 				`{`  + 
550+ 					`"running-requests":10,`  + 
551+ 					`"waiting-requests":30,`  + 
552+ 					`"kv-cache-usage":0.4,`  + 
553+ 					`"request-success-total":{`  + 
554+ 					`"stop":20,`  + 
555+ 					`"length":0,`  + 
556+ 					`"tool_calls":0,`  + 
557+ 					`"remote_decode":0`  + 
558+ 					`},`  + 
559+ 					`"request-prompt-tokens":[10,20,30],`  + 
560+ 					`"request-generation-tokens":[10,20,30],`  + 
561+ 					`"request-params-max-tokens":[10,20,30],`  + 
562+ 					`"loras":[`  + 
563+ 					`{`  + 
564+ 					`"running":"lora4,lora2",`  + 
565+ 					`"waiting":"lora3",`  + 
566+ 					`"timestamp":1257894567`  + 
567+ 					`},`  + 
568+ 					`{`  + 
569+ 					`"running":"lora4,lora3",`  + 
570+ 					`"waiting":"",`  + 
571+ 					`"timestamp":1257894569`  + 
572+ 					`}`  + 
573+ 					`]`  + 
574+ 					`}` ,
522575			}
523576
524577			client , err  :=  startServerWithArgs (ctx , common .ModeRandom , args , nil )
@@ -536,6 +589,48 @@ var _ = Describe("Simulator metrics", Ordered, func() {
536589			Expect (metrics ).To (ContainSubstring ("vllm:gpu_cache_usage_perc{model_name=\" my_model\" } 0.4" ))
537590			Expect (metrics ).To (ContainSubstring ("vllm:lora_requests_info{max_lora=\" 1\" ,running_lora_adapters=\" lora4,lora2\" ,waiting_lora_adapters=\" lora3\" } 1.257894567e+09" ))
538591			Expect (metrics ).To (ContainSubstring ("vllm:lora_requests_info{max_lora=\" 1\" ,running_lora_adapters=\" lora4,lora3\" ,waiting_lora_adapters=\" \" } 1.257894569e+09" ))
592+ 
593+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="1"} 10` ))
594+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="2"} 30` ))
595+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="5"} 60` ))
596+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="10"} 60` ))
597+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="20"} 60` ))
598+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="50"} 60` ))
599+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="100"} 60` ))
600+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="200"} 60` ))
601+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="500"} 60` ))
602+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="1000"} 60` ))
603+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="+Inf"} 60` ))
604+ 
605+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="1"} 10` ))
606+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="2"} 30` ))
607+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="5"} 60` ))
608+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="10"} 60` ))
609+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="20"} 60` ))
610+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="50"} 60` ))
611+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="100"} 60` ))
612+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="200"} 60` ))
613+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="500"} 60` ))
614+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="1000"} 60` ))
615+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="+Inf"} 60` ))
616+ 
617+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="1"} 10` ))
618+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="2"} 30` ))
619+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="5"} 60` ))
620+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="10"} 60` ))
621+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="20"} 60` ))
622+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="50"} 60` ))
623+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="100"} 60` ))
624+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="200"} 60` ))
625+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="500"} 60` ))
626+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="1000"} 60` ))
627+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="+Inf"} 60` ))
628+ 
629+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="length",model_name="my_model"} 0` ))
630+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="remote_decode",model_name="my_model"} 0` ))
631+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="stop",model_name="my_model"} 20` ))
632+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="tool_calls",model_name="my_model"} 0` ))
633+ 
539634		})
540635	})
541636})
@@ -691,3 +786,95 @@ func TestBuild125Buckets(t *testing.T) {
691786		})
692787	}
693788}
789+ 
790+ func  validateSamplesInBuckets (t  * testing.T , samples  []float64 , boundaries  []float64 , counts  []float64 ) {
791+ 	if  len (boundaries ) !=  len (counts ) {
792+ 		t .Fatalf ("boundaries and counts length mismatch: %d vs %d" , len (boundaries ), len (counts ))
793+ 	}
794+ 
795+ 	prev  :=  0.0 
796+ 	for  i , count  :=  range  counts  {
797+ 		if  count  ==  0  {
798+ 			prev  =  boundaries [i ]
799+ 			continue 
800+ 		}
801+ 
802+ 		lower , upper  :=  prev , boundaries [i ]
803+ 		valueInBucket  :=  0 
804+ 
805+ 		for  _ , v  :=  range  samples  {
806+ 			if  v  >  lower  &&  v  <=  upper  {
807+ 				valueInBucket ++ 
808+ 			}
809+ 		}
810+ 		if  valueInBucket  !=  int (count ) {
811+ 			t .Errorf ("bucket[%d] (%.3f, %.3f]: want %d samples, got %d" ,
812+ 				i , lower , upper , int (count ), valueInBucket )
813+ 		}
814+ 		prev  =  upper 
815+ 	}
816+ 
817+ 	totalExpected  :=  0 
818+ 	for  _ , c  :=  range  counts  {
819+ 		totalExpected  +=  int (c )
820+ 	}
821+ 	if  len (samples ) !=  totalExpected  {
822+ 		t .Errorf ("total samples: want %d, got %d" , totalExpected , len (samples ))
823+ 	}
824+ }
825+ 
826+ func  TestGenerateSamplesFromBuckets (t  * testing.T ) {
827+ 	tests  :=  []struct  {
828+ 		name             string 
829+ 		boundaries       []float64 
830+ 		counts           []float64 
831+ 		expectedSamples  int 
832+ 	}{
833+ 		{
834+ 			name :            "normal 4 case" ,
835+ 			boundaries :      []float64 {1.0 , 2.0 , 5.0 , 10.0 },
836+ 			counts :          []float64 {10 , 20 , 30 , 15 },
837+ 			expectedSamples : 75 ,
838+ 		},
839+ 		{
840+ 			name :            "zero count in middle" ,
841+ 			boundaries :      []float64 {1.0 , 2.0 , 5.0 },
842+ 			counts :          []float64 {5 , 0 , 10 },
843+ 			expectedSamples : 15 ,
844+ 		},
845+ 		{
846+ 			name :            "single bucket" ,
847+ 			boundaries :      []float64 {10.0 },
848+ 			counts :          []float64 {5 },
849+ 			expectedSamples : 5 ,
850+ 		},
851+ 		{
852+ 			name :            "all zeros" ,
853+ 			boundaries :      []float64 {1 , 2 , 5 },
854+ 			counts :          []float64 {0 , 0 , 0 },
855+ 			expectedSamples : 0 ,
856+ 		},
857+ 		{
858+ 			name :            "large numbers" ,
859+ 			boundaries :      []float64 {100 , 1000 , 10000 },
860+ 			counts :          []float64 {1000 , 2000 , 3000 },
861+ 			expectedSamples : 6000 ,
862+ 		},
863+ 		{
864+ 			name :            "empty inputs" ,
865+ 			boundaries :      []float64 {},
866+ 			counts :          []float64 {},
867+ 			expectedSamples : 0 ,
868+ 		},
869+ 	}
870+ 
871+ 	for  _ , tt  :=  range  tests  {
872+ 		t .Run (tt .name , func (t  * testing.T ) {
873+ 			samples  :=  generateSamplesFromBuckets (tt .boundaries , tt .counts )
874+ 			if  len (samples ) !=  tt .expectedSamples  {
875+ 				t .Fatalf ("sample count mismatch: want %d, got %d" , tt .expectedSamples , len (samples ))
876+ 			}
877+ 			validateSamplesInBuckets (t , samples , tt .boundaries , tt .counts )
878+ 		})
879+ 	}
880+ }
0 commit comments