@@ -19,7 +19,9 @@ package llmdinferencesim
1919import  (
2020	"context" 
2121	"errors" 
22+ 	"fmt" 
2223	"io" 
24+ 	"math" 
2325	"net/http" 
2426	"os" 
2527	"reflect" 
@@ -109,7 +111,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
109111
110112	It ("Should record correct prompt and generation token counts" , func () {
111113		modelName  :=  "testmodel" 
112- 		prompt  :=  strings .Repeat ("hello " , 10 )
114+ 		prompt  :=  strings .Repeat ("hello " , 25 )
113115		maxTokens  :=  25 
114116
115117		ctx  :=  context .TODO ()
@@ -144,10 +146,38 @@ var _ = Describe("Simulator metrics", Ordered, func() {
144146		data , err  :=  io .ReadAll (metricsResp .Body )
145147		Expect (err ).NotTo (HaveOccurred ())
146148		metrics  :=  string (data )
149+ 		// request_prompt_tokens_bucket 
150+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="1"} 0` ))
151+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="2"} 0` ))
152+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="5"} 0` ))
153+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="10"} 0` ))
154+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="20"} 0` ))
147155		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="50"} 1` ))
156+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="100"} 1` ))
157+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="200"} 1` ))
158+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="500"} 1` ))
159+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="100"} 1` ))
160+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="testmodel",le="+Inf"} 1` ))
161+ 		// request_params_max_tokens_bucket 
162+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="1"} 0` ))
163+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="2"} 0` ))
164+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="5"} 0` ))
165+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="10"} 0` ))
166+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="20"} 0` ))
148167		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="50"} 1` ))
168+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="100"} 1` ))
169+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="200"} 1` ))
170+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="500"} 1` ))
171+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="1000"} 1` ))
172+ 		Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="testmodel",le="+Inf"} 1` ))
173+ 		// request_generation_tokens 
174+ 		// We do not verify the distribution of the number of tokens generated per request, 
175+ 		// as the number of generated tokens is unpredictable in this test. 
176+ 		// Therefore, we only verify the number of requests and the total number of generated tokens, 
177+ 		// and skip the bucket distribution. 
149178		Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_count{model_name="testmodel"} 1` ))
150- 		Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="stop",model_name="testmodel"} 1` ))
179+ 		// request_success_total 
180+ 		Expect (metrics ).To (MatchRegexp (`vllm:request_success_total{finish_reason="(stop|length)",model_name="testmodel"} 1` ))
151181	})
152182
153183	It ("Should send correct lora metrics" , func () {
@@ -632,7 +662,36 @@ var _ = Describe("Simulator metrics", Ordered, func() {
632662			ctx  :=  context .TODO ()
633663			args  :=  []string {"cmd" , "--model" , model , "--mode" , common .ModeRandom ,
634664				"--fake-metrics" ,
665+ << << << <  HEAD 
635666				"{\" running-requests\" :10,\" waiting-requests\" :30,\" kv-cache-usage\" :0.4,\" loras\" :[{\" running\" :\" lora4,lora2\" ,\" waiting\" :\" lora3\" ,\" timestamp\" :1257894567},{\" running\" :\" lora4,lora3\" ,\" waiting\" :\" \" ,\" timestamp\" :1257894569}],\" ttft-buckets-values\" :[1, 2, 3],\" tpot-buckets-values\" : [0, 0, 1, 2, 3]}" ,
667+ == == == = 
668+ 				`{`  + 
669+ 					`"running-requests":10,`  + 
670+ 					`"waiting-requests":30,`  + 
671+ 					`"kv-cache-usage":0.4,`  + 
672+ 					`"request-success-total":{`  + 
673+ 					`"stop":20,`  + 
674+ 					`"length":0,`  + 
675+ 					`"tool_calls":0,`  + 
676+ 					`"remote_decode":0`  + 
677+ 					`},`  + 
678+ 					`"request-prompt-tokens":[10,20,30],`  + 
679+ 					`"request-generation-tokens":[10,20,30],`  + 
680+ 					`"request-params-max-tokens":[10,20,30],`  + 
681+ 					`"loras":[`  + 
682+ 					`{`  + 
683+ 					`"running":"lora4,lora2",`  + 
684+ 					`"waiting":"lora3",`  + 
685+ 					`"timestamp":1257894567`  + 
686+ 					`},`  + 
687+ 					`{`  + 
688+ 					`"running":"lora4,lora3",`  + 
689+ 					`"waiting":"",`  + 
690+ 					`"timestamp":1257894569`  + 
691+ 					`}`  + 
692+ 					`]`  + 
693+ 					`}` ,
694+ >> >> >> >  02 bc5c3  (fix  review  comment )
636695			}
637696
638697			client , err  :=  startServerWithArgs (ctx , common .ModeRandom , args , nil )
@@ -651,6 +710,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
651710			Expect (metrics ).To (ContainSubstring ("vllm:lora_requests_info{max_lora=\" 1\" ,running_lora_adapters=\" lora4,lora2\" ,waiting_lora_adapters=\" lora3\" } 1.257894567e+09" ))
652711			Expect (metrics ).To (ContainSubstring ("vllm:lora_requests_info{max_lora=\" 1\" ,running_lora_adapters=\" lora4,lora3\" ,waiting_lora_adapters=\" \" } 1.257894569e+09" ))
653712
713+ << << << <  HEAD 
654714			Expect (metrics ).To (ContainSubstring ("vllm:time_to_first_token_seconds_bucket{model_name=\" my_model\" ,le=\" 0.001\" } 1" ))
655715			Expect (metrics ).To (ContainSubstring ("vllm:time_to_first_token_seconds_bucket{model_name=\" my_model\" ,le=\" 0.005\" } 3" ))
656716			Expect (metrics ).To (ContainSubstring ("vllm:time_to_first_token_seconds_bucket{model_name=\" my_model\" ,le=\" 0.01\" } 6" ))
@@ -707,6 +767,49 @@ var _ = Describe("Simulator metrics", Ordered, func() {
707767			Expect (metrics ).To (ContainSubstring ("vllm:time_to_first_token_seconds_bucket{model_name=\" my_model\" ,le=\" 640\" } 0" ))
708768			Expect (metrics ).To (ContainSubstring ("vllm:time_to_first_token_seconds_bucket{model_name=\" my_model\" ,le=\" 2560\" } 0" ))
709769			Expect (metrics ).To (ContainSubstring ("vllm:time_to_first_token_seconds_bucket{model_name=\" my_model\" ,le=\" +Inf\" } 1" ))
770+ == == == = 
771+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="1"} 10` ))
772+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="2"} 30` ))
773+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="5"} 60` ))
774+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="10"} 60` ))
775+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="20"} 60` ))
776+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="50"} 60` ))
777+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="100"} 60` ))
778+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="200"} 60` ))
779+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="500"} 60` ))
780+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="1000"} 60` ))
781+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_generation_tokens_bucket{model_name="my_model",le="+Inf"} 60` ))
782+ 
783+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="1"} 10` ))
784+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="2"} 30` ))
785+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="5"} 60` ))
786+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="10"} 60` ))
787+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="20"} 60` ))
788+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="50"} 60` ))
789+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="100"} 60` ))
790+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="200"} 60` ))
791+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="500"} 60` ))
792+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="1000"} 60` ))
793+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_prompt_tokens_bucket{model_name="my_model",le="+Inf"} 60` ))
794+ 
795+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="1"} 10` ))
796+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="2"} 30` ))
797+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="5"} 60` ))
798+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="10"} 60` ))
799+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="20"} 60` ))
800+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="50"} 60` ))
801+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="100"} 60` ))
802+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="200"} 60` ))
803+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="500"} 60` ))
804+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="1000"} 60` ))
805+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_params_max_tokens_bucket{model_name="my_model",le="+Inf"} 60` ))
806+ 
807+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="length",model_name="my_model"} 0` ))
808+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="remote_decode",model_name="my_model"} 0` ))
809+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="stop",model_name="my_model"} 20` ))
810+ 			Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="tool_calls",model_name="my_model"} 0` ))
811+ 
812+ >> >> >> >  02 bc5c3  (fix  review  comment )
710813		})
711814	})
712815})
@@ -890,3 +993,104 @@ func TestBuild125Buckets(t *testing.T) {
890993		})
891994	}
892995}
996+ 
997+ func  validateSamplesInBuckets (t  * testing.T , samples  []float64 , boundaries  []float64 , counts  []float64 ) {
998+ 	fullCounts  :=  padCountsToFull (boundaries , counts )
999+ 	// Now validate using fullCounts 
1000+ 	sortedSamples  :=  make ([]float64 , len (samples ))
1001+ 	copy (sortedSamples , samples )
1002+ 	sort .Float64s (sortedSamples )
1003+ 
1004+ 	actualCounts  :=  make ([]int , len (fullCounts ))
1005+ 	sampleIndex  :=  0 
1006+ 
1007+ 	for  bucketIndex  :=  range  fullCounts  {
1008+ 		var  upperBound  float64 
1009+ 		if  bucketIndex  ==  len (boundaries ) {
1010+ 			upperBound  =  math .Inf (+ 1 )
1011+ 		} else  {
1012+ 			upperBound  =  boundaries [bucketIndex ]
1013+ 		}
1014+ 
1015+ 		for  sampleIndex  <  len (sortedSamples ) &&  sortedSamples [sampleIndex ] <=  upperBound  {
1016+ 			actualCounts [bucketIndex ]++ 
1017+ 			sampleIndex ++ 
1018+ 		}
1019+ 	}
1020+ 
1021+ 	// Verify each bucket 
1022+ 	for  i , want  :=  range  fullCounts  {
1023+ 		if  actualCounts [i ] !=  int (want ) {
1024+ 			var  lowerStr , upperStr  string 
1025+ 			if  i  ==  0  {
1026+ 				lowerStr  =  "-Inf" 
1027+ 			} else  {
1028+ 				lowerStr  =  fmt .Sprintf ("%.3f" , boundaries [i - 1 ])
1029+ 			}
1030+ 			if  i  ==  len (boundaries ) {
1031+ 				upperStr  =  "+Inf" 
1032+ 			} else  {
1033+ 				upperStr  =  fmt .Sprintf ("%.3f" , boundaries [i ])
1034+ 			}
1035+ 			t .Errorf ("bucket[%d] (%s, %s]: want %d, got %d" ,
1036+ 				i , lowerStr , upperStr , int (want ), actualCounts [i ])
1037+ 		}
1038+ 	}
1039+ 
1040+ 	totalExpected  :=  0 
1041+ 	for  _ , c  :=  range  fullCounts  {
1042+ 		totalExpected  +=  int (c )
1043+ 	}
1044+ 	if  len (samples ) !=  totalExpected  {
1045+ 		t .Errorf ("total samples: want %d, got %d" , totalExpected , len (samples ))
1046+ 	}
1047+ }
1048+ 
1049+ func  TestGenerateSamplesFromBuckets (t  * testing.T ) {
1050+ 	tests  :=  []struct  {
1051+ 		name             string 
1052+ 		boundaries       []float64 
1053+ 		counts           []float64 
1054+ 		expectedSamples  int 
1055+ 	}{
1056+ 		{
1057+ 			name :            "short counts with non-zero in middle" ,
1058+ 			boundaries :      []float64 {1 , 2 , 5 , 10 , 20 , 50 , 100 , 200 , 500 , 1000 },
1059+ 			counts :          []float64 {0 , 0 , 0 , 5 , 0 , 5 },
1060+ 			expectedSamples : 10 ,
1061+ 		},
1062+ 		{
1063+ 			name :            "empty boundaries → 1 bucket" ,
1064+ 			boundaries :      []float64 {},
1065+ 			counts :          []float64 {7 },
1066+ 			expectedSamples : 7 ,
1067+ 		},
1068+ 		{
1069+ 			name :            "single boundary → 2 buckets, short counts" ,
1070+ 			boundaries :      []float64 {10.0 },
1071+ 			counts :          []float64 {3 },
1072+ 			expectedSamples : 3 ,
1073+ 		},
1074+ 		{
1075+ 			name :            "full counts provided" ,
1076+ 			boundaries :      []float64 {1 , 2 , 5 },
1077+ 			counts :          []float64 {1 , 0 , 2 , 1 },
1078+ 			expectedSamples : 4 ,
1079+ 		},
1080+ 		{
1081+ 			name :            "all zeros (short)" ,
1082+ 			boundaries :      []float64 {1 , 2 , 5 },
1083+ 			counts :          []float64 {},
1084+ 			expectedSamples : 0 ,
1085+ 		},
1086+ 	}
1087+ 	for  _ , tt  :=  range  tests  {
1088+ 		t .Run (tt .name , func (t  * testing.T ) {
1089+ 			samples  :=  generateSamplesFromBuckets (tt .boundaries , tt .counts )
1090+ 			if  len (samples ) !=  tt .expectedSamples  {
1091+ 				t .Fatalf ("sample count mismatch: want %d, got %d" , tt .expectedSamples , len (samples ))
1092+ 			}
1093+ 			validateSamplesInBuckets (t , samples , tt .boundaries , tt .counts )
1094+ 		})
1095+ 	}
1096+ }
0 commit comments