@@ -664,26 +664,26 @@ var _ = Describe("Simulator metrics", Ordered, func() {
664664			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , tpotMetricName , 0.15 , 6 )))
665665
666666			buckets  :=  build125Buckets (1024 )
667+ 			var  expectedCount  int 
667668
668- 			for  _ , boudary  :=  range  buckets  {
669- 				switch  boudary  {
669+ 			for  _ , boundary  :=  range  buckets  {
670+ 				switch  boundary  {
670671				case  1.0 :
671- 					Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , generationTokensMetricName , 1 , 10 )))
672- 					Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , promptTokensMetricName , 1 , 10 )))
673- 					Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , paramMaxTokensMetricName , 1 , 10 )))
672+ 					expectedCount  =  10 
674673				case  2.0 :
675- 					Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , generationTokensMetricName , 2 , 30 )))
676- 					Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , promptTokensMetricName , 2 , 30 )))
677- 					Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , paramMaxTokensMetricName , 2 , 30 )))
674+ 					expectedCount  =  30 
678675				default :
679- 					Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , generationTokensMetricName , boudary , 60 )))
680- 					Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , promptTokensMetricName , boudary , 60 )))
681- 					Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , paramMaxTokensMetricName , boudary , 60 )))
676+ 					expectedCount  =  60 
682677				}
678+ 
679+ 				Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , generationTokensMetricName , boundary , expectedCount )))
680+ 				Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , promptTokensMetricName , boundary , expectedCount )))
681+ 				Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , paramMaxTokensMetricName , boundary , expectedCount )))
682+ 
683683			}
684- 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , generationTokensMetricName , math .Inf (1 ), 60 )))
685- 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , promptTokensMetricName , math .Inf (1 ), 60 )))
686- 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , paramMaxTokensMetricName , math .Inf (1 ), 60 )))
684+ 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , generationTokensMetricName , math .Inf (1 ), expectedCount )))
685+ 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , promptTokensMetricName , math .Inf (1 ), expectedCount )))
686+ 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , paramMaxTokensMetricName , math .Inf (1 ), expectedCount )))
687687
688688			Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="length",model_name="testmodel"} 0` ))
689689			Expect (metrics ).To (ContainSubstring (`vllm:request_success_total{finish_reason="remote_decode",model_name="testmodel"} 0` ))
@@ -718,6 +718,58 @@ var _ = Describe("Simulator metrics", Ordered, func() {
718718		})
719719	})
720720
721+ 	Context ("fake latency metrics" , func () {
722+ 		It ("should respond with valid fake latency metrics to /metrics" , func () {
723+ 			ctx  :=  context .TODO ()
724+ 			args  :=  []string {"cmd" , "--model" , testModel , "--mode" , common .ModeEcho ,
725+ 				"--fake-metrics" ,
726+ 				`{`  + 
727+ 					`"e2erl-buckets-values":[0, 1, 2],`  + 
728+ 					`"queue-time-buckets-values":[0, 1, 2],`  + 
729+ 					`"inf-time-buckets-values":[0, 1, 2],`  + 
730+ 					`"prefill-time-buckets-values":[0, 1, 2],`  + 
731+ 					`"decode-time-buckets-values":[0, 1, 2]`  + 
732+ 					`}` ,
733+ 			}
734+ 
735+ 			client , err  :=  startServerWithArgs (ctx , args )
736+ 			Expect (err ).NotTo (HaveOccurred ())
737+ 
738+ 			resp , err  :=  client .Get (metricsUrl )
739+ 			Expect (err ).NotTo (HaveOccurred ())
740+ 			Expect (resp .StatusCode ).To (Equal (http .StatusOK ))
741+ 
742+ 			data , err  :=  io .ReadAll (resp .Body )
743+ 			Expect (err ).NotTo (HaveOccurred ())
744+ 			metrics  :=  string (data )
745+ 
746+ 			// buckets counts should be 0, 1, 3, 3, 3, ... 
747+ 			var  expectedCount  int 
748+ 
749+ 			for  i , boundary  :=  range  common .RequestLatencyBucketsBoundaries  {
750+ 				switch  i  {
751+ 				case  0 :
752+ 					expectedCount  =  0 
753+ 				case  1 :
754+ 					expectedCount  =  1 
755+ 				default :
756+ 					expectedCount  =  3 
757+ 				}
758+ 
759+ 				Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , e2eReqLatencyMetricName , boundary , expectedCount )))
760+ 				Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , reqInferenceTimeMetricName , boundary , expectedCount )))
761+ 				Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , reqQueueTimeMetricName , boundary , expectedCount )))
762+ 				Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , prefillTimeMetricName , boundary , expectedCount )))
763+ 				Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , decodeTimeMetricName , boundary , expectedCount )))
764+ 			}
765+ 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , e2eReqLatencyMetricName , math .Inf (1 ), 3 )))
766+ 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , reqInferenceTimeMetricName , math .Inf (1 ), 3 )))
767+ 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , reqQueueTimeMetricName , math .Inf (1 ), 3 )))
768+ 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , prefillTimeMetricName , math .Inf (1 ), 3 )))
769+ 			Expect (metrics ).To (ContainSubstring (getFloatBucketMetricLine (testModel , decodeTimeMetricName , math .Inf (1 ), 3 )))
770+ 		})
771+ 	})
772+ 
721773	Context ("single request latency metrics" , func () {
722774		numOfTokens  :=  len (common .Tokenize (testUserMessage ))
723775
0 commit comments