Skip to content

Commit e50ff44

Browse files
committed
Set fake latency metrics if defined in configuration, added tests for latency fake metrics
Signed-off-by: Maya Barnea <[email protected]>
1 parent 5a6d9f1 commit e50ff44

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

pkg/llm-d-inference-sim/metrics.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ func (s *VllmSimulator) createAndRegisterPrometheus() error {
170170
)
171171

172172
if err := s.metrics.registry.Register(s.metrics.reqInferenceTime); err != nil {
173-
s.logger.Error(err, "Prometheus request inerence time histogram register failed")
173+
s.logger.Error(err, "Prometheus request inference time histogram register failed")
174174
return err
175175
}
176176

@@ -310,7 +310,23 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
310310
}
311311

312312
if s.config.FakeMetrics.E2ERequestLatencyBucketValues != nil {
313-
s.initFakeHistogram(s.metrics.tpot, common.RequestLatencyBucketsBoundaries, s.config.FakeMetrics.E2ERequestLatencyBucketValues)
313+
s.initFakeHistogram(s.metrics.e2eReqLatency, common.RequestLatencyBucketsBoundaries, s.config.FakeMetrics.E2ERequestLatencyBucketValues)
314+
}
315+
316+
if s.config.FakeMetrics.ReqQueueTimeBucketValues != nil {
317+
s.initFakeHistogram(s.metrics.reqQueueTime, common.RequestLatencyBucketsBoundaries, s.config.FakeMetrics.ReqQueueTimeBucketValues)
318+
}
319+
320+
if s.config.FakeMetrics.ReqInfTimeBucketValues != nil {
321+
s.initFakeHistogram(s.metrics.reqInferenceTime, common.RequestLatencyBucketsBoundaries, s.config.FakeMetrics.ReqInfTimeBucketValues)
322+
}
323+
324+
if s.config.FakeMetrics.ReqPrefillTimeBucketValues != nil {
325+
s.initFakeHistogram(s.metrics.reqPrefillTime, common.RequestLatencyBucketsBoundaries, s.config.FakeMetrics.ReqPrefillTimeBucketValues)
326+
}
327+
328+
if s.config.FakeMetrics.ReqDecodeTimeBucketValues != nil {
329+
s.initFakeHistogram(s.metrics.reqDecodeTime, common.RequestLatencyBucketsBoundaries, s.config.FakeMetrics.ReqDecodeTimeBucketValues)
314330
}
315331
}
316332

0 commit comments

Comments
 (0)