Skip to content

Commit 94be0aa

Browse files
committed
add fake latency metrics test
Signed-off-by: Maya Barnea <[email protected]>
1 parent 3e85d38 commit 94be0aa

File tree

1 file changed

+66
-14
lines changed

1 file changed

+66
-14
lines changed

pkg/llm-d-inference-sim/metrics_test.go

Lines changed: 66 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -664,26 +664,26 @@ var _ = Describe("Simulator metrics", Ordered, func() {
664664
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, tpotMetricName, 0.15, 6)))
665665

666666
buckets := build125Buckets(1024)
667+
var expectedCount int
667668

668-
for _, boudary := range buckets {
669-
switch boudary {
669+
for _, boundary := range buckets {
670+
switch boundary {
670671
case 1.0:
671-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, generationTokensMetricName, 1, 10)))
672-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, promptTokensMetricName, 1, 10)))
673-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, paramMaxTokensMetricName, 1, 10)))
672+
expectedCount = 10
674673
case 2.0:
675-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, generationTokensMetricName, 2, 30)))
676-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, promptTokensMetricName, 2, 30)))
677-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, paramMaxTokensMetricName, 2, 30)))
674+
expectedCount = 30
678675
default:
679-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, generationTokensMetricName, boudary, 60)))
680-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, promptTokensMetricName, boudary, 60)))
681-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, paramMaxTokensMetricName, boudary, 60)))
676+
expectedCount = 60
682677
}
678+
679+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, generationTokensMetricName, boundary, expectedCount)))
680+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, promptTokensMetricName, boundary, expectedCount)))
681+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, paramMaxTokensMetricName, boundary, expectedCount)))
682+
683683
}
684-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, generationTokensMetricName, math.Inf(1), 60)))
685-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, promptTokensMetricName, math.Inf(1), 60)))
686-
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, paramMaxTokensMetricName, math.Inf(1), 60)))
684+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, generationTokensMetricName, math.Inf(1), expectedCount)))
685+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, promptTokensMetricName, math.Inf(1), expectedCount)))
686+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, paramMaxTokensMetricName, math.Inf(1), expectedCount)))
687687

688688
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="length",model_name="testmodel"} 0`))
689689
Expect(metrics).To(ContainSubstring(`vllm:request_success_total{finish_reason="remote_decode",model_name="testmodel"} 0`))
@@ -718,6 +718,58 @@ var _ = Describe("Simulator metrics", Ordered, func() {
718718
})
719719
})
720720

721+
Context("fake latency metrics", func() {
722+
It("should respond with valid fake latency metrics to /metrics", func() {
723+
ctx := context.TODO()
724+
args := []string{"cmd", "--model", testModel, "--mode", common.ModeEcho,
725+
"--fake-metrics",
726+
`{` +
727+
`"e2erl-buckets-values":[0, 1, 2],` +
728+
`"queue-time-buckets-values":[0, 1, 2],` +
729+
`"inf-time-buckets-values":[0, 1, 2],` +
730+
`"prefill-time-buckets-values":[0, 1, 2],` +
731+
`"decode-time-buckets-values":[0, 1, 2]` +
732+
`}`,
733+
}
734+
735+
client, err := startServerWithArgs(ctx, args)
736+
Expect(err).NotTo(HaveOccurred())
737+
738+
resp, err := client.Get(metricsUrl)
739+
Expect(err).NotTo(HaveOccurred())
740+
Expect(resp.StatusCode).To(Equal(http.StatusOK))
741+
742+
data, err := io.ReadAll(resp.Body)
743+
Expect(err).NotTo(HaveOccurred())
744+
metrics := string(data)
745+
746+
// buckets counts should be 0, 1, 3, 3, 3, ...
747+
var expectedCount int
748+
749+
for i, boundary := range common.RequestLatencyBucketsBoundaries {
750+
switch i {
751+
case 0:
752+
expectedCount = 0
753+
case 1:
754+
expectedCount = 1
755+
default:
756+
expectedCount = 3
757+
}
758+
759+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, e2eReqLatencyMetricName, boundary, expectedCount)))
760+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, reqInferenceTimeMetricName, boundary, expectedCount)))
761+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, reqQueueTimeMetricName, boundary, expectedCount)))
762+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, prefillTimeMetricName, boundary, expectedCount)))
763+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, decodeTimeMetricName, boundary, expectedCount)))
764+
}
765+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, e2eReqLatencyMetricName, math.Inf(1), 3)))
766+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, reqInferenceTimeMetricName, math.Inf(1), 3)))
767+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, reqQueueTimeMetricName, math.Inf(1), 3)))
768+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, prefillTimeMetricName, math.Inf(1), 3)))
769+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, decodeTimeMetricName, math.Inf(1), 3)))
770+
})
771+
})
772+
721773
Context("single request latency metrics", func() {
722774
numOfTokens := len(common.Tokenize(testUserMessage))
723775

0 commit comments

Comments
 (0)