Skip to content

Commit 031e461

Browse files
committed
Add test for vllm:request_queue_time_seconds and vllm:request_inference_time_seconds
Signed-off-by: Maya Barnea <[email protected]>
1 parent 99bdec4 commit 031e461

File tree

1 file changed

+52
-1
lines changed

1 file changed

+52
-1
lines changed

pkg/llm-d-inference-sim/metrics_test.go

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222
"io"
23+
"math"
2324
"net/http"
2425
"os"
2526
"strings"
@@ -808,7 +809,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
808809
})
809810
})
810811

811-
Context("latency metrics", func() {
812+
Context("single request latency metrics", func() {
812813
numOfTokens := len(common.Tokenize(testUserMessage))
813814

814815
DescribeTable("should calculate all latency related metrics correctly for a single request",
@@ -831,6 +832,56 @@ var _ = Describe("Simulator metrics", Ordered, func() {
831832
Entry(nil, "prefill per token + inter token time", 0, 100, 100),
832833
)
833834
})
835+
836+
Context("multiple requests latency metrics", func() {
837+
It("should calculate waiting and inference time correctly", func() {
838+
ctx := context.TODO()
839+
args := []string{"cmd", "--model", testModel, "--mode", common.ModeEcho,
840+
"--time-to-first-token", "1000", "--max-num-seqs", "1",
841+
}
842+
843+
client, err := startServerWithArgs(ctx, args)
844+
Expect(err).NotTo(HaveOccurred())
845+
846+
openaiclient, params := getOpenAIClientAndChatParams(client, testModel, testUserMessage, false)
847+
848+
var reqWg sync.WaitGroup
849+
reqWg.Add(2)
850+
851+
// send two requests
852+
for range 2 {
853+
go func() {
854+
defer reqWg.Done()
855+
defer GinkgoRecover()
856+
857+
_, err := openaiclient.Chat.Completions.New(ctx, params)
858+
Expect(err).NotTo(HaveOccurred())
859+
}()
860+
}
861+
862+
reqWg.Wait()
863+
time.Sleep(300 * time.Millisecond)
864+
metricsResp, err := client.Get(metricsUrl)
865+
Expect(err).NotTo(HaveOccurred())
866+
Expect(metricsResp.StatusCode).To(Equal(http.StatusOK))
867+
868+
data, err := io.ReadAll(metricsResp.Body)
869+
Expect(err).NotTo(HaveOccurred())
870+
metrics := string(data)
871+
872+
for _, boundary := range common.RequestLatencyBucketsBoundaries {
873+
if boundary < 1.5 {
874+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, reqInferenceTimeMetricName, boundary, 0)))
875+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, reqQueueTimeMetricName, boundary, 0)))
876+
} else {
877+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, reqInferenceTimeMetricName, boundary, 2)))
878+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, reqQueueTimeMetricName, boundary, 1)))
879+
}
880+
}
881+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, reqInferenceTimeMetricName, math.Inf(1), 2)))
882+
Expect(metrics).To(ContainSubstring(getFloatBucketMetricLine(testModel, reqQueueTimeMetricName, math.Inf(1), 1)))
883+
})
884+
})
834885
})
835886

836887
var _ = Describe("build125Buckets", Ordered, func() {

0 commit comments

Comments
 (0)