Skip to content

Commit 7e913bc

Browse files
authored
fix occasional ttft and tpot metrics test failures (#233)
Signed-off-by: Maya Barnea <[email protected]>
1 parent d60782a commit 7e913bc

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

pkg/llm-d-inference-sim/metrics_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -393,14 +393,14 @@ var _ = Describe("Simulator metrics", Ordered, func() {
393393
modelName := "my_model"
394394
// Send one request, check that ttft and tpot are as defined in the simulator command line params
395395
ctx := context.TODO()
396-
args := []string{"cmd", "--model", modelName, "--mode", common.ModeRandom,
396+
// use mode echo to be sure that response is more than one token - this makes sure that tpot is reported to prometheus
397+
args := []string{"cmd", "--model", modelName, "--mode", common.ModeEcho,
397398
"--time-to-first-token", "200", "--inter-token-latency", "100"}
398399

399400
client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
400401
Expect(err).NotTo(HaveOccurred())
401402

402403
openaiclient, params := getOpenAIClientAndChatParams(client, modelName, userMessage, false)
403-
params.MaxTokens = openai.Int(5)
404404

405405
var reqWg, metricsWg sync.WaitGroup
406406
metricsWg.Add(1)
@@ -451,7 +451,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
451451
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"640\"} 1"))
452452
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"2560\"} 1"))
453453
Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"+Inf\"} 1"))
454-
// tpot
454+
// check tpot only is it exists in metrics, when a single
455455
Expect(metrics).To(ContainSubstring("vllm:time_per_output_token_seconds_bucket{model_name=\"my_model\",le=\"0.01\"} 0"))
456456
Expect(metrics).To(ContainSubstring("vllm:time_per_output_token_seconds_bucket{model_name=\"my_model\",le=\"0.025\"} 0"))
457457
Expect(metrics).To(ContainSubstring("vllm:time_per_output_token_seconds_bucket{model_name=\"my_model\",le=\"0.05\"} 0"))

0 commit comments

Comments
 (0)