From 141a22b039b0f5a7a077ca9002c2fe636da5cb67 Mon Sep 17 00:00:00 2001 From: Maya Barnea Date: Mon, 27 Oct 2025 14:27:13 +0200 Subject: [PATCH] fix occasional ttft and tpot metrics test failures Signed-off-by: Maya Barnea --- pkg/llm-d-inference-sim/metrics_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/llm-d-inference-sim/metrics_test.go b/pkg/llm-d-inference-sim/metrics_test.go index 9f5b98f2..52d3aecc 100644 --- a/pkg/llm-d-inference-sim/metrics_test.go +++ b/pkg/llm-d-inference-sim/metrics_test.go @@ -393,14 +393,14 @@ var _ = Describe("Simulator metrics", Ordered, func() { modelName := "my_model" // Send one request, check that ttft and tpot are as defined in the simulator command line params ctx := context.TODO() - args := []string{"cmd", "--model", modelName, "--mode", common.ModeRandom, + // use mode echo to be sure that response is more than one token - this makes sure that tpot is reported to prometheus + args := []string{"cmd", "--model", modelName, "--mode", common.ModeEcho, "--time-to-first-token", "200", "--inter-token-latency", "100"} client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil) Expect(err).NotTo(HaveOccurred()) openaiclient, params := getOpenAIClientAndChatParams(client, modelName, userMessage, false) - params.MaxTokens = openai.Int(5) var reqWg, metricsWg sync.WaitGroup metricsWg.Add(1) @@ -451,7 +451,7 @@ var _ = Describe("Simulator metrics", Ordered, func() { Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"640\"} 1")) Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"2560\"} 1")) Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"+Inf\"} 1")) - // tpot + // check tpot only is it exists in metrics, when a single Expect(metrics).To(ContainSubstring("vllm:time_per_output_token_seconds_bucket{model_name=\"my_model\",le=\"0.01\"} 0")) Expect(metrics).To(ContainSubstring("vllm:time_per_output_token_seconds_bucket{model_name=\"my_model\",le=\"0.025\"} 0")) Expect(metrics).To(ContainSubstring("vllm:time_per_output_token_seconds_bucket{model_name=\"my_model\",le=\"0.05\"} 0"))