From 141a22b039b0f5a7a077ca9002c2fe636da5cb67 Mon Sep 17 00:00:00 2001
From: Maya Barnea <mayab@il.ibm.com>
Date: Mon, 27 Oct 2025 14:27:13 +0200
Subject: [PATCH] fix occasional ttft and tpot metrics test failures

Signed-off-by: Maya Barnea <mayab@il.ibm.com>
---
 pkg/llm-d-inference-sim/metrics_test.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/llm-d-inference-sim/metrics_test.go b/pkg/llm-d-inference-sim/metrics_test.go
index 9f5b98f2..52d3aecc 100644
--- a/pkg/llm-d-inference-sim/metrics_test.go
+++ b/pkg/llm-d-inference-sim/metrics_test.go
@@ -393,14 +393,14 @@ var _ = Describe("Simulator metrics", Ordered, func() {
 		modelName := "my_model"
 		// Send one request, check that ttft and tpot are as defined in the simulator command line params
 		ctx := context.TODO()
-		args := []string{"cmd", "--model", modelName, "--mode", common.ModeRandom,
+		// use mode echo to be sure that response is more than one token - this makes sure that tpot is reported to prometheus
+		args := []string{"cmd", "--model", modelName, "--mode", common.ModeEcho,
 			"--time-to-first-token", "200", "--inter-token-latency", "100"}
 
 		client, err := startServerWithArgs(ctx, common.ModeRandom, args, nil)
 		Expect(err).NotTo(HaveOccurred())
 
 		openaiclient, params := getOpenAIClientAndChatParams(client, modelName, userMessage, false)
-		params.MaxTokens = openai.Int(5)
 
 		var reqWg, metricsWg sync.WaitGroup
 		metricsWg.Add(1)
@@ -451,7 +451,7 @@ var _ = Describe("Simulator metrics", Ordered, func() {
 			Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"640\"} 1"))
 			Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"2560\"} 1"))
 			Expect(metrics).To(ContainSubstring("vllm:time_to_first_token_seconds_bucket{model_name=\"my_model\",le=\"+Inf\"} 1"))
-			// tpot
+			// check tpot only is it exists in metrics, when a single
 			Expect(metrics).To(ContainSubstring("vllm:time_per_output_token_seconds_bucket{model_name=\"my_model\",le=\"0.01\"} 0"))
 			Expect(metrics).To(ContainSubstring("vllm:time_per_output_token_seconds_bucket{model_name=\"my_model\",le=\"0.025\"} 0"))
 			Expect(metrics).To(ContainSubstring("vllm:time_per_output_token_seconds_bucket{model_name=\"my_model\",le=\"0.05\"} 0"))