From 25ba2145df76ed9619cc075a87adda521b340da1 Mon Sep 17 00:00:00 2001 From: Qifan Deng Date: Sun, 24 Aug 2025 18:13:35 +1000 Subject: [PATCH 1/2] Cast bounds type in tests to func def: latency, interToken, and timeToFirst (to int) Signed-off-by: Qifan Deng --- pkg/llm-d-inference-sim/simulator_test.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/llm-d-inference-sim/simulator_test.go b/pkg/llm-d-inference-sim/simulator_test.go index a86eb3f5..bcfb041e 100644 --- a/pkg/llm-d-inference-sim/simulator_test.go +++ b/pkg/llm-d-inference-sim/simulator_test.go @@ -761,8 +761,8 @@ var _ = Describe("Simulator", func() { simulator.config.InterTokenLatency = interTokenLatency simulator.config.InterTokenLatencyStdDev = stddev interToken := simulator.getInterTokenLatency() - Expect(interToken).To(BeNumerically(">=", float32(interTokenLatency)*0.3)) - Expect(interToken).To(BeNumerically("<=", float32(interTokenLatency)*1.7)) + Expect(interToken).To(BeNumerically(">=", int(float64(interTokenLatency)*0.3))) + Expect(interToken).To(BeNumerically("<=", int(float64(interTokenLatency)*1.7))) }, func(interTokenLatency int, stddev int) string { return fmt.Sprintf("interTokenLatency: %d stddev: %d", interTokenLatency, stddev) @@ -778,8 +778,8 @@ var _ = Describe("Simulator", func() { simulator.config.InterTokenLatency = interTokenLatency simulator.config.InterTokenLatencyStdDev = stddev latency := simulator.getTotalInterTokenLatency(numberOfTokens) - Expect(latency).To(BeNumerically(">=", float32(interTokenLatency)*0.3*float32(numberOfTokens))) - Expect(latency).To(BeNumerically("<=", float32(interTokenLatency)*1.7*float32(numberOfTokens))) + Expect(latency).To(BeNumerically(">=", int(float32(interTokenLatency)*0.3*float32(numberOfTokens)))) + Expect(latency).To(BeNumerically("<=", int(float32(interTokenLatency)*1.7*float32(numberOfTokens)))) }, func(interTokenLatency int, stddev int, numberOfTokens int) string { return fmt.Sprintf("interTokenLatency: %d stddev: %d, numberOfTokens: %d", interTokenLatency, @@ -800,11 +800,11 @@ var _ = Describe("Simulator", func() { simulator.config.KVCacheTransferLatencyStdDev = kvCacheLatencyStdDev timeToFirst := simulator.getTimeToFirstToken(doREmotePrefill) if doREmotePrefill { - Expect(timeToFirst).To(BeNumerically(">=", float32(kvCacheLatency)*0.3)) - Expect(timeToFirst).To(BeNumerically("<=", float32(kvCacheLatency)*1.7)) + Expect(timeToFirst).To(BeNumerically(">=", int(float32(kvCacheLatency)*0.3))) + Expect(timeToFirst).To(BeNumerically("<=", int(float32(kvCacheLatency)*1.7))) } else { - Expect(timeToFirst).To(BeNumerically(">=", float32(timeToFirstToken)*0.3)) - Expect(timeToFirst).To(BeNumerically("<=", float32(timeToFirstToken)*1.7)) + Expect(timeToFirst).To(BeNumerically(">=", int(float32(timeToFirstToken)*0.3))) + Expect(timeToFirst).To(BeNumerically("<=", int(float32(timeToFirstToken)*1.7))) } }, func(timeToFirstToken int, timeToFirstTokenStdDev int, From 22aa3e626236747522f2504a49773f52799cc35d Mon Sep 17 00:00:00 2001 From: Qifan Deng Date: Sun, 24 Aug 2025 18:24:42 +1000 Subject: [PATCH 2/2] Use float 32 Signed-off-by: Qifan Deng --- pkg/llm-d-inference-sim/simulator_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/llm-d-inference-sim/simulator_test.go b/pkg/llm-d-inference-sim/simulator_test.go index bcfb041e..cf9fd468 100644 --- a/pkg/llm-d-inference-sim/simulator_test.go +++ b/pkg/llm-d-inference-sim/simulator_test.go @@ -761,8 +761,8 @@ var _ = Describe("Simulator", func() { simulator.config.InterTokenLatency = interTokenLatency simulator.config.InterTokenLatencyStdDev = stddev interToken := simulator.getInterTokenLatency() - Expect(interToken).To(BeNumerically(">=", int(float64(interTokenLatency)*0.3))) - Expect(interToken).To(BeNumerically("<=", int(float64(interTokenLatency)*1.7))) + Expect(interToken).To(BeNumerically(">=", int(float32(interTokenLatency)*0.3))) + Expect(interToken).To(BeNumerically("<=", int(float32(interTokenLatency)*1.7))) }, func(interTokenLatency int, stddev int) string { return fmt.Sprintf("interTokenLatency: %d stddev: %d", interTokenLatency, stddev)