Skip to content

Commit 9ccbe95

Browse files
committed
Test TTFT when partially loaded
Signed-off-by: Qifan Deng <[email protected]>
1 parent 04542f2 commit 9ccbe95

File tree

1 file changed

+32
-1
lines changed

1 file changed

+32
-1
lines changed

pkg/llm-d-inference-sim/simulator_test.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,7 @@ var _ = Describe("Simulator", func() {
981981
for len(simulator.runReqChan) > 0 {
982982
<-simulator.runReqChan
983983
}
984-
for i := 0; i < maxNumOfReq; i++ {
984+
for range maxNumOfReq {
985985
simulator.runReqChan <- 1
986986
}
987987

@@ -1000,5 +1000,36 @@ var _ = Describe("Simulator", func() {
10001000
Entry("factor: 20000.0", 20000.0, 310),
10011001
)
10021002

1003+
DescribeTable("when time-factor-under-load is > 1, and the sim is partially loaded, the time to first token should be linear interpolation between time-to-first-token and time-factor-under-load * time-to-first-token",
1004+
func(timeFactorUnderLoad float64, maxNumOfReq int, nCurrNumOfReq int) {
1005+
simulator.config.TimeToFirstToken = 42
1006+
simulator.config.TimeToFirstTokenStdDev = 0
1007+
simulator.config.TimeFactorUnderLoad = timeFactorUnderLoad
1008+
simulator.config.MaxNumSeqs = maxNumOfReq
1009+
1010+
for len(simulator.runReqChan) > 0 {
1011+
<-simulator.runReqChan
1012+
}
1013+
for range nCurrNumOfReq {
1014+
simulator.runReqChan <- 1
1015+
}
1016+
1017+
ttft := simulator.getTimeToFirstToken(128, 0, false, &simulator.runReqChan)
1018+
max := timeFactorUnderLoad * float64(42)
1019+
Expect(ttft).To(BeNumerically(">=", 42))
1020+
Expect(ttft).To(BeNumerically("<=", max))
1021+
1022+
},
1023+
func(timeFactorUnderLoad float64, maxNumOfReq int, nCurrNumOfReq int) string {
1024+
return fmt.Sprintf("timeFactorUnderLoad: %f maxNumOfReq: %d nCurrNumOfReq: %d",
1025+
timeFactorUnderLoad, maxNumOfReq, nCurrNumOfReq)
1026+
},
1027+
1028+
Entry("factor: 1.5", 1.5, 70, 35),
1029+
Entry("factor: 2.0", 2.0, 2, 1),
1030+
Entry("factor: 100.0", 100.0, 150, 75),
1031+
Entry("factor: 20000.0", 20000.0, 310, 155),
1032+
)
1033+
10031034
})
10041035
})

0 commit comments

Comments
 (0)