@@ -798,7 +798,7 @@ var _ = Describe("Simulator", func() {
798798 simulator .config .TimeToFirstTokenStdDev = timeToFirstTokenStdDev
799799 simulator .config .KVCacheTransferLatency = kvCacheLatency
800800 simulator .config .KVCacheTransferLatencyStdDev = kvCacheLatencyStdDev
801- timeToFirst := simulator .getTimeToFirstToken (1 , 0 , doREmotePrefill )
801+ timeToFirst := simulator .getTimeToFirstToken (1 , 0 , doREmotePrefill , & simulator . runReqChan )
802802 if doREmotePrefill {
803803 Expect (timeToFirst ).To (BeNumerically (">=" , int (float32 (kvCacheLatency )* 0.3 )))
804804 Expect (timeToFirst ).To (BeNumerically ("<=" , int (float32 (kvCacheLatency )* 1.7 )))
@@ -829,7 +829,7 @@ var _ = Describe("Simulator", func() {
829829 simulator .config .PrefillTimePerToken = 200
830830 simulator .config .PrefillTimeStdDev = 80
831831
832- ttft := simulator .getTimeToFirstToken (128 , 0 , false )
832+ ttft := simulator .getTimeToFirstToken (128 , 0 , false , & simulator . runReqChan )
833833
834834 Expect (ttft ).To (BeNumerically ("==" , timeToFirstToken ))
835835 })
@@ -842,7 +842,7 @@ var _ = Describe("Simulator", func() {
842842 simulator .config .PrefillTimePerToken = 200
843843 simulator .config .PrefillTimeStdDev = 80
844844
845- ttft := simulator .getTimeToFirstToken (128 , 0 , false )
845+ ttft := simulator .getTimeToFirstToken (128 , 0 , false , & simulator . runReqChan )
846846 Expect (ttft ).NotTo (BeNumerically ("==" , 0 ))
847847 })
848848
@@ -853,7 +853,7 @@ var _ = Describe("Simulator", func() {
853853 simulator .config .PrefillTimePerToken = prefillTimePerToken
854854 simulator .config .PrefillTimeStdDev = stdDev
855855
856- ttft := simulator .getTimeToFirstToken (nTokens , nCachedTokens , false )
856+ ttft := simulator .getTimeToFirstToken (nTokens , nCachedTokens , false , & simulator . runReqChan )
857857
858858 expectedTTFT := prefillOverhead + prefillTimePerToken * (nTokens - nCachedTokens )
859859 Expect (ttft ).To (BeNumerically (">=" , int (float64 (expectedTTFT )* 0.3 )))
@@ -881,7 +881,7 @@ var _ = Describe("Simulator", func() {
881881 simulator .config .PrefillTimePerToken = prefillTimePerToken
882882 simulator .config .PrefillTimeStdDev = 0
883883
884- ttft := simulator .getTimeToFirstToken (nTokens , nCachedTokens , false )
884+ ttft := simulator .getTimeToFirstToken (nTokens , nCachedTokens , false , & simulator . runReqChan )
885885 expectedTTFT := prefillOverhead + prefillTimePerToken * (nTokens - nCachedTokens )
886886 Expect (ttft ).To (Equal (expectedTTFT ))
887887 },
@@ -905,7 +905,7 @@ var _ = Describe("Simulator", func() {
905905 simulator .config .KVCacheTransferTimePerToken = 100
906906 simulator .config .KVCacheTransferTimeStdDev = 0
907907
908- ttft := simulator .getTimeToFirstToken (128 , 0 , true )
908+ ttft := simulator .getTimeToFirstToken (128 , 0 , true , & simulator . runReqChan )
909909 Expect (ttft ).To (BeNumerically ("==" , 200 ))
910910 })
911911
@@ -916,7 +916,7 @@ var _ = Describe("Simulator", func() {
916916 simulator .config .KVCacheTransferTimePerToken = 100
917917 simulator .config .KVCacheTransferTimeStdDev = 0
918918
919- ttft := simulator .getTimeToFirstToken (128 , 0 , true )
919+ ttft := simulator .getTimeToFirstToken (128 , 0 , true , & simulator . runReqChan )
920920 Expect (ttft ).To (BeNumerically ("==" , 12800 ))
921921 })
922922
@@ -927,7 +927,7 @@ var _ = Describe("Simulator", func() {
927927 simulator .config .KVCacheTransferTimePerToken = kvCacheTransTPT
928928 simulator .config .KVCacheTransferTimeStdDev = stddev
929929
930- ttft := simulator .getTimeToFirstToken (nTokens , 0 , true )
930+ ttft := simulator .getTimeToFirstToken (nTokens , 0 , true , & simulator . runReqChan )
931931
932932 expectedTTFT := kvCacheTransTPT * nTokens
933933 Expect (ttft ).To (BeNumerically (">=" , int (float64 (expectedTTFT )* 0.3 )))
@@ -945,5 +945,60 @@ var _ = Describe("Simulator", func() {
945945 Entry ("very long prompt" , 150 , 100 , 20000 ),
946946 )
947947
948+ It ("when time-factor-under-load is 1, the time to first token should be equal to time-to-first-token" , func () {
949+ simulator .config .TimeToFirstToken = 42
950+ simulator .config .TimeToFirstTokenStdDev = 0
951+ simulator .config .TimeFactorUnderLoad = 1.0
952+
953+ simulator .runReqChan <- 100
954+
955+ ttft := simulator .getTimeToFirstToken (128 , 0 , false , & simulator .runReqChan )
956+ Expect (ttft ).To (Equal (42 ))
957+ })
958+
959+ It ("when time-factor-under-load is > 1, but max-num-seqs is 1, the factor will not take effect" , func () {
960+ simulator .config .TimeToFirstToken = 42
961+ simulator .config .TimeToFirstTokenStdDev = 0
962+ simulator .config .TimeFactorUnderLoad = 100.0
963+ simulator .config .MaxNumSeqs = 1
964+
965+ for len (simulator .runReqChan ) > 0 {
966+ <- simulator .runReqChan
967+ }
968+
969+ simulator .runReqChan <- 1
970+
971+ ttft := simulator .getTimeToFirstToken (128 , 0 , false , & simulator .runReqChan )
972+ Expect (ttft ).To (Equal (42 ))
973+ })
974+
975+ DescribeTable ("when time-factor-under-load is > 1, and the sim is fully loaded, the time to first token should be time-factor-under-load * time-to-first-token" ,
976+ func (timeFactorUnderLoad float64 , maxNumOfReq int ) {
977+ simulator .config .TimeToFirstToken = 42
978+ simulator .config .TimeToFirstTokenStdDev = 0
979+ simulator .config .TimeFactorUnderLoad = timeFactorUnderLoad
980+ simulator .config .MaxNumSeqs = maxNumOfReq
981+ for len (simulator .runReqChan ) > 0 {
982+ <- simulator .runReqChan
983+ }
984+ for i := 0 ; i < maxNumOfReq ; i ++ {
985+ simulator .runReqChan <- 1
986+ }
987+
988+ ttft := simulator .getTimeToFirstToken (128 , 0 , false , & simulator .runReqChan )
989+ Expect (ttft ).To (Equal (int (float64 (42 ) * timeFactorUnderLoad )))
990+
991+ },
992+ func (timeFactorUnderLoad float64 , maxNumOfReq int64 ) string {
993+ return fmt .Sprintf ("timeFactorUnderLoad: %f maxNumOfReq: %d" ,
994+ timeFactorUnderLoad , maxNumOfReq )
995+ },
996+
997+ Entry ("factor: 1.5" , 1.5 , 70 ),
998+ Entry ("factor: 2.0" , 2.0 , 2 ),
999+ Entry ("factor: 100.0" , 100.0 , 150 ),
1000+ Entry ("factor: 20000.0" , 20000.0 , 310 ),
1001+ )
1002+
9481003 })
9491004})
0 commit comments