diff --git a/pkg/common/utils.go b/pkg/common/utils.go index a04692dc..dba9829b 100644 --- a/pkg/common/utils.go +++ b/pkg/common/utils.go @@ -339,22 +339,24 @@ func RandomFloat(min float64, max float64) float64 { return randomGenerator.Float64()*(max-min) + min } -// Returns a normally distributed float64 +// Returns a normally distributed int // If the generated value differs by more than 70% from mean, the returned // value will be 70% of mean -func RandomNorm(mean float64, stddev float64) float64 { +func RandomNorm(mean int, stddev int) int { if stddev == 0 { return mean } randMutex.Lock() defer randMutex.Unlock() - value := randomGenerator.NormFloat64()*stddev + mean - if value < 0.3*mean { - value = 0.3 * mean - } else if value > 1.7*mean { - value = 1.7 * mean + mean_ := float64(mean) + stddev_ := float64(stddev) + value := randomGenerator.NormFloat64()*stddev_ + mean_ + if value < 0.3*mean_ { + value = 0.3 * mean_ + } else if value > 1.7*mean_ { + value = 1.7 * mean_ } - return value + return int(value) } // GenerateUUIDString generates a UUID string under a lock diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go index 026a55c4..9f5ce113 100644 --- a/pkg/llm-d-inference-sim/simulator.go +++ b/pkg/llm-d-inference-sim/simulator.go @@ -667,25 +667,23 @@ func (s *VllmSimulator) getTimeToFirstToken(nPromptTokens int, nCachedPromptToke if s.config.KVCacheTransferLatency == 0 && s.config.KVCacheTransferLatencyStdDev == 0 { // is disaggregated PD and ttft is calculated using number of prompt tokens kvCacheTransT := s.config.KVCacheTransferTimePerToken * nPromptTokens - return int(common.RandomNorm(float64(kvCacheTransT), float64(s.config.KVCacheTransferTimeStdDev))) + return common.RandomNorm(kvCacheTransT, s.config.KVCacheTransferTimeStdDev) } // is disaggregated PD and *not* using number of prompt tokens - return int(common.RandomNorm(float64(s.config.KVCacheTransferLatency), float64(s.config.KVCacheTransferLatencyStdDev))) + return common.RandomNorm(s.config.KVCacheTransferLatency, s.config.KVCacheTransferLatencyStdDev) } if s.config.TimeToFirstToken == 0 && s.config.TimeToFirstTokenStdDev == 0 { // is aggregated PD and ttft is calculated using number of prompt tokens that are not in kv cache prefillTime := s.config.PrefillOverhead + (nPromptTokens-nCachedPromptTokens)*s.config.PrefillTimePerToken - return int(common.RandomNorm(float64(prefillTime), float64(s.config.PrefillTimeStdDev))) + return common.RandomNorm(prefillTime, s.config.PrefillTimeStdDev) } // is aggregated PD and *not* using number of prompt tokens - return int(common.RandomNorm(float64(s.config.TimeToFirstToken), float64(s.config.TimeToFirstTokenStdDev))) + return common.RandomNorm(s.config.TimeToFirstToken, s.config.TimeToFirstTokenStdDev) } // returns inter token latency func (s *VllmSimulator) getInterTokenLatency() int { - mean := float64(s.config.InterTokenLatency) - stddev := float64(s.config.InterTokenLatencyStdDev) - return int(common.RandomNorm(mean, stddev)) + return common.RandomNorm(s.config.InterTokenLatency, s.config.InterTokenLatencyStdDev) } // returns total inter token latency for the given number of tokens