Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions pkg/common/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,22 +339,24 @@ func RandomFloat(min float64, max float64) float64 {
return randomGenerator.Float64()*(max-min) + min
}

// Returns a normally distributed float64
// Returns a normally distributed int
// If the generated value differs by more than 70% from mean, the returned
// value will be 70% of mean
func RandomNorm(mean float64, stddev float64) float64 {
func RandomNorm(mean int, stddev int) int {
if stddev == 0 {
return mean
}
randMutex.Lock()
defer randMutex.Unlock()
value := randomGenerator.NormFloat64()*stddev + mean
if value < 0.3*mean {
value = 0.3 * mean
} else if value > 1.7*mean {
value = 1.7 * mean
mean_ := float64(mean)
stddev_ := float64(stddev)
value := randomGenerator.NormFloat64()*stddev_ + mean_
if value < 0.3*mean_ {
value = 0.3 * mean_
} else if value > 1.7*mean_ {
value = 1.7 * mean_
}
return value
return int(value)
}

// GenerateUUIDString generates a UUID string under a lock
Expand Down
12 changes: 5 additions & 7 deletions pkg/llm-d-inference-sim/simulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -667,25 +667,23 @@ func (s *VllmSimulator) getTimeToFirstToken(nPromptTokens int, nCachedPromptToke
if s.config.KVCacheTransferLatency == 0 && s.config.KVCacheTransferLatencyStdDev == 0 {
// is disaggregated PD and ttft is calculated using number of prompt tokens
kvCacheTransT := s.config.KVCacheTransferTimePerToken * nPromptTokens
return int(common.RandomNorm(float64(kvCacheTransT), float64(s.config.KVCacheTransferTimeStdDev)))
return common.RandomNorm(kvCacheTransT, s.config.KVCacheTransferTimeStdDev)
}
// is disaggregated PD and *not* using number of prompt tokens
return int(common.RandomNorm(float64(s.config.KVCacheTransferLatency), float64(s.config.KVCacheTransferLatencyStdDev)))
return common.RandomNorm(s.config.KVCacheTransferLatency, s.config.KVCacheTransferLatencyStdDev)
}
if s.config.TimeToFirstToken == 0 && s.config.TimeToFirstTokenStdDev == 0 {
// is aggregated PD and ttft is calculated using number of prompt tokens that are not in kv cache
prefillTime := s.config.PrefillOverhead + (nPromptTokens-nCachedPromptTokens)*s.config.PrefillTimePerToken
return int(common.RandomNorm(float64(prefillTime), float64(s.config.PrefillTimeStdDev)))
return common.RandomNorm(prefillTime, s.config.PrefillTimeStdDev)
}
// is aggregated PD and *not* using number of prompt tokens
return int(common.RandomNorm(float64(s.config.TimeToFirstToken), float64(s.config.TimeToFirstTokenStdDev)))
return common.RandomNorm(s.config.TimeToFirstToken, s.config.TimeToFirstTokenStdDev)
}

// returns inter token latency
func (s *VllmSimulator) getInterTokenLatency() int {
mean := float64(s.config.InterTokenLatency)
stddev := float64(s.config.InterTokenLatencyStdDev)
return int(common.RandomNorm(mean, stddev))
return common.RandomNorm(s.config.InterTokenLatency, s.config.InterTokenLatencyStdDev)
}

// returns total inter token latency for the given number of tokens
Expand Down