Skip to content

Commit 1d30ea0

Browse files
committed
Add param time-factor-under-load
Signed-off-by: Qifan Deng <[email protected]>
1 parent abdb2fa commit 1d30ea0

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

pkg/common/config.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,13 @@ type Configuration struct {
104104
// KVCacheTransferOverheadStdDev similar to TimeToFirstTokenStdDev
105105
KVCacheTransferTimeStdDev int `yaml:"kv-cache-transfer-time-std-dev" json:"kv-cache-transfer-time-std-dev"`
106106

107+
// TimeFactorUnderLoad is a multiplicative factor that affects the overall time taken for requests
108+
// when parallel requests are being processed. The value of this factor must be >= 1.0, with a default of 1.0.
109+
// If this factor is 1.0, no extra time is added. When the factor is x (where x > 1.0) and there are MaxNumSeqs
110+
// requests, the total time will be multiplied by x.
111+
// The extra time then decreases multiplicatively to 1.0 when the number of requests is less than MaxNumSeqs.
112+
TimeFactorUnderLoad float64 `yaml:"time-factor-under-load" json:"time-factor-under-load"`
113+
107114
// Mode defines the simulator response generation mode, valid values: echo, random
108115
Mode string `yaml:"mode" json:"mode"`
109116
// Seed defines random seed for operations
@@ -259,6 +266,7 @@ func newConfig() *Configuration {
259266
MaxModelLen: 1024,
260267
Mode: ModeRandom,
261268
Seed: time.Now().UnixNano(),
269+
TimeFactorUnderLoad: 1.0,
262270
MaxToolCallIntegerParam: 100,
263271
MaxToolCallNumberParam: 100,
264272
MaxToolCallArrayParamLength: 5,
@@ -362,6 +370,10 @@ func (c *Configuration) validate() error {
362370
return errors.New("kv-cache tranfer standard deviation cannot be more than 30% of kv-cache tranfer")
363371
}
364372

373+
if c.TimeFactorUnderLoad < 1.0 {
374+
return errors.New("time factor under load cannot be less than 1.0")
375+
}
376+
365377
if c.MaxLoras < 1 {
366378
return errors.New("max LoRAs cannot be less than 1")
367379
}
@@ -509,6 +521,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
509521
f.IntVar(&config.TimeToFirstTokenStdDev, "time-to-first-token-std-dev", config.TimeToFirstTokenStdDev, "Standard deviation for time before the first token will be returned (in milliseconds)")
510522
f.IntVar(&config.KVCacheTransferLatencyStdDev, "kv-cache-transfer-latency-std-dev", config.KVCacheTransferLatencyStdDev, "Standard deviation for time for KV-cache transfer from a remote vLLM (in milliseconds)")
511523
f.Int64Var(&config.Seed, "seed", config.Seed, "Random seed for operations (if not set, current Unix time in nanoseconds is used)")
524+
f.Float64Var(&config.TimeFactorUnderLoad, "time-factor-under-load", config.TimeFactorUnderLoad, "Time factor under load (must be >= 1.0)")
512525

513526
f.IntVar(&config.MaxToolCallIntegerParam, "max-tool-call-integer-param", config.MaxToolCallIntegerParam, "Maximum possible value of integer parameters in a tool call")
514527
f.IntVar(&config.MinToolCallIntegerParam, "min-tool-call-integer-param", config.MinToolCallIntegerParam, "Minimum possible value of integer parameters in a tool call")

pkg/common/config_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,16 @@ var _ = Describe("Simulator configuration", func() {
441441
args: []string{"cmd", "--max-num-seqs", "-1",
442442
"--config", "../../manifests/config.yaml"},
443443
},
444+
{
445+
name: "invalid time-factor-under-load",
446+
args: []string{"cmd", "--time-factor-under-load", "0",
447+
"--config", "../../manifests/config.yaml"},
448+
},
449+
{
450+
name: "invalid time-factor-under-load",
451+
args: []string{"cmd", "--time-factor-under-load", "-1",
452+
"--config", "../../manifests/config.yaml"},
453+
},
444454
}
445455

446456
for _, test := range invalidTests {

0 commit comments

Comments
 (0)