Skip to content

Commit 251813e

Browse files
committed
Clarify failure injection rate documentation
Removed redundant lines and updated comments and help text to clarify that 'failure-injection-rate' is the probability of injecting failures, not specifically tied to failure mode. Signed-off-by: Sergey Marunich <[email protected]>
1 parent 458bca2 commit 251813e

File tree

2 files changed

+60
-6
lines changed

2 files changed

+60
-6
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ The simulator supports two modes of operation:
3333
- `echo` mode: the response contains the same text that was received in the request. For `/v1/chat/completions` the last message for the role=`user` is used.
3434
- `random` mode: the response is randomly chosen from a set of pre-defined sentences.
3535

36+
Additionally, the simulator can inject OpenAI API compatible error responses for testing error handling using the `failure-injection-rate` parameter.
37+
3638
Timing of the response is defined by the `time-to-first-token` and `inter-token-latency` parameters. In case P/D is enabled for a request, `kv-cache-transfer-latency` will be used instead of `time-to-first-token`.
3739

3840
For a request with `stream=true`: `time-to-first-token` or `kv-cache-transfer-latency` defines the delay before the first token is returned, `inter-token-latency` defines the delay between subsequent tokens in the stream.
@@ -116,13 +118,14 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
116118
- `min-tool-call-array-param-length`: the minimum possible length of array parameters in a tool call, optional, defaults to 1
117119
- `tool-call-not-required-param-probability`: the probability to add a parameter, that is not required, in a tool call, optional, defaults to 50
118120
- `object-tool-call-not-required-field-probability`: the probability to add a field, that is not required, in an object in a tool call, optional, defaults to 50
119-
<!--
120121
- `enable-kvcache`: if true, the KV cache support will be enabled in the simulator. In this case, the KV cache will be simulated, and ZQM events will be published when a KV cache block is added or evicted.
121122
- `kv-cache-size`: the maximum number of token blocks in kv cache
122123
- `block-size`: token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128
123124
- `tokenizers-cache-dir`: the directory for caching tokenizers
124125
- `hash-seed`: seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
125126
- `zmq-endpoint`: ZMQ address to publish events
127+
- `failure-injection-rate`: probability (0-100) of injecting failures, optional, default is 10
128+
- `failure-types`: list of specific failure types to inject (rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found), optional, if empty all types are used
126129
- `event-batch-size`: the maximum number of kv-cache events to be sent together, defaults to 16
127130
-->
128131
In addition, as we are using klog, the following parameters are available:

pkg/common/config.go

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ const (
3434
vLLMDefaultPort = 8000
3535
ModeRandom = "random"
3636
ModeEcho = "echo"
37+
38+
// Failure type constants
39+
FailureTypeRateLimit = "rate_limit"
40+
FailureTypeInvalidAPIKey = "invalid_api_key"
41+
FailureTypeContextLength = "context_length"
42+
FailureTypeServerError = "server_error"
43+
FailureTypeInvalidRequest = "invalid_request"
44+
FailureTypeModelNotFound = "model_not_found"
3745
)
3846

3947
type Configuration struct {
@@ -127,6 +135,11 @@ type Configuration struct {
127135
ZMQEndpoint string `yaml:"zmq-endpoint"`
128136
// EventBatchSize is the maximum number of kv-cache events to be sent together, defaults to 16
129137
EventBatchSize int `yaml:"event-batch-size"`
138+
139+
// FailureInjectionRate is the probability (0-100) of injecting failures
140+
FailureInjectionRate int `yaml:"failure-injection-rate"`
141+
// FailureTypes is a list of specific failure types to inject (empty means all types)
142+
FailureTypes []string `yaml:"failure-types"`
130143
}
131144

132145
type LoraModule struct {
@@ -182,10 +195,12 @@ func newConfig() *Configuration {
182195
MinToolCallArrayParamLength: 1,
183196
ToolCallNotRequiredParamProbability: 50,
184197
ObjectToolCallNotRequiredParamProbability: 50,
185-
KVCacheSize: 1024,
186-
TokenBlockSize: 16,
187-
ZMQEndpoint: "tcp://localhost:5557",
188-
EventBatchSize: 16,
198+
KVCacheSize: 1024,
199+
TokenBlockSize: 16,
200+
ZMQEndpoint: "tcp://localhost:5557",
201+
EventBatchSize: 16,
202+
FailureInjectionRate: 10,
203+
FailureTypes: []string{},
189204
}
190205
}
191206

@@ -299,6 +314,25 @@ func (c *Configuration) validate() error {
299314
if c.EventBatchSize < 1 {
300315
return errors.New("event batch size cannot less than 1")
301316
}
317+
318+
if c.FailureInjectionRate < 0 || c.FailureInjectionRate > 100 {
319+
return errors.New("failure injection rate should be between 0 and 100")
320+
}
321+
322+
validFailureTypes := map[string]bool{
323+
FailureTypeRateLimit: true,
324+
FailureTypeInvalidAPIKey: true,
325+
FailureTypeContextLength: true,
326+
FailureTypeServerError: true,
327+
FailureTypeInvalidRequest: true,
328+
FailureTypeModelNotFound: true,
329+
}
330+
for _, failureType := range c.FailureTypes {
331+
if !validFailureTypes[failureType] {
332+
return fmt.Errorf("invalid failure type '%s', valid types are: rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found", failureType)
333+
}
334+
}
335+
302336
return nil
303337
}
304338

@@ -326,7 +360,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
326360
f.IntVar(&config.MaxCPULoras, "max-cpu-loras", config.MaxCPULoras, "Maximum number of LoRAs to store in CPU memory")
327361
f.IntVar(&config.MaxModelLen, "max-model-len", config.MaxModelLen, "Model's context window, maximum number of tokens in a single request including input and output")
328362

329-
f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode, echo - returns the same text that was sent in the request, for chat completion returns the last message, random - returns random sentence from a bank of pre-defined sentences")
363+
f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode: echo - returns the same text that was sent in the request, for chat completion returns the last message; random - returns random sentence from a bank of pre-defined sentences")
330364
f.IntVar(&config.InterTokenLatency, "inter-token-latency", config.InterTokenLatency, "Time to generate one token (in milliseconds)")
331365
f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)")
332366
f.IntVar(&config.KVCacheTransferLatency, "kv-cache-transfer-latency", config.KVCacheTransferLatency, "Time for KV-cache transfer from a remote vLLM (in milliseconds)")
@@ -351,6 +385,13 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
351385
f.StringVar(&config.HashSeed, "hash-seed", config.HashSeed, "Seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)")
352386
f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")
353387
f.IntVar(&config.EventBatchSize, "event-batch-size", config.EventBatchSize, "Maximum number of kv-cache events to be sent together")
388+
389+
f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures")
390+
391+
failureTypes := getParamValueFromArgs("failure-types")
392+
var dummyFailureTypes multiString
393+
f.Var(&dummyFailureTypes, "failure-types", "List of specific failure types to inject (rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found)")
394+
f.Lookup("failure-types").NoOptDefVal = "dummy"
354395

355396
// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
356397
var dummyString string
@@ -384,6 +425,16 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
384425
if servedModelNames != nil {
385426
config.ServedModelNames = servedModelNames
386427
}
428+
if failureTypes != nil {
429+
config.FailureTypes = failureTypes
430+
}
431+
432+
if config.HashSeed == "" {
433+
hashSeed := os.Getenv("PYTHONHASHSEED")
434+
if hashSeed != "" {
435+
config.HashSeed = hashSeed
436+
}
437+
}
387438

388439
if config.HashSeed == "" {
389440
hashSeed := os.Getenv("PYTHONHASHSEED")

0 commit comments

Comments
 (0)