Skip to content

Commit e18f3b1

Browse files
committed
Refactor failure injection and update simulator error handling
Failure injection is now controlled by a dedicated 'failure-injection-rate' parameter instead of a separate 'failure' mode. Failure type constants are centralized, and error handling in the simulator is refactored to use a unified method for sending error responses. Documentation and tests are updated to reflect these changes, and the OpenAI error response format now includes an 'object' field. Signed-off-by: Sergey Marunich <[email protected]>
1 parent dcad055 commit e18f3b1

File tree

7 files changed

+82
-314
lines changed

7 files changed

+82
-314
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,11 @@ In addition, it supports a subset of vLLM's Prometheus metrics. These metrics ar
2929

3030
The simulated inference has no connection with the model and LoRA adapters specified in the command line parameters or via the /v1/load_lora_adapter HTTP REST endpoint. The /v1/models endpoint returns simulated results based on those same command line parameters and those loaded via the /v1/load_lora_adapter HTTP REST endpoint.
3131

32-
The simulator supports three modes of operation:
32+
The simulator supports two modes of operation:
3333
- `echo` mode: the response contains the same text that was received in the request. For `/v1/chat/completions` the last message for the role=`user` is used.
3434
- `random` mode: the response is randomly chosen from a set of pre-defined sentences.
35-
- `failure` mode: randomly injects OpenAI API compatible error responses for testing error handling.
35+
36+
Additionally, the simulator can inject OpenAI API compatible error responses for testing error handling using the `failure-injection-rate` parameter.
3637

3738
Additionally, the simulator can inject OpenAI API compatible error responses for testing error handling using the `failure-injection-rate` parameter.
3839

@@ -104,7 +105,6 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
104105
- `mode`: the simulator mode, optional, by default `random`
105106
- `echo`: returns the same text that was sent in the request
106107
- `random`: returns a sentence chosen at random from a set of pre-defined sentences
107-
- `failure`: randomly injects OpenAI API compatible error responses
108108
- `time-to-first-token`: the time to the first token (in milliseconds), optional, by default zero
109109
- `time-to-first-token-std-dev`: standard deviation for time before the first token will be returned, in milliseconds, optional, default is 0, can't be more than 30% of `time-to-first-token`, will not cause the actual time to first token to differ by more than 70% from `time-to-first-token`
110110
- `inter-token-latency`: the time to 'generate' each additional token (in milliseconds), optional, by default zero

pkg/common/config.go

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,14 @@ const (
3434
vLLMDefaultPort = 8000
3535
ModeRandom = "random"
3636
ModeEcho = "echo"
37-
ModeFailure = "failure"
37+
38+
// Failure type constants
39+
FailureTypeRateLimit = "rate_limit"
40+
FailureTypeInvalidAPIKey = "invalid_api_key"
41+
FailureTypeContextLength = "context_length"
42+
FailureTypeServerError = "server_error"
43+
FailureTypeInvalidRequest = "invalid_request"
44+
FailureTypeModelNotFound = "model_not_found"
3845
)
3946

4047
type Configuration struct {
@@ -221,8 +228,8 @@ func (c *Configuration) validate() error {
221228
c.ServedModelNames = []string{c.Model}
222229
}
223230

224-
if c.Mode != ModeEcho && c.Mode != ModeRandom && c.Mode != ModeFailure {
225-
return fmt.Errorf("invalid mode '%s', valid values are 'random', 'echo', and 'failure'", c.Mode)
231+
if c.Mode != ModeEcho && c.Mode != ModeRandom {
232+
return fmt.Errorf("invalid mode '%s', valid values are 'random' and 'echo'", c.Mode)
226233
}
227234
if c.Port <= 0 {
228235
return fmt.Errorf("invalid port '%d'", c.Port)
@@ -313,12 +320,12 @@ func (c *Configuration) validate() error {
313320
}
314321

315322
validFailureTypes := map[string]bool{
316-
"rate_limit": true,
317-
"invalid_api_key": true,
318-
"context_length": true,
319-
"server_error": true,
320-
"invalid_request": true,
321-
"model_not_found": true,
323+
FailureTypeRateLimit: true,
324+
FailureTypeInvalidAPIKey: true,
325+
FailureTypeContextLength: true,
326+
FailureTypeServerError: true,
327+
FailureTypeInvalidRequest: true,
328+
FailureTypeModelNotFound: true,
322329
}
323330
for _, failureType := range c.FailureTypes {
324331
if !validFailureTypes[failureType] {
@@ -353,7 +360,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
353360
f.IntVar(&config.MaxCPULoras, "max-cpu-loras", config.MaxCPULoras, "Maximum number of LoRAs to store in CPU memory")
354361
f.IntVar(&config.MaxModelLen, "max-model-len", config.MaxModelLen, "Model's context window, maximum number of tokens in a single request including input and output")
355362

356-
f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode: echo - returns the same text that was sent in the request, for chat completion returns the last message; random - returns random sentence from a bank of pre-defined sentences; failure - randomly injects API errors")
363+
f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode: echo - returns the same text that was sent in the request, for chat completion returns the last message; random - returns random sentence from a bank of pre-defined sentences")
357364
f.IntVar(&config.InterTokenLatency, "inter-token-latency", config.InterTokenLatency, "Time to generate one token (in milliseconds)")
358365
f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)")
359366
f.IntVar(&config.KVCacheTransferLatency, "kv-cache-transfer-latency", config.KVCacheTransferLatency, "Time for KV-cache transfer from a remote vLLM (in milliseconds)")

pkg/common/failures.go

Lines changed: 0 additions & 122 deletions
This file was deleted.

pkg/common/failures_test.go

Lines changed: 0 additions & 134 deletions
This file was deleted.

0 commit comments

Comments
 (0)