Skip to content

Commit 09ab37b

Browse files
committed
Add failure injection mode to simulator
Introduces a 'failure' mode to the simulator, allowing random injection of OpenAI API-compatible error responses for testing error handling. Adds configuration options for failure injection rate and specific failure types, implements error response logic, and updates documentation and tests to cover the new functionality. Signed-off-by: Sergey Marunich <[email protected]>
1 parent f99a38b commit 09ab37b

File tree

7 files changed

+609
-22
lines changed

7 files changed

+609
-22
lines changed

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@ In addition, it supports a subset of vLLM's Prometheus metrics. These metrics ar
2929

3030
The simulated inference has no connection with the model and LoRA adapters specified in the command line parameters or via the /v1/load_lora_adapter HTTP REST endpoint. The /v1/models endpoint returns simulated results based on those same command line parameters and those loaded via the /v1/load_lora_adapter HTTP REST endpoint.
3131

32-
The simulator supports two modes of operation:
32+
The simulator supports three modes of operation:
3333
- `echo` mode: the response contains the same text that was received in the request. For `/v1/chat/completions` the last message for the role=`user` is used.
3434
- `random` mode: the response is randomly chosen from a set of pre-defined sentences.
35+
- `failure` mode: randomly injects OpenAI API compatible error responses for testing error handling.
3536

3637
Timing of the response is defined by the `time-to-first-token` and `inter-token-latency` parameters. In case P/D is enabled for a request, `kv-cache-transfer-latency` will be used instead of `time-to-first-token`.
3738

@@ -101,6 +102,7 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
101102
- `mode`: the simulator mode, optional, by default `random`
102103
- `echo`: returns the same text that was sent in the request
103104
- `random`: returns a sentence chosen at random from a set of pre-defined sentences
105+
- `failure`: randomly injects OpenAI API compatible error responses
104106
- `time-to-first-token`: the time to the first token (in milliseconds), optional, by default zero
105107
- `time-to-first-token-std-dev`: standard deviation for time before the first token will be returned, in milliseconds, optional, default is 0, can't be more than 30% of `time-to-first-token`, will not cause the actual time to first token to differ by more than 70% from `time-to-first-token`
106108
- `inter-token-latency`: the time to 'generate' each additional token (in milliseconds), optional, by default zero
@@ -122,6 +124,8 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
122124
- `tokenizers-cache-dir`: the directory for caching tokenizers
123125
- `hash-seed`: seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
124126
- `zmq-endpoint`: ZMQ address to publish events
127+
- `failure-injection-rate`: probability (0-100) of injecting failures when in failure mode, optional, default is 10
128+
- `failure-types`: list of specific failure types to inject (rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found), optional, if empty all types are used
125129

126130
In addition, as we are using klog, the following parameters are available:
127131
- `add_dir_header`: if true, adds the file directory to the header of the log messages

pkg/common/config.go

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ const (
3434
vLLMDefaultPort = 8000
3535
ModeRandom = "random"
3636
ModeEcho = "echo"
37+
ModeFailure = "failure"
3738
)
3839

3940
type Configuration struct {
@@ -127,6 +128,11 @@ type Configuration struct {
127128
ZMQEndpoint string `yaml:"zmq-endpoint"`
128129
// EventBatchSize is the maximum number of kv-cache events to be sent together, defaults to 16
129130
EventBatchSize int `yaml:"event-batch-size"`
131+
132+
// FailureInjectionRate is the probability (0-100) of injecting failures when in failure mode
133+
FailureInjectionRate int `yaml:"failure-injection-rate"`
134+
// FailureTypes is a list of specific failure types to inject (empty means all types)
135+
FailureTypes []string `yaml:"failure-types"`
130136
}
131137

132138
type LoraModule struct {
@@ -182,10 +188,12 @@ func newConfig() *Configuration {
182188
MinToolCallArrayParamLength: 1,
183189
ToolCallNotRequiredParamProbability: 50,
184190
ObjectToolCallNotRequiredParamProbability: 50,
185-
KVCacheSize: 1024,
186-
TokenBlockSize: 16,
187-
ZMQEndpoint: "tcp://localhost:5557",
188-
EventBatchSize: 16,
191+
KVCacheSize: 1024,
192+
TokenBlockSize: 16,
193+
ZMQEndpoint: "tcp://localhost:5557",
194+
EventBatchSize: 16,
195+
FailureInjectionRate: 10,
196+
FailureTypes: []string{},
189197
}
190198
}
191199

@@ -213,8 +221,8 @@ func (c *Configuration) validate() error {
213221
c.ServedModelNames = []string{c.Model}
214222
}
215223

216-
if c.Mode != ModeEcho && c.Mode != ModeRandom {
217-
return fmt.Errorf("invalid mode '%s', valid values are 'random' and 'echo'", c.Mode)
224+
if c.Mode != ModeEcho && c.Mode != ModeRandom && c.Mode != ModeFailure {
225+
return fmt.Errorf("invalid mode '%s', valid values are 'random', 'echo', and 'failure'", c.Mode)
218226
}
219227
if c.Port <= 0 {
220228
return fmt.Errorf("invalid port '%d'", c.Port)
@@ -299,6 +307,25 @@ func (c *Configuration) validate() error {
299307
if c.EventBatchSize < 1 {
300308
return errors.New("event batch size cannot less than 1")
301309
}
310+
311+
if c.FailureInjectionRate < 0 || c.FailureInjectionRate > 100 {
312+
return errors.New("failure injection rate should be between 0 and 100")
313+
}
314+
315+
validFailureTypes := map[string]bool{
316+
"rate_limit": true,
317+
"invalid_api_key": true,
318+
"context_length": true,
319+
"server_error": true,
320+
"invalid_request": true,
321+
"model_not_found": true,
322+
}
323+
for _, failureType := range c.FailureTypes {
324+
if !validFailureTypes[failureType] {
325+
return fmt.Errorf("invalid failure type '%s', valid types are: rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found", failureType)
326+
}
327+
}
328+
302329
return nil
303330
}
304331

@@ -326,7 +353,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
326353
f.IntVar(&config.MaxCPULoras, "max-cpu-loras", config.MaxCPULoras, "Maximum number of LoRAs to store in CPU memory")
327354
f.IntVar(&config.MaxModelLen, "max-model-len", config.MaxModelLen, "Model's context window, maximum number of tokens in a single request including input and output")
328355

329-
f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode, echo - returns the same text that was sent in the request, for chat completion returns the last message, random - returns random sentence from a bank of pre-defined sentences")
356+
f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode: echo - returns the same text that was sent in the request, for chat completion returns the last message; random - returns random sentence from a bank of pre-defined sentences; failure - randomly injects API errors")
330357
f.IntVar(&config.InterTokenLatency, "inter-token-latency", config.InterTokenLatency, "Time to generate one token (in milliseconds)")
331358
f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)")
332359
f.IntVar(&config.KVCacheTransferLatency, "kv-cache-transfer-latency", config.KVCacheTransferLatency, "Time for KV-cache transfer from a remote vLLM (in milliseconds)")
@@ -351,6 +378,13 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
351378
f.StringVar(&config.HashSeed, "hash-seed", config.HashSeed, "Seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)")
352379
f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")
353380
f.IntVar(&config.EventBatchSize, "event-batch-size", config.EventBatchSize, "Maximum number of kv-cache events to be sent together")
381+
382+
f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures when in failure mode")
383+
384+
failureTypes := getParamValueFromArgs("failure-types")
385+
var dummyFailureTypes multiString
386+
f.Var(&dummyFailureTypes, "failure-types", "List of specific failure types to inject (rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found)")
387+
f.Lookup("failure-types").NoOptDefVal = "dummy"
354388

355389
// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
356390
var dummyString string
@@ -384,6 +418,9 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
384418
if servedModelNames != nil {
385419
config.ServedModelNames = servedModelNames
386420
}
421+
if failureTypes != nil {
422+
config.FailureTypes = failureTypes
423+
}
387424

388425
if config.HashSeed == "" {
389426
hashSeed := os.Getenv("PYTHONHASHSEED")

pkg/common/failures.go

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
Copyright 2025 The llm-d-inference-sim Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package common
18+
19+
import (
20+
"fmt"
21+
"math/rand"
22+
"time"
23+
)
24+
25+
type FailureSpec struct {
26+
StatusCode int
27+
ErrorType string
28+
ErrorCode string
29+
Message string
30+
Param *string
31+
}
32+
33+
var predefinedFailures = map[string]FailureSpec{
34+
"rate_limit": {
35+
StatusCode: 429,
36+
ErrorType: "rate_limit_exceeded",
37+
ErrorCode: "rate_limit_exceeded",
38+
Message: "Rate limit reached for model in organization org-xxx on requests per min (RPM): Limit 3, Used 3, Requested 1.",
39+
Param: nil,
40+
},
41+
"invalid_api_key": {
42+
StatusCode: 401,
43+
ErrorType: "invalid_request_error",
44+
ErrorCode: "invalid_api_key",
45+
Message: "Incorrect API key provided",
46+
Param: nil,
47+
},
48+
"context_length": {
49+
StatusCode: 400,
50+
ErrorType: "invalid_request_error",
51+
ErrorCode: "context_length_exceeded",
52+
Message: "This model's maximum context length is 4096 tokens. However, your messages resulted in 4500 tokens.",
53+
Param: stringPtr("messages"),
54+
},
55+
"server_error": {
56+
StatusCode: 503,
57+
ErrorType: "server_error",
58+
ErrorCode: "server_error",
59+
Message: "The server is overloaded or not ready yet.",
60+
Param: nil,
61+
},
62+
"invalid_request": {
63+
StatusCode: 400,
64+
ErrorType: "invalid_request_error",
65+
ErrorCode: "invalid_request_error",
66+
Message: "Invalid request: missing required parameter 'model'.",
67+
Param: stringPtr("model"),
68+
},
69+
"model_not_found": {
70+
StatusCode: 404,
71+
ErrorType: "invalid_request_error",
72+
ErrorCode: "model_not_found",
73+
Message: "The model 'gpt-nonexistent' does not exist",
74+
Param: stringPtr("model"),
75+
},
76+
}
77+
78+
// ShouldInjectFailure determines whether to inject a failure based on configuration
79+
func ShouldInjectFailure(config *Configuration) bool {
80+
if config.Mode != ModeFailure {
81+
return false
82+
}
83+
84+
rand.Seed(time.Now().UnixNano())
85+
return rand.Intn(100) < config.FailureInjectionRate
86+
}
87+
88+
// GetRandomFailure returns a random failure from configured types or all types if none specified
89+
func GetRandomFailure(config *Configuration) FailureSpec {
90+
rand.Seed(time.Now().UnixNano())
91+
92+
var availableFailures []string
93+
if len(config.FailureTypes) == 0 {
94+
// Use all failure types if none specified
95+
for failureType := range predefinedFailures {
96+
availableFailures = append(availableFailures, failureType)
97+
}
98+
} else {
99+
availableFailures = config.FailureTypes
100+
}
101+
102+
if len(availableFailures) == 0 {
103+
// Fallback to server_error if no valid types
104+
return predefinedFailures["server_error"]
105+
}
106+
107+
randomType := availableFailures[rand.Intn(len(availableFailures))]
108+
109+
// Customize message with current model name
110+
failure := predefinedFailures[randomType]
111+
if randomType == "rate_limit" && config.Model != "" {
112+
failure.Message = fmt.Sprintf("Rate limit reached for %s in organization org-xxx on requests per min (RPM): Limit 3, Used 3, Requested 1.", config.Model)
113+
} else if randomType == "model_not_found" && config.Model != "" {
114+
failure.Message = fmt.Sprintf("The model '%s-nonexistent' does not exist", config.Model)
115+
}
116+
117+
return failure
118+
}
119+
120+
func stringPtr(s string) *string {
121+
return &s
122+
}

pkg/common/failures_test.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
Copyright 2025 The llm-d-inference-sim Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package common_test
18+
19+
import (
20+
"strings"
21+
22+
. "github.com/onsi/ginkgo/v2"
23+
. "github.com/onsi/gomega"
24+
25+
"github.com/llm-d/llm-d-inference-sim/pkg/common"
26+
)
27+
28+
var _ = Describe("Failures", func() {
29+
Describe("ShouldInjectFailure", func() {
30+
It("should not inject failure when not in failure mode", func() {
31+
config := &common.Configuration{
32+
Mode: common.ModeRandom,
33+
FailureInjectionRate: 100,
34+
}
35+
Expect(common.ShouldInjectFailure(config)).To(BeFalse())
36+
})
37+
38+
It("should not inject failure when rate is 0", func() {
39+
config := &common.Configuration{
40+
Mode: common.ModeFailure,
41+
FailureInjectionRate: 0,
42+
}
43+
Expect(common.ShouldInjectFailure(config)).To(BeFalse())
44+
})
45+
46+
It("should inject failure when in failure mode with 100% rate", func() {
47+
config := &common.Configuration{
48+
Mode: common.ModeFailure,
49+
FailureInjectionRate: 100,
50+
}
51+
Expect(common.ShouldInjectFailure(config)).To(BeTrue())
52+
})
53+
})
54+
55+
Describe("GetRandomFailure", func() {
56+
It("should return a failure from all types when none specified", func() {
57+
config := &common.Configuration{
58+
Model: "test-model",
59+
FailureTypes: []string{},
60+
}
61+
failure := common.GetRandomFailure(config)
62+
Expect(failure.StatusCode).To(BeNumerically(">=", 400))
63+
Expect(failure.Message).ToNot(BeEmpty())
64+
Expect(failure.ErrorType).ToNot(BeEmpty())
65+
})
66+
67+
It("should return rate limit failure when specified", func() {
68+
config := &common.Configuration{
69+
Model: "test-model",
70+
FailureTypes: []string{"rate_limit"},
71+
}
72+
failure := common.GetRandomFailure(config)
73+
Expect(failure.StatusCode).To(Equal(429))
74+
Expect(failure.ErrorType).To(Equal("rate_limit_exceeded"))
75+
Expect(failure.ErrorCode).To(Equal("rate_limit_exceeded"))
76+
Expect(strings.Contains(failure.Message, "test-model")).To(BeTrue())
77+
})
78+
79+
It("should return invalid API key failure when specified", func() {
80+
config := &common.Configuration{
81+
FailureTypes: []string{"invalid_api_key"},
82+
}
83+
failure := common.GetRandomFailure(config)
84+
Expect(failure.StatusCode).To(Equal(401))
85+
Expect(failure.ErrorType).To(Equal("invalid_request_error"))
86+
Expect(failure.ErrorCode).To(Equal("invalid_api_key"))
87+
Expect(failure.Message).To(Equal("Incorrect API key provided"))
88+
})
89+
90+
It("should return context length failure when specified", func() {
91+
config := &common.Configuration{
92+
FailureTypes: []string{"context_length"},
93+
}
94+
failure := common.GetRandomFailure(config)
95+
Expect(failure.StatusCode).To(Equal(400))
96+
Expect(failure.ErrorType).To(Equal("invalid_request_error"))
97+
Expect(failure.ErrorCode).To(Equal("context_length_exceeded"))
98+
Expect(failure.Param).ToNot(BeNil())
99+
Expect(*failure.Param).To(Equal("messages"))
100+
})
101+
102+
It("should return server error when specified", func() {
103+
config := &common.Configuration{
104+
FailureTypes: []string{"server_error"},
105+
}
106+
failure := common.GetRandomFailure(config)
107+
Expect(failure.StatusCode).To(Equal(503))
108+
Expect(failure.ErrorType).To(Equal("server_error"))
109+
Expect(failure.ErrorCode).To(Equal("server_error"))
110+
})
111+
112+
It("should return model not found failure when specified", func() {
113+
config := &common.Configuration{
114+
Model: "test-model",
115+
FailureTypes: []string{"model_not_found"},
116+
}
117+
failure := common.GetRandomFailure(config)
118+
Expect(failure.StatusCode).To(Equal(404))
119+
Expect(failure.ErrorType).To(Equal("invalid_request_error"))
120+
Expect(failure.ErrorCode).To(Equal("model_not_found"))
121+
Expect(strings.Contains(failure.Message, "test-model-nonexistent")).To(BeTrue())
122+
})
123+
124+
It("should return server error as fallback for empty types", func() {
125+
config := &common.Configuration{
126+
FailureTypes: []string{},
127+
}
128+
// This test is probabilistic since it randomly selects, but we can test structure
129+
failure := common.GetRandomFailure(config)
130+
Expect(failure.StatusCode).To(BeNumerically(">=", 400))
131+
Expect(failure.ErrorType).ToNot(BeEmpty())
132+
})
133+
})
134+
})

0 commit comments

Comments
 (0)