Skip to content

Commit dcad055

Browse files
committed
Add failure injection mode to simulator
Introduces a 'failure' mode to the simulator, allowing random injection of OpenAI API-compatible error responses for testing error handling. Adds configuration options for failure injection rate and specific failure types, implements error response logic, and updates documentation and tests to cover the new functionality. Signed-off-by: Sergey Marunich <[email protected]>
1 parent f1f18d3 commit dcad055

File tree

7 files changed

+314
-80
lines changed

7 files changed

+314
-80
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@ In addition, it supports a subset of vLLM's Prometheus metrics. These metrics ar
2929

3030
The simulated inference has no connection with the model and LoRA adapters specified in the command line parameters or via the /v1/load_lora_adapter HTTP REST endpoint. The /v1/models endpoint returns simulated results based on those same command line parameters and those loaded via the /v1/load_lora_adapter HTTP REST endpoint.
3131

32-
The simulator supports two modes of operation:
32+
The simulator supports three modes of operation:
3333
- `echo` mode: the response contains the same text that was received in the request. For `/v1/chat/completions` the last message for the role=`user` is used.
3434
- `random` mode: the response is randomly chosen from a set of pre-defined sentences.
35+
- `failure` mode: randomly injects OpenAI API compatible error responses for testing error handling.
3536

3637
Additionally, the simulator can inject OpenAI API compatible error responses for testing error handling using the `failure-injection-rate` parameter.
3738

@@ -103,6 +104,7 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
103104
- `mode`: the simulator mode, optional, by default `random`
104105
- `echo`: returns the same text that was sent in the request
105106
- `random`: returns a sentence chosen at random from a set of pre-defined sentences
107+
- `failure`: randomly injects OpenAI API compatible error responses
106108
- `time-to-first-token`: the time to the first token (in milliseconds), optional, by default zero
107109
- `time-to-first-token-std-dev`: standard deviation for time before the first token will be returned, in milliseconds, optional, default is 0, can't be more than 30% of `time-to-first-token`, will not cause the actual time to first token to differ by more than 70% from `time-to-first-token`
108110
- `inter-token-latency`: the time to 'generate' each additional token (in milliseconds), optional, by default zero

pkg/common/config.go

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,7 @@ const (
3434
vLLMDefaultPort = 8000
3535
ModeRandom = "random"
3636
ModeEcho = "echo"
37-
38-
// Failure type constants
39-
FailureTypeRateLimit = "rate_limit"
40-
FailureTypeInvalidAPIKey = "invalid_api_key"
41-
FailureTypeContextLength = "context_length"
42-
FailureTypeServerError = "server_error"
43-
FailureTypeInvalidRequest = "invalid_request"
44-
FailureTypeModelNotFound = "model_not_found"
37+
ModeFailure = "failure"
4538
)
4639

4740
type Configuration struct {
@@ -228,8 +221,8 @@ func (c *Configuration) validate() error {
228221
c.ServedModelNames = []string{c.Model}
229222
}
230223

231-
if c.Mode != ModeEcho && c.Mode != ModeRandom {
232-
return fmt.Errorf("invalid mode '%s', valid values are 'random' and 'echo'", c.Mode)
224+
if c.Mode != ModeEcho && c.Mode != ModeRandom && c.Mode != ModeFailure {
225+
return fmt.Errorf("invalid mode '%s', valid values are 'random', 'echo', and 'failure'", c.Mode)
233226
}
234227
if c.Port <= 0 {
235228
return fmt.Errorf("invalid port '%d'", c.Port)
@@ -320,12 +313,12 @@ func (c *Configuration) validate() error {
320313
}
321314

322315
validFailureTypes := map[string]bool{
323-
FailureTypeRateLimit: true,
324-
FailureTypeInvalidAPIKey: true,
325-
FailureTypeContextLength: true,
326-
FailureTypeServerError: true,
327-
FailureTypeInvalidRequest: true,
328-
FailureTypeModelNotFound: true,
316+
"rate_limit": true,
317+
"invalid_api_key": true,
318+
"context_length": true,
319+
"server_error": true,
320+
"invalid_request": true,
321+
"model_not_found": true,
329322
}
330323
for _, failureType := range c.FailureTypes {
331324
if !validFailureTypes[failureType] {
@@ -360,7 +353,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
360353
f.IntVar(&config.MaxCPULoras, "max-cpu-loras", config.MaxCPULoras, "Maximum number of LoRAs to store in CPU memory")
361354
f.IntVar(&config.MaxModelLen, "max-model-len", config.MaxModelLen, "Model's context window, maximum number of tokens in a single request including input and output")
362355

363-
f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode: echo - returns the same text that was sent in the request, for chat completion returns the last message; random - returns random sentence from a bank of pre-defined sentences")
356+
f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode: echo - returns the same text that was sent in the request, for chat completion returns the last message; random - returns random sentence from a bank of pre-defined sentences; failure - randomly injects API errors")
364357
f.IntVar(&config.InterTokenLatency, "inter-token-latency", config.InterTokenLatency, "Time to generate one token (in milliseconds)")
365358
f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)")
366359
f.IntVar(&config.KVCacheTransferLatency, "kv-cache-transfer-latency", config.KVCacheTransferLatency, "Time for KV-cache transfer from a remote vLLM (in milliseconds)")

pkg/common/failures.go

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*
2+
Copyright 2025 The llm-d-inference-sim Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package common
18+
19+
import (
20+
"fmt"
21+
"math/rand"
22+
"time"
23+
)
24+
25+
type FailureSpec struct {
26+
StatusCode int
27+
ErrorType string
28+
ErrorCode string
29+
Message string
30+
Param *string
31+
}
32+
33+
var predefinedFailures = map[string]FailureSpec{
34+
"rate_limit": {
35+
StatusCode: 429,
36+
ErrorType: "rate_limit_exceeded",
37+
ErrorCode: "rate_limit_exceeded",
38+
Message: "Rate limit reached for model in organization org-xxx on requests per min (RPM): Limit 3, Used 3, Requested 1.",
39+
Param: nil,
40+
},
41+
"invalid_api_key": {
42+
StatusCode: 401,
43+
ErrorType: "invalid_request_error",
44+
ErrorCode: "invalid_api_key",
45+
Message: "Incorrect API key provided",
46+
Param: nil,
47+
},
48+
"context_length": {
49+
StatusCode: 400,
50+
ErrorType: "invalid_request_error",
51+
ErrorCode: "context_length_exceeded",
52+
Message: "This model's maximum context length is 4096 tokens. However, your messages resulted in 4500 tokens.",
53+
Param: stringPtr("messages"),
54+
},
55+
"server_error": {
56+
StatusCode: 503,
57+
ErrorType: "server_error",
58+
ErrorCode: "server_error",
59+
Message: "The server is overloaded or not ready yet.",
60+
Param: nil,
61+
},
62+
"invalid_request": {
63+
StatusCode: 400,
64+
ErrorType: "invalid_request_error",
65+
ErrorCode: "invalid_request_error",
66+
Message: "Invalid request: missing required parameter 'model'.",
67+
Param: stringPtr("model"),
68+
},
69+
"model_not_found": {
70+
StatusCode: 404,
71+
ErrorType: "invalid_request_error",
72+
ErrorCode: "model_not_found",
73+
Message: "The model 'gpt-nonexistent' does not exist",
74+
Param: stringPtr("model"),
75+
},
76+
}
77+
78+
// ShouldInjectFailure determines whether to inject a failure based on configuration
79+
func ShouldInjectFailure(config *Configuration) bool {
80+
if config.Mode != ModeFailure {
81+
return false
82+
}
83+
84+
rand.Seed(time.Now().UnixNano())
85+
return rand.Intn(100) < config.FailureInjectionRate
86+
}
87+
88+
// GetRandomFailure returns a random failure from configured types or all types if none specified
89+
func GetRandomFailure(config *Configuration) FailureSpec {
90+
rand.Seed(time.Now().UnixNano())
91+
92+
var availableFailures []string
93+
if len(config.FailureTypes) == 0 {
94+
// Use all failure types if none specified
95+
for failureType := range predefinedFailures {
96+
availableFailures = append(availableFailures, failureType)
97+
}
98+
} else {
99+
availableFailures = config.FailureTypes
100+
}
101+
102+
if len(availableFailures) == 0 {
103+
// Fallback to server_error if no valid types
104+
return predefinedFailures["server_error"]
105+
}
106+
107+
randomType := availableFailures[rand.Intn(len(availableFailures))]
108+
109+
// Customize message with current model name
110+
failure := predefinedFailures[randomType]
111+
if randomType == "rate_limit" && config.Model != "" {
112+
failure.Message = fmt.Sprintf("Rate limit reached for %s in organization org-xxx on requests per min (RPM): Limit 3, Used 3, Requested 1.", config.Model)
113+
} else if randomType == "model_not_found" && config.Model != "" {
114+
failure.Message = fmt.Sprintf("The model '%s-nonexistent' does not exist", config.Model)
115+
}
116+
117+
return failure
118+
}
119+
120+
func stringPtr(s string) *string {
121+
return &s
122+
}

pkg/common/failures_test.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
Copyright 2025 The llm-d-inference-sim Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package common_test
18+
19+
import (
20+
"strings"
21+
22+
. "github.com/onsi/ginkgo/v2"
23+
. "github.com/onsi/gomega"
24+
25+
"github.com/llm-d/llm-d-inference-sim/pkg/common"
26+
)
27+
28+
var _ = Describe("Failures", func() {
29+
Describe("ShouldInjectFailure", func() {
30+
It("should not inject failure when not in failure mode", func() {
31+
config := &common.Configuration{
32+
Mode: common.ModeRandom,
33+
FailureInjectionRate: 100,
34+
}
35+
Expect(common.ShouldInjectFailure(config)).To(BeFalse())
36+
})
37+
38+
It("should not inject failure when rate is 0", func() {
39+
config := &common.Configuration{
40+
Mode: common.ModeFailure,
41+
FailureInjectionRate: 0,
42+
}
43+
Expect(common.ShouldInjectFailure(config)).To(BeFalse())
44+
})
45+
46+
It("should inject failure when in failure mode with 100% rate", func() {
47+
config := &common.Configuration{
48+
Mode: common.ModeFailure,
49+
FailureInjectionRate: 100,
50+
}
51+
Expect(common.ShouldInjectFailure(config)).To(BeTrue())
52+
})
53+
})
54+
55+
Describe("GetRandomFailure", func() {
56+
It("should return a failure from all types when none specified", func() {
57+
config := &common.Configuration{
58+
Model: "test-model",
59+
FailureTypes: []string{},
60+
}
61+
failure := common.GetRandomFailure(config)
62+
Expect(failure.StatusCode).To(BeNumerically(">=", 400))
63+
Expect(failure.Message).ToNot(BeEmpty())
64+
Expect(failure.ErrorType).ToNot(BeEmpty())
65+
})
66+
67+
It("should return rate limit failure when specified", func() {
68+
config := &common.Configuration{
69+
Model: "test-model",
70+
FailureTypes: []string{"rate_limit"},
71+
}
72+
failure := common.GetRandomFailure(config)
73+
Expect(failure.StatusCode).To(Equal(429))
74+
Expect(failure.ErrorType).To(Equal("rate_limit_exceeded"))
75+
Expect(failure.ErrorCode).To(Equal("rate_limit_exceeded"))
76+
Expect(strings.Contains(failure.Message, "test-model")).To(BeTrue())
77+
})
78+
79+
It("should return invalid API key failure when specified", func() {
80+
config := &common.Configuration{
81+
FailureTypes: []string{"invalid_api_key"},
82+
}
83+
failure := common.GetRandomFailure(config)
84+
Expect(failure.StatusCode).To(Equal(401))
85+
Expect(failure.ErrorType).To(Equal("invalid_request_error"))
86+
Expect(failure.ErrorCode).To(Equal("invalid_api_key"))
87+
Expect(failure.Message).To(Equal("Incorrect API key provided"))
88+
})
89+
90+
It("should return context length failure when specified", func() {
91+
config := &common.Configuration{
92+
FailureTypes: []string{"context_length"},
93+
}
94+
failure := common.GetRandomFailure(config)
95+
Expect(failure.StatusCode).To(Equal(400))
96+
Expect(failure.ErrorType).To(Equal("invalid_request_error"))
97+
Expect(failure.ErrorCode).To(Equal("context_length_exceeded"))
98+
Expect(failure.Param).ToNot(BeNil())
99+
Expect(*failure.Param).To(Equal("messages"))
100+
})
101+
102+
It("should return server error when specified", func() {
103+
config := &common.Configuration{
104+
FailureTypes: []string{"server_error"},
105+
}
106+
failure := common.GetRandomFailure(config)
107+
Expect(failure.StatusCode).To(Equal(503))
108+
Expect(failure.ErrorType).To(Equal("server_error"))
109+
Expect(failure.ErrorCode).To(Equal("server_error"))
110+
})
111+
112+
It("should return model not found failure when specified", func() {
113+
config := &common.Configuration{
114+
Model: "test-model",
115+
FailureTypes: []string{"model_not_found"},
116+
}
117+
failure := common.GetRandomFailure(config)
118+
Expect(failure.StatusCode).To(Equal(404))
119+
Expect(failure.ErrorType).To(Equal("invalid_request_error"))
120+
Expect(failure.ErrorCode).To(Equal("model_not_found"))
121+
Expect(strings.Contains(failure.Message, "test-model-nonexistent")).To(BeTrue())
122+
})
123+
124+
It("should return server error as fallback for empty types", func() {
125+
config := &common.Configuration{
126+
FailureTypes: []string{},
127+
}
128+
// This test is probabilistic since it randomly selects, but we can test structure
129+
failure := common.GetRandomFailure(config)
130+
Expect(failure.StatusCode).To(BeNumerically(">=", 400))
131+
Expect(failure.ErrorType).ToNot(BeEmpty())
132+
})
133+
})
134+
})

0 commit comments

Comments
 (0)