llm-d · irar2 · Aug 28, 2025 · Aug 6, 2025 · Aug 7, 2025 · Aug 7, 2025
diff --git a/README.md b/README.md
@@ -124,6 +124,8 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
 - `hash-seed`: seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
 - `zmq-endpoint`: ZMQ address to publish events
 - `event-batch-size`: the maximum number of kv-cache events to be sent together, defaults to 16
+- `failure-injection-rate`: probability (0-100) of injecting failures, optional, default is 0
+- `failure-types`: list of specific failure types to inject (rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found), optional, if empty all types are used
 -->
 In addition, as we are using klog, the following parameters are available:
 - `add_dir_header`: if true, adds the file directory to the header of the log messages

diff --git a/pkg/common/config.go b/pkg/common/config.go
@@ -34,6 +34,14 @@
 	vLLMDefaultPort = 8000
 	ModeRandom      = "random"
 	ModeEcho        = "echo"
+
+	// Failure type constants
+	FailureTypeRateLimit       = "rate_limit"
+	FailureTypeInvalidAPIKey   = "invalid_api_key"
+	FailureTypeContextLength   = "context_length"
+	FailureTypeServerError     = "server_error"
+	FailureTypeInvalidRequest  = "invalid_request"
+	FailureTypeModelNotFound   = "model_not_found"
 )
 
 type Configuration struct {
@@ -127,6 +135,11 @@
 	ZMQEndpoint string `yaml:"zmq-endpoint"`
 	// EventBatchSize is the maximum number of kv-cache events to be sent together, defaults to 16
 	EventBatchSize int `yaml:"event-batch-size"`
+
+	// FailureInjectionRate is the probability (0-100) of injecting failures
+	FailureInjectionRate int `yaml:"failure-injection-rate"`
+	// FailureTypes is a list of specific failure types to inject (empty means all types)
+	FailureTypes []string `yaml:"failure-types"`
 }
 
 type LoraModule struct {
@@ -182,10 +195,12 @@
 		MinToolCallArrayParamLength:         1,
 		ToolCallNotRequiredParamProbability: 50,
 		ObjectToolCallNotRequiredParamProbability: 50,
-		KVCacheSize:    1024,
-		TokenBlockSize: 16,
-		ZMQEndpoint:    "tcp://localhost:5557",
-		EventBatchSize: 16,
+		KVCacheSize:        1024,
+		TokenBlockSize:     16,
+		ZMQEndpoint:        "tcp://localhost:5557",
+		EventBatchSize:     16,
+		FailureInjectionRate: 0,
+		FailureTypes:       []string{},
 	}
 }
 
@@ -299,6 +314,24 @@
 	if c.EventBatchSize < 1 {
 		return errors.New("event batch size cannot less than 1")
 	}
+
+  if c.FailureInjectionRate < 0 || c.FailureInjectionRate > 100 {
+		return errors.New("failure injection rate should be between 0 and 100")
+	}
+
+	validFailureTypes := map[string]bool{
+		FailureTypeRateLimit:      true,
+		FailureTypeInvalidAPIKey:  true,
+		FailureTypeContextLength:  true,
+		FailureTypeServerError:    true,
+		FailureTypeInvalidRequest: true,
+		FailureTypeModelNotFound:  true,
+	}
+	for _, failureType := range c.FailureTypes {
+		if !validFailureTypes[failureType] {
+			return fmt.Errorf("invalid failure type '%s', valid types are: rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found", failureType)
+		}
+	}
 	return nil
 }
 
@@ -326,7 +359,7 @@
 	f.IntVar(&config.MaxCPULoras, "max-cpu-loras", config.MaxCPULoras, "Maximum number of LoRAs to store in CPU memory")
 	f.IntVar(&config.MaxModelLen, "max-model-len", config.MaxModelLen, "Model's context window, maximum number of tokens in a single request including input and output")
 
-	f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode, echo - returns the same text that was sent in the request, for chat completion returns the last message, random - returns random sentence from a bank of pre-defined sentences")
+	f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode: echo - returns the same text that was sent in the request, for chat completion returns the last message; random - returns random sentence from a bank of pre-defined sentences")
 	f.IntVar(&config.InterTokenLatency, "inter-token-latency", config.InterTokenLatency, "Time to generate one token (in milliseconds)")
 	f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)")
 	f.IntVar(&config.KVCacheTransferLatency, "kv-cache-transfer-latency", config.KVCacheTransferLatency, "Time for KV-cache transfer from a remote vLLM (in milliseconds)")
@@ -351,6 +384,14 @@
 	f.StringVar(&config.HashSeed, "hash-seed", config.HashSeed, "Seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)")
 	f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")
 	f.IntVar(&config.EventBatchSize, "event-batch-size", config.EventBatchSize, "Maximum number of kv-cache events to be sent together")
+
+  f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures")
+
+	failureTypes := getParamValueFromArgs("failure-types")
+	var dummyFailureTypes multiString
+	f.Var(&dummyFailureTypes, "failure-types", "List of specific failure types to inject (rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found)")
+	f.Lookup("failure-types").NoOptDefVal = "dummy"
+
 
 	// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
 	var dummyString string
@@ -384,6 +425,23 @@
 	if servedModelNames != nil {
 		config.ServedModelNames = servedModelNames
 	}
+	if failureTypes != nil {
+		config.FailureTypes = failureTypes
+	}
+
+	if config.HashSeed == "" {
+		hashSeed := os.Getenv("PYTHONHASHSEED")
+		if hashSeed != "" {
+			config.HashSeed = hashSeed
+		}
+	}
+
+	if config.HashSeed == "" {
+		hashSeed := os.Getenv("PYTHONHASHSEED")
+		if hashSeed != "" {
+			config.HashSeed = hashSeed
+		}
+	}
 
 	if config.HashSeed == "" {
 		hashSeed := os.Getenv("PYTHONHASHSEED")
@@ -422,4 +480,4 @@
 		}
 	}
 	return values
-}
+}
diff --git a/pkg/llm-d-inference-sim/failures.go b/pkg/llm-d-inference-sim/failures.go
@@ -0,0 +1,126 @@
+/*
+Copyright 2025 The llm-d-inference-sim Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package llmdinferencesim
+
+import (
+	"fmt"
+
+	"github.com/llm-d/llm-d-inference-sim/pkg/common"
+)
+
+const (
+	// Error message templates
+	RateLimitMessageTemplate    = "Rate limit reached for %s in organization org-xxx on requests per min (RPM): Limit 3, Used 3, Requested 1."
+	ModelNotFoundMessageTemplate = "The model '%s-nonexistent' does not exist"
+)
+
+type FailureSpec struct {
+	StatusCode int
+	ErrorType  string
+	ErrorCode  string
+	Message    string
+	Param      *string
+}
+
+var predefinedFailures = map[string]FailureSpec{
+	common.FailureTypeRateLimit: {
+		StatusCode: 429,
+		ErrorType:  "rate_limit_exceeded",
+		ErrorCode:  "rate_limit_exceeded",
+		Message:    "Rate limit reached for model in organization org-xxx on requests per min (RPM): Limit 3, Used 3, Requested 1.",
+		Param:      nil,
+	},
+	common.FailureTypeInvalidAPIKey: {
+		StatusCode: 401,
+		ErrorType:  "invalid_request_error",
+		ErrorCode:  "invalid_api_key",
+		Message:    "Incorrect API key provided",
+		Param:      nil,
+	},
+	common.FailureTypeContextLength: {
+		StatusCode: 400,
+		ErrorType:  "invalid_request_error",
+		ErrorCode:  "context_length_exceeded",
+		Message:    "This model's maximum context length is 4096 tokens. However, your messages resulted in 4500 tokens.",
+		Param:      stringPtr("messages"),
+	},
+	common.FailureTypeServerError: {
+		StatusCode: 503,
+		ErrorType:  "server_error",
+		ErrorCode:  "server_error",
+		Message:    "The server is overloaded or not ready yet.",
+		Param:      nil,
+	},
+	common.FailureTypeInvalidRequest: {
+		StatusCode: 400,
+		ErrorType:  "invalid_request_error",
+		ErrorCode:  "invalid_request_error",
+		Message:    "Invalid request: missing required parameter 'model'.",
+		Param:      stringPtr("model"),
+	},
+	common.FailureTypeModelNotFound: {
+		StatusCode: 404,
+		ErrorType:  "invalid_request_error",
+		ErrorCode:  "model_not_found",
+		Message:    "The model 'gpt-nonexistent' does not exist",
+		Param:      stringPtr("model"),
+	},
+}
+
+// ShouldInjectFailure determines whether to inject a failure based on configuration
+func ShouldInjectFailure(config *common.Configuration) bool {
+	if config.FailureInjectionRate == 0 {
+		return false
+	}
+
+	return common.RandomInt(1, 100) <= config.FailureInjectionRate
+}
+
+// GetRandomFailure returns a random failure from configured types or all types if none specified
+func GetRandomFailure(config *common.Configuration) FailureSpec {
+	var availableFailures []string
+	if len(config.FailureTypes) == 0 {
+		// Use all failure types if none specified
+		for failureType := range predefinedFailures {
+			availableFailures = append(availableFailures, failureType)
+		}
+	} else {
+		availableFailures = config.FailureTypes
+	}
+
+	if len(availableFailures) == 0 {
+		// Fallback to server_error if no valid types
+		return predefinedFailures[common.FailureTypeServerError]
+	}
+
+	randomIndex := common.RandomInt(0, len(availableFailures)-1)
+	randomType := availableFailures[randomIndex]
+
+	// Customize message with current model name
+	failure := predefinedFailures[randomType]
+	if randomType == common.FailureTypeRateLimit && config.Model != "" {
+		failure.Message = fmt.Sprintf(RateLimitMessageTemplate, config.Model)
+	} else if randomType == common.FailureTypeModelNotFound && config.Model != "" {
+		failure.Message = fmt.Sprintf(ModelNotFoundMessageTemplate, config.Model)
+	}
+
+	return failure
+}
+
+func stringPtr(s string) *string {
+	return &s
+}
diff --git a/pkg/llm-d-inference-sim/failures_test.go b/pkg/llm-d-inference-sim/failures_test.go
@@ -0,0 +1,128 @@
+/*
+Copyright 2025 The llm-d-inference-sim Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package llmdinferencesim_test
+
+import (
+	"strings"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"github.com/llm-d/llm-d-inference-sim/pkg/common"
+	llmdinferencesim "github.com/llm-d/llm-d-inference-sim/pkg/llm-d-inference-sim"
+)
+
+var _ = Describe("Failures", func() {
+	Describe("ShouldInjectFailure", func() {
+		It("should not inject failure when injection rate is 0", func() {
+			config := &common.Configuration{
+				Mode:                 common.ModeRandom,
+				FailureInjectionRate: 0,
+			}
+			Expect(llmdinferencesim.ShouldInjectFailure(config)).To(BeFalse())
+		})
+
+		It("should inject failure when injection rate is 100", func() {
+			config := &common.Configuration{
+				Mode:                 common.ModeRandom,
+				FailureInjectionRate: 100,
+			}
+			Expect(llmdinferencesim.ShouldInjectFailure(config)).To(BeTrue())
+		})
+
+	})
+
+	Describe("GetRandomFailure", func() {
+		It("should return a failure from all types when none specified", func() {
+			config := &common.Configuration{
+				Model:        "test-model",
+				FailureTypes: []string{},
+			}
+			failure := llmdinferencesim.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(BeNumerically(">=", 400))
+			Expect(failure.Message).ToNot(BeEmpty())
+			Expect(failure.ErrorType).ToNot(BeEmpty())
+		})
+
+		It("should return rate limit failure when specified", func() {
+			config := &common.Configuration{
+				Model:        "test-model",
+				FailureTypes: []string{common.FailureTypeRateLimit},
+			}
+			failure := llmdinferencesim.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(Equal(429))
+			Expect(failure.ErrorType).To(Equal("rate_limit_exceeded"))
+			Expect(failure.ErrorCode).To(Equal("rate_limit_exceeded"))
+			Expect(strings.Contains(failure.Message, "test-model")).To(BeTrue())
+		})
+
+		It("should return invalid API key failure when specified", func() {
+			config := &common.Configuration{
+				FailureTypes: []string{common.FailureTypeInvalidAPIKey},
+			}
+			failure := llmdinferencesim.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(Equal(401))
+			Expect(failure.ErrorType).To(Equal("invalid_request_error"))
+			Expect(failure.ErrorCode).To(Equal("invalid_api_key"))
+			Expect(failure.Message).To(Equal("Incorrect API key provided"))
+		})
+
+		It("should return context length failure when specified", func() {
+			config := &common.Configuration{
+				FailureTypes: []string{common.FailureTypeContextLength},
+			}
+			failure := llmdinferencesim.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(Equal(400))
+			Expect(failure.ErrorType).To(Equal("invalid_request_error"))
+			Expect(failure.ErrorCode).To(Equal("context_length_exceeded"))
+			Expect(failure.Param).ToNot(BeNil())
+			Expect(*failure.Param).To(Equal("messages"))
+		})
+
+		It("should return server error when specified", func() {
+			config := &common.Configuration{
+				FailureTypes: []string{common.FailureTypeServerError},
+			}
+			failure := llmdinferencesim.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(Equal(503))
+			Expect(failure.ErrorType).To(Equal("server_error"))
+			Expect(failure.ErrorCode).To(Equal("server_error"))
+		})
+
+		It("should return model not found failure when specified", func() {
+			config := &common.Configuration{
+				Model:        "test-model",
+				FailureTypes: []string{common.FailureTypeModelNotFound},
+			}
+			failure := llmdinferencesim.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(Equal(404))
+			Expect(failure.ErrorType).To(Equal("invalid_request_error"))
+			Expect(failure.ErrorCode).To(Equal("model_not_found"))
+			Expect(strings.Contains(failure.Message, "test-model-nonexistent")).To(BeTrue())
+		})
+
+		It("should return server error as fallback for empty types", func() {
+			config := &common.Configuration{
+				FailureTypes: []string{},
+			}
+			// This test is probabilistic since it randomly selects, but we can test structure
+			failure := llmdinferencesim.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(BeNumerically(">=", 400))
+			Expect(failure.ErrorType).ToNot(BeEmpty())
+		})
+	})
+})