llm-d
diff --git a/‎README.md‎
Lines changed: 3 additions & 1 deletion b/‎README.md‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎pkg/common/config.go‎
Lines changed: 10 additions & 17 deletions b/‎pkg/common/config.go‎
Lines changed: 10 additions & 17 deletions
diff --git a/‎pkg/common/failures.go‎
Lines changed: 122 additions & 0 deletions b/‎pkg/common/failures.go‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎pkg/common/failures_test.go‎
Lines changed: 134 additions & 0 deletions b/‎pkg/common/failures_test.go‎
Lines changed: 134 additions & 0 deletions
@@ -29,9 +29,10 @@ In addition, it supports a subset of vLLM's Prometheus metrics. These metrics ar
 
 The simulated inference has no connection with the model and LoRA adapters specified in the command line parameters or via the /v1/load_lora_adapter HTTP REST endpoint. The /v1/models endpoint returns simulated results based on those same command line parameters and those loaded via the /v1/load_lora_adapter HTTP REST endpoint.
 
-The simulator supports two modes of operation:
+The simulator supports three modes of operation:
 - `echo` mode: the response contains the same text that was received in the request. For `/v1/chat/completions` the last message for the role=`user` is used.
 - `random` mode: the response is randomly chosen from a set of pre-defined sentences.
+- `failure` mode: randomly injects OpenAI API compatible error responses for testing error handling.
 
 Additionally, the simulator can inject OpenAI API compatible error responses for testing error handling using the `failure-injection-rate` parameter.
 
@@ -103,6 +104,7 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
 - `mode`: the simulator mode, optional, by default `random`
     - `echo`: returns the same text that was sent in the request
     - `random`: returns a sentence chosen at random from a set of pre-defined sentences
+    - `failure`: randomly injects OpenAI API compatible error responses
 - `time-to-first-token`: the time to the first token (in milliseconds), optional, by default zero
 - `time-to-first-token-std-dev`: standard deviation for time before the first token will be returned, in milliseconds, optional, default is 0, can't be more than 30% of `time-to-first-token`, will not cause the actual time to first token to differ by more than 70% from `time-to-first-token`
 - `inter-token-latency`: the time to 'generate' each additional token (in milliseconds), optional, by default zero
 
@@ -34,14 +34,7 @@ const (
 	vLLMDefaultPort = 8000
 	ModeRandom      = "random"
 	ModeEcho        = "echo"
-	
-	// Failure type constants
-	FailureTypeRateLimit       = "rate_limit"
-	FailureTypeInvalidAPIKey   = "invalid_api_key"
-	FailureTypeContextLength   = "context_length"
-	FailureTypeServerError     = "server_error"
-	FailureTypeInvalidRequest  = "invalid_request"
-	FailureTypeModelNotFound   = "model_not_found"
+	ModeFailure     = "failure"
 )
 
 type Configuration struct {
@@ -228,8 +221,8 @@ func (c *Configuration) validate() error {
 		c.ServedModelNames = []string{c.Model}
 	}
 
-	if c.Mode != ModeEcho && c.Mode != ModeRandom {
-		return fmt.Errorf("invalid mode '%s', valid values are 'random' and 'echo'", c.Mode)
+	if c.Mode != ModeEcho && c.Mode != ModeRandom && c.Mode != ModeFailure {
+		return fmt.Errorf("invalid mode '%s', valid values are 'random', 'echo', and 'failure'", c.Mode)
 	}
 	if c.Port <= 0 {
 		return fmt.Errorf("invalid port '%d'", c.Port)
@@ -320,12 +313,12 @@ func (c *Configuration) validate() error {
 	}
 
 	validFailureTypes := map[string]bool{
-		FailureTypeRateLimit:      true,
-		FailureTypeInvalidAPIKey:  true,
-		FailureTypeContextLength:  true,
-		FailureTypeServerError:    true,
-		FailureTypeInvalidRequest: true,
-		FailureTypeModelNotFound:  true,
+		"rate_limit":        true,
+		"invalid_api_key":   true,
+		"context_length":    true,
+		"server_error":      true,
+		"invalid_request":   true,
+		"model_not_found":   true,
 	}
 	for _, failureType := range c.FailureTypes {
 		if !validFailureTypes[failureType] {
@@ -360,7 +353,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
 	f.IntVar(&config.MaxCPULoras, "max-cpu-loras", config.MaxCPULoras, "Maximum number of LoRAs to store in CPU memory")
 	f.IntVar(&config.MaxModelLen, "max-model-len", config.MaxModelLen, "Model's context window, maximum number of tokens in a single request including input and output")
 
-	f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode: echo - returns the same text that was sent in the request, for chat completion returns the last message; random - returns random sentence from a bank of pre-defined sentences")
+	f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode: echo - returns the same text that was sent in the request, for chat completion returns the last message; random - returns random sentence from a bank of pre-defined sentences; failure - randomly injects API errors")
 	f.IntVar(&config.InterTokenLatency, "inter-token-latency", config.InterTokenLatency, "Time to generate one token (in milliseconds)")
 	f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)")
 	f.IntVar(&config.KVCacheTransferLatency, "kv-cache-transfer-latency", config.KVCacheTransferLatency, "Time for KV-cache transfer from a remote vLLM (in milliseconds)")
 
@@ -0,0 +1,122 @@
+/*
+Copyright 2025 The llm-d-inference-sim Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package common
+
+import (
+	"fmt"
+	"math/rand"
+	"time"
+)
+
+type FailureSpec struct {
+	StatusCode int
+	ErrorType  string
+	ErrorCode  string
+	Message    string
+	Param      *string
+}
+
+var predefinedFailures = map[string]FailureSpec{
+	"rate_limit": {
+		StatusCode: 429,
+		ErrorType:  "rate_limit_exceeded",
+		ErrorCode:  "rate_limit_exceeded",
+		Message:    "Rate limit reached for model in organization org-xxx on requests per min (RPM): Limit 3, Used 3, Requested 1.",
+		Param:      nil,
+	},
+	"invalid_api_key": {
+		StatusCode: 401,
+		ErrorType:  "invalid_request_error",
+		ErrorCode:  "invalid_api_key",
+		Message:    "Incorrect API key provided",
+		Param:      nil,
+	},
+	"context_length": {
+		StatusCode: 400,
+		ErrorType:  "invalid_request_error",
+		ErrorCode:  "context_length_exceeded",
+		Message:    "This model's maximum context length is 4096 tokens. However, your messages resulted in 4500 tokens.",
+		Param:      stringPtr("messages"),
+	},
+	"server_error": {
+		StatusCode: 503,
+		ErrorType:  "server_error",
+		ErrorCode:  "server_error",
+		Message:    "The server is overloaded or not ready yet.",
+		Param:      nil,
+	},
+	"invalid_request": {
+		StatusCode: 400,
+		ErrorType:  "invalid_request_error",
+		ErrorCode:  "invalid_request_error",
+		Message:    "Invalid request: missing required parameter 'model'.",
+		Param:      stringPtr("model"),
+	},
+	"model_not_found": {
+		StatusCode: 404,
+		ErrorType:  "invalid_request_error",
+		ErrorCode:  "model_not_found",
+		Message:    "The model 'gpt-nonexistent' does not exist",
+		Param:      stringPtr("model"),
+	},
+}
+
+// ShouldInjectFailure determines whether to inject a failure based on configuration
+func ShouldInjectFailure(config *Configuration) bool {
+	if config.Mode != ModeFailure {
+		return false
+	}
+	
+	rand.Seed(time.Now().UnixNano())
+	return rand.Intn(100) < config.FailureInjectionRate
+}
+
+// GetRandomFailure returns a random failure from configured types or all types if none specified
+func GetRandomFailure(config *Configuration) FailureSpec {
+	rand.Seed(time.Now().UnixNano())
+	
+	var availableFailures []string
+	if len(config.FailureTypes) == 0 {
+		// Use all failure types if none specified
+		for failureType := range predefinedFailures {
+			availableFailures = append(availableFailures, failureType)
+		}
+	} else {
+		availableFailures = config.FailureTypes
+	}
+	
+	if len(availableFailures) == 0 {
+		// Fallback to server_error if no valid types
+		return predefinedFailures["server_error"]
+	}
+	
+	randomType := availableFailures[rand.Intn(len(availableFailures))]
+	
+	// Customize message with current model name
+	failure := predefinedFailures[randomType]
+	if randomType == "rate_limit" && config.Model != "" {
+		failure.Message = fmt.Sprintf("Rate limit reached for %s in organization org-xxx on requests per min (RPM): Limit 3, Used 3, Requested 1.", config.Model)
+	} else if randomType == "model_not_found" && config.Model != "" {
+		failure.Message = fmt.Sprintf("The model '%s-nonexistent' does not exist", config.Model)
+	}
+	
+	return failure
+}
+
+func stringPtr(s string) *string {
+	return &s
+}
@@ -0,0 +1,134 @@
+/*
+Copyright 2025 The llm-d-inference-sim Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package common_test
+
+import (
+	"strings"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+
+	"github.com/llm-d/llm-d-inference-sim/pkg/common"
+)
+
+var _ = Describe("Failures", func() {
+	Describe("ShouldInjectFailure", func() {
+		It("should not inject failure when not in failure mode", func() {
+			config := &common.Configuration{
+				Mode:                 common.ModeRandom,
+				FailureInjectionRate: 100,
+			}
+			Expect(common.ShouldInjectFailure(config)).To(BeFalse())
+		})
+
+		It("should not inject failure when rate is 0", func() {
+			config := &common.Configuration{
+				Mode:                 common.ModeFailure,
+				FailureInjectionRate: 0,
+			}
+			Expect(common.ShouldInjectFailure(config)).To(BeFalse())
+		})
+
+		It("should inject failure when in failure mode with 100% rate", func() {
+			config := &common.Configuration{
+				Mode:                 common.ModeFailure,
+				FailureInjectionRate: 100,
+			}
+			Expect(common.ShouldInjectFailure(config)).To(BeTrue())
+		})
+	})
+
+	Describe("GetRandomFailure", func() {
+		It("should return a failure from all types when none specified", func() {
+			config := &common.Configuration{
+				Model:        "test-model",
+				FailureTypes: []string{},
+			}
+			failure := common.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(BeNumerically(">=", 400))
+			Expect(failure.Message).ToNot(BeEmpty())
+			Expect(failure.ErrorType).ToNot(BeEmpty())
+		})
+
+		It("should return rate limit failure when specified", func() {
+			config := &common.Configuration{
+				Model:        "test-model",
+				FailureTypes: []string{"rate_limit"},
+			}
+			failure := common.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(Equal(429))
+			Expect(failure.ErrorType).To(Equal("rate_limit_exceeded"))
+			Expect(failure.ErrorCode).To(Equal("rate_limit_exceeded"))
+			Expect(strings.Contains(failure.Message, "test-model")).To(BeTrue())
+		})
+
+		It("should return invalid API key failure when specified", func() {
+			config := &common.Configuration{
+				FailureTypes: []string{"invalid_api_key"},
+			}
+			failure := common.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(Equal(401))
+			Expect(failure.ErrorType).To(Equal("invalid_request_error"))
+			Expect(failure.ErrorCode).To(Equal("invalid_api_key"))
+			Expect(failure.Message).To(Equal("Incorrect API key provided"))
+		})
+
+		It("should return context length failure when specified", func() {
+			config := &common.Configuration{
+				FailureTypes: []string{"context_length"},
+			}
+			failure := common.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(Equal(400))
+			Expect(failure.ErrorType).To(Equal("invalid_request_error"))
+			Expect(failure.ErrorCode).To(Equal("context_length_exceeded"))
+			Expect(failure.Param).ToNot(BeNil())
+			Expect(*failure.Param).To(Equal("messages"))
+		})
+
+		It("should return server error when specified", func() {
+			config := &common.Configuration{
+				FailureTypes: []string{"server_error"},
+			}
+			failure := common.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(Equal(503))
+			Expect(failure.ErrorType).To(Equal("server_error"))
+			Expect(failure.ErrorCode).To(Equal("server_error"))
+		})
+
+		It("should return model not found failure when specified", func() {
+			config := &common.Configuration{
+				Model:        "test-model",
+				FailureTypes: []string{"model_not_found"},
+			}
+			failure := common.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(Equal(404))
+			Expect(failure.ErrorType).To(Equal("invalid_request_error"))
+			Expect(failure.ErrorCode).To(Equal("model_not_found"))
+			Expect(strings.Contains(failure.Message, "test-model-nonexistent")).To(BeTrue())
+		})
+
+		It("should return server error as fallback for empty types", func() {
+			config := &common.Configuration{
+				FailureTypes: []string{},
+			}
+			// This test is probabilistic since it randomly selects, but we can test structure
+			failure := common.GetRandomFailure(config)
+			Expect(failure.StatusCode).To(BeNumerically(">=", 400))
+			Expect(failure.ErrorType).ToNot(BeEmpty())
+		})
+	})
+})