Clarify failure injection rate documentation

smarunich · smarunich · commit e83e1a88d71f · 2025-08-14T13:01:47.000-04:00
Removed redundant lines and updated comments and help text to clarify that 'failure-injection-rate' is the probability of injecting failures, not specifically tied to failure mode.
diff --git a/README.md b/README.md
@@ -35,8 +35,6 @@ The simulator supports two modes of operation:
 
 Additionally, the simulator can inject OpenAI API compatible error responses for testing error handling using the `failure-injection-rate` parameter.
 
-Additionally, the simulator can inject OpenAI API compatible error responses for testing error handling using the `failure-injection-rate` parameter.
-
 Timing of the response is defined by the `time-to-first-token` and `inter-token-latency` parameters. In case P/D is enabled for a request, `kv-cache-transfer-latency` will be used instead of `time-to-first-token`.
 
 For a request with `stream=true`: `time-to-first-token` or `kv-cache-transfer-latency` defines the delay before the first token is returned, `inter-token-latency` defines the delay between subsequent tokens in the stream. 
@@ -126,7 +124,7 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
 - `tokenizers-cache-dir`: the directory for caching tokenizers
 - `hash-seed`: seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
 - `zmq-endpoint`: ZMQ address to publish events
-- `failure-injection-rate`: probability (0-100) of injecting failures when in failure mode, optional, default is 10
+- `failure-injection-rate`: probability (0-100) of injecting failures, optional, default is 10
 - `failure-types`: list of specific failure types to inject (rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found), optional, if empty all types are used
 - `event-batch-size`: the maximum number of kv-cache events to be sent together, defaults to 16
 -->
diff --git a/pkg/common/config.go b/pkg/common/config.go
@@ -136,7 +136,7 @@ type Configuration struct {
 	// EventBatchSize is the maximum number of kv-cache events to be sent together, defaults to 16
 	EventBatchSize int `yaml:"event-batch-size"`
 
-	// FailureInjectionRate is the probability (0-100) of injecting failures when in failure mode
+	// FailureInjectionRate is the probability (0-100) of injecting failures
 	FailureInjectionRate int `yaml:"failure-injection-rate"`
 	// FailureTypes is a list of specific failure types to inject (empty means all types)
 	FailureTypes []string `yaml:"failure-types"`
@@ -386,7 +386,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
 	f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")
 	f.IntVar(&config.EventBatchSize, "event-batch-size", config.EventBatchSize, "Maximum number of kv-cache events to be sent together")
 	
-  f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures when in failure mode")
+  f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures")
 
 	failureTypes := getParamValueFromArgs("failure-types")
 	var dummyFailureTypes multiString