Merge branch 'main' into failure-mode

smarunich · web-flow · commit 972092ae660f · 2025-08-14T12:05:27.000-04:00
Signed-off-by: Sergey Marunich &lt;marunich.s@gmail.com&gt;
diff --git a/Dockerfile b/Dockerfile
@@ -23,7 +23,9 @@ COPY . .
 
 # HuggingFace tokenizer bindings
 RUN mkdir -p lib
-RUN curl -L https://github.com/daulet/tokenizers/releases/download/v1.22.1/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
+# Ensure that the TOKENIZER_VERSION matches the one used in the imported llm-d-kv-cache-manager version
+ARG TOKENIZER_VERSION=v1.22.1
+RUN curl -L https://github.com/daulet/tokenizers/releases/download/${TOKENIZER_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
 RUN ranlib lib/*.a
 
 # Build
diff --git a/README.md b/README.md
@@ -120,15 +120,16 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
 - `min-tool-call-array-param-length`: the minimum possible length of array parameters in a tool call, optional, defaults to 1
 - `tool-call-not-required-param-probability`: the probability to add a parameter, that is not required, in a tool call, optional, defaults to 50
 - `object-tool-call-not-required-field-probability`: the probability to add a field, that is not required, in an object in a tool call, optional, defaults to 50
-- `enable-kvcache`: if true, the KV cache support will be enabled in the simulator. In this case, the KV cache will be simulated, and ZQM events will be published when a KV cache block is added or evicted.
+- `enable-kvcache`: if true, the KV cache support will be enabled in the simulator. In this case, the KV cache will be simulated, and ZQM events will be published when a KV cache block is added or evicted. 
 - `kv-cache-size`: the maximum number of token blocks in kv cache
 - `block-size`: token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128
 - `tokenizers-cache-dir`: the directory for caching tokenizers
 - `hash-seed`: seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
 - `zmq-endpoint`: ZMQ address to publish events
 - `failure-injection-rate`: probability (0-100) of injecting failures when in failure mode, optional, default is 10
 - `failure-types`: list of specific failure types to inject (rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found), optional, if empty all types are used
-
+- `event-batch-size`: the maximum number of kv-cache events to be sent together, defaults to 16
+-->
 In addition, as we are using klog, the following parameters are available:
 - `add_dir_header`: if true, adds the file directory to the header of the log messages
 - `alsologtostderr`: log to standard error as well as files (no effect when -logtostderr=true)
diff --git a/pkg/common/config.go b/pkg/common/config.go
@@ -314,8 +314,8 @@ func (c *Configuration) validate() error {
 	if c.EventBatchSize < 1 {
 		return errors.New("event batch size cannot less than 1")
 	}
-
-	if c.FailureInjectionRate < 0 || c.FailureInjectionRate > 100 {
+  
+  if c.FailureInjectionRate < 0 || c.FailureInjectionRate > 100 {
 		return errors.New("failure injection rate should be between 0 and 100")
 	}
 
@@ -386,7 +386,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
 	f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")
 	f.IntVar(&config.EventBatchSize, "event-batch-size", config.EventBatchSize, "Maximum number of kv-cache events to be sent together")
 	
-	f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures when in failure mode")
+  f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures when in failure mode")
 
 	failureTypes := getParamValueFromArgs("failure-types")
 	var dummyFailureTypes multiString
@@ -436,6 +436,13 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
 		}
 	}
 
+	if config.HashSeed == "" {
+		hashSeed := os.Getenv("PYTHONHASHSEED")
+		if hashSeed != "" {
+			config.HashSeed = hashSeed
+		}
+	}
+
 	if err := config.validate(); err != nil {
 		return nil, err
 	}

Original file line number	Diff line number	Diff line change
`@@ -314,8 +314,8 @@ func (c *Configuration) validate() error {`
`314`	`314`	`if c.EventBatchSize < 1 {`
`315`	`315`	`return errors.New("event batch size cannot less than 1")`
`316`	`316`	`}`
`317`		`-`
`318`		`- if c.FailureInjectionRate < 0 \|\| c.FailureInjectionRate > 100 {`
	`317`	`+`
	`318`	`+ if c.FailureInjectionRate < 0 \|\| c.FailureInjectionRate > 100 {`
`319`	`319`	`return errors.New("failure injection rate should be between 0 and 100")`
`320`	`320`	`}`
`321`	`321`
`@@ -386,7 +386,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {`
`386`	`386`	`f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")`
`387`	`387`	`f.IntVar(&config.EventBatchSize, "event-batch-size", config.EventBatchSize, "Maximum number of kv-cache events to be sent together")`
`388`	`388`
`389`		`- f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures when in failure mode")`
	`389`	`+ f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures when in failure mode")`
`390`	`390`
`391`	`391`	`failureTypes := getParamValueFromArgs("failure-types")`
`392`	`392`	`var dummyFailureTypes multiString`
`@@ -436,6 +436,13 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {`
`436`	`436`	`}`
`437`	`437`	`}`
`438`	`438`
	`439`	`+ if config.HashSeed == "" {`
	`440`	`+ hashSeed := os.Getenv("PYTHONHASHSEED")`
	`441`	`+ if hashSeed != "" {`
	`442`	`+ config.HashSeed = hashSeed`
	`443`	`+ }`
	`444`	`+ }`
	`445`	`+`
`439`	`446`	`if err := config.validate(); err != nil {`
`440`	`447`	`return nil, err`
`441`	`448`	`}`