Skip to content

Commit 972092a

Browse files
authored
Merge branch 'main' into failure-mode
Signed-off-by: Sergey Marunich <[email protected]>
2 parents e18f3b1 + c1ba592 commit 972092a

File tree

3 files changed

+16
-6
lines changed

3 files changed

+16
-6
lines changed

Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ COPY . .
2323

2424
# HuggingFace tokenizer bindings
2525
RUN mkdir -p lib
26-
RUN curl -L https://github.com/daulet/tokenizers/releases/download/v1.22.1/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
26+
# Ensure that the TOKENIZER_VERSION matches the one used in the imported llm-d-kv-cache-manager version
27+
ARG TOKENIZER_VERSION=v1.22.1
28+
RUN curl -L https://github.com/daulet/tokenizers/releases/download/${TOKENIZER_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
2729
RUN ranlib lib/*.a
2830

2931
# Build

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,15 +120,16 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
120120
- `min-tool-call-array-param-length`: the minimum possible length of array parameters in a tool call, optional, defaults to 1
121121
- `tool-call-not-required-param-probability`: the probability to add a parameter, that is not required, in a tool call, optional, defaults to 50
122122
- `object-tool-call-not-required-field-probability`: the probability to add a field, that is not required, in an object in a tool call, optional, defaults to 50
123-
- `enable-kvcache`: if true, the KV cache support will be enabled in the simulator. In this case, the KV cache will be simulated, and ZQM events will be published when a KV cache block is added or evicted.
123+
- `enable-kvcache`: if true, the KV cache support will be enabled in the simulator. In this case, the KV cache will be simulated, and ZQM events will be published when a KV cache block is added or evicted.
124124
- `kv-cache-size`: the maximum number of token blocks in kv cache
125125
- `block-size`: token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128
126126
- `tokenizers-cache-dir`: the directory for caching tokenizers
127127
- `hash-seed`: seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
128128
- `zmq-endpoint`: ZMQ address to publish events
129129
- `failure-injection-rate`: probability (0-100) of injecting failures when in failure mode, optional, default is 10
130130
- `failure-types`: list of specific failure types to inject (rate_limit, invalid_api_key, context_length, server_error, invalid_request, model_not_found), optional, if empty all types are used
131-
131+
- `event-batch-size`: the maximum number of kv-cache events to be sent together, defaults to 16
132+
-->
132133
In addition, as we are using klog, the following parameters are available:
133134
- `add_dir_header`: if true, adds the file directory to the header of the log messages
134135
- `alsologtostderr`: log to standard error as well as files (no effect when -logtostderr=true)

pkg/common/config.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,8 @@ func (c *Configuration) validate() error {
314314
if c.EventBatchSize < 1 {
315315
return errors.New("event batch size cannot less than 1")
316316
}
317-
318-
if c.FailureInjectionRate < 0 || c.FailureInjectionRate > 100 {
317+
318+
if c.FailureInjectionRate < 0 || c.FailureInjectionRate > 100 {
319319
return errors.New("failure injection rate should be between 0 and 100")
320320
}
321321

@@ -386,7 +386,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
386386
f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")
387387
f.IntVar(&config.EventBatchSize, "event-batch-size", config.EventBatchSize, "Maximum number of kv-cache events to be sent together")
388388

389-
f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures when in failure mode")
389+
f.IntVar(&config.FailureInjectionRate, "failure-injection-rate", config.FailureInjectionRate, "Probability (0-100) of injecting failures when in failure mode")
390390

391391
failureTypes := getParamValueFromArgs("failure-types")
392392
var dummyFailureTypes multiString
@@ -436,6 +436,13 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
436436
}
437437
}
438438

439+
if config.HashSeed == "" {
440+
hashSeed := os.Getenv("PYTHONHASHSEED")
441+
if hashSeed != "" {
442+
config.HashSeed = hashSeed
443+
}
444+
}
445+
439446
if err := config.validate(); err != nil {
440447
return nil, err
441448
}

0 commit comments

Comments
 (0)