Use same version of tokenizer in both Dockerfile and Makefile (#132)

mayabar · smarunich · commit 458bca2f6b27 · 2025-08-14T17:13:00.000-04:00
* - Use same version of tokenizer in both Dockerfile and Makefile
- Fixes in readme file

Signed-off-by: Maya Barnea &lt;mayab@il.ibm.com&gt;

* updates according PR's review

Signed-off-by: Maya Barnea &lt;mayab@il.ibm.com&gt;

---------

Signed-off-by: Maya Barnea &lt;mayab@il.ibm.com&gt;
Signed-off-by: Sergey Marunich &lt;marunich.s@gmail.com&gt;
diff --git a/Dockerfile b/Dockerfile
@@ -23,7 +23,9 @@ COPY . .
 
 # HuggingFace tokenizer bindings
 RUN mkdir -p lib
-RUN curl -L https://github.com/daulet/tokenizers/releases/download/v1.22.1/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
+# Ensure that the TOKENIZER_VERSION matches the one used in the imported llm-d-kv-cache-manager version
+ARG TOKENIZER_VERSION=v1.22.1
+RUN curl -L https://github.com/daulet/tokenizers/releases/download/${TOKENIZER_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
 RUN ranlib lib/*.a
 
 # Build
diff --git a/Makefile b/Makefile
@@ -39,14 +39,16 @@ help: ## Print help
 LDFLAGS ?= -extldflags '-L$(shell pwd)/lib'
 CGO_ENABLED=1
 TOKENIZER_LIB = lib/libtokenizers.a
+# Extract TOKENIZER_VERSION from Dockerfile
+TOKENIZER_VERSION := $(shell grep '^ARG TOKENIZER_VERSION=' Dockerfile | cut -d'=' -f2)
 
 .PHONY: download-tokenizer
 download-tokenizer: $(TOKENIZER_LIB)
 $(TOKENIZER_LIB):
 	## Download the HuggingFace tokenizer bindings.
-	@echo "Downloading HuggingFace tokenizer bindings..."
+	@echo "Downloading HuggingFace tokenizer bindings for version $(TOKENIZER_VERSION)..."
 	mkdir -p lib
-	curl -L https://github.com/daulet/tokenizers/releases/download/v1.22.1/libtokenizers.$(TARGETOS)-$(TARGETARCH).tar.gz | tar -xz -C lib
+	curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-$(TARGETARCH).tar.gz | tar -xz -C lib
 	ranlib lib/*.a
 
 ##@ Development
diff --git a/README.md b/README.md
@@ -116,13 +116,15 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
 - `min-tool-call-array-param-length`: the minimum possible length of array parameters in a tool call, optional, defaults to 1
 - `tool-call-not-required-param-probability`: the probability to add a parameter, that is not required, in a tool call, optional, defaults to 50
 - `object-tool-call-not-required-field-probability`: the probability to add a field, that is not required, in an object in a tool call, optional, defaults to 50
-- `enable-kvcache`: if true, the KV cache support will be enabled in the simulator. In this case, the KV cache will be simulated, and ZQM events will be published when a KV cache block is added or evicted.
+<!-- 
+- `enable-kvcache`: if true, the KV cache support will be enabled in the simulator. In this case, the KV cache will be simulated, and ZQM events will be published when a KV cache block is added or evicted. 
 - `kv-cache-size`: the maximum number of token blocks in kv cache
 - `block-size`: token block size for contiguous chunks of tokens, possible values: 8,16,32,64,128
 - `tokenizers-cache-dir`: the directory for caching tokenizers
 - `hash-seed`: seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)
 - `zmq-endpoint`: ZMQ address to publish events
-
+- `event-batch-size`: the maximum number of kv-cache events to be sent together, defaults to 16
+-->
 In addition, as we are using klog, the following parameters are available:
 - `add_dir_header`: if true, adds the file directory to the header of the log messages
 - `alsologtostderr`: log to standard error as well as files (no effect when -logtostderr=true)