Skip to content

Commit 2d83929

Browse files
authored
Matched Default TokenProcessorConfig.BlockSize with vLLM's (#52)
* - update vLLM deployment in chart - update default tokens hashing block-size Signed-off-by: Maroon Ayoub <[email protected]> * updated default kvevents::Config::ZMQEndpoint Signed-off-by: Maroon Ayoub <[email protected]> * clarify defaultBlockSize Signed-off-by: Maroon Ayoub <[email protected]> --------- Signed-off-by: Maroon Ayoub <[email protected]>
1 parent d85bdbb commit 2d83929

File tree

5 files changed

+16
-12
lines changed

5 files changed

+16
-12
lines changed

examples/kv_events/online/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func getKVCacheIndexerConfig() *kvcache.Config {
6666

6767
blockSize, err := strconv.Atoi(os.Getenv(blockSizeEnvVar))
6868
if err == nil || blockSize >= 0 {
69-
config.TokenProcessorConfig.ChunkSize = blockSize
69+
config.TokenProcessorConfig.BlockSize = blockSize
7070
}
7171

7272
return config

pkg/kvcache/kvblock/token_processor.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,13 @@ import (
2727
"github.com/llm-d/llm-d-kv-cache-manager/pkg/utils"
2828
)
2929

30-
const defaultChunkSize = 256
30+
// defaultBlockSize is the default number of tokens per block.
31+
// 16 is the default value used by vLLM.
32+
const defaultBlockSize = 16
3133

3234
// TokenProcessorConfig holds the configuration for the token processor.
3335
type TokenProcessorConfig struct {
34-
ChunkSize int
36+
BlockSize int
3537
// HashSeed is used to prefix initial hash chunks, similarly to vLLM's NONE_HASH.
3638
// This should be aligned with vLLM's `PYTHONHASHSEED` environment variable.
3739
// The system's deployer is responsible for aligning the vLLM deployments
@@ -44,7 +46,7 @@ type TokenProcessorConfig struct {
4446
// DefaultTokenProcessorConfig returns the default configuration for the token processor.
4547
func DefaultTokenProcessorConfig() *TokenProcessorConfig {
4648
return &TokenProcessorConfig{
47-
ChunkSize: defaultChunkSize,
49+
BlockSize: defaultBlockSize,
4850
HashSeed: "",
4951
}
5052
}
@@ -134,8 +136,8 @@ func (db *ChunkedTokenDatabase) prefixHashes(parentHash uint64, tokenChunks [][]
134136
// chunkTokens splits the input slice of tokens into chunks of size chunkSize.
135137
func (db *ChunkedTokenDatabase) chunkTokens(tokens []uint32) [][]uint32 {
136138
var chunks [][]uint32
137-
for i := 0; i < len(tokens); i += db.ChunkSize {
138-
end := i + db.ChunkSize
139+
for i := 0; i < len(tokens); i += db.BlockSize {
140+
end := i + db.BlockSize
139141
if end > len(tokens) {
140142
break // no partial blocks
141143
}

pkg/kvcache/kvevents/pool.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ type Config struct {
2727
// DefaultConfig returns a default configuration for the event processing pool.
2828
func DefaultConfig() *Config {
2929
return &Config{
30-
ZMQEndpoint: "tcp://0.0.0.0:5557",
30+
ZMQEndpoint: "tcp://*:5557",
3131
TopicFilter: "kv@",
3232
Concurrency: 4,
3333
}

tests/e2e/redis_mock/e2e_suite_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func (s *KVCacheSuite) SetupTest() {
6060

6161
s.config = kvcache.NewDefaultConfig()
6262
s.config.PrefixStoreConfig.BlockSize = 4
63-
s.config.TokenProcessorConfig.ChunkSize = 4
63+
s.config.TokenProcessorConfig.BlockSize = 4
6464

6565
s.tokenizer, err = tokenization.NewCachedHFTokenizer(s.config.TokenizersPoolConfig.HFTokenizerConfig)
6666
s.Require().NoError(err)

vllm-setup-helm/templates/deployment.yaml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,16 @@ spec:
5252
args:
5353
- |
5454
git config --global --add safe.directory /workspace/vllm && \
55-
git remote add maroon https://github.com/vMaroon/vllm.git && \
56-
git fetch maroon && \
57-
git reset --hard 786715333f822ff9274700d343d8628ef29ec525 && \
55+
git remote add vllm https://github.com/vllm-project/vllm.git && \
56+
git fetch vllm && \
57+
git reset --hard 697ef765ee91d1a47b49ae7e43951cfd116b6052 && \
5858
VLLM_COMMIT=$(git merge-base HEAD origin/main) && \
5959
VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/${VLLM_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl && \
6060
VLLM_USE_PRECOMPILED=1 uv pip install --editable . && \
6161
pip install -r requirements/common.txt && \
62+
{{- if .Values.lmcache.enabled }}
6263
export LMCACHE_DISTRIBUTED_URL=${POD_IP} && \
64+
{{- end }}
6365
vllm serve {{ .Values.vllm.model.name }} \
6466
--host 0.0.0.0 \
6567
--port 8000 \
@@ -83,7 +85,7 @@ spec:
8385
--block-size {{ .Values.vllm.blockSize }} \
8486
{{- if .Values.kvCacheManager.enabled }}
8587
--kv-events-config "{\"enable_kv_cache_events\":{{ .Values.kvCacheManager.enabled }},\"publisher\":\"zmq\",\"endpoint\":\"{{ include "chart.kvCacheManagerServiceUrl" . }}\",\"topic\":\"kv@${POD_IP}@{{ .Values.vllm.model.name }}\"}" \
86-
--prefix-caching-hash-algo sha256_cbor \
88+
--prefix-caching-hash-algo sha256_cbor_64bit \
8789
{{- end }}
8890
ports:
8991
- name: http

0 commit comments

Comments
 (0)