Skip to content

Commit 8912ddd

Browse files
irar2smarunich
authored andcommitted
Publish kv-cache events (#126)
* Publish kv-cache events Signed-off-by: Ira <[email protected]> * Fix lint errors Signed-off-by: Ira <[email protected]> * Review fixes Signed-off-by: Ira <[email protected]> * Sleep to allow prevous sub to close Signed-off-by: Ira <[email protected]> --------- Signed-off-by: Ira <[email protected]> Signed-off-by: Sergey Marunich <[email protected]>
1 parent 9187884 commit 8912ddd

File tree

5 files changed

+26
-12
lines changed

5 files changed

+26
-12
lines changed

Makefile

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,14 @@ help: ## Print help
3939
LDFLAGS ?= -extldflags '-L$(shell pwd)/lib'
4040
CGO_ENABLED=1
4141
TOKENIZER_LIB = lib/libtokenizers.a
42-
# Extract TOKENIZER_VERSION from Dockerfile
43-
TOKENIZER_VERSION := $(shell grep '^ARG TOKENIZER_VERSION=' Dockerfile | cut -d'=' -f2)
4442

4543
.PHONY: download-tokenizer
4644
download-tokenizer: $(TOKENIZER_LIB)
4745
$(TOKENIZER_LIB):
4846
## Download the HuggingFace tokenizer bindings.
49-
@echo "Downloading HuggingFace tokenizer bindings for version $(TOKENIZER_VERSION)..."
47+
@echo "Downloading HuggingFace tokenizer bindings..."
5048
mkdir -p lib
51-
curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-$(TARGETARCH).tar.gz | tar -xz -C lib
49+
curl -L https://github.com/daulet/tokenizers/releases/download/v1.22.1/libtokenizers.$(TARGETOS)-$(TARGETARCH).tar.gz | tar -xz -C lib
5250
ranlib lib/*.a
5351

5452
##@ Development
@@ -226,4 +224,4 @@ download-zmq: ## Install ZMQ dependencies based on OS/ARCH
226224
exit 1; \
227225
fi; \
228226
echo "✅ ZMQ dependencies installed."; \
229-
fi
227+
fi

go.sum

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
github.com/alicebob/miniredis/v2 v2.35.0 h1:QwLphYqCEAo1eu1TqPRN2jgVMPBweeQcR21jeqDCONI=
2-
github.com/alicebob/miniredis/v2 v2.35.0/go.mod h1:TcL7YfarKPGDAthEtl5NBeHZfeUQj6OXMm/+iu5cLMM=
31
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
42
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
53
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -13,6 +11,8 @@ github.com/buaazp/fasthttprouter v0.1.1/go.mod h1:h/Ap5oRVLeItGKTVBb+heQPks+HdIU
1311
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
1412
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
1513
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
14+
github.com/daulet/tokenizers v1.20.2 h1:tlq/vIOiBTKDPets3596aFvmJYLn3XI6LFKq4q9LKhQ=
15+
github.com/daulet/tokenizers v1.20.2/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs=
1616
github.com/daulet/tokenizers v1.22.1 h1:3wzAFIxfgRuqGKka8xdkeTbctDmmqOOs12GofqdorpM=
1717
github.com/daulet/tokenizers v1.22.1/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs=
1818
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -68,6 +68,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
6868
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
6969
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
7070
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
71+
github.com/llm-d/llm-d-kv-cache-manager v0.2.0 h1:7MXFPjy3P8nZ7HbB1LWhhVLHvNTLbZglkD/ZcT7UU1k=
72+
github.com/llm-d/llm-d-kv-cache-manager v0.2.0/go.mod h1:ZTqwsnIVC6R5YuTUrYofPIUnCeZ9RvXn1UQAdxLYl1Y=
7173
github.com/llm-d/llm-d-kv-cache-manager v0.2.2-0.20250810103202-0adf0940f60a h1:PXR37HLgYYfolzWQA2uQOEiJlj3IV9YSvgaEFqCRSa8=
7274
github.com/llm-d/llm-d-kv-cache-manager v0.2.2-0.20250810103202-0adf0940f60a/go.mod h1:g2UlYKNJ4S860SAQ/QoRnytAFfnp8f1luW4IuZSMwCE=
7375
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
@@ -145,8 +147,6 @@ github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZ
145147
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
146148
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
147149
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
148-
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
149-
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
150150
go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
151151
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
152152
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=

pkg/common/config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ type Configuration struct {
125125

126126
// ZMQEndpoint is the ZMQ address to publish events, the default value is tcp://localhost:5557
127127
ZMQEndpoint string `yaml:"zmq-endpoint"`
128+
// EventBatchSize is the maximum number of kv-cache events to be sent together, defaults to 16
129+
EventBatchSize int `yaml:"event-batch-size"`
128130
}
129131

130132
type LoraModule struct {
@@ -183,6 +185,7 @@ func newConfig() *Configuration {
183185
KVCacheSize: 1024,
184186
TokenBlockSize: 16,
185187
ZMQEndpoint: "tcp://localhost:5557",
188+
EventBatchSize: 16,
186189
}
187190
}
188191

@@ -293,6 +296,9 @@ func (c *Configuration) validate() error {
293296
if c.KVCacheSize < 0 {
294297
return errors.New("KV cache size cannot be negative")
295298
}
299+
if c.EventBatchSize < 1 {
300+
return errors.New("event batch size cannot less than 1")
301+
}
296302
return nil
297303
}
298304

@@ -344,6 +350,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
344350
f.StringVar(&config.TokenizersCacheDir, "tokenizers-cache-dir", config.TokenizersCacheDir, "Directory for caching tokenizers")
345351
f.StringVar(&config.HashSeed, "hash-seed", config.HashSeed, "Seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)")
346352
f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")
353+
f.IntVar(&config.EventBatchSize, "event-batch-size", config.EventBatchSize, "Maximum number of kv-cache events to be sent together")
347354

348355
// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
349356
var dummyString string

pkg/common/config_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,13 @@ var _ = Describe("Simulator configuration", func() {
103103
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
104104
"{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}",
105105
}
106+
c.EventBatchSize = 5
106107
test = testCase{
107108
name: "config file with command line args",
108109
args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002",
109110
"--served-model-name", "alias1", "alias2", "--seed", "100",
110111
"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}", "{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}",
112+
"--event-batch-size", "5",
111113
},
112114
expectedConfig: c,
113115
}
@@ -291,6 +293,11 @@ var _ = Describe("Simulator configuration", func() {
291293
args: []string{"cmd", "--block-size", "35",
292294
"--config", "../../manifests/config.yaml"},
293295
},
296+
{
297+
name: "invalid (negative) event-batch-size",
298+
args: []string{"cmd", "--event-batch-size", "-35",
299+
"--config", "../../manifests/config.yaml"},
300+
},
294301
}
295302

296303
for _, test := range invalidTests {

pkg/kv-cache/kv_cache.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,17 @@ func NewKVCacheHelper(config *common.Configuration, logger logr.Logger) (*KVCach
4747
tokenizationConfig.TokenizersCacheDir = config.TokenizersCacheDir
4848
}
4949
tokenizer, err := tokenization.NewCachedHFTokenizer(tokenizationConfig.HFTokenizerConfig)
50-
5150
if err != nil {
5251
return nil, fmt.Errorf("failed to create tokenizer: %w", err)
5352
}
54-
53+
blockCache, err := newBlockCache(config, logger)
54+
if err != nil {
55+
return nil, fmt.Errorf("failed to create block cache: %w", err)
56+
}
5557
return &KVCacheHelper{
5658
tokenizer: tokenizer,
5759
tokensProcessor: tokensProcessor,
58-
blockCache: newBlockCache(config.KVCacheSize, logger),
60+
blockCache: blockCache,
5961
logger: logger,
6062
}, nil
6163
}

0 commit comments

Comments
 (0)