Skip to content

Commit e2a7c97

Browse files
irar2smarunich
authored andcommitted
Publish kv-cache events (#126)
* Publish kv-cache events Signed-off-by: Ira <[email protected]> * Fix lint errors Signed-off-by: Ira <[email protected]> * Review fixes Signed-off-by: Ira <[email protected]> * Sleep to allow prevous sub to close Signed-off-by: Ira <[email protected]> --------- Signed-off-by: Ira <[email protected]> Signed-off-by: Sergey Marunich <[email protected]>
1 parent ffea4bd commit e2a7c97

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

Makefile

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,14 @@ help: ## Print help
3939
LDFLAGS ?= -extldflags '-L$(shell pwd)/lib'
4040
CGO_ENABLED=1
4141
TOKENIZER_LIB = lib/libtokenizers.a
42-
# Extract TOKENIZER_VERSION from Dockerfile
43-
TOKENIZER_VERSION := $(shell grep '^ARG TOKENIZER_VERSION=' Dockerfile | cut -d'=' -f2)
4442

4543
.PHONY: download-tokenizer
4644
download-tokenizer: $(TOKENIZER_LIB)
4745
$(TOKENIZER_LIB):
4846
## Download the HuggingFace tokenizer bindings.
49-
@echo "Downloading HuggingFace tokenizer bindings for version $(TOKENIZER_VERSION)..."
47+
@echo "Downloading HuggingFace tokenizer bindings..."
5048
mkdir -p lib
51-
curl -L https://github.com/daulet/tokenizers/releases/download/$(TOKENIZER_VERSION)/libtokenizers.$(TARGETOS)-$(TARGETARCH).tar.gz | tar -xz -C lib
49+
curl -L https://github.com/daulet/tokenizers/releases/download/v1.22.1/libtokenizers.$(TARGETOS)-$(TARGETARCH).tar.gz | tar -xz -C lib
5250
ranlib lib/*.a
5351

5452
##@ Development

pkg/common/config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ type Configuration struct {
125125

126126
// ZMQEndpoint is the ZMQ address to publish events, the default value is tcp://localhost:5557
127127
ZMQEndpoint string `yaml:"zmq-endpoint"`
128+
// EventBatchSize is the maximum number of kv-cache events to be sent together, defaults to 16
129+
EventBatchSize int `yaml:"event-batch-size"`
128130
}
129131

130132
type LoraModule struct {
@@ -183,6 +185,7 @@ func newConfig() *Configuration {
183185
KVCacheSize: 1024,
184186
TokenBlockSize: 16,
185187
ZMQEndpoint: "tcp://localhost:5557",
188+
EventBatchSize: 16,
186189
}
187190
}
188191

@@ -293,6 +296,9 @@ func (c *Configuration) validate() error {
293296
if c.KVCacheSize < 0 {
294297
return errors.New("KV cache size cannot be negative")
295298
}
299+
if c.EventBatchSize < 1 {
300+
return errors.New("event batch size cannot less than 1")
301+
}
296302
return nil
297303
}
298304

@@ -344,6 +350,7 @@ func ParseCommandParamsAndLoadConfig() (*Configuration, error) {
344350
f.StringVar(&config.TokenizersCacheDir, "tokenizers-cache-dir", config.TokenizersCacheDir, "Directory for caching tokenizers")
345351
f.StringVar(&config.HashSeed, "hash-seed", config.HashSeed, "Seed for hash generation (if not set, is read from PYTHONHASHSEED environment variable)")
346352
f.StringVar(&config.ZMQEndpoint, "zmq-endpoint", config.ZMQEndpoint, "ZMQ address to publish events")
353+
f.IntVar(&config.EventBatchSize, "event-batch-size", config.EventBatchSize, "Maximum number of kv-cache events to be sent together")
347354

348355
// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
349356
var dummyString string

0 commit comments

Comments
 (0)