Skip to content

Commit 463291f

Browse files
authored
Kv cache support without KV events (#107)
* first steps of kv-cache support: update build process, add KVCacheHelper Signed-off-by: Maya Barnea <[email protected]> * fix build and lint problems after merge Signed-off-by: Maya Barnea <[email protected]> * Add command line and yaml config file parameter 'enable-kvcache' Signed-off-by: Maya Barnea <[email protected]> * - continue kv-cache support implementation - add block cache and initial version of test for it Signed-off-by: Maya Barnea <[email protected]> * move kv cache related code to a separate package Signed-off-by: Maya Barnea <[email protected]> * update kv cache test to be more modular Signed-off-by: Maya Barnea <[email protected]> * fixes in kv-cache-manager + thread save test Signed-off-by: Maya Barnea <[email protected]> * fix make test Signed-off-by: Maya Barnea <[email protected]> * fix typo Signed-off-by: Maya Barnea <[email protected]> * remove redundant field initialization Signed-off-by: Maya Barnea <[email protected]> * rename cache + fixes according to PR comments Signed-off-by: Maya Barnea <[email protected]> * fixes according to PR comments Signed-off-by: Maya Barnea <[email protected]> * fixes by PR comments Signed-off-by: Maya Barnea <[email protected]> --------- Signed-off-by: Maya Barnea <[email protected]>
1 parent 91549f5 commit 463291f

File tree

14 files changed

+928
-28
lines changed

14 files changed

+928
-28
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
bin
2+
lib
3+
vendor
24
.vscode
35
# MacOSX
46
.DS_Store

Dockerfile

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@ FROM quay.io/projectquay/golang:1.24 AS builder
33
ARG TARGETOS
44
ARG TARGETARCH
55

6+
# Install build tools
7+
# The builder is based on UBI8, so we need epel-release-8.
8+
RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \
9+
dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig && \
10+
dnf clean all
11+
612
WORKDIR /workspace
713
# Copy the Go Modules manifests
814
COPY go.mod go.mod
@@ -15,18 +21,29 @@ RUN go mod download
1521
COPY cmd/llm-d-inference-sim/main.go cmd/cmd.go
1622
COPY . .
1723

24+
# HuggingFace tokenizer bindings
25+
RUN mkdir -p lib
26+
RUN curl -L https://github.com/daulet/tokenizers/releases/download/v1.20.2/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
27+
RUN ranlib lib/*.a
28+
1829
# Build
1930
# the GOARCH has not a default value to allow the binary be built according to the host where the command
2031
# was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO
2132
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
2233
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
23-
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o bin/llm-d-inference-sim cmd/cmd.go
34+
ENV CGO_ENABLED=1
35+
ENV GOOS=${TARGETOS:-linux}
36+
ENV GOARCH=${TARGETARCH}
37+
RUN go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go
38+
39+
# Use ubi9 as a minimal base image to package the manager binary
40+
# Refer to https://catalog.redhat.com/software/containers/ubi9/ubi-minimal/615bd9b4075b022acc111bf5 for more details
41+
FROM registry.access.redhat.com/ubi9/ubi-minimal:latest
2442

25-
# Use ubi9 as a micro base image to package the manager binary
26-
# Refer to https://catalog.redhat.com/software/containers/ubi9/ubi-micro/615bdf943f6014fa45ae1b58 for more details
27-
FROM registry.access.redhat.com/ubi9/ubi-micro:latest
2843
WORKDIR /
2944
COPY --from=builder /workspace/bin/llm-d-inference-sim /app/llm-d-inference-sim
30-
USER 65532:65532
45+
46+
# USER 65532:65532
47+
USER root
3148

3249
ENTRYPOINT ["/app/llm-d-inference-sim"]

Makefile

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,17 @@
1414

1515
# Makefile for the llm-d-inference-sim project
1616

17-
CONTAINER_RUNTIME ?= docker
18-
1917
SHELL := /usr/bin/env bash
2018

2119
# Defaults
20+
TARGETOS ?= $(shell go env GOOS)
21+
TARGETARCH ?= $(shell go env GOARCH)
2222
PROJECT_NAME ?= llm-d-inference-sim
23-
REGISTRY ?= ghcr.io/llm-d
24-
IMAGE_TAG_BASE ?= $(REGISTRY)/$(PROJECT_NAME)
23+
IMAGE_REGISTRY ?= ghcr.io/llm-d
24+
IMAGE_TAG_BASE ?= $(IMAGE_REGISTRY)/$(PROJECT_NAME)
2525
SIM_TAG ?= dev
2626
IMG = $(IMAGE_TAG_BASE):$(SIM_TAG)
27+
2728
CONTAINER_TOOL := $(shell { command -v docker >/dev/null 2>&1 && echo docker; } || { command -v podman >/dev/null 2>&1 && echo podman; } || echo "")
2829
BUILDER := $(shell command -v buildah >/dev/null 2>&1 && echo buildah || echo $(CONTAINER_TOOL))
2930
PLATFORMS ?= linux/amd64 # linux/arm64 # linux/s390x,linux/ppc64le
@@ -35,17 +36,36 @@ SRC = $(shell find . -type f -name '*.go')
3536
help: ## Print help
3637
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
3738

39+
LDFLAGS ?= -extldflags '-L$(shell pwd)/lib'
40+
CGO_ENABLED=1
41+
TOKENIZER_LIB = lib/libtokenizers.a
42+
43+
.PHONY: download-tokenizer
44+
download-tokenizer: $(TOKENIZER_LIB)
45+
$(TOKENIZER_LIB):
46+
## Download the HuggingFace tokenizer bindings.
47+
@echo "Downloading HuggingFace tokenizer bindings..."
48+
mkdir -p lib
49+
curl -L https://github.com/daulet/tokenizers/releases/download/v1.20.2/libtokenizers.$(TARGETOS)-$(TARGETARCH).tar.gz | tar -xz -C lib
50+
ranlib lib/*.a
51+
3852
##@ Development
3953

54+
.PHONY: clean
55+
clean:
56+
go clean -testcache -cache
57+
rm -f $(TOKENIZER_LIB)
58+
rmdir lib
59+
4060
.PHONY: format
4161
format: ## Format Go source files
4262
@printf "\033[33;1m==== Running gofmt ====\033[0m\n"
4363
@gofmt -l -w $(SRC)
4464

4565
.PHONY: test
46-
test: check-ginkgo ## Run tests
66+
test: check-ginkgo download-tokenizer ## Run tests
4767
@printf "\033[33;1m==== Running tests ====\033[0m\n"
48-
ginkgo -r -v
68+
CGO_ENABLED=1 ginkgo -ldflags="$(LDFLAGS)" -v -r
4969

5070
.PHONY: post-deploy-test
5171
post-deploy-test: ## Run post deployment tests
@@ -60,25 +80,29 @@ lint: check-golangci-lint ## Run lint
6080
##@ Build
6181

6282
.PHONY: build
63-
build: check-go ##
83+
build: check-go download-tokenizer ##
6484
@printf "\033[33;1m==== Building ====\033[0m\n"
65-
go build -o bin/$(PROJECT_NAME) cmd/$(PROJECT_NAME)/main.go
85+
go build -ldflags="$(LDFLAGS)" -o bin/$(PROJECT_NAME) cmd/$(PROJECT_NAME)/main.go
6686

6787
##@ Container Build/Push
6888

69-
.PHONY: image-build-and-push
70-
image-build-and-push: image-build image-push ## Build and push Docker image $(IMG) to registry
71-
7289
.PHONY: image-build
7390
image-build: check-container-tool ## Build Docker image ## Build Docker image using $(CONTAINER_TOOL)
7491
@printf "\033[33;1m==== Building Docker image $(IMG) ====\033[0m\n"
75-
$(CONTAINER_TOOL) build --build-arg TARGETOS=$(TARGETOS) --build-arg TARGETARCH=$(TARGETARCH) -t $(IMG) .
92+
$(CONTAINER_TOOL) build \
93+
--platform $(TARGETOS)/$(TARGETARCH) \
94+
--build-arg TARGETOS=$(TARGETOS)\
95+
--build-arg TARGETARCH=$(TARGETARCH)\
96+
-t $(IMG) .
7697

7798
.PHONY: image-push
7899
image-push: check-container-tool ## Push Docker image $(IMG) to registry
79100
@printf "\033[33;1m==== Pushing Docker image $(IMG) ====\033[0m\n"
80101
$(CONTAINER_TOOL) push $(IMG)
81102

103+
.PHONY: image-build-and-push
104+
image-build-and-push: image-build image-push ## Build and push Docker image $(IMG) to registry
105+
82106
##@ Install/Uninstall Targets
83107

84108
# Default install/uninstall (Docker)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
112112
- `min-tool-call-array-param-length`: the minimum possible length of array parameters in a tool call, optional, defaults to 1
113113
- `tool-call-not-required-param-probability`: the probability to add a parameter, that is not required, in a tool call, optional, defaults to 50
114114
- `object-tool-call-not-required-field-probability`: the probability to add a field, that is not required, in an object in a tool call, optional, defaults to 50
115+
- `enable-kvcache`: if true, the KV cache support will be enabled in the simulator. In this case, the KV cache will be simulated, and ZQM events will be published when a KV cache block is added or evicted.
115116

116117
In addition, as we are using klog, the following parameters are available:
117118
- `add_dir_header`: if true, adds the file directory to the header of the log messages

go.mod

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
module github.com/llm-d/llm-d-inference-sim
22

3-
go 1.23.4
3+
go 1.24.1
44

5-
toolchain go1.24.0
5+
toolchain go1.24.2
66

77
require (
88
github.com/buaazp/fasthttprouter v0.1.1
99
github.com/go-logr/logr v1.4.2
1010
github.com/google/uuid v1.6.0
11+
github.com/llm-d/llm-d-kv-cache-manager v0.2.0
1112
github.com/onsi/ginkgo/v2 v2.23.4
1213
github.com/onsi/gomega v1.37.0
1314
github.com/openai/openai-go v0.1.0-beta.10
14-
github.com/prometheus/client_golang v1.21.1
15+
github.com/prometheus/client_golang v1.22.0
1516
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
1617
github.com/spf13/pflag v1.0.6
1718
github.com/valyala/fasthttp v1.59.0
@@ -23,25 +24,57 @@ require (
2324
github.com/andybalholm/brotli v1.1.1 // indirect
2425
github.com/beorn7/perks v1.0.1 // indirect
2526
github.com/cespare/xxhash/v2 v2.3.0 // indirect
27+
github.com/daulet/tokenizers v1.20.2 // indirect
2628
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
29+
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
30+
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
31+
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
32+
github.com/go-openapi/jsonpointer v0.21.0 // indirect
33+
github.com/go-openapi/jsonreference v0.20.2 // indirect
34+
github.com/go-openapi/swag v0.23.0 // indirect
2735
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
36+
github.com/gogo/protobuf v1.3.2 // indirect
37+
github.com/google/gnostic-models v0.6.9 // indirect
2838
github.com/google/go-cmp v0.7.0 // indirect
2939
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
40+
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
41+
github.com/josharian/intern v1.0.0 // indirect
42+
github.com/json-iterator/go v1.1.12 // indirect
3043
github.com/klauspost/compress v1.18.0 // indirect
44+
github.com/mailru/easyjson v0.7.7 // indirect
45+
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
46+
github.com/modern-go/reflect2 v1.0.2 // indirect
3147
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
32-
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
48+
github.com/pkg/errors v0.9.1 // indirect
3349
github.com/prometheus/client_model v0.6.1 // indirect
3450
github.com/prometheus/common v0.62.0 // indirect
3551
github.com/prometheus/procfs v0.15.1 // indirect
52+
github.com/redis/go-redis/v9 v9.7.3 // indirect
3653
github.com/tidwall/gjson v1.18.0 // indirect
3754
github.com/tidwall/match v1.1.1 // indirect
3855
github.com/tidwall/pretty v1.2.1 // indirect
3956
github.com/tidwall/sjson v1.2.5 // indirect
4057
github.com/valyala/bytebufferpool v1.0.0 // indirect
58+
github.com/x448/float16 v0.8.4 // indirect
4159
go.uber.org/automaxprocs v1.6.0 // indirect
4260
golang.org/x/net v0.38.0 // indirect
61+
golang.org/x/oauth2 v0.27.0 // indirect
4362
golang.org/x/sys v0.32.0 // indirect
63+
golang.org/x/term v0.30.0 // indirect
4464
golang.org/x/text v0.23.0 // indirect
65+
golang.org/x/time v0.9.0 // indirect
4566
golang.org/x/tools v0.31.0 // indirect
4667
google.golang.org/protobuf v1.36.5 // indirect
68+
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
69+
gopkg.in/inf.v0 v0.9.1 // indirect
70+
k8s.io/api v0.33.0 // indirect
71+
k8s.io/apimachinery v0.33.0 // indirect
72+
k8s.io/client-go v0.33.0 // indirect
73+
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
74+
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect
75+
sigs.k8s.io/controller-runtime v0.21.0 // indirect
76+
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
77+
sigs.k8s.io/randfill v1.0.0 // indirect
78+
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
79+
sigs.k8s.io/yaml v1.4.0 // indirect
4780
)

0 commit comments

Comments
 (0)