Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 37 additions & 4 deletions .github/workflows/ci-pr-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,55 @@ jobs:
go-version-file: ./go.mod
cache-dependency-path: ./go.sum

- name: Install libzmq dependencies (kvcache/kvevents)
- name: Install libzmq and Python dependencies (kvcache/kvevents)
run: |
sudo apt-get update
sudo apt-get install -y pkg-config python3-dev python3-pip
make download-zmq
pip3 install transformers --break-system-packages

- name: Set PKG_CONFIG_PATH
run: echo "PKG_CONFIG_PATH=/usr/lib/pkgconfig" >> $GITHUB_ENV
- name: Configure CGO for Python
run: |
PYTHON_INCLUDE=$(python3 -c "import sysconfig; print(sysconfig.get_path('include'))")
echo "CPATH=${PYTHON_INCLUDE}:${CPATH}" >> $GITHUB_ENV
echo "CGO_ENABLED=1" >> $GITHUB_ENV
echo "CGO_CFLAGS=$(python3-config --cflags --embed)" >> $GITHUB_ENV
echo "CGO_LDFLAGS=$(python3-config --ldflags --embed)" >> $GITHUB_ENV

- name: Set PKG_CONFIG_PATH and PYTHONPATH
run: |
echo "PKG_CONFIG_PATH=/usr/lib/pkgconfig" >> $GITHUB_ENV
GOMODCACHE=$(go env GOMODCACHE)
# Extract kv-cache-manager version from go.mod
KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager)
KV_CACHE_MGR_PATH="${GOMODCACHE}/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions"
echo "PYTHONPATH=${KV_CACHE_MGR_PATH}:${PYTHONPATH}" >> $GITHUB_ENV

- name: Run lint checks
uses: golangci/golangci-lint-action@v8
with:
version: 'v2.4.0'
args: "--config=./.golangci.yml"
env:
CGO_ENABLED: ${{ env.CGO_ENABLED }}
CGO_CFLAGS: ${{ env.CGO_CFLAGS }}
CGO_LDFLAGS: ${{ env.CGO_LDFLAGS }}
CPATH: ${{ env.CPATH }}
PKG_CONFIG_PATH: ${{ env.PKG_CONFIG_PATH }}

- name: Build Container image
run: |
make image-build SIM_TAG=pr-check

- name: Run go test
shell: bash
shell: bash
run: |
echo "Running tests with Ginkgo..."
make test
env:
CGO_ENABLED: ${{ env.CGO_ENABLED }}
CGO_CFLAGS: ${{ env.CGO_CFLAGS }}
CGO_LDFLAGS: ${{ env.CGO_LDFLAGS }}
CPATH: ${{ env.CPATH }}
PKG_CONFIG_PATH: ${{ env.PKG_CONFIG_PATH }}
PYTHONPATH: ${{ env.PYTHONPATH }}
53 changes: 45 additions & 8 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ ARG TARGETARCH
# Install build tools
# The builder is based on UBI8, so we need epel-release-8.
RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \
dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig && \
dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig python3.12-devel python3.12-pip git && \
dnf clean all
# python3.12-devel needed for CGO compilation (Python headers and python3.12-config for linker flags)

WORKDIR /workspace
# Copy the Go Modules manifests
Expand All @@ -28,6 +29,15 @@ ARG TOKENIZER_VERSION=v1.22.1
RUN curl -L https://github.com/daulet/tokenizers/releases/download/${TOKENIZER_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
RUN ranlib lib/*.a

# Copy Python wrapper and requirements from kv-cache-manager dependency
# Extract version dynamically and copy to a known location
RUN KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) && \
mkdir -p /workspace/kv-cache-manager-wrapper && \
cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/render_jinja_template_wrapper.py \
/workspace/kv-cache-manager-wrapper/ && \
cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/requirements.txt \
/workspace/kv-cache-manager-wrapper/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
# was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO
Expand All @@ -36,22 +46,49 @@ RUN ranlib lib/*.a
ENV CGO_ENABLED=1
ENV GOOS=${TARGETOS:-linux}
ENV GOARCH=${TARGETARCH}
RUN go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go
ENV PYTHON=python3.12
ENV PYTHONPATH=/usr/lib64/python3.12/site-packages:/usr/lib/python3.12/site-packages

RUN export CGO_CFLAGS="$(python3.12-config --cflags) -I/workspace/lib" && \
export CGO_LDFLAGS="$(python3.12-config --ldflags --embed) -L/workspace/lib -ltokenizers -ldl -lm" && \
go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go

# Runtime stage
# Use ubi9 as a minimal base image to package the manager binary
# Refer to https://catalog.redhat.com/software/containers/ubi9/ubi-minimal/615bd9b4075b022acc111bf5 for more details
FROM registry.access.redhat.com/ubi9/ubi-minimal:latest

WORKDIR /

# Install zeromq runtime library needed by the manager.
# Install zeromq runtime library and Python runtime needed by the manager.
# The final image is UBI9, so we need epel-release-9.
# Using microdnf for minimal image size
USER root
RUN microdnf install -y dnf && \
dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm' && \
dnf install -y zeromq && \
dnf clean all && \
rm -rf /var/cache/dnf /var/lib/dnf
RUN curl -L -o /tmp/epel-release.rpm https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
rpm -i /tmp/epel-release.rpm && \
rm /tmp/epel-release.rpm && \
microdnf install -y --setopt=install_weak_deps=0 zeromq python3.12 python3.12-libs python3.12-pip && \
microdnf clean all && \
rm -rf /var/cache/yum /var/lib/yum && \
ln -sf /usr/bin/python3.12 /usr/bin/python3 && \
ln -sf /usr/bin/python3.12 /usr/bin/python

# Install wrapper as a module in site-packages
RUN mkdir -p /usr/local/lib/python3.12/site-packages/
COPY --from=builder /workspace/kv-cache-manager-wrapper/render_jinja_template_wrapper.py /usr/local/lib/python3.12/site-packages/

# Python deps (no cache, single target) – filter out torch
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
COPY --from=builder /workspace/kv-cache-manager-wrapper/requirements.txt /tmp/requirements.txt
RUN sed '/^torch\b/d' /tmp/requirements.txt > /tmp/requirements.notorch.txt && \
python3.12 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \
python3.12 -m pip install --no-cache-dir --target /usr/local/lib/python3.12/site-packages -r /tmp/requirements.notorch.txt && \
rm /tmp/requirements.txt /tmp/requirements.notorch.txt && \
rm -rf /root/.cache/pip

# Python env
ENV PYTHONPATH="/usr/local/lib/python3.12/site-packages:/usr/lib/python3.12/site-packages"
ENV PYTHON=python3.12

COPY --from=builder /workspace/bin/llm-d-inference-sim /app/llm-d-inference-sim

Expand Down
12 changes: 9 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ require (
github.com/buaazp/fasthttprouter v0.1.1
github.com/go-logr/logr v1.4.2
github.com/google/uuid v1.6.0
github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1
github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc2
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/gomega v1.37.0
github.com/openai/openai-go/v3 v3.6.1
Expand All @@ -23,6 +23,12 @@ require (
k8s.io/klog/v2 v2.130.1
)

require (
github.com/dgraph-io/ristretto/v2 v2.3.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
go.uber.org/multierr v1.11.0 // indirect
)

require (
github.com/andybalholm/brotli v1.1.1 // indirect
github.com/beorn7/perks v1.0.1 // indirect
Expand Down Expand Up @@ -64,7 +70,7 @@ require (
go.uber.org/automaxprocs v1.6.0 // indirect
golang.org/x/net v0.38.0 // indirect
golang.org/x/oauth2 v0.27.0 // indirect
golang.org/x/sys v0.32.0 // indirect
golang.org/x/sys v0.35.0 // indirect
golang.org/x/term v0.30.0 // indirect
golang.org/x/text v0.23.0 // indirect
golang.org/x/time v0.9.0 // indirect
Expand All @@ -77,7 +83,7 @@ require (
k8s.io/client-go v0.33.0 // indirect
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect
sigs.k8s.io/controller-runtime v0.21.0 // indirect
sigs.k8s.io/controller-runtime v0.21.0
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/randfill v1.0.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
Expand Down
16 changes: 12 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,14 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgraph-io/ristretto/v2 v2.3.0 h1:qTQ38m7oIyd4GAed/QkUZyPFNMnvVWyazGXRwvOt5zk=
github.com/dgraph-io/ristretto/v2 v2.3.0/go.mod h1:gpoRV3VzrEY1a9dWAYV6T1U7YzfgttXdd/ZzL1s9OZM=
github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa512G+w+Pxci9hJPB8oMnkcP3iZF38=
github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
Expand Down Expand Up @@ -68,8 +74,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1 h1:SDLiNrcreDcA9m9wfXAumFARDHHXpjOjHTzshTiTGxk=
github.com/llm-d/llm-d-kv-cache-manager v0.3.0-rc1/go.mod h1:tN80/D0Faf6pE2ocwFgTNoCxKPsqdsa2XnjQUqOaZ8Q=
github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc2 h1:l2Sm8W6SRg4TAme4RsndwZ++5+4aQvDI4vnf8TKrhww=
github.com/llm-d/llm-d-kv-cache-manager v0.4.0-rc2/go.mod h1:ZlK7MCuz5D/weLeHyNKEmVF/eJZDyYn3XyRowTihq9o=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-sqlite3 v1.14.32 h1:JD12Ag3oLy1zQA+BNn74xRgaBbdhbNIDYvQUEuuErjs=
Expand Down Expand Up @@ -153,6 +159,8 @@ go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs=
go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
Expand All @@ -174,8 +182,8 @@ golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand Down
4 changes: 2 additions & 2 deletions pkg/kv-cache/block_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,14 @@ func (bc *blockCache) startRequest(requestID string, blocks []uint64) (int, erro

delete(bc.unusedBlocks, oldestUnusedHash)
common.WriteToChannel(bc.eventChan,
EventData{action: eventActionRemove, hashValues: []uint64{oldestUnusedHash}},
EventData{action: eventActionRemove, hashValues: []any{oldestUnusedHash}},
bc.logger, "block cache eventChan")
}

// Add the new block
bc.usedBlocks[block] = 1
common.WriteToChannel(bc.eventChan,
EventData{action: eventActionStore, hashValues: []uint64{block}},
EventData{action: eventActionStore, hashValues: []any{block}},
bc.logger, "block cache eventChan")
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/kv-cache/kv_cache_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const (

type EventData struct {
action EventAction
hashValues []uint64
hashValues []any
}

type KVEventSender struct {
Expand Down
8 changes: 4 additions & 4 deletions pkg/kv-cache/kv_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,8 +330,8 @@ var _ = Describe("KV cache", Ordered, func() {
wg.Wait() // wait for goroutine to exit
}()

expectedRemovedBlocks := []uint64{2, 4}
expectedStoredBlocks := []uint64{1, 2, 3, 4, 5, 6}
expectedRemovedBlocks := []any{uint64(2), uint64(4)}
expectedStoredBlocks := []any{uint64(1), uint64(2), uint64(3), uint64(4), uint64(5), uint64(6)}

go func() {
// Make sure that the subscriber listens before the events are published
Expand Down Expand Up @@ -371,8 +371,8 @@ var _ = Describe("KV cache", Ordered, func() {
Expect(alreadyInCache).To(Equal(0))
}()

removedBlocks := make([]uint64, 0)
storedBlocks := make([]uint64, 0)
removedBlocks := make([]any, 0)
storedBlocks := make([]any, 0)
count := uint64(1)
for {
parts, err := sub.RecvMessageBytes(0)
Expand Down
8 changes: 4 additions & 4 deletions pkg/kv-cache/kv_test_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ import (

"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvevents"
"github.com/onsi/ginkgo/v2"
gomega "github.com/onsi/gomega"
"github.com/onsi/gomega"
"github.com/vmihailenco/msgpack/v5"
)

func ParseKVEvent(parts [][]byte, expectedTopic string, expectedSeq uint64) ([]uint64, []uint64, bool) {
func ParseKVEvent(parts [][]byte, expectedTopic string, expectedSeq uint64) ([]any, []any, bool) {
// The message should be [topic, seq, payload]
gomega.Expect(parts).To(gomega.HaveLen(3))

Expand All @@ -34,8 +34,8 @@ func ParseKVEvent(parts [][]byte, expectedTopic string, expectedSeq uint64) ([]u
seq := binary.BigEndian.Uint64(parts[1])
gomega.Expect(seq).To(gomega.Equal(expectedSeq))

removed := make([]uint64, 0)
stored := make([]uint64, 0)
removed := make([]any, 0)
stored := make([]any, 0)
allCleared := false

var eventBatch kvevents.EventBatch
Expand Down
14 changes: 11 additions & 3 deletions pkg/llm-d-inference-sim/simulator.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,13 +333,21 @@ func (s *VllmSimulator) initializeSim(ctx context.Context) error {
return err
}

tokenizationConfig := tokenization.DefaultConfig()
tokenizationConfig, err := tokenization.DefaultConfig()
if err != nil {
return fmt.Errorf("failed to create default tokenization configuration: %w", err)
}

if s.config.TokenizersCacheDir != "" {
tokenizationConfig.TokenizersCacheDir = s.config.TokenizersCacheDir
if tokenizationConfig.HFTokenizerConfig == nil {
tokenizationConfig.HFTokenizerConfig = &tokenization.HFTokenizerConfig{}
}
tokenizationConfig.HFTokenizerConfig.TokenizersCacheDir = s.config.TokenizersCacheDir
}

s.tokenizer, err = tokenization.NewCachedHFTokenizer(tokenizationConfig.HFTokenizerConfig)
if err != nil {
return fmt.Errorf("failed to create tokenizer: %w", err)
return fmt.Errorf("failed to create hf tokenizer: %w", err)
}

if s.config.EnableKVCache {
Expand Down
Loading