@@ -6,7 +6,7 @@ ARG TARGETARCH
66# Install build tools
77# The builder is based on UBI8, so we need epel-release-8.
88RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \
9- dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig && \
9+ dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig python3-devel && \
1010 dnf clean all
1111
1212WORKDIR /workspace
@@ -28,6 +28,13 @@ ARG TOKENIZER_VERSION=v1.22.1
2828RUN curl -L https://github.com/daulet/tokenizers/releases/download/${TOKENIZER_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
2929RUN ranlib lib/*.a
3030
31+ # Copy Python wrapper from kv-cache-manager dependency
32+ # Extract version dynamically and copy to a known location
33+ RUN KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) && \
34+ mkdir -p /workspace/kv-cache-manager-wrapper && \
35+ cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/render_jinja_template_wrapper.py \
36+ /workspace/kv-cache-manager-wrapper/
37+
3138# Build
3239# the GOARCH has not a default value to allow the binary be built according to the host where the command
3340# was called. For example, if we call make image-build in a local env which has the Apple Silicon M1 SO
@@ -38,20 +45,38 @@ ENV GOOS=${TARGETOS:-linux}
3845ENV GOARCH=${TARGETARCH}
3946RUN go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go
4047
48+ # Runtime stage
4149# Use ubi9 as a minimal base image to package the manager binary
4250# Refer to https://catalog.redhat.com/software/containers/ubi9/ubi-minimal/615bd9b4075b022acc111bf5 for more details
4351FROM registry.access.redhat.com/ubi9/ubi-minimal:latest
4452
4553WORKDIR /
4654
47- # Install zeromq runtime library needed by the manager.
55+ # Install zeromq runtime library and Python runtime needed by the manager.
4856# The final image is UBI9, so we need epel-release-9.
57+ # Using microdnf for minimal image size
4958USER root
50- RUN microdnf install -y dnf && \
51- dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm' && \
52- dnf install -y zeromq && \
53- dnf clean all && \
54- rm -rf /var/cache/dnf /var/lib/dnf
59+ RUN curl -L -o /tmp/epel-release.rpm https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
60+ rpm -i /tmp/epel-release.rpm && \
61+ rm /tmp/epel-release.rpm && \
62+ microdnf install -y --setopt=install_weak_deps=0 zeromq python3 python3-libs python3-pip && \
63+ microdnf clean all && \
64+ rm -rf /var/cache/yum /var/lib/yum
65+
66+ # Install wrapper as a module in site-packages
67+ # Extract the kv-cache-manager version dynamically from go.mod in the builder stage
68+ RUN mkdir -p /usr/local/lib/python3.9/site-packages/
69+ COPY --from=builder /workspace/kv-cache-manager-wrapper/render_jinja_template_wrapper.py /usr/local/lib/python3.9/site-packages/
70+
71+ # Python deps (no cache, single target) – install transformers
72+ ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
73+ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \
74+ python3 -m pip install --no-cache-dir --target /usr/local/lib/python3.9/site-packages transformers && \
75+ rm -rf /root/.cache/pip
76+
77+ # Python env
78+ ENV PYTHONPATH="/usr/local/lib/python3.9/site-packages:/usr/lib/python3.9/site-packages"
79+ ENV PYTHON=python3
5580
5681COPY --from=builder /workspace/bin/llm-d-inference-sim /app/llm-d-inference-sim
5782
0 commit comments