@@ -6,8 +6,9 @@ ARG TARGETARCH
66# Install build tools
77# The builder is based on UBI8, so we need epel-release-8.
88RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \
9- dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig python3-devel && \
9+ dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig python3.12 -devel python3.12-pip git && \
1010 dnf clean all
11+ # python3.12-devel needed for CGO compilation (Python headers and python3.12-config for linker flags)
1112
1213WORKDIR /workspace
1314# Copy the Go Modules manifests
@@ -28,11 +29,13 @@ ARG TOKENIZER_VERSION=v1.22.1
2829RUN curl -L https://github.com/daulet/tokenizers/releases/download/${TOKENIZER_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
2930RUN ranlib lib/*.a
3031
31- # Copy Python wrapper from kv-cache-manager dependency
32+ # Copy Python wrapper and requirements from kv-cache-manager dependency
3233# Extract version dynamically and copy to a known location
3334RUN KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) && \
3435 mkdir -p /workspace/kv-cache-manager-wrapper && \
3536 cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/render_jinja_template_wrapper.py \
37+ /workspace/kv-cache-manager-wrapper/ && \
38+ cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/requirements.txt \
3639 /workspace/kv-cache-manager-wrapper/
3740
3841# Build
@@ -43,7 +46,12 @@ RUN KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-k
4346ENV CGO_ENABLED=1
4447ENV GOOS=${TARGETOS:-linux}
4548ENV GOARCH=${TARGETARCH}
46- RUN go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go
49+ ENV PYTHON=python3.12
50+ ENV PYTHONPATH=/usr/lib64/python3.12/site-packages:/usr/lib/python3.12/site-packages
51+
52+ RUN export CGO_CFLAGS="$(python3.12-config --cflags) -I/workspace/lib" && \
53+ export CGO_LDFLAGS="$(python3.12-config --ldflags --embed) -L/workspace/lib -ltokenizers -ldl -lm" && \
54+ go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go
4755
4856# Runtime stage
4957# Use ubi9 as a minimal base image to package the manager binary
@@ -59,24 +67,28 @@ USER root
5967RUN curl -L -o /tmp/epel-release.rpm https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
6068 rpm -i /tmp/epel-release.rpm && \
6169 rm /tmp/epel-release.rpm && \
62- microdnf install -y --setopt=install_weak_deps=0 zeromq python3 python3-libs python3-pip && \
70+ microdnf install -y --setopt=install_weak_deps=0 zeromq python3.12 python3.12 -libs python3.12 -pip && \
6371 microdnf clean all && \
64- rm -rf /var/cache/yum /var/lib/yum
72+ rm -rf /var/cache/yum /var/lib/yum && \
73+ ln -sf /usr/bin/python3.12 /usr/bin/python3 && \
74+ ln -sf /usr/bin/python3.12 /usr/bin/python
6575
6676# Install wrapper as a module in site-packages
67- # Extract the kv-cache-manager version dynamically from go.mod in the builder stage
68- RUN mkdir -p /usr/local/lib/python3.9/site-packages/
69- COPY --from=builder /workspace/kv-cache-manager-wrapper/render_jinja_template_wrapper.py /usr/local/lib/python3.9/site-packages/
77+ RUN mkdir -p /usr/local/lib/python3.12/site-packages/
78+ COPY --from=builder /workspace/kv-cache-manager-wrapper/render_jinja_template_wrapper.py /usr/local/lib/python3.12/site-packages/
7079
71- # Python deps (no cache, single target) – install transformers
80+ # Python deps (no cache, single target) – filter out torch
7281ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
73- RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \
74- python3 -m pip install --no-cache-dir --target /usr/local/lib/python3.9/site-packages transformers && \
82+ COPY --from=builder /workspace/kv-cache-manager-wrapper/requirements.txt /tmp/requirements.txt
83+ RUN sed '/^torch\b /d' /tmp/requirements.txt > /tmp/requirements.notorch.txt && \
84+ python3.12 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \
85+ python3.12 -m pip install --no-cache-dir --target /usr/local/lib/python3.12/site-packages -r /tmp/requirements.notorch.txt && \
86+ rm /tmp/requirements.txt /tmp/requirements.notorch.txt && \
7587 rm -rf /root/.cache/pip
7688
7789# Python env
78- ENV PYTHONPATH="/usr/local/lib/python3.9 /site-packages:/usr/lib/python3.9 /site-packages"
79- ENV PYTHON=python3
90+ ENV PYTHONPATH="/usr/local/lib/python3.12 /site-packages:/usr/lib/python3.12 /site-packages"
91+ ENV PYTHON=python3.12
8092
8193COPY --from=builder /workspace/bin/llm-d-inference-sim /app/llm-d-inference-sim
8294
0 commit comments