@@ -28,11 +28,13 @@ ARG TOKENIZER_VERSION=v1.22.1
2828RUN curl -L https://github.com/daulet/tokenizers/releases/download/${TOKENIZER_VERSION}/libtokenizers.${TARGETOS}-${TARGETARCH}.tar.gz | tar -xz -C lib
2929RUN ranlib lib/*.a
3030
31- # Copy Python wrapper from kv-cache-manager dependency
31+ # Copy Python wrapper and requirements from kv-cache-manager dependency
3232# Extract version dynamically and copy to a known location
3333RUN KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-kv-cache-manager) && \
3434 mkdir -p /workspace/kv-cache-manager-wrapper && \
3535 cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/render_jinja_template_wrapper.py \
36+ /workspace/kv-cache-manager-wrapper/ && \
37+ cp /go/pkg/mod/github.com/llm-d/llm-d-kv-cache-manager@${KV_CACHE_MGR_VERSION}/pkg/preprocessing/chat_completions/requirements.txt \
3638 /workspace/kv-cache-manager-wrapper/
3739
3840# Build
@@ -43,7 +45,10 @@ RUN KV_CACHE_MGR_VERSION=$(go list -m -f '{{.Version}}' github.com/llm-d/llm-d-k
4345ENV CGO_ENABLED=1
4446ENV GOOS=${TARGETOS:-linux}
4547ENV GOARCH=${TARGETARCH}
46- RUN go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go
48+ ENV PYTHON=python3
49+ RUN export CGO_CFLAGS="$(python3-config --cflags) -I/workspace/lib" && \
50+ export CGO_LDFLAGS="$(python3-config --ldflags --embed) -L/workspace/lib -ltokenizers -ldl -lm" && \
51+ go build -a -o bin/llm-d-inference-sim -ldflags="-extldflags '-L$(pwd)/lib'" cmd/cmd.go
4752
4853# Runtime stage
4954# Use ubi9 as a minimal base image to package the manager binary
@@ -59,24 +64,28 @@ USER root
5964RUN curl -L -o /tmp/epel-release.rpm https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
6065 rpm -i /tmp/epel-release.rpm && \
6166 rm /tmp/epel-release.rpm && \
62- microdnf install -y --setopt=install_weak_deps=0 zeromq python3 python3-libs python3-pip && \
67+ microdnf install -y --setopt=install_weak_deps=0 zeromq python3.12 python3.12 -libs python3.12 -pip && \
6368 microdnf clean all && \
64- rm -rf /var/cache/yum /var/lib/yum
69+ rm -rf /var/cache/yum /var/lib/yum && \
70+ ln -sf /usr/bin/python3.12 /usr/bin/python3 && \
71+ ln -sf /usr/bin/python3.12 /usr/bin/python
6572
6673# Install wrapper as a module in site-packages
67- # Extract the kv-cache-manager version dynamically from go.mod in the builder stage
68- RUN mkdir -p /usr/local/lib/python3.9/site-packages/
69- COPY --from=builder /workspace/kv-cache-manager-wrapper/render_jinja_template_wrapper.py /usr/local/lib/python3.9/site-packages/
74+ RUN mkdir -p /usr/local/lib/python3.12/site-packages/
75+ COPY --from=builder /workspace/kv-cache-manager-wrapper/render_jinja_template_wrapper.py /usr/local/lib/python3.12/site-packages/
7076
71- # Python deps (no cache, single target) – install transformers
77+ # Python deps (no cache, single target) – filter out torch
7278ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
73- RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \
74- python3 -m pip install --no-cache-dir --target /usr/local/lib/python3.9/site-packages transformers && \
79+ COPY --from=builder /workspace/kv-cache-manager-wrapper/requirements.txt /tmp/requirements.txt
80+ RUN sed '/^torch\b /d' /tmp/requirements.txt > /tmp/requirements.notorch.txt && \
81+ python3.12 -m pip install --no-cache-dir --upgrade pip setuptools wheel && \
82+ python3.12 -m pip install --no-cache-dir --target /usr/local/lib/python3.12/site-packages -r /tmp/requirements.notorch.txt && \
83+ rm /tmp/requirements.txt /tmp/requirements.notorch.txt && \
7584 rm -rf /root/.cache/pip
7685
7786# Python env
78- ENV PYTHONPATH="/usr/local/lib/python3.9 /site-packages:/usr/lib/python3.9 /site-packages"
79- ENV PYTHON=python3
87+ ENV PYTHONPATH="/usr/local/lib/python3.12 /site-packages:/usr/lib/python3.12 /site-packages"
88+ ENV PYTHON=python3.12
8089
8190COPY --from=builder /workspace/bin/llm-d-inference-sim /app/llm-d-inference-sim
8291
0 commit comments