Skip to content

Commit 431b391

Browse files
committed
Build VLLM CUDA from RHEL AI wheels, add audio and video packages (#85)
* Update Dockerfile.ubi to install vllm-cuda using wheel from RHEL AI team the install script is located in payload/run.sh. An args file was also added with the custom parameters, and is referenced in the tekton pipeline. * update payload/run.sh to use bot token * add trap to guarantee run.sh deletion
1 parent ae2078e commit 431b391

File tree

3 files changed

+88
-91
lines changed

3 files changed

+88
-91
lines changed

Dockerfile.ubi

Lines changed: 47 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
1-
## Global Args #################################################################
2-
ARG BASE_UBI_IMAGE_TAG=9.5-1741850109
3-
ARG PYTHON_VERSION=3.12
41

5-
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
6-
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
2+
ARG BASE_UBI_IMAGE_TAG
3+
ARG PYTHON_VERSION
74

85
## Base Layer ##################################################################
9-
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
6+
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base
107
ARG PYTHON_VERSION
118
ENV PYTHON_VERSION=${PYTHON_VERSION}
129
RUN microdnf -y update && microdnf install -y --nodocs \
@@ -19,25 +16,28 @@ ENV LANG=C.UTF-8 \
1916
LC_ALL=C.UTF-8
2017

2118
# Some utils for dev purposes - tar required for kubectl cp
19+
2220
RUN microdnf install -y --nodocs \
23-
which procps findutils tar vim git\
21+
which procps findutils tar vim git \
2422
&& microdnf clean all
2523

2624

2725
## Python Installer ############################################################
28-
FROM base as python-install
26+
FROM base AS python-install
2927
ARG PYTHON_VERSION
3028

3129
ENV VIRTUAL_ENV=/opt/vllm
3230
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
3331
ENV PYTHON_VERSION=${PYTHON_VERSION}
3432
RUN microdnf install -y --nodocs \
3533
python${PYTHON_VERSION}-devel && \
36-
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all
34+
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && \
35+
pip install --no-cache -U pip wheel uv && \
36+
microdnf clean all
3737

3838

3939
## CUDA Base ###################################################################
40-
FROM python-install as cuda-base
40+
FROM python-install AS cuda-base
4141

4242
RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
4343
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
@@ -51,88 +51,30 @@ RUN microdnf install -y --nodocs \
5151
ln -s ${CUDA_HOME}/lib64/stubs/libcuda.so /usr/lib64/
5252

5353

54-
5554
## Python cuda base #################################################################
5655
FROM cuda-base AS python-cuda-base
5756

5857
ENV VIRTUAL_ENV=/opt/vllm
5958
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
6059

6160
# install cuda and common dependencies
62-
RUN --mount=type=cache,target=/root/.cache/pip \
63-
--mount=type=cache,target=/root/.cache/uv \
61+
RUN --mount=type=cache,target=/root/.cache/uv \
6462
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
6563
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
6664
uv pip install \
6765
-r requirements-cuda.txt
6866

6967

70-
## Development #################################################################
71-
FROM python-cuda-base AS dev
72-
73-
# install build and runtime dependencies
74-
RUN --mount=type=cache,target=/root/.cache/pip \
75-
--mount=type=cache,target=/root/.cache/uv \
76-
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
77-
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
78-
--mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \
79-
--mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \
80-
--mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \
81-
uv pip install \
82-
-r requirements-cuda.txt \
83-
-r requirements-dev.txt
84-
85-
## Builder #####################################################################
86-
FROM dev AS build
87-
88-
# install build dependencies
89-
RUN --mount=type=cache,target=/root/.cache/pip \
90-
--mount=type=cache,target=/root/.cache/uv \
91-
--mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
92-
uv pip install -r requirements-build.txt
93-
94-
# install compiler cache to speed up compilation leveraging local or remote caching
95-
# git is required for the cutlass kernels
96-
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y --nodocs git ccache && microdnf clean all
97-
98-
COPY . .
99-
100-
ARG TORCH_CUDA_ARCH_LIST
101-
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
102-
ARG vllm_fa_cmake_gpu_arches
103-
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
104-
105-
# max jobs used by Ninja to build extensions
106-
ARG max_jobs=2
107-
ENV MAX_JOBS=${max_jobs}
108-
# number of threads used by nvcc
109-
ARG nvcc_threads=8
110-
ENV NVCC_THREADS=$nvcc_threads
111-
# make sure punica kernels are built (for LoRA)
112-
ENV VLLM_INSTALL_PUNICA_KERNELS=1
113-
114-
# Make sure the cuda environment is in the PATH
115-
ENV PATH=/usr/local/cuda/bin:$PATH
116-
117-
ENV CCACHE_DIR=/root/.cache/ccache
118-
RUN --mount=type=cache,target=/root/.cache/ccache \
119-
--mount=type=cache,target=/root/.cache/pip \
120-
--mount=type=cache,target=/root/.cache/uv \
121-
--mount=type=bind,src=.git,target=/workspace/.git \
122-
env CFLAGS="-march=haswell" \
123-
CXXFLAGS="$CFLAGS $CXXFLAGS" \
124-
CMAKE_BUILD_TYPE=Release \
125-
python3 setup.py bdist_wheel --dist-dir=dist
12668

12769
#################### libsodium Build IMAGE ####################
128-
FROM base as libsodium-builder
70+
FROM base AS libsodium-builder
12971

13072
RUN microdnf install -y --nodocs gcc gzip \
13173
&& microdnf clean all
13274

13375
WORKDIR /usr/src/libsodium
13476

135-
ARG LIBSODIUM_VERSION=1.0.20
77+
ARG LIBSODIUM_VERSION
13678
RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \
13779
&& tar -xzvf libsodium*.tar.gz \
13880
&& rm -f libsodium*.tar.gz \
@@ -156,25 +98,32 @@ ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nv
15698
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/nvtx/lib:${LD_LIBRARY_PATH}"
15799

158100
# Triton needs a CC compiler
101+
159102
RUN microdnf install -y --nodocs gcc \
160103
rsync \
161104
&& microdnf clean all
162105

163-
# install vllm wheel first, so that torch etc will be installed
164-
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
165-
--mount=type=cache,target=/root/.cache/pip \
166-
--mount=type=cache,target=/root/.cache/uv \
167-
uv pip install "$(echo dist/*.whl)[tensorizer]" --verbose
168106

169107
# Install libsodium for Tensorizer encryption
170108
RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
171-
cd /usr/src/libsodium \
172-
&& make install
109+
make -C /usr/src/libsodium install
173110

174-
RUN --mount=type=cache,target=/root/.cache/pip \
175-
--mount=type=cache,target=/root/.cache/uv \
176-
uv pip install \
177-
"https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.0.post2/flashinfer_python-0.2.0.post2+cu124torch2.5-cp312-cp312-linux_x86_64.whl"
111+
COPY LICENSE /licenses/vllm.md
112+
COPY examples/*.jinja /app/data/template/
113+
114+
# install vllm by running the payload script and then install flashinfer
115+
116+
ARG VLLM_WHEEL_VERSION
117+
ARG VLLM_WHEEL_INDEX
118+
ARG FLASHINFER_VERSION
119+
RUN --mount=type=cache,target=/root/.cache/uv \
120+
--mount=type=bind,src=payload,target=/workspace/payload \
121+
--mount=type=secret,id=rhel-ai-private-index-auth/BOT_PAT \
122+
env BOT_PAT=$(cat /run/secrets/rhel-ai-private-index-auth/BOT_PAT) \
123+
VLLM_WHEEL_VERSION=${VLLM_VERSION} \
124+
VLLM_WHEEL_INDEX=${VLLM_WHEEL_INDEX} \
125+
./payload/run.sh && \
126+
uv pip install "${FLASHINFER_VERSION}"
178127

179128
ENV HF_HUB_OFFLINE=1 \
180129
HOME=/home/vllm \
@@ -199,25 +148,32 @@ ENV HF_HUB_OFFLINE=1 \
199148
RUN umask 002 && \
200149
useradd --uid 2000 --gid 0 vllm && \
201150
mkdir -p /home/vllm && \
202-
chmod g+rwx /home/vllm /usr/src /workspace
203-
204-
COPY LICENSE /licenses/vllm.md
205-
COPY examples/*.jinja /app/data/template/
151+
chmod g+rwx /home/vllm
206152

207153
USER 2000
208154
WORKDIR /home/vllm
209155

210156
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
211157

212158

213-
FROM vllm-openai as vllm-grpc-adapter
159+
## TGIS Adapter layer #####################################################################
160+
FROM vllm-openai AS vllm-grpc-adapter
214161

215162
USER root
216163

217-
RUN --mount=type=cache,target=/root/.cache/pip \
218-
--mount=type=cache,target=/root/.cache/uv \
219-
--mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
220-
HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.6.3
164+
ARG VLLM_TGIS_ADAPTER_VERSION
165+
RUN --mount=type=cache,target=/root/.cache/uv \
166+
--mount=type=bind,src=payload,target=/workspace/payload \
167+
--mount=type=secret,id=rhel-ai-private-index-auth/BOT_PAT \
168+
cd /workspace && \
169+
ls && \
170+
env HOME=/root \
171+
BOT_PAT=$(cat /run/secrets/rhel-ai-private-index-auth/BOT_PAT) \
172+
VLLM_WHEEL_VERSION=${VLLM_VERSION} \
173+
VLLM_TGIS_ADAPTER_VERSION=${VLLM_TGIS_ADAPTER_VERSION} \
174+
VLLM_WHEEL_INDEX=${VLLM_WHEEL_INDEX} \
175+
./payload/run.sh
176+
221177

222178
ENV GRPC_PORT=8033 \
223179
PORT=8000 \

argfile.konflux

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
BASE_UBI_IMAGE_TAG=9.5-1739420147
2+
PYTHON_VERSION=3.11
3+
LIBSODIUM_VERSION=1.0.20
4+
VLLM_TGIS_ADAPTER_VERSION=0.6.3
5+
FLASHINFER_VERSION=https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post1/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl
6+
VLLM_WHEEL_VERSION=0.7.2
7+
VLLM_WHEEL_INDEX=https://gitlab.com/api/v4/projects/66664052/packages/pypi/simple

payload/run.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/bin/bash
2+
# required env vars:
3+
# $BOT_PAT
4+
# $WHEEL_RELEASE_ARTIFACTS
5+
# optional:
6+
# $VLLM_TGIS_ADAPTER_VERSION
7+
# $VLLM_WHEEL_VERSION
8+
set -ex
9+
10+
cat <<EOF > ${HOME}/.netrc
11+
machine gitlab.com
12+
login rhel-ai-wheels-prefetch-token-rhoai
13+
password $BOT_PAT
14+
EOF
15+
16+
trap "rm ${HOME}/.netrc" EXIT
17+
18+
# https://docs.astral.sh/uv/configuration/indexes/#searching-across-multiple-indexes
19+
# This will prefer to use the custom index, and fall back to pypi if needed
20+
export UV_EXTRA_INDEX_URL=${VLLM_WHEEL_INDEX}
21+
export UV_INDEX_STRATEGY=unsafe-first-match
22+
23+
vllm="vllm[tensorizer,audio,video]"
24+
25+
if [[ -n "$VLLM_TGIS_ADAPTER_VERSION" ]]; then
26+
vllm_tgis_adapter="vllm-tgis-adapter==${VLLM_TGIS_ADAPTER_VERSION}"
27+
fi
28+
29+
if [[ -n "$VLLM_WHEEL_VERSION" ]]; then
30+
vllm="${vllm}==${$VLLM_WHEEL_VERSION}"
31+
fi
32+
33+
uv pip install $vllm $vllm_tgis_adapter
34+

0 commit comments

Comments
 (0)