Skip to content

Commit bf36270

Browse files
committed
add Dockerfile.rocm.ubi
1 parent 8fcb848 commit bf36270

File tree

1 file changed

+267
-0
lines changed

1 file changed

+267
-0
lines changed

Dockerfile.rocm.ubi

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
## Global Args ##################################################################
2+
ARG BASE_UBI_IMAGE_TAG=9.5-1741850109
3+
ARG PYTHON_VERSION=3.12
4+
# Default ROCm ARCHes to build vLLM for.
5+
ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
6+
ARG MAX_JOBS=12
7+
8+
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base
9+
10+
ARG PYTHON_VERSION
11+
12+
ENV VIRTUAL_ENV=/opt/vllm
13+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
14+
15+
RUN --mount=type=cache,target=/root/.cache/pip \
16+
microdnf -y update && \
17+
microdnf install -y --setopt=install_weak_deps=0 --nodocs \
18+
python${PYTHON_VERSION}-devel \
19+
python${PYTHON_VERSION}-pip \
20+
python${PYTHON_VERSION}-wheel && \
21+
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && \
22+
pip install -U pip wheel setuptools uv && \
23+
microdnf clean all
24+
25+
26+
FROM base AS rocm_base
27+
ARG ROCM_VERSION=6.3.4
28+
ARG PYTHON_VERSION
29+
ARG BASE_UBI_IMAGE_TAG
30+
31+
RUN printf "[amdgpu]\n\
32+
name=amdgpu\n\
33+
baseurl=https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/${BASE_UBI_IMAGE_TAG/-*/}/main/x86_64/\n\
34+
enabled=1\n\
35+
priority=50\n\
36+
gpgcheck=1\n\
37+
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key\n\
38+
[ROCm-${ROCM_VERSION}]\n\
39+
name=ROCm${ROCM_VERSION}\n\
40+
baseurl=https://repo.radeon.com/rocm/rhel9/${ROCM_VERSION}/main\n\
41+
enabled=1\n\
42+
priority=50\n\
43+
gpgcheck=1\n\
44+
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo
45+
46+
47+
RUN --mount=type=cache,target=/root/.cache/pip \
48+
--mount=type=cache,target=/root/.cache/uv \
49+
export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \
50+
uv pip install --pre \
51+
--index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \
52+
torch==2.7.0.dev20250308+rocm${version}\
53+
torchvision==0.22.0.dev20250308+rocm${version} && \
54+
# Install libdrm-amdgpu to avoid errors when retrieving device information (amdgpu.ids: No such file or directory)
55+
microdnf install -y --nodocs libdrm-amdgpu && \
56+
microdnf clean all
57+
58+
59+
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/numpy.libs:$LD_LIBRARY_PATH"
60+
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/pillow.libs:$LD_LIBRARY_PATH"
61+
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/triton/backends/amd/lib:$LD_LIBRARY_PATH"
62+
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torch/lib:$LD_LIBRARY_PATH"
63+
64+
RUN echo $LD_LIBRARY_PATH | tr : \\n >> /etc/ld.so.conf.d/torch-venv.conf && \
65+
ldconfig
66+
67+
FROM rocm_base as rocm_devel
68+
69+
ENV CCACHE_DIR=/root/.cache/ccache
70+
71+
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
72+
rpm -ql epel-release && \
73+
microdnf -y update && \
74+
microdnf --nodocs -y install \
75+
ccache \
76+
git \
77+
# packages required to build vllm
78+
amd-smi-lib \
79+
hipblas-devel \
80+
hipblaslt-devel \
81+
hipcc \
82+
hipcub-devel \
83+
hipfft-devel \
84+
hiprand-devel \
85+
hipsolver-devel \
86+
hipsparse-devel \
87+
hsa-rocr-devel \
88+
miopen-hip-devel \
89+
rccl-devel \
90+
rocblas-devel \
91+
rocm-device-libs \
92+
rocprim-devel \
93+
rocrand-devel \
94+
rocthrust-devel \
95+
# end packages required to build vllm
96+
wget \
97+
which && \
98+
microdnf clean all
99+
100+
WORKDIR /workspace
101+
102+
ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer
103+
ENV PATH=$PATH:/opt/rocm/bin
104+
ENV CPLUS_INCLUDE_PATH=$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torch/include:/opt/rocm/include
105+
106+
107+
FROM rocm_devel AS build_amdsmi
108+
109+
# Build AMD SMI wheel
110+
RUN cd /opt/rocm/share/amd_smi && \
111+
python3 -m pip wheel . --wheel-dir=/install
112+
113+
##################################################################################################
114+
115+
FROM rocm_devel AS build_flashattention
116+
117+
ARG FA_GFX_ARCHS="gfx90a;gfx942"
118+
119+
# the FA_BRANCH commit belongs to the ROCm/flash-attention fork, `main_perf` branch
120+
ARG FA_BRANCH="3cea2fb"
121+
ARG MAX_JOBS
122+
ENV MAX_JOBS=${MAX_JOBS}
123+
124+
RUN --mount=type=cache,target=/root/.cache/uv \
125+
--mount=type=cache,target=/workspace/build \
126+
mkdir -p /libs && \
127+
cd /libs && \
128+
git clone https://github.com/ROCm/flash-attention.git && \
129+
cd flash-attention && \
130+
git checkout ${FA_BRANCH} && \
131+
git submodule update --init && \
132+
uv pip install cmake ninja packaging && \
133+
env \
134+
GPU_ARCHS="${FA_GFX_ARCHS}" \
135+
python3 setup.py bdist_wheel --dist-dir=/install
136+
137+
##################################################################################################
138+
139+
FROM rocm_devel AS build_vllm
140+
ARG PYTORCH_ROCM_ARCH
141+
ARG PYTHON_VERSION
142+
ARG MAX_JOBS
143+
ENV MAX_JOBS=${MAX_JOBS}
144+
ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
145+
146+
COPY . .
147+
148+
ENV VLLM_TARGET_DEVICE="rocm"
149+
ENV MAX_JOBS=${MAX_JOBS}
150+
# Make sure punica kernels are built (for LoRA)
151+
ENV VLLM_INSTALL_PUNICA_KERNELS=1
152+
153+
RUN --mount=type=cache,target=/root/.cache/ccache \
154+
--mount=type=cache,target=/root/.cache/pip \
155+
--mount=type=cache,target=/root/.cache/uv \
156+
uv pip install -v -U \
157+
ninja setuptools-scm>=8 "cmake>=3.26" packaging && \
158+
env CFLAGS="-march=haswell" \
159+
CXXFLAGS="$CFLAGS $CXXFLAGS" \
160+
CMAKE_BUILD_TYPE=Release \
161+
python3 setup.py bdist_wheel --dist-dir=dist
162+
163+
#################### libsodium Build IMAGE ####################
164+
FROM rocm_base as libsodium-builder
165+
166+
RUN microdnf install -y --nodocs gcc gzip tar \
167+
&& microdnf clean all
168+
169+
WORKDIR /usr/src/libsodium
170+
171+
ARG LIBSODIUM_VERSION=1.0.20
172+
RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \
173+
&& tar -xzvf libsodium*.tar.gz \
174+
&& rm -f libsodium*.tar.gz \
175+
&& mv libsodium*/* ./
176+
177+
RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection" \
178+
./configure \
179+
--prefix="/usr/" \
180+
--libdir=/usr/lib64 && \
181+
make -j $(nproc) && \
182+
make check
183+
184+
##################################################################################################
185+
186+
FROM rocm_base AS vllm-openai
187+
ARG MAX_JOBS
188+
189+
WORKDIR /workspace
190+
191+
ENV VIRTUAL_ENV=/opt/vllm
192+
ENV PATH=$VIRTUAL_ENV/bin:$PATH
193+
194+
# Required for triton
195+
RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc rsync && \
196+
microdnf clean all
197+
198+
# Install libsodium for Tensorizer encryption
199+
RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
200+
cd /usr/src/libsodium \
201+
&& make install
202+
203+
RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install/amdsmi/ \
204+
--mount=type=bind,from=build_flashattention,src=/install,target=/install/flashattention \
205+
--mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \
206+
--mount=type=cache,target=/root/.cache/pip \
207+
--mount=type=cache,target=/root/.cache/uv \
208+
export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \
209+
uv pip install \
210+
--index-strategy=unsafe-best-match \
211+
--extra-index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \
212+
/install/amdsmi/*.whl\
213+
/install/flashattention/*.whl\
214+
/install/vllm/*.whl
215+
216+
ENV HF_HUB_OFFLINE=1 \
217+
HOME=/home/vllm \
218+
# Allow requested max length to exceed what is extracted from the
219+
# config.json
220+
# see: https://github.com/vllm-project/vllm/pull/7080
221+
VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
222+
VLLM_USAGE_SOURCE=production-docker-image \
223+
VLLM_WORKER_MULTIPROC_METHOD=fork \
224+
VLLM_NO_USAGE_STATS=1 \
225+
# Silences the HF Tokenizers warning
226+
TOKENIZERS_PARALLELISM=false \
227+
RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 \
228+
VLLM_USE_TRITON_FLASH_ATTN=0 \
229+
HIP_FORCE_DEV_KERNARG=1 \
230+
OUTLINES_CACHE_DIR=/tmp/outlines \
231+
NUMBA_CACHE_DIR=/tmp/numba \
232+
TRITON_CACHE_DIR=/tmp/triton
233+
234+
# setup non-root user for OpenShift
235+
RUN umask 002 && \
236+
useradd --uid 2000 --gid 0 vllm && \
237+
mkdir -p /licenses /home/vllm && \
238+
chmod g+rwx /home/vllm
239+
240+
COPY LICENSE /licenses/vllm.md
241+
COPY examples/*.jinja /app/data/template/
242+
243+
USER 2000
244+
WORKDIR /home/vllm
245+
246+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
247+
248+
249+
FROM vllm-openai as vllm-grpc-adapter
250+
251+
USER root
252+
253+
RUN --mount=type=cache,target=/root/.cache/pip \
254+
--mount=type=cache,target=/root/.cache/uv \
255+
--mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \
256+
HOME=/root uv pip install /install/vllm/*.whl vllm-tgis-adapter==0.6.3
257+
258+
ENV GRPC_PORT=8033 \
259+
PORT=8000 \
260+
# As an optimization, vLLM disables logprobs when using spec decoding by
261+
# default, but this would be unexpected to users of a hosted model that
262+
# happens to have spec decoding
263+
# see: https://github.com/vllm-project/vllm/pull/6485
264+
DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
265+
266+
USER 2000
267+
ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]

0 commit comments

Comments
 (0)