|
| 1 | +## Global Args ################################################################## |
| 2 | +ARG BASE_UBI_IMAGE_TAG=9.5-1741850109 |
| 3 | +ARG PYTHON_VERSION=3.12 |
| 4 | +# Default ROCm ARCHes to build vLLM for. |
| 5 | +ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100" |
| 6 | +ARG MAX_JOBS=12 |
| 7 | + |
| 8 | +FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base |
| 9 | + |
| 10 | +ARG PYTHON_VERSION |
| 11 | + |
| 12 | +ENV VIRTUAL_ENV=/opt/vllm |
| 13 | +ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| 14 | + |
| 15 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 16 | + microdnf -y update && \ |
| 17 | + microdnf install -y --setopt=install_weak_deps=0 --nodocs \ |
| 18 | + python${PYTHON_VERSION}-devel \ |
| 19 | + python${PYTHON_VERSION}-pip \ |
| 20 | + python${PYTHON_VERSION}-wheel && \ |
| 21 | + python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && \ |
| 22 | + pip install -U pip wheel setuptools uv && \ |
| 23 | + microdnf clean all |
| 24 | + |
| 25 | + |
| 26 | +FROM base AS rocm_base |
| 27 | +ARG ROCM_VERSION=6.3.4 |
| 28 | +ARG PYTHON_VERSION |
| 29 | +ARG BASE_UBI_IMAGE_TAG |
| 30 | + |
| 31 | +RUN printf "[amdgpu]\n\ |
| 32 | +name=amdgpu\n\ |
| 33 | +baseurl=https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/${BASE_UBI_IMAGE_TAG/-*/}/main/x86_64/\n\ |
| 34 | +enabled=1\n\ |
| 35 | +priority=50\n\ |
| 36 | +gpgcheck=1\n\ |
| 37 | +gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key\n\ |
| 38 | +[ROCm-${ROCM_VERSION}]\n\ |
| 39 | +name=ROCm${ROCM_VERSION}\n\ |
| 40 | +baseurl=https://repo.radeon.com/rocm/rhel9/${ROCM_VERSION}/main\n\ |
| 41 | +enabled=1\n\ |
| 42 | +priority=50\n\ |
| 43 | +gpgcheck=1\n\ |
| 44 | +gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo |
| 45 | + |
| 46 | + |
| 47 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 48 | + --mount=type=cache,target=/root/.cache/uv \ |
| 49 | + export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \ |
| 50 | + uv pip install --pre \ |
| 51 | + --index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \ |
| 52 | + torch==2.7.0.dev20250308+rocm${version}\ |
| 53 | + torchvision==0.22.0.dev20250308+rocm${version} && \ |
| 54 | + # Install libdrm-amdgpu to avoid errors when retrieving device information (amdgpu.ids: No such file or directory) |
| 55 | + microdnf install -y --nodocs libdrm-amdgpu && \ |
| 56 | + microdnf clean all |
| 57 | + |
| 58 | + |
| 59 | +ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/numpy.libs:$LD_LIBRARY_PATH" |
| 60 | +ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/pillow.libs:$LD_LIBRARY_PATH" |
| 61 | +ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/triton/backends/amd/lib:$LD_LIBRARY_PATH" |
| 62 | +ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torch/lib:$LD_LIBRARY_PATH" |
| 63 | + |
| 64 | +RUN echo $LD_LIBRARY_PATH | tr : \\n >> /etc/ld.so.conf.d/torch-venv.conf && \ |
| 65 | + ldconfig |
| 66 | + |
| 67 | +FROM rocm_base as rocm_devel |
| 68 | + |
| 69 | +ENV CCACHE_DIR=/root/.cache/ccache |
| 70 | + |
| 71 | +RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \ |
| 72 | + rpm -ql epel-release && \ |
| 73 | + microdnf -y update && \ |
| 74 | + microdnf --nodocs -y install \ |
| 75 | + ccache \ |
| 76 | + git \ |
| 77 | + # packages required to build vllm |
| 78 | + amd-smi-lib \ |
| 79 | + hipblas-devel \ |
| 80 | + hipblaslt-devel \ |
| 81 | + hipcc \ |
| 82 | + hipcub-devel \ |
| 83 | + hipfft-devel \ |
| 84 | + hiprand-devel \ |
| 85 | + hipsolver-devel \ |
| 86 | + hipsparse-devel \ |
| 87 | + hsa-rocr-devel \ |
| 88 | + miopen-hip-devel \ |
| 89 | + rccl-devel \ |
| 90 | + rocblas-devel \ |
| 91 | + rocm-device-libs \ |
| 92 | + rocprim-devel \ |
| 93 | + rocrand-devel \ |
| 94 | + rocthrust-devel \ |
| 95 | + # end packages required to build vllm |
| 96 | + wget \ |
| 97 | + which && \ |
| 98 | + microdnf clean all |
| 99 | + |
| 100 | +WORKDIR /workspace |
| 101 | + |
| 102 | +ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer |
| 103 | +ENV PATH=$PATH:/opt/rocm/bin |
| 104 | +ENV CPLUS_INCLUDE_PATH=$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torch/include:/opt/rocm/include |
| 105 | + |
| 106 | + |
| 107 | +FROM rocm_devel AS build_amdsmi |
| 108 | + |
| 109 | +# Build AMD SMI wheel |
| 110 | +RUN cd /opt/rocm/share/amd_smi && \ |
| 111 | + python3 -m pip wheel . --wheel-dir=/install |
| 112 | + |
| 113 | +################################################################################################## |
| 114 | + |
| 115 | +FROM rocm_devel AS build_flashattention |
| 116 | + |
| 117 | +ARG FA_GFX_ARCHS="gfx90a;gfx942" |
| 118 | + |
| 119 | +# the FA_BRANCH commit belongs to the ROCm/flash-attention fork, `main_perf` branch |
| 120 | +ARG FA_BRANCH="3cea2fb" |
| 121 | +ARG MAX_JOBS |
| 122 | +ENV MAX_JOBS=${MAX_JOBS} |
| 123 | + |
| 124 | +RUN --mount=type=cache,target=/root/.cache/uv \ |
| 125 | + --mount=type=cache,target=/workspace/build \ |
| 126 | + mkdir -p /libs && \ |
| 127 | + cd /libs && \ |
| 128 | + git clone https://github.com/ROCm/flash-attention.git && \ |
| 129 | + cd flash-attention && \ |
| 130 | + git checkout ${FA_BRANCH} && \ |
| 131 | + git submodule update --init && \ |
| 132 | + uv pip install cmake ninja packaging && \ |
| 133 | + env \ |
| 134 | + GPU_ARCHS="${FA_GFX_ARCHS}" \ |
| 135 | + python3 setup.py bdist_wheel --dist-dir=/install |
| 136 | + |
| 137 | +################################################################################################## |
| 138 | + |
| 139 | +FROM rocm_devel AS build_vllm |
| 140 | +ARG PYTORCH_ROCM_ARCH |
| 141 | +ARG PYTHON_VERSION |
| 142 | +ARG MAX_JOBS |
| 143 | +ENV MAX_JOBS=${MAX_JOBS} |
| 144 | +ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} |
| 145 | + |
| 146 | +COPY . . |
| 147 | + |
| 148 | +ENV VLLM_TARGET_DEVICE="rocm" |
| 149 | +ENV MAX_JOBS=${MAX_JOBS} |
| 150 | +# Make sure punica kernels are built (for LoRA) |
| 151 | +ENV VLLM_INSTALL_PUNICA_KERNELS=1 |
| 152 | + |
| 153 | +RUN --mount=type=cache,target=/root/.cache/ccache \ |
| 154 | + --mount=type=cache,target=/root/.cache/pip \ |
| 155 | + --mount=type=cache,target=/root/.cache/uv \ |
| 156 | + uv pip install -v -U \ |
| 157 | + ninja setuptools-scm>=8 "cmake>=3.26" packaging && \ |
| 158 | + env CFLAGS="-march=haswell" \ |
| 159 | + CXXFLAGS="$CFLAGS $CXXFLAGS" \ |
| 160 | + CMAKE_BUILD_TYPE=Release \ |
| 161 | + python3 setup.py bdist_wheel --dist-dir=dist |
| 162 | + |
| 163 | +#################### libsodium Build IMAGE #################### |
| 164 | +FROM rocm_base as libsodium-builder |
| 165 | + |
| 166 | +RUN microdnf install -y --nodocs gcc gzip tar \ |
| 167 | + && microdnf clean all |
| 168 | + |
| 169 | +WORKDIR /usr/src/libsodium |
| 170 | + |
| 171 | +ARG LIBSODIUM_VERSION=1.0.20 |
| 172 | +RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \ |
| 173 | + && tar -xzvf libsodium*.tar.gz \ |
| 174 | + && rm -f libsodium*.tar.gz \ |
| 175 | + && mv libsodium*/* ./ |
| 176 | + |
| 177 | +RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection" \ |
| 178 | + ./configure \ |
| 179 | + --prefix="/usr/" \ |
| 180 | + --libdir=/usr/lib64 && \ |
| 181 | + make -j $(nproc) && \ |
| 182 | + make check |
| 183 | + |
| 184 | +################################################################################################## |
| 185 | + |
| 186 | +FROM rocm_base AS vllm-openai |
| 187 | +ARG MAX_JOBS |
| 188 | + |
| 189 | +WORKDIR /workspace |
| 190 | + |
| 191 | +ENV VIRTUAL_ENV=/opt/vllm |
| 192 | +ENV PATH=$VIRTUAL_ENV/bin:$PATH |
| 193 | + |
| 194 | +# Required for triton |
| 195 | +RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc rsync && \ |
| 196 | + microdnf clean all |
| 197 | + |
| 198 | +# Install libsodium for Tensorizer encryption |
| 199 | +RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \ |
| 200 | + cd /usr/src/libsodium \ |
| 201 | + && make install |
| 202 | + |
| 203 | +RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install/amdsmi/ \ |
| 204 | + --mount=type=bind,from=build_flashattention,src=/install,target=/install/flashattention \ |
| 205 | + --mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \ |
| 206 | + --mount=type=cache,target=/root/.cache/pip \ |
| 207 | + --mount=type=cache,target=/root/.cache/uv \ |
| 208 | + export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \ |
| 209 | + uv pip install \ |
| 210 | + --index-strategy=unsafe-best-match \ |
| 211 | + --extra-index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \ |
| 212 | + /install/amdsmi/*.whl\ |
| 213 | + /install/flashattention/*.whl\ |
| 214 | + /install/vllm/*.whl |
| 215 | + |
| 216 | +ENV HF_HUB_OFFLINE=1 \ |
| 217 | + HOME=/home/vllm \ |
| 218 | + # Allow requested max length to exceed what is extracted from the |
| 219 | + # config.json |
| 220 | + # see: https://github.com/vllm-project/vllm/pull/7080 |
| 221 | + VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \ |
| 222 | + VLLM_USAGE_SOURCE=production-docker-image \ |
| 223 | + VLLM_WORKER_MULTIPROC_METHOD=fork \ |
| 224 | + VLLM_NO_USAGE_STATS=1 \ |
| 225 | + # Silences the HF Tokenizers warning |
| 226 | + TOKENIZERS_PARALLELISM=false \ |
| 227 | + RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 \ |
| 228 | + VLLM_USE_TRITON_FLASH_ATTN=0 \ |
| 229 | + HIP_FORCE_DEV_KERNARG=1 \ |
| 230 | + OUTLINES_CACHE_DIR=/tmp/outlines \ |
| 231 | + NUMBA_CACHE_DIR=/tmp/numba \ |
| 232 | + TRITON_CACHE_DIR=/tmp/triton |
| 233 | + |
| 234 | +# setup non-root user for OpenShift |
| 235 | +RUN umask 002 && \ |
| 236 | + useradd --uid 2000 --gid 0 vllm && \ |
| 237 | + mkdir -p /licenses /home/vllm && \ |
| 238 | + chmod g+rwx /home/vllm |
| 239 | + |
| 240 | +COPY LICENSE /licenses/vllm.md |
| 241 | +COPY examples/*.jinja /app/data/template/ |
| 242 | + |
| 243 | +USER 2000 |
| 244 | +WORKDIR /home/vllm |
| 245 | + |
| 246 | +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] |
| 247 | + |
| 248 | + |
| 249 | +FROM vllm-openai as vllm-grpc-adapter |
| 250 | + |
| 251 | +USER root |
| 252 | + |
| 253 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 254 | + --mount=type=cache,target=/root/.cache/uv \ |
| 255 | + --mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \ |
| 256 | + HOME=/root uv pip install /install/vllm/*.whl vllm-tgis-adapter==0.6.3 |
| 257 | + |
| 258 | +ENV GRPC_PORT=8033 \ |
| 259 | + PORT=8000 \ |
| 260 | + # As an optimization, vLLM disables logprobs when using spec decoding by |
| 261 | + # default, but this would be unexpected to users of a hosted model that |
| 262 | + # happens to have spec decoding |
| 263 | + # see: https://github.com/vllm-project/vllm/pull/6485 |
| 264 | + DISABLE_LOGPROBS_DURING_SPEC_DECODING=false |
| 265 | + |
| 266 | +USER 2000 |
| 267 | +ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"] |
0 commit comments