|
| 1 | +# Please update any changes made here to |
| 2 | +# docs/source/dev/dockerfile-ubi/dockerfile-ubi.rst |
| 3 | + |
| 4 | +## Global Args ################################################################# |
| 5 | +ARG BASE_UBI_IMAGE_TAG=9.4 |
| 6 | +ARG PYTHON_VERSION=3.11 |
| 7 | + |
| 8 | +ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX" |
| 9 | + |
| 10 | + |
| 11 | +## Base Layer ################################################################## |
| 12 | +FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base |
| 13 | +ARG PYTHON_VERSION |
| 14 | + |
| 15 | +RUN microdnf install -y \ |
| 16 | + python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \ |
| 17 | + && microdnf clean all |
| 18 | + |
| 19 | +WORKDIR /workspace |
| 20 | + |
| 21 | +ENV LANG=C.UTF-8 \ |
| 22 | + LC_ALL=C.UTF-8 |
| 23 | + |
| 24 | +# Some utils for dev purposes - tar required for kubectl cp |
| 25 | +RUN microdnf install -y \ |
| 26 | + which procps findutils tar vim git\ |
| 27 | + && microdnf clean all |
| 28 | + |
| 29 | + |
| 30 | +## Python Installer ############################################################ |
| 31 | +FROM base as python-install |
| 32 | + |
| 33 | +ARG PYTHON_VERSION |
| 34 | + |
| 35 | +ENV VIRTUAL_ENV=/opt/vllm |
| 36 | +ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| 37 | +RUN microdnf install -y \ |
| 38 | + python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \ |
| 39 | + python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel && microdnf clean all |
| 40 | + |
| 41 | + |
| 42 | +## CUDA Base ################################################################### |
| 43 | +FROM python-install as cuda-base |
| 44 | + |
| 45 | +# The Nvidia operator won't allow deploying on CUDA 12.0 hosts if |
| 46 | +# this env var is set to 12.2.0, even though it's compatible |
| 47 | +#ENV CUDA_VERSION=12.2.0 \ |
| 48 | +ENV CUDA_VERSION=12.0.0 \ |
| 49 | + NV_CUDA_LIB_VERSION=12.2.0-1 \ |
| 50 | + NVIDIA_VISIBLE_DEVICES=all \ |
| 51 | + NVIDIA_DRIVER_CAPABILITIES=compute,utility \ |
| 52 | + NV_CUDA_CUDART_VERSION=12.2.53-1 \ |
| 53 | + NV_CUDA_COMPAT_VERSION=535.104.12 |
| 54 | + |
| 55 | +RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \ |
| 56 | + https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo |
| 57 | + |
| 58 | +RUN microdnf install -y \ |
| 59 | + cuda-cudart-12-2-${NV_CUDA_CUDART_VERSION} \ |
| 60 | + cuda-compat-12-2-${NV_CUDA_COMPAT_VERSION} \ |
| 61 | + && microdnf clean all |
| 62 | + |
| 63 | + |
| 64 | +ARG CUDA_HOME="/usr/local/cuda" |
| 65 | +ENV CUDA_HOME=${CUDA_HOME}\ |
| 66 | + PATH="${CUDA_HOME}/bin:${PATH}" \ |
| 67 | + LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}" |
| 68 | + |
| 69 | + |
| 70 | +## CUDA Development ############################################################ |
| 71 | +FROM cuda-base as cuda-devel |
| 72 | + |
| 73 | +ENV NV_CUDA_CUDART_DEV_VERSION=12.2.53-1 \ |
| 74 | + NV_NVML_DEV_VERSION=12.2.81-1 \ |
| 75 | + NV_LIBCUBLAS_DEV_VERSION=12.2.1.16-1 \ |
| 76 | + NV_LIBNPP_DEV_VERSION=12.1.1.14-1 \ |
| 77 | + NV_LIBNCCL_DEV_PACKAGE_VERSION=2.18.5-1+cuda12.2 |
| 78 | + |
| 79 | +RUN microdnf install -y \ |
| 80 | + cuda-command-line-tools-12-2-${NV_CUDA_LIB_VERSION} \ |
| 81 | + cuda-libraries-devel-12-2-${NV_CUDA_LIB_VERSION} \ |
| 82 | + cuda-minimal-build-12-2-${NV_CUDA_LIB_VERSION} \ |
| 83 | + cuda-cudart-devel-12-2-${NV_CUDA_CUDART_DEV_VERSION} \ |
| 84 | + cuda-nvml-devel-12-2-${NV_NVML_DEV_VERSION} \ |
| 85 | + libcublas-devel-12-2-${NV_LIBCUBLAS_DEV_VERSION} \ |
| 86 | + libnpp-devel-12-2-${NV_LIBNPP_DEV_VERSION} \ |
| 87 | + libnccl-devel-${NV_LIBNCCL_DEV_PACKAGE_VERSION} \ |
| 88 | + && microdnf clean all |
| 89 | + |
| 90 | +ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs" |
| 91 | + |
| 92 | +# Workaround for https://github.com/openai/triton/issues/2507 and |
| 93 | +# https://github.com/pytorch/pytorch/issues/107960 -- hopefully |
| 94 | +# this won't be needed for future versions of this docker image |
| 95 | +# or future versions of triton. |
| 96 | +RUN ldconfig /usr/local/cuda-12.2/compat/ |
| 97 | + |
| 98 | +## Python cuda base ################################################################# |
| 99 | +FROM cuda-devel AS python-cuda-base |
| 100 | + |
| 101 | +ENV VIRTUAL_ENV=/opt/vllm |
| 102 | +ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
| 103 | + |
| 104 | +# install cuda and common dependencies |
| 105 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 106 | + --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ |
| 107 | + --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ |
| 108 | + pip install \ |
| 109 | + -r requirements-cuda.txt |
| 110 | + |
| 111 | +## Development ################################################################# |
| 112 | +FROM python-cuda-base AS dev |
| 113 | + |
| 114 | +# install build and runtime dependencies |
| 115 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 116 | + --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ |
| 117 | + --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ |
| 118 | + --mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \ |
| 119 | + pip3 install \ |
| 120 | + -r requirements-cuda.txt \ |
| 121 | + -r requirements-dev.txt |
| 122 | + |
| 123 | +## Proto Compilation ########################################################### |
| 124 | +FROM python-install AS gen-protos |
| 125 | + |
| 126 | +ENV PATH=/opt/vllm/bin/:$PATH |
| 127 | + |
| 128 | +RUN microdnf install -y \ |
| 129 | + make \ |
| 130 | + findutils \ |
| 131 | + && microdnf clean all |
| 132 | + |
| 133 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 134 | + --mount=type=bind,source=Makefile,target=Makefile \ |
| 135 | + --mount=type=bind,source=proto,target=proto \ |
| 136 | + make gen-protos |
| 137 | + |
| 138 | +## Builder ##################################################################### |
| 139 | +FROM dev AS build |
| 140 | + |
| 141 | +# install build dependencies |
| 142 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 143 | + --mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \ |
| 144 | + pip install -r requirements-build.txt |
| 145 | + |
| 146 | +# install compiler cache to speed up compilation leveraging local or remote caching |
| 147 | +RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y ccache && microdnf clean all |
| 148 | +# install build dependencies |
| 149 | + |
| 150 | +# copy input files |
| 151 | +COPY csrc csrc |
| 152 | +COPY setup.py setup.py |
| 153 | +COPY cmake cmake |
| 154 | +COPY CMakeLists.txt CMakeLists.txt |
| 155 | +COPY requirements-common.txt requirements-common.txt |
| 156 | +COPY requirements-cuda.txt requirements-cuda.txt |
| 157 | +COPY pyproject.toml pyproject.toml |
| 158 | + |
| 159 | +ARG TORCH_CUDA_ARCH_LIST |
| 160 | +ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST |
| 161 | + |
| 162 | +# max jobs used by Ninja to build extensions |
| 163 | +ARG max_jobs=2 |
| 164 | +ENV MAX_JOBS=${max_jobs} |
| 165 | +# number of threads used by nvcc |
| 166 | +ARG nvcc_threads=8 |
| 167 | +ENV NVCC_THREADS=$nvcc_threads |
| 168 | +# make sure punica kernels are built (for LoRA) |
| 169 | +ENV VLLM_INSTALL_PUNICA_KERNELS=1 |
| 170 | + |
| 171 | +# Make sure the cuda environment is in the PATH |
| 172 | +ENV PATH=/usr/local/cuda/bin:$PATH |
| 173 | +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH |
| 174 | + |
| 175 | +# Copy the entire directory before building wheel |
| 176 | +COPY vllm vllm |
| 177 | + |
| 178 | +# Copy over the generated *.pb2 files |
| 179 | +COPY --from=gen-protos /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb |
| 180 | + |
| 181 | +ENV CCACHE_DIR=/root/.cache/ccache |
| 182 | +RUN --mount=type=cache,target=/root/.cache/ccache \ |
| 183 | + --mount=type=cache,target=/root/.cache/pip \ |
| 184 | + CMAKE_BUILD_TYPE=Release python3 setup.py bdist_wheel --dist-dir=dist |
| 185 | + |
| 186 | +## Release ##################################################################### |
| 187 | +# Note from the non-UBI Dockerfile: |
| 188 | +# We used base cuda image because pytorch installs its own cuda libraries. |
| 189 | +# However pynccl depends on cuda libraries so we had to switch to the runtime image |
| 190 | +# In the future it would be nice to get a container with pytorch and cuda without duplicating cuda |
| 191 | +FROM python-install AS vllm-openai |
| 192 | + |
| 193 | +WORKDIR /workspace |
| 194 | + |
| 195 | +ENV VIRTUAL_ENV=/opt/vllm |
| 196 | +ENV PATH=$VIRTUAL_ENV/bin/:$PATH |
| 197 | + |
| 198 | +# Triton needs a CC compiler |
| 199 | +RUN microdnf install -y gcc \ |
| 200 | + && microdnf clean all |
| 201 | + |
| 202 | +# install vllm wheel first, so that torch etc will be installed |
| 203 | +RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ |
| 204 | + --mount=type=cache,target=/root/.cache/pip \ |
| 205 | + pip install dist/*.whl --verbose |
| 206 | + |
| 207 | +# vllm requires a specific nccl version built from source distribution |
| 208 | +# See https://github.com/NVIDIA/nccl/issues/1234 |
| 209 | +RUN pip install \ |
| 210 | + -v \ |
| 211 | + --force-reinstall \ |
| 212 | + --no-binary="all" \ |
| 213 | + --no-cache-dir \ |
| 214 | + "vllm-nccl-cu12==2.18.1.0.4.0" && \ |
| 215 | + mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /opt/vllm/lib/ && \ |
| 216 | + chmod 0755 /opt/vllm/lib/libnccl.so.2.18.1 |
| 217 | + |
| 218 | + |
| 219 | +RUN --mount=type=cache,target=/root/.cache/pip \ |
| 220 | + pip install \ |
| 221 | + # additional dependencies for the TGIS gRPC server |
| 222 | + grpcio-tools==1.63.0 \ |
| 223 | + # additional dependencies for openai api_server |
| 224 | + accelerate==0.30.0 \ |
| 225 | + # hf_transfer for faster HF hub downloads |
| 226 | + hf_transfer==0.1.6 |
| 227 | + |
| 228 | +ENV HF_HUB_OFFLINE=1 \ |
| 229 | + PORT=8000 \ |
| 230 | + GRPC_PORT=8033 \ |
| 231 | + HOME=/home/vllm \ |
| 232 | + VLLM_NCCL_SO_PATH=/opt/vllm/lib/libnccl.so.2.18.1 \ |
| 233 | + VLLM_USAGE_SOURCE=production-docker-image \ |
| 234 | + VLLM_WORKER_MULTIPROC_METHOD=fork |
| 235 | + |
| 236 | +# setup non-root user for OpenShift |
| 237 | +RUN umask 002 \ |
| 238 | + && useradd --uid 2000 --gid 0 vllm \ |
| 239 | + && chmod g+rwx $HOME /usr/src /workspace |
| 240 | + |
| 241 | +COPY LICENSE /licenses/vllm.md |
| 242 | + |
| 243 | +USER 2000 |
| 244 | +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] |
0 commit comments