|
| 1 | +ARG BUILD_STAGE=prod |
| 2 | + |
| 3 | +FROM public.ecr.aws/docker/library/ubuntu:22.04 AS base |
| 4 | + |
| 5 | +LABEL dlc_major_version="1" |
| 6 | +LABEL maintainer="Amazon AI" |
| 7 | + |
| 8 | +ARG DEBIAN_FRONTEND=noninteractive |
| 9 | +ARG PIP=pip3 |
| 10 | +ARG PYTHON=python3.10 |
| 11 | +ARG PYTHON_VERSION=3.10.12 |
| 12 | +ARG MAMBA_VERSION=23.1.0-4 |
| 13 | +ARG TORCHSERVE_VERSION=0.11.0 |
| 14 | + |
| 15 | + |
| 16 | +# See http://bugs.python.org/issue19846 |
| 17 | +ENV LANG=C.UTF-8 |
| 18 | +ENV LD_LIBRARY_PATH=/opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH |
| 19 | +ENV PATH=/opt/conda/bin:/opt/aws/neuron/bin:$PATH |
| 20 | +ENV VLLM_TARGET_DEVICE=neuron |
| 21 | + |
| 22 | +RUN apt-get update \ |
| 23 | + && apt-get upgrade -y \ |
| 24 | + && apt-get install -y --no-install-recommends \ |
| 25 | + apt-transport-https \ |
| 26 | + build-essential \ |
| 27 | + ca-certificates \ |
| 28 | + cmake \ |
| 29 | + curl \ |
| 30 | + emacs \ |
| 31 | + ffmpeg \ |
| 32 | + git \ |
| 33 | + gnupg2 \ |
| 34 | + gpg-agent \ |
| 35 | + jq \ |
| 36 | + libgl1 \ |
| 37 | + libgl1-mesa-glx \ |
| 38 | + libglib2.0-0 \ |
| 39 | + libsm6 \ |
| 40 | + libxext6 \ |
| 41 | + libxrender-dev \ |
| 42 | + libcap-dev \ |
| 43 | + libhwloc-dev \ |
| 44 | + openssh-client \ |
| 45 | + openjdk-11-jdk \ |
| 46 | + unzip \ |
| 47 | + vim \ |
| 48 | + wget \ |
| 49 | + zlib1g-dev \ |
| 50 | + && rm -rf /var/lib/apt/lists/* \ |
| 51 | + && rm -rf /tmp/tmp* \ |
| 52 | + && apt-get clean |
| 53 | + |
| 54 | +# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files |
| 55 | +RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \ |
| 56 | + mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \ |
| 57 | + /var/lib/dpkg/info/ca-certificates-java.postinst configure; |
| 58 | + |
| 59 | +RUN curl -L -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-x86_64.sh \ |
| 60 | + && chmod +x ~/mambaforge.sh \ |
| 61 | + && ~/mambaforge.sh -b -p /opt/conda \ |
| 62 | + && rm ~/mambaforge.sh \ |
| 63 | + && /opt/conda/bin/conda update -y conda \ |
| 64 | + && /opt/conda/bin/mamba install -c conda-forge -y \ |
| 65 | + python=$PYTHON_VERSION \ |
| 66 | + pyopenssl \ |
| 67 | + cython \ |
| 68 | + mkl-include \ |
| 69 | + mkl \ |
| 70 | + parso \ |
| 71 | + typing \ |
| 72 | + # Below 2 are included in miniconda base, but not mamba so need to install |
| 73 | + conda-content-trust \ |
| 74 | + charset-normalizer \ |
| 75 | + && /opt/conda/bin/conda clean -ya |
| 76 | + |
| 77 | +RUN /opt/conda/bin/mamba install -c conda-forge \ |
| 78 | + scikit-learn \ |
| 79 | + h5py \ |
| 80 | + requests \ |
| 81 | + && conda clean -ya \ |
| 82 | + && pip install --upgrade pip \ |
| 83 | + --trusted-host pypi.org --trusted-host files.pythonhosted.org \ |
| 84 | + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ |
| 85 | + && pip install \ |
| 86 | + packaging \ |
| 87 | + enum-compat \ |
| 88 | + ipython \ |
| 89 | + && rm -rf ~/.cache/pip/* |
| 90 | + |
| 91 | +COPY --chmod=755 vllm_entrypoint.py neuron-monitor.sh deep_learning_container.py /usr/local/bin/ |
| 92 | +# Copy the Vllm Installation files |
| 93 | +COPY --chmod=755 vllm_requirements.txt /root/ |
| 94 | + |
| 95 | +### Mount Point ### |
| 96 | +# When launching the container, mount the code directory to /workspace |
| 97 | +ARG APP_MOUNT=/workspace |
| 98 | +VOLUME [ ${APP_MOUNT} ] |
| 99 | +WORKDIR ${APP_MOUNT}/vllm |
| 100 | + |
| 101 | +RUN ${PIP} install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ |
| 102 | + && ${PIP} install --no-cache-dir -U \ |
| 103 | + "opencv-python" \ |
| 104 | + "scipy" \ |
| 105 | + "awscli" \ |
| 106 | + "pandas" \ |
| 107 | + "boto3" \ |
| 108 | + "cryptography" \ |
| 109 | + "ninja" \ |
| 110 | + "pytest" \ |
| 111 | + "packaging" \ |
| 112 | + "wheel" \ |
| 113 | + "cmake>=3.26" \ |
| 114 | + "setuptools-scm>=8" \ |
| 115 | + "jinja2" \ |
| 116 | + torchserve==${TORCHSERVE_VERSION} \ |
| 117 | + torch-model-archiver==${TORCHSERVE_VERSION} \ |
| 118 | + && ${PIP} install --no-deps --no-cache-dir -U torchvision \ |
| 119 | + && ${PIP} install --no-cache-dir -r /root/vllm_requirements.txt \ |
| 120 | + && rm -rf ~/.cache/pip/* |
| 121 | + |
| 122 | +RUN useradd -m model-server \ |
| 123 | + && mkdir -p /home/model-server/tmp /opt/ml/model \ |
| 124 | + && chown -R model-server /home/model-server /opt/ml/model |
| 125 | +COPY config.properties /home/model-server |
| 126 | + |
| 127 | +# Compliance |
| 128 | +RUN HOME_DIR=/root \ |
| 129 | + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ |
| 130 | + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ |
| 131 | + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ |
| 132 | + && chmod +x /usr/local/bin/testOSSCompliance \ |
| 133 | + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ |
| 134 | + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ |
| 135 | + && rm -rf ${HOME_DIR}/oss_compliance* \ |
| 136 | + # conda leaves an empty /root/.cache/conda/notices.cache file which is not removed by conda clean -ya |
| 137 | + && rm -rf ${HOME_DIR}/.cache/conda |
| 138 | + |
| 139 | +# Setting up APT and PIP repo for neuron artifacts |
| 140 | +ARG NEURON_APT_REPO=https://apt.repos.neuron.amazonaws.com |
| 141 | +ARG NEURON_APT_REPO_KEY |
| 142 | +ARG NEURON_PIP_REPO=https://pip.repos.neuron.amazonaws.com |
| 143 | +ARG NEURON_PIP_REPO_KEY |
| 144 | +RUN mkdir -p /etc/apt/keyrings \ |
| 145 | + && APT_REPO_PREFIX=$([ -n "${NEURON_APT_REPO_KEY}" ] && echo "${NEURON_APT_REPO_KEY}@" || echo "") \ |
| 146 | + && echo "deb [signed-by=/etc/apt/keyrings/neuron.gpg] https://${APT_REPO_PREFIX}${NEURON_APT_REPO} focal main" > /etc/apt/sources.list.d/neuron.list \ |
| 147 | + && curl $([ -n "${NEURON_APT_REPO_KEY}" ] && echo "-u ${NEURON_APT_REPO_KEY}") -sSL "https://${NEURON_APT_REPO}/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB" | gpg --dearmor > /etc/apt/keyrings/neuron.gpg \ |
| 148 | + && PIP_REPO_URL=$([ -n "${NEURON_PIP_REPO_KEY}" ] && echo "https://${NEURON_PIP_REPO_KEY}@${NEURON_PIP_REPO}" || echo "https://${NEURON_PIP_REPO}") \ |
| 149 | + && ${PIP} config set global.extra-index-url "${PIP_REPO_URL}" |
| 150 | + |
| 151 | +# Neuron SDK components version numbers |
| 152 | +ARG NEURON_ARTIFACT_PATH=/root/neuron_artifacts |
| 153 | +ARG IGNORE_MISSING_NEURON_COMPONENTS=false |
| 154 | +RUN IGNORE_MISSING_NEURON_COMPONENTS=$(echo ${IGNORE_MISSING_NEURON_COMPONENTS} | tr '[:upper:]' '[:lower:]') |
| 155 | + |
| 156 | +ARG NEURONX_COLLECTIVES_LIB_VERSION=2.27.34.0-ec8cd5e8b |
| 157 | +ARG NEURONX_RUNTIME_LIB_VERSION=2.27.23.0-8deec4dbf |
| 158 | +ARG NEURONX_TOOLS_VERSION=2.25.145.0 |
| 159 | + |
| 160 | +ARG NEURONX_CC_VERSION=2.20.9961.0 |
| 161 | +ARG NEURONX_FRAMEWORK_VERSION=2.7.0.2.9.9357+08e1f40d |
| 162 | +ARG NEURONX_DISTRIBUTED_VERSION=0.14.18461+9ac233f2 |
| 163 | +ARG NEURONX_DISTRIBUTED_INFERENCE_VERSION=0.5.9230+dcf1e2da |
| 164 | + |
| 165 | +FROM base AS vllm-clone |
| 166 | + |
| 167 | +RUN mkdir -p /root/.ssh && \ |
| 168 | + echo "StrictHostKeyChecking no" >> /root/.ssh/config && \ |
| 169 | + ssh-keyscan -t rsa github.com >> /root/.ssh/known_hosts |
| 170 | + |
| 171 | +WORKDIR /vllm |
| 172 | + |
| 173 | +RUN --mount=type=secret,id=ssh_key,target=/root/.ssh/id_ed25519,mode=0600 \ |
| 174 | + git clone -b release-2.25 [email protected]:aws-neuron/private-neuronx-vllm-staging.git . |
| 175 | + |
| 176 | +FROM base AS repo |
| 177 | + |
| 178 | +# Install Neuron components from the apt and pip repos (latest versions) |
| 179 | +RUN apt-get update \ |
| 180 | + && apt-get install -y \ |
| 181 | + aws-neuronx-tools \ |
| 182 | + aws-neuronx-collectives \ |
| 183 | + aws-neuronx-runtime-lib \ |
| 184 | + && rm -rf /var/lib/apt/lists/* \ |
| 185 | + && rm -rf /tmp/tmp* \ |
| 186 | + && apt-get clean |
| 187 | + |
| 188 | +RUN ${PIP} install --no-cache-dir \ |
| 189 | + neuronx-cc \ |
| 190 | + torch-neuronx \ |
| 191 | + neuronx_distributed \ |
| 192 | + neuronx_distributed_inference \ |
| 193 | + && rm -rf ~/.cache/pip/* |
| 194 | + |
| 195 | +# Install VLLM from source |
| 196 | +COPY --from=vllm-clone /vllm /opt/vllm |
| 197 | +WORKDIR /opt/vllm |
| 198 | + |
| 199 | +RUN ${PIP} install --no-cache-dir -r requirements/neuron.txt \ |
| 200 | + && VLLM_TARGET_DEVICE="neuron" ${PIP} install --no-cache-dir -e . |
| 201 | + |
| 202 | +FROM base AS prod |
| 203 | + |
| 204 | +# Install Neuron components with specific versions |
| 205 | +RUN apt-get update \ |
| 206 | + && apt-get install -y \ |
| 207 | + aws-neuronx-tools=$NEURONX_TOOLS_VERSION \ |
| 208 | + aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \ |
| 209 | + aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \ |
| 210 | + && rm -rf /var/lib/apt/lists/* \ |
| 211 | + && rm -rf /tmp/tmp* \ |
| 212 | + && apt-get clean |
| 213 | + |
| 214 | +RUN ${PIP} install --no-cache-dir \ |
| 215 | + neuronx-cc==$NEURONX_CC_VERSION \ |
| 216 | + torch-neuronx==$NEURONX_FRAMEWORK_VERSION \ |
| 217 | + neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION \ |
| 218 | + neuronx_distributed_inference==$NEURONX_DISTRIBUTED_INFERENCE_VERSION \ |
| 219 | + && rm -rf ~/.cache/pip/* |
| 220 | + |
| 221 | +# Install VLLM from source |
| 222 | +RUN cd /tmp \ |
| 223 | + && git clone -b neuron-2.24-vllm-v0.7.2 https://github.com/aws-neuron/upstreaming-to-vllm.git \ |
| 224 | + && cd upstreaming-to-vllm \ |
| 225 | + && ${PIP} install --no-cache-dir -r requirements/neuron.txt \ |
| 226 | + && SETUPTOOLS_SCM_PRETEND_VERSION="2.24.0.0" VLLM_TARGET_DEVICE="neuron" ${PIP} install --no-cache-dir -e . \ |
| 227 | + && cd / \ |
| 228 | + && rm -rf /tmp/upstreaming-to-vllm |
| 229 | + |
| 230 | +WORKDIR ${APP_MOUNT}/vllm |
| 231 | + |
| 232 | +FROM ${BUILD_STAGE} AS final |
| 233 | + |
| 234 | +EXPOSE 8080 8081 |
| 235 | + |
| 236 | +ENTRYPOINT ["python", "/usr/local/bin/vllm_entrypoint.py"] |
| 237 | +CMD ["/bin/bash"] |
| 238 | +HEALTHCHECK CMD curl --fail http://localhost:8080/ping || exit 1 |
0 commit comments