Skip to content

Commit 1cc5c68

Browse files
neuron-containers-cikaenafwiaws-eddy
authored
Updated Dockerfiles for 2.24 release (#109)
Updated Dockerfiles --------- Co-authored-by: kaenafwi <[email protected]> Co-authored-by: Eddy Varela <[email protected]>
1 parent c6d7bfa commit 1cc5c68

File tree

6 files changed

+1185
-0
lines changed

6 files changed

+1185
-0
lines changed

docker/common/vllm_entrypoint.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
import subprocess
2+
import sys
3+
4+
subprocess.check_call(sys.argv[1:])
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
psutil
2+
sentencepiece # Required for LLaMA tokenizer.
3+
numpy < 2.0.0
4+
requests >= 2.26.0
5+
tqdm
6+
blake3
7+
py-cpuinfo
8+
transformers >= 4.48.2 # Required for Bamba model and Transformers backend.
9+
tokenizers >= 0.19.1 # Required for Llama 3.
10+
protobuf # Required by LlamaTokenizer.
11+
fastapi >= 0.107.0, < 0.113.0; python_version < '3.9'
12+
fastapi >= 0.107.0, != 0.113.*, != 0.114.0; python_version >= '3.9'
13+
aiohttp
14+
openai >= 1.52.0 # Ensure modern openai package (ensure types module present and max_completion_tokens field support)
15+
uvicorn[standard]
16+
pydantic >= 2.9 # Required for fastapi >= 0.113.0
17+
prometheus_client >= 0.18.0
18+
pillow # Required for image processing
19+
prometheus-fastapi-instrumentator >= 7.0.0
20+
tiktoken >= 0.6.0 # Required for DBRX tokenizer
21+
lm-format-enforcer >= 0.10.9, < 0.11
22+
outlines == 0.1.11
23+
lark == 1.2.2
24+
xgrammar >= 0.1.6; platform_machine == "x86_64"
25+
typing_extensions >= 4.10
26+
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
27+
partial-json-parser # used for parsing partial JSON outputs
28+
pyzmq
29+
msgspec
30+
gguf == 0.10.0
31+
importlib_metadata
32+
mistral_common[opencv] >= 1.5.0
33+
pyyaml
34+
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
35+
setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
36+
einops # Required for Qwen2-VL.
37+
compressed-tensors == 0.9.1 # required for compressed-tensors
38+
depyf==0.18.0 # required for profiling and debugging with compilation config
39+
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
ARG BUILD_STAGE=prod
2+
3+
FROM public.ecr.aws/docker/library/ubuntu:22.04 AS base
4+
5+
LABEL dlc_major_version="1"
6+
LABEL maintainer="Amazon AI"
7+
8+
# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 22
9+
ARG DEBIAN_FRONTEND=noninteractive
10+
ARG PYTHON=python3.10
11+
ARG PYTHON_VERSION=3.10.12
12+
ARG PIP=pip3
13+
ARG OMPI_VERSION=4.1.5
14+
15+
# Python won’t try to write .pyc or .pyo files on the import of source modules
16+
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
17+
ENV PYTHONDONTWRITEBYTECODE=1
18+
ENV PYTHONUNBUFFERED=1
19+
ENV PYTHONIOENCODING=UTF-8
20+
ENV LANG=C.UTF-8
21+
ENV LC_ALL=C.UTF-8
22+
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/aws/neuron/lib"
23+
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/amazon/efa/lib"
24+
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/amazon/efa/lib64"
25+
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/amazon/openmpi/lib64"
26+
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib"
27+
ENV PATH="/opt/aws/neuron/bin:${PATH}"
28+
29+
RUN apt-get update \
30+
&& apt-get upgrade -y \
31+
&& apt-get install -y --no-install-recommends \
32+
build-essential \
33+
ca-certificates \
34+
cmake \
35+
curl \
36+
emacs \
37+
git \
38+
gnupg2 \
39+
gpg-agent \
40+
jq \
41+
libopencv-dev \
42+
libglib2.0-0 \
43+
libgl1-mesa-glx \
44+
libsm6 \
45+
libxext6 \
46+
libxrender-dev \
47+
libssl-dev \
48+
libsqlite3-dev \
49+
libgdbm-dev \
50+
libc6-dev \
51+
libbz2-dev \
52+
libncurses-dev \
53+
libffi-dev \
54+
libcap-dev \
55+
libhwloc-dev \
56+
openjdk-8-jdk-headless \
57+
openjdk-8-jdk \
58+
openjdk-8-jre \
59+
openjdk-11-jdk \
60+
openssl \
61+
software-properties-common \
62+
tk-dev \
63+
unzip \
64+
wget \
65+
vim \
66+
zlib1g-dev \
67+
&& rm -rf /var/lib/apt/lists/* \
68+
&& rm -rf /tmp/tmp* \
69+
&& apt-get clean
70+
71+
# Install Open MPI
72+
RUN mkdir -p /tmp/openmpi \
73+
&& cd /tmp/openmpi \
74+
&& wget --quiet https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OMPI_VERSION}.tar.gz \
75+
&& tar zxf openmpi-${OMPI_VERSION}.tar.gz \
76+
&& cd openmpi-${OMPI_VERSION} \
77+
&& ./configure --enable-orterun-prefix-by-default \
78+
&& make -j $(nproc) all \
79+
&& make install \
80+
&& ldconfig \
81+
&& rm -rf /tmp/openmpi
82+
83+
# Install packages and configure SSH for MPI operator in k8s
84+
RUN apt-get update \
85+
&& apt-get install -y openmpi-bin openssh-server \
86+
&& mkdir -p /var/run/sshd \
87+
&& echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config \
88+
&& echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config \
89+
&& sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config \
90+
&& rm -rf /var/lib/apt/lists/* \
91+
&& rm -rf /tmp/tmp* \
92+
&& apt-get clean
93+
94+
# Install Python
95+
RUN wget -q https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
96+
&& tar -xzf Python-$PYTHON_VERSION.tgz \
97+
&& cd Python-$PYTHON_VERSION \
98+
&& ./configure --enable-shared --prefix=/usr/local \
99+
&& make -j $(nproc) && make install \
100+
&& cd .. && rm -rf ../Python-$PYTHON_VERSION* \
101+
&& ln -s /usr/local/bin/pip3 /usr/bin/pip \
102+
&& ln -s /usr/local/bin/$PYTHON /usr/local/bin/python \
103+
&& ${PIP} --no-cache-dir install --upgrade \
104+
"awscli<2" \
105+
pip \
106+
requests \
107+
setuptools \
108+
&& rm -rf ~/.cache/pip/*
109+
110+
# Install EFA
111+
RUN apt-get update \
112+
&& cd $HOME \
113+
&& curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz \
114+
&& wget https://efa-installer.amazonaws.com/aws-efa-installer.key && gpg --import aws-efa-installer.key \
115+
&& cat aws-efa-installer.key | gpg --fingerprint \
116+
&& wget https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz.sig && gpg --verify ./aws-efa-installer-latest.tar.gz.sig \
117+
&& tar -xf aws-efa-installer-latest.tar.gz \
118+
&& cd aws-efa-installer \
119+
&& ./efa_installer.sh -y -g --skip-kmod --skip-limit-conf --no-verify \
120+
&& cd $HOME \
121+
&& rm -rf /var/lib/apt/lists/* \
122+
&& rm -rf /tmp/tmp* \
123+
&& apt-get clean
124+
125+
WORKDIR /
126+
127+
# The ENV variables declared below are changed in the previous section
128+
# Grouping these ENV variables in the first section causes
129+
# ompi_info to fail. This is only observed in CPU containers
130+
ENV PATH="$PATH:/home/.openmpi/bin"
131+
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/"
132+
RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value
133+
134+
RUN mkdir -p /etc/pki/tls/certs && cp /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt
135+
136+
# Copy workaround script for incorrect hostname
137+
COPY changehostname.c /
138+
COPY --chmod=755 start_with_right_hostname.sh deep_learning_container.py /usr/local/bin/
139+
140+
RUN HOME_DIR=/root \
141+
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
142+
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
143+
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
144+
&& chmod +x /usr/local/bin/testOSSCompliance \
145+
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
146+
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
147+
&& rm -rf ${HOME_DIR}/oss_compliance* \
148+
&& rm -rf /tmp/tmp*
149+
150+
# Setting up APT and PIP repo for neuron artifacts
151+
ARG NEURON_APT_REPO=https://apt.repos.neuron.amazonaws.com
152+
ARG NEURON_APT_REPO_KEY
153+
ARG NEURON_PIP_REPO=https://pip.repos.neuron.amazonaws.com
154+
ARG NEURON_PIP_REPO_KEY
155+
RUN mkdir -p /etc/apt/keyrings \
156+
&& APT_REPO_PREFIX=$([ -n "${NEURON_APT_REPO_KEY}" ] && echo "${NEURON_APT_REPO_KEY}@" || echo "") \
157+
&& echo "deb [signed-by=/etc/apt/keyrings/neuron.gpg] https://${APT_REPO_PREFIX}${NEURON_APT_REPO} focal main" > /etc/apt/sources.list.d/neuron.list \
158+
&& curl $([ -n "${NEURON_APT_REPO_KEY}" ] && echo "-u ${NEURON_APT_REPO_KEY}") -sSL "https://${NEURON_APT_REPO}/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB" | gpg --dearmor > /etc/apt/keyrings/neuron.gpg \
159+
&& PIP_REPO_URL=$([ -n "${NEURON_PIP_REPO_KEY}" ] && echo "https://${NEURON_PIP_REPO_KEY}@${NEURON_PIP_REPO}" || echo "https://${NEURON_PIP_REPO}") \
160+
&& ${PIP} config set global.extra-index-url "${PIP_REPO_URL}"
161+
162+
# Neuron SDK components version numbers
163+
ARG NEURON_ARTIFACT_PATH=/root/neuron_artifacts
164+
ARG IGNORE_MISSING_NEURON_COMPONENTS=false
165+
RUN IGNORE_MISSING_NEURON_COMPONENTS=$(echo ${IGNORE_MISSING_NEURON_COMPONENTS} | tr '[:upper:]' '[:lower:]')
166+
167+
ARG NEURONX_RUNTIME_LIB_VERSION=2.26.42.0-2ff3b5c7d
168+
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.26.43.0-47cc904ea
169+
ARG NEURONX_TOOLS_VERSION=2.24.54.0
170+
171+
ARG NEURONX_CC_VERSION=2.19.8089.0
172+
ARG NEURONX_JAX_TRAINING_VERSION=0.6.0.1.0.1296+1f770067
173+
174+
FROM base AS dev
175+
176+
RUN --mount=type=bind,source=apt,target=${NEURON_ARTIFACT_PATH}/apt \
177+
install_apt_package() { \
178+
pkg_name=$1; \
179+
version_arg=$2; \
180+
if [ -f "${NEURON_ARTIFACT_PATH}/apt/${version_arg}" ]; then \
181+
apt-get install -y ${NEURON_ARTIFACT_PATH}/apt/${version_arg}; \
182+
elif [ "${IGNORE_MISSING_NEURON_COMPONENTS}" = "false" ]; then \
183+
apt-get install -y ${pkg_name}=${version_arg}; \
184+
else \
185+
echo "Ignoring package ${pkg_name}"; \
186+
fi; \
187+
} \
188+
&& apt-get update \
189+
&& install_apt_package "aws-neuronx-collectives" "${NEURONX_COLLECTIVES_LIB_VERSION}" \
190+
&& install_apt_package "aws-neuronx-runtime-lib" "${NEURONX_RUNTIME_LIB_VERSION}" \
191+
&& install_apt_package "aws-neuronx-tools" "${NEURONX_TOOLS_VERSION}" \
192+
&& rm -rf /var/lib/apt/lists/* \
193+
&& rm -rf /tmp/tmp* \
194+
&& apt-get clean
195+
196+
RUN --mount=type=bind,source=pip,target=${NEURON_ARTIFACT_PATH}/pip \
197+
install_pip_package() { \
198+
packages=""; \
199+
flags=""; \
200+
while [ "$#" -gt 0 ]; do \
201+
pkg_name=$(echo $1 | cut -d: -f1); \
202+
version_arg=$(echo $1 | cut -d: -f2); \
203+
extra_flags=$(echo $1 | cut -d: -f3); \
204+
if [ -f "${NEURON_ARTIFACT_PATH}/pip/${version_arg}" ]; then \
205+
packages="${packages} ${NEURON_ARTIFACT_PATH}/pip/${version_arg}"; \
206+
else \
207+
if [ "${IGNORE_MISSING_NEURON_COMPONENTS}" = "false" ]; then \
208+
packages="${packages} ${pkg_name}==${version_arg}"; \
209+
else \
210+
echo "Ignoring package ${pkg_name}"; \
211+
fi; \
212+
fi; \
213+
# Store unique flags
214+
if [ ! -z "${extra_flags}" ]; then \
215+
for flag in $(echo "${extra_flags}" | tr ' ' '\n'); do \
216+
case " ${flags} " in \
217+
*" ${flag} "*) ;; \
218+
*) flags="${flags} ${flag}" ;; \
219+
esac \
220+
done; \
221+
fi; \
222+
shift; \
223+
done; \
224+
if [ ! -z "${packages}" ]; then \
225+
echo "Installing packages: ${packages} with flags ${flags}"; \
226+
${PIP} install --no-cache-dir --force-reinstall \
227+
--extra-index-url="file:///${NEURON_ARTIFACT_PATH}/pip" \
228+
${packages} ${flags}; \
229+
fi; \
230+
} \
231+
&& install_pip_package "neuronx-cc:${NEURONX_CC_VERSION}:" "jax-neuronx:${NEURONX_JAX_TRAINING_VERSION}:" \
232+
&& rm -rf ~/.cache/pip/*
233+
234+
FROM base AS repo
235+
236+
# Install Neuron components from the apt and pip repos
237+
RUN apt-get update \
238+
&& apt-get install -y \
239+
aws-neuronx-tools \
240+
aws-neuronx-collectives \
241+
aws-neuronx-runtime-lib \
242+
&& rm -rf /var/lib/apt/lists/* \
243+
&& rm -rf /tmp/tmp* \
244+
&& apt-get clean \
245+
&& ${PIP} install --no-cache-dir --force-reinstall \
246+
neuronx-cc \
247+
jax-neuronx \
248+
&& rm -rf ~/.cache/pip/*
249+
250+
251+
FROM base AS prod
252+
253+
# Install Neuron components
254+
# Install Neuron Driver, Runtime and Tools
255+
RUN apt-get update \
256+
&& apt-get install -y \
257+
aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
258+
aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
259+
aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
260+
&& rm -rf /var/lib/apt/lists/* \
261+
&& rm -rf /tmp/tmp* \
262+
&& apt-get clean
263+
264+
# Install JAX & Neuron CC
265+
RUN ${PIP} install --no-cache-dir --force-reinstall \
266+
neuronx-cc==$NEURONX_CC_VERSION \
267+
jax-neuronx==$NEURONX_JAX_TRAINING_VERSION \
268+
&& rm -rf ~/.cache/pip/*
269+
270+
FROM ${BUILD_STAGE} AS final
271+
272+
# Starts framework
273+
ENTRYPOINT ["bash", "-m", "start_with_right_hostname.sh"]
274+
CMD ["/bin/bash"]
275+
276+
HEALTHCHECK CMD curl --fail http://localhost:8080/ping || exit 1

0 commit comments

Comments
 (0)