Skip to content

Commit 14b1f0d

Browse files
neuron-containers-cikaenafwi
andauthored
Updated Dockerfiles (#150)
Updated Dockerfiles Co-authored-by: kaenafwi <[email protected]>
1 parent bebd691 commit 14b1f0d

File tree

4 files changed

+42
-14
lines changed

4 files changed

+42
-14
lines changed

jax/training/0.6/Dockerfile.neuronx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ RUN mkdir -p /etc/apt/keyrings \
163163
ARG NEURONX_RUNTIME_LIB_VERSION=2.28.23.0-dd5879008
164164
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.28.27.0-bc30ece58
165165
ARG NEURONX_TOOLS_VERSION=2.26.14.0
166-
ARG NEURONX_CC_VERSION=2.21.18209.0+043b1bf7
166+
ARG NEURONX_CC_VERSION=2.21.33363.0+82129205
167167
ARG NEURONX_JAX_TRAINING_VERSION=0.6.2.1.0.6446+d8c0de77
168168

169169
FROM base AS repo

pytorch/inference/2.8.0/Dockerfile.neuronx

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,21 @@ RUN /opt/conda/bin/mamba install -c conda-forge \
8686
ipython \
8787
&& rm -rf ~/.cache/pip/*
8888

89+
# Install EFA
90+
RUN apt-get update \
91+
&& cd $HOME \
92+
&& curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz \
93+
&& wget https://efa-installer.amazonaws.com/aws-efa-installer.key && gpg --import aws-efa-installer.key \
94+
&& cat aws-efa-installer.key | gpg --fingerprint \
95+
&& wget https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz.sig && gpg --verify ./aws-efa-installer-latest.tar.gz.sig \
96+
&& tar -xf aws-efa-installer-latest.tar.gz \
97+
&& cd aws-efa-installer \
98+
&& ./efa_installer.sh -y -g --skip-kmod --skip-limit-conf --no-verify \
99+
&& cd $HOME \
100+
&& rm -rf /var/lib/apt/lists/* \
101+
&& rm -rf /tmp/tmp* \
102+
&& apt-get clean
103+
89104
RUN ${PIP} install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
90105
&& ${PIP} install --no-cache-dir -U \
91106
opencv-python>=4.8.1.78 \
@@ -151,8 +166,8 @@ ARG NEURONX_COLLECTIVES_LIB_VERSION=2.28.27.0-bc30ece58
151166
ARG NEURONX_RUNTIME_LIB_VERSION=2.28.23.0-dd5879008
152167
ARG NEURONX_TOOLS_VERSION=2.26.14.0
153168

154-
ARG NEURONX_CC_VERSION=2.21.18209.0+043b1bf7
155-
ARG NEURONX_FRAMEWORK_VERSION=2.8.0.2.10.13553+1e4dd6ca
169+
ARG NEURONX_CC_VERSION=2.21.33363.0+82129205
170+
ARG NEURONX_FRAMEWORK_VERSION=2.8.0.2.10.16998+e9bf8a50
156171
ARG NEURONX_DISTRIBUTED_VERSION=0.15.22404+1f27bddf
157172
ARG NEURONX_DISTRIBUTED_INFERENCE_VERSION=0.6.10598+a59fdc00
158173

pytorch/training/2.8.0/Dockerfile.neuronx

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,7 @@ RUN wget -q https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VER
102102
&& cd .. && rm -rf ../Python-$PYTHON_VERSION* \
103103
&& ln -s /usr/local/bin/pip3 /usr/bin/pip \
104104
&& ln -s /usr/local/bin/$PYTHON /usr/local/bin/python \
105-
&& ${PIP} --no-cache-dir install --upgrade \
106-
pip \
105+
&& ${PIP} --no-cache-dir install --upgrade pip \
107106
&& rm -rf ~/.cache/pip/*
108107

109108
WORKDIR /
@@ -174,7 +173,7 @@ RUN apt-get update \
174173
# torchvision needed for MLP. since it depends on torch and torch neuron/torch
175174
# is already installed install it with nodeps
176175
RUN pip3 install --no-cache-dir --no-deps -U \
177-
torchvision \
176+
torchvision==0.23.0 \
178177
# Needed for running bert training scripts
179178
&& pip3 install --no-cache-dir -U \
180179
graphviz \
@@ -218,8 +217,8 @@ RUN mkdir -p /etc/apt/keyrings \
218217
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.28.27.0-bc30ece58
219218
ARG NEURONX_RUNTIME_LIB_VERSION=2.28.23.0-dd5879008
220219
ARG NEURONX_TOOLS_VERSION=2.26.14.0
221-
ARG NEURONX_FRAMEWORK_VERSION=2.8.0.2.10.13553+1e4dd6ca
222-
ARG NEURONX_CC_VERSION=2.21.18209.0+043b1bf7
220+
ARG NEURONX_FRAMEWORK_VERSION=2.8.0.2.10.16998+e9bf8a50
221+
ARG NEURONX_CC_VERSION=2.21.33363.0+82129205
223222
ARG NEURONX_DISTRIBUTED_VERSION=0.15.22404+1f27bddf
224223
ARG NEURONX_DISTRIBUTED_TRAINING_VERSION=1.6.0
225224

@@ -269,9 +268,8 @@ RUN git clone https://github.com/NVIDIA/apex.git /root/apex \
269268
&& cd /root/apex \
270269
&& git checkout 23.05 \
271270
&& cp /root/apex_setup.py setup.py \
272-
&& python3 setup.py bdist_wheel \
273271
# Install dependencies from requirements and extras for SageMaker usecase
274-
&& ${PIP} install --no-cache-dir -r /root/nxdt_requirements.txt /root/apex/dist/apex-0.1-py3-none-any.whl \
272+
&& ${PIP} install --no-cache-dir --no-build-isolation -r /root/nxdt_requirements.txt /root/apex \
275273
&& /root/nxdt_install_setup.sh \
276274
&& ${PIP} install --force-reinstall "torch==2.8.0" \
277275
&& rm -rf ~/.cache/pip/*

vllm/inference/0.9.1/Dockerfile.neuronx

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,21 @@ RUN /opt/conda/bin/mamba install -c conda-forge \
8989
ipython \
9090
&& rm -rf ~/.cache/pip/*
9191

92+
# Install EFA
93+
RUN apt-get update \
94+
&& cd $HOME \
95+
&& curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz \
96+
&& wget https://efa-installer.amazonaws.com/aws-efa-installer.key && gpg --import aws-efa-installer.key \
97+
&& cat aws-efa-installer.key | gpg --fingerprint \
98+
&& wget https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz.sig && gpg --verify ./aws-efa-installer-latest.tar.gz.sig \
99+
&& tar -xf aws-efa-installer-latest.tar.gz \
100+
&& cd aws-efa-installer \
101+
&& ./efa_installer.sh -y -g --skip-kmod --skip-limit-conf --no-verify \
102+
&& cd $HOME \
103+
&& rm -rf /var/lib/apt/lists/* \
104+
&& rm -rf /tmp/tmp* \
105+
&& apt-get clean
106+
92107
COPY --chmod=755 vllm_entrypoint.py neuron-monitor.sh deep_learning_container.py /usr/local/bin/
93108

94109
### Mount Point ###
@@ -145,8 +160,8 @@ RUN mkdir -p /etc/apt/keyrings \
145160
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.28.27.0-bc30ece58
146161
ARG NEURONX_RUNTIME_LIB_VERSION=2.28.23.0-dd5879008
147162
ARG NEURONX_TOOLS_VERSION=2.26.14.0
148-
ARG NEURONX_CC_VERSION=2.21.18209.0+043b1bf7
149-
ARG NEURONX_FRAMEWORK_VERSION=2.8.0.2.10.13553+1e4dd6ca
163+
ARG NEURONX_CC_VERSION=2.21.33363.0+82129205
164+
ARG NEURONX_FRAMEWORK_VERSION=2.8.0.2.10.16998+e9bf8a50
150165
ARG NEURONX_DISTRIBUTED_VERSION=0.15.22404+1f27bddf
151166
ARG NEURONX_DISTRIBUTED_INFERENCE_VERSION=0.6.10598+a59fdc00
152167

@@ -159,7 +174,7 @@ RUN mkdir -p /root/.ssh && \
159174
WORKDIR /vllm
160175

161176
RUN --mount=type=secret,id=ssh_key,target=/root/.ssh/id_ed25519,mode=0600 \
162-
git clone -b 2.26.0 [email protected]:aws-neuron/private-neuronx-vllm-staging.git .
177+
git clone -b 2.26.1 [email protected]:aws-neuron/private-neuronx-vllm-staging.git .
163178

164179
FROM base AS repo
165180

@@ -207,7 +222,7 @@ RUN ${PIP} install --no-cache-dir \
207222
&& rm -rf ~/.cache/pip/*
208223

209224
# Install VLLM from source
210-
RUN git clone -b 2.26.0 https://github.com/aws-neuron/upstreaming-to-vllm.git /opt/vllm
225+
RUN git clone -b 2.26.1 https://github.com/aws-neuron/upstreaming-to-vllm.git /opt/vllm
211226
WORKDIR /opt/vllm
212227

213228
RUN ${PIP} install --no-cache-dir --no-deps -r requirements/neuron.txt \

0 commit comments

Comments
 (0)