Skip to content

Commit c61764f

Browse files
neuron-containers-ciFu Qiao
andauthored
Updated Dockerfiles (#68)
Updated Dockerfiles --------- Co-authored-by: Fu Qiao <[email protected]>
1 parent 5669d22 commit c61764f

File tree

4 files changed

+132
-131
lines changed

4 files changed

+132
-131
lines changed

docker/jax/training/0.4/Dockerfile.neuronx renamed to docker/jax/training/0.5/Dockerfile.neuronx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ LABEL dlc_major_version="1"
44
LABEL maintainer="Amazon AI"
55

66
# Neuron SDK components version numbers
7-
ARG NEURONX_RUNTIME_LIB_VERSION=2.23.112.0-9b5179492
8-
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.23.135.0-3e70920f2
9-
ARG NEURONX_TOOLS_VERSION=2.20.204.0
10-
ARG NEURONX_CC_VERSION=2.16.372.0
11-
ARG NEURONX_JAX_TRAINING_VERSION=0.1.2
7+
ARG NEURONX_RUNTIME_LIB_VERSION=2.24.53.0-f239092cc
8+
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.24.59.0-838c7fc8b
9+
ARG NEURONX_TOOLS_VERSION=2.22.61.0
10+
ARG NEURONX_CC_VERSION=2.17.194.0
11+
ARG NEURONX_JAX_TRAINING_VERSION=0.1.3
1212

1313
ARG PYTHON=python3.10
1414
ARG PYTHON_VERSION=3.10.12

docker/pytorch/inference/2.5.1/Dockerfile.neuronx

Lines changed: 48 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,11 @@ LABEL dlc_major_version="1"
44
LABEL maintainer="Amazon AI"
55
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
66

7-
# Neuron SDK components version numbers
8-
ARG NEURONX_CC_VERSION=2.16.372.0
9-
ARG NEURONX_FRAMEWORK_VERSION=2.5.1.2.4.0
10-
ARG NEURONX_TRANSFORMERS_VERSION=0.13.380
11-
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.23.135.0-3e70920f2
12-
ARG NEURONX_RUNTIME_LIB_VERSION=2.23.112.0-9b5179492
13-
ARG NEURONX_TOOLS_VERSION=2.20.204.0
14-
ARG NEURONX_DISTRIBUTED_VERSION=0.10.1
15-
ARG NEURONX_DISTRIBUTED_INFERENCE_VERSION=0.1.1
16-
7+
ARG PIP=pip3
178
ARG PYTHON=python3.10
189
ARG PYTHON_VERSION=3.10.12
1910
ARG TORCHSERVE_VERSION=0.11.0
20-
ARG SM_TOOLKIT_VERSION=2.0.21
11+
ARG SM_TOOLKIT_VERSION=2.0.25
2112
ARG MAMBA_VERSION=23.1.0-4
2213

2314
# See http://bugs.python.org/issue19846
@@ -37,7 +28,6 @@ RUN apt-get update \
3728
curl \
3829
emacs \
3930
git \
40-
gnupg2 \
4131
gpg-agent \
4232
jq \
4333
libgl1-mesa-glx \
@@ -56,18 +46,6 @@ RUN apt-get update \
5646
&& rm -rf /tmp/tmp* \
5747
&& apt-get clean
5848

59-
RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
60-
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
61-
62-
RUN apt-get update \
63-
&& apt-get install -y \
64-
aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
65-
aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
66-
aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
67-
&& rm -rf /var/lib/apt/lists/* \
68-
&& rm -rf /tmp/tmp* \
69-
&& apt-get clean
70-
7149
# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
7250
RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
7351
mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
@@ -100,9 +78,10 @@ RUN conda install -c conda-forge \
10078
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
10179
&& pip install packaging \
10280
enum-compat \
103-
ipython
81+
ipython \
82+
&& rm -rf ~/.cache/pip/*
10483

105-
RUN pip install --no-cache-dir -U \
84+
RUN ${PIP} install --no-cache-dir -U \
10685
opencv-python>=4.8.1.78 \
10786
"numpy<1.24,>1.21" \
10887
"scipy>=1.8.0" \
@@ -111,43 +90,30 @@ RUN pip install --no-cache-dir -U \
11190
"awscli<2" \
11291
pandas==1.* \
11392
boto3 \
114-
cryptography
115-
116-
RUN pip install -U --extra-index-url https://pip.repos.neuron.amazonaws.com \
117-
neuronx-cc==$NEURONX_CC_VERSION \
118-
torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
119-
transformers-neuronx==$NEURONX_TRANSFORMERS_VERSION \
120-
&& pip install -U "protobuf>=3.18.3,<4" \
93+
cryptography \
94+
"protobuf>=3.18.3,<4" \
12195
"transformers==4.45.*" \
12296
torchserve==${TORCHSERVE_VERSION} \
12397
torch-model-archiver==${TORCHSERVE_VERSION} \
124-
&& pip install --no-deps --no-cache-dir -U torchvision==0.20.* \
125-
&& pip install --no-deps -U --extra-index-url https://pip.repos.neuron.amazonaws.com neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION \
126-
&& pip install -U --extra-index-url https://pip.repos.neuron.amazonaws.com neuronx_distributed_inference==$NEURONX_DISTRIBUTED_INFERENCE_VERSION
98+
&& ${PIP} install --no-deps --no-cache-dir -U torchvision==0.20.* \
99+
&& rm -rf ~/.cache/pip/*
127100

128101
RUN useradd -m model-server \
129102
&& mkdir -p /home/model-server/tmp /opt/ml/model \
130103
&& chown -R model-server /home/model-server /opt/ml/model
131104

132-
COPY neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
133-
COPY neuron-monitor.sh /usr/local/bin/neuron-monitor.sh
134-
COPY torchserve-neuron.sh /usr/local/bin/entrypoint.sh
105+
COPY --chmod=755 neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
106+
COPY --chmod=755 neuron-monitor.sh deep_learning_container.py /usr/local/bin/
107+
COPY --chmod=755 torchserve-neuron.sh /usr/local/bin/entrypoint.sh
135108
COPY config.properties /home/model-server
136109

137-
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
138-
&& chmod +x /usr/local/bin/neuron-monitor.sh \
139-
&& chmod +x /usr/local/bin/entrypoint.sh
140110

141-
ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py
142-
143-
RUN chmod +x /usr/local/bin/deep_learning_container.py
144-
145-
RUN pip install --no-cache-dir "sagemaker-pytorch-inference==${SM_TOOLKIT_VERSION}"
146-
147-
# patch default_pytorch_inference_handler.py to import torch_neuronx
148-
RUN DEST_DIR=$(python -c "import os.path, sagemaker_pytorch_serving_container; print(os.path.dirname(sagemaker_pytorch_serving_container.__file__))") \
111+
RUN ${PIP} install --no-cache-dir "sagemaker-pytorch-inference==${SM_TOOLKIT_VERSION}" \
112+
# patch default_pytorch_inference_handler.py to import torch_neuronx
113+
&& DEST_DIR=$(python -c "import os.path, sagemaker_pytorch_serving_container; print(os.path.dirname(sagemaker_pytorch_serving_container.__file__))") \
149114
&& DEST_FILE=${DEST_DIR}/default_pytorch_inference_handler.py \
150-
&& sed -i "s/import torch/import torch, torch_neuronx/" ${DEST_FILE}
115+
&& sed -i "s/import torch/import torch, torch_neuronx/" ${DEST_FILE} \
116+
&& rm -rf ~/.cache/pip/*
151117

152118
RUN HOME_DIR=/root \
153119
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
@@ -162,9 +128,39 @@ RUN HOME_DIR=/root \
162128

163129
RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.5/license.txt
164130

131+
# Neuron SDK pre-release packages
132+
ARG NEURON_ARTIFACT_PATH=/root/neuron_artifacts
133+
ARG NEURONX_RUNTIME_LIB_VERSION=2.24.53.0-f239092cc
134+
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.24.59.0-838c7fc8b
135+
ARG NEURONX_TOOLS_VERSION=2.22.61.0
136+
137+
RUN --mount=type=bind,source=apt,target=${NEURON_ARTIFACT_PATH}/apt \
138+
apt-get install -y \
139+
${NEURON_ARTIFACT_PATH}/apt/${NEURONX_TOOLS_VERSION} \
140+
${NEURON_ARTIFACT_PATH}/apt/${NEURONX_COLLECTIVES_LIB_VERSION} \
141+
${NEURON_ARTIFACT_PATH}/apt/${NEURONX_RUNTIME_LIB_VERSION} \
142+
&& rm -rf /var/lib/apt/lists/* \
143+
&& rm -rf /tmp/tmp* \
144+
&& apt-get clean
145+
146+
ARG NEURONX_FRAMEWORK_VERSION=2.5.1.2.6.0
147+
ARG NEURONX_TRANSFORMERS_VERSION=0.13.470
148+
ARG NEURONX_CC_VERSION=2.17.194.0
149+
ARG NEURONX_DISTRIBUTED_VERSION=0.11.0
150+
ARG NEURONX_DISTRIBUTED_INFERENCE_VERSION=0.2.0
151+
152+
RUN --mount=type=bind,source=pip,target=${NEURON_ARTIFACT_PATH}/pip \
153+
${PIP} install --no-cache-dir --find-links ${NEURON_ARTIFACT_PATH}/pip \
154+
${NEURON_ARTIFACT_PATH}/pip/${NEURONX_CC_VERSION} \
155+
${NEURON_ARTIFACT_PATH}/pip/${NEURONX_FRAMEWORK_VERSION} \
156+
${NEURON_ARTIFACT_PATH}/pip/${NEURONX_TRANSFORMERS_VERSION} \
157+
&& ${PIP} install --no-deps --find-links -U ${NEURON_ARTIFACT_PATH}/pip/${NEURONX_DISTRIBUTED_VERSION} \
158+
&& ${PIP} install --no-deps --find-links -U ${NEURON_ARTIFACT_PATH}/pip/${NEURONX_DISTRIBUTED_INFERENCE_VERSION} \
159+
&& rm -rf ~/.cache/pip/*
160+
165161
EXPOSE 8080 8081
166162

167163
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
168164
CMD ["/usr/local/bin/entrypoint.sh"]
169165

170-
HEALTHCHECK CMD curl --fail http://localhost:8080/ping || exit 1
166+
HEALTHCHECK CMD curl --fail http://localhost:8080/ping || exit 1

0 commit comments

Comments
 (0)