@@ -4,20 +4,11 @@ LABEL dlc_major_version="1"
44LABEL maintainer="Amazon AI"
55LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
66
7- # Neuron SDK components version numbers
8- ARG NEURONX_CC_VERSION=2.16.372.0
9- ARG NEURONX_FRAMEWORK_VERSION=2.5.1.2.4.0
10- ARG NEURONX_TRANSFORMERS_VERSION=0.13.380
11- ARG NEURONX_COLLECTIVES_LIB_VERSION=2.23.135.0-3e70920f2
12- ARG NEURONX_RUNTIME_LIB_VERSION=2.23.112.0-9b5179492
13- ARG NEURONX_TOOLS_VERSION=2.20.204.0
14- ARG NEURONX_DISTRIBUTED_VERSION=0.10.1
15- ARG NEURONX_DISTRIBUTED_INFERENCE_VERSION=0.1.1
16-
7+ ARG PIP=pip3
178ARG PYTHON=python3.10
189ARG PYTHON_VERSION=3.10.12
1910ARG TORCHSERVE_VERSION=0.11.0
20- ARG SM_TOOLKIT_VERSION=2.0.21
11+ ARG SM_TOOLKIT_VERSION=2.0.25
2112ARG MAMBA_VERSION=23.1.0-4
2213
2314# See http://bugs.python.org/issue19846
@@ -37,7 +28,6 @@ RUN apt-get update \
3728 curl \
3829 emacs \
3930 git \
40- gnupg2 \
4131 gpg-agent \
4232 jq \
4333 libgl1-mesa-glx \
@@ -56,18 +46,6 @@ RUN apt-get update \
5646 && rm -rf /tmp/tmp* \
5747 && apt-get clean
5848
59- RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
60- RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
61-
62- RUN apt-get update \
63- && apt-get install -y \
64- aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
65- aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
66- aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
67- && rm -rf /var/lib/apt/lists/* \
68- && rm -rf /tmp/tmp* \
69- && apt-get clean
70-
7149# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
7250RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
7351 mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
@@ -100,9 +78,10 @@ RUN conda install -c conda-forge \
10078 && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
10179 && pip install packaging \
10280 enum-compat \
103- ipython
81+ ipython \
82+ && rm -rf ~/.cache/pip/*
10483
105- RUN pip install --no-cache-dir -U \
84+ RUN ${PIP} install --no-cache-dir -U \
10685 opencv-python>=4.8.1.78 \
10786 "numpy<1.24,>1.21" \
10887 "scipy>=1.8.0" \
@@ -111,43 +90,30 @@ RUN pip install --no-cache-dir -U \
11190 "awscli<2" \
11291 pandas==1.* \
11392 boto3 \
114- cryptography
115-
116- RUN pip install -U --extra-index-url https://pip.repos.neuron.amazonaws.com \
117- neuronx-cc==$NEURONX_CC_VERSION \
118- torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
119- transformers-neuronx==$NEURONX_TRANSFORMERS_VERSION \
120- && pip install -U "protobuf>=3.18.3,<4" \
93+ cryptography \
94+ "protobuf>=3.18.3,<4" \
12195 "transformers==4.45.*" \
12296 torchserve==${TORCHSERVE_VERSION} \
12397 torch-model-archiver==${TORCHSERVE_VERSION} \
124- && pip install --no-deps --no-cache-dir -U torchvision==0.20.* \
125- && pip install --no-deps -U --extra-index-url https://pip.repos.neuron.amazonaws.com neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION \
126- && pip install -U --extra-index-url https://pip.repos.neuron.amazonaws.com neuronx_distributed_inference==$NEURONX_DISTRIBUTED_INFERENCE_VERSION
98+ && ${PIP} install --no-deps --no-cache-dir -U torchvision==0.20.* \
99+ && rm -rf ~/.cache/pip/*
127100
128101RUN useradd -m model-server \
129102 && mkdir -p /home/model-server/tmp /opt/ml/model \
130103 && chown -R model-server /home/model-server /opt/ml/model
131104
132- COPY neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
133- COPY neuron-monitor.sh /usr/local/bin/neuron-monitor.sh
134- COPY torchserve-neuron.sh /usr/local/bin/entrypoint.sh
105+ COPY --chmod=755 neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
106+ COPY --chmod=755 neuron-monitor.sh deep_learning_container.py /usr/local/bin/
107+ COPY --chmod=755 torchserve-neuron.sh /usr/local/bin/entrypoint.sh
135108COPY config.properties /home/model-server
136109
137- RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
138- && chmod +x /usr/local/bin/neuron-monitor.sh \
139- && chmod +x /usr/local/bin/entrypoint.sh
140110
141- ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py
142-
143- RUN chmod +x /usr/local/bin/deep_learning_container.py
144-
145- RUN pip install --no-cache-dir "sagemaker-pytorch-inference==${SM_TOOLKIT_VERSION}"
146-
147- # patch default_pytorch_inference_handler.py to import torch_neuronx
148- RUN DEST_DIR=$(python -c "import os.path, sagemaker_pytorch_serving_container; print(os.path.dirname(sagemaker_pytorch_serving_container.__file__))") \
111+ RUN ${PIP} install --no-cache-dir "sagemaker-pytorch-inference==${SM_TOOLKIT_VERSION}" \
112+ # patch default_pytorch_inference_handler.py to import torch_neuronx
113+ && DEST_DIR=$(python -c "import os.path, sagemaker_pytorch_serving_container; print(os.path.dirname(sagemaker_pytorch_serving_container.__file__))") \
149114 && DEST_FILE=${DEST_DIR}/default_pytorch_inference_handler.py \
150- && sed -i "s/import torch/import torch, torch_neuronx/" ${DEST_FILE}
115+ && sed -i "s/import torch/import torch, torch_neuronx/" ${DEST_FILE} \
116+ && rm -rf ~/.cache/pip/*
151117
152118RUN HOME_DIR=/root \
153119 && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
@@ -162,9 +128,39 @@ RUN HOME_DIR=/root \
162128
163129RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.5/license.txt
164130
131+ # Neuron SDK pre-release packages
132+ ARG NEURON_ARTIFACT_PATH=/root/neuron_artifacts
133+ ARG NEURONX_RUNTIME_LIB_VERSION=2.24.53.0-f239092cc
134+ ARG NEURONX_COLLECTIVES_LIB_VERSION=2.24.59.0-838c7fc8b
135+ ARG NEURONX_TOOLS_VERSION=2.22.61.0
136+
137+ RUN --mount=type=bind,source=apt,target=${NEURON_ARTIFACT_PATH}/apt \
138+ apt-get install -y \
139+ ${NEURON_ARTIFACT_PATH}/apt/${NEURONX_TOOLS_VERSION} \
140+ ${NEURON_ARTIFACT_PATH}/apt/${NEURONX_COLLECTIVES_LIB_VERSION} \
141+ ${NEURON_ARTIFACT_PATH}/apt/${NEURONX_RUNTIME_LIB_VERSION} \
142+ && rm -rf /var/lib/apt/lists/* \
143+ && rm -rf /tmp/tmp* \
144+ && apt-get clean
145+
146+ ARG NEURONX_FRAMEWORK_VERSION=2.5.1.2.6.0
147+ ARG NEURONX_TRANSFORMERS_VERSION=0.13.470
148+ ARG NEURONX_CC_VERSION=2.17.194.0
149+ ARG NEURONX_DISTRIBUTED_VERSION=0.11.0
150+ ARG NEURONX_DISTRIBUTED_INFERENCE_VERSION=0.2.0
151+
152+ RUN --mount=type=bind,source=pip,target=${NEURON_ARTIFACT_PATH}/pip \
153+ ${PIP} install --no-cache-dir --find-links ${NEURON_ARTIFACT_PATH}/pip \
154+ ${NEURON_ARTIFACT_PATH}/pip/${NEURONX_CC_VERSION} \
155+ ${NEURON_ARTIFACT_PATH}/pip/${NEURONX_FRAMEWORK_VERSION} \
156+ ${NEURON_ARTIFACT_PATH}/pip/${NEURONX_TRANSFORMERS_VERSION} \
157+ && ${PIP} install --no-deps --find-links -U ${NEURON_ARTIFACT_PATH}/pip/${NEURONX_DISTRIBUTED_VERSION} \
158+ && ${PIP} install --no-deps --find-links -U ${NEURON_ARTIFACT_PATH}/pip/${NEURONX_DISTRIBUTED_INFERENCE_VERSION} \
159+ && rm -rf ~/.cache/pip/*
160+
165161EXPOSE 8080 8081
166162
167163ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
168164CMD ["/usr/local/bin/entrypoint.sh"]
169165
170- HEALTHCHECK CMD curl --fail http://localhost:8080/ping || exit 1
166+ HEALTHCHECK CMD curl --fail http://localhost:8080/ping || exit 1
0 commit comments