1+ ARG BUILD_STAGE=prod
2+
3+ FROM public.ecr.aws/docker/library/ubuntu:22.04 AS base
4+
5+ LABEL dlc_major_version="1"
6+ LABEL maintainer="Amazon AI"
7+ LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
8+
9+ ARG DEBIAN_FRONTEND=noninteractive
10+ ARG PIP=pip3
11+ ARG PYTHON=python3.11
12+ ARG PYTHON_VERSION=3.11.13
13+ ARG TORCHSERVE_VERSION=0.11.0
14+ ARG SM_TOOLKIT_VERSION=2.0.25
15+ ARG MINIFORGE_VERSION=25.3.1-0
16+
17+ # See http://bugs.python.org/issue19846
18+ ENV LANG=C.UTF-8
19+ ENV LD_LIBRARY_PATH=/opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH
20+ ENV PATH=/opt/conda/bin:/opt/aws/neuron/bin:$PATH
21+
22+ RUN apt-get update \
23+ && apt-get upgrade -y \
24+ && apt-get install -y --no-install-recommends \
25+ apt-transport-https \
26+ build-essential \
27+ ca-certificates \
28+ cmake \
29+ curl \
30+ emacs \
31+ git \
32+ gnupg2 \
33+ gpg-agent \
34+ jq \
35+ libgl1-mesa-glx \
36+ libglib2.0-0 \
37+ libsm6 \
38+ libxext6 \
39+ libxrender-dev \
40+ libcap-dev \
41+ libhwloc-dev \
42+ openjdk-11-jdk \
43+ unzip \
44+ vim \
45+ wget \
46+ zlib1g-dev \
47+ && rm -rf /var/lib/apt/lists/* \
48+ && rm -rf /tmp/tmp* \
49+ && apt-get clean
50+
51+ # https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
52+ RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
53+ mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
54+ /var/lib/dpkg/info/ca-certificates-java.postinst configure;
55+
56+ RUN curl -L -o ~/miniforge.sh https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/Miniforge3-${MINIFORGE_VERSION}-Linux-x86_64.sh \
57+ && chmod +x ~/miniforge.sh \
58+ && ~/miniforge.sh -b -p /opt/conda \
59+ && rm ~/miniforge.sh \
60+ && /opt/conda/bin/conda update -y conda \
61+ && /opt/conda/bin/mamba install -c conda-forge -y \
62+ python=$PYTHON_VERSION \
63+ pyopenssl \
64+ cython \
65+ mkl-include \
66+ mkl \
67+ parso \
68+ typing \
69+ # Below 2 are included in miniconda base, but not mamba so need to install
70+ conda-content-trust \
71+ charset-normalizer \
72+ && /opt/conda/bin/conda clean -ya
73+
74+ RUN /opt/conda/bin/mamba install -c conda-forge \
75+ python=$PYTHON_VERSION \
76+ scikit-learn \
77+ h5py \
78+ requests \
79+ && conda clean -ya \
80+ && pip install --upgrade pip \
81+ --trusted-host pypi.org --trusted-host files.pythonhosted.org \
82+ && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
83+ && pip install \
84+ packaging \
85+ enum-compat \
86+ ipython \
87+ && rm -rf ~/.cache/pip/*
88+
89+ RUN ${PIP} install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
90+ && ${PIP} install --no-cache-dir -U \
91+ opencv-python>=4.8.1.78 \
92+ "numpy<1.24,>1.21" \
93+ "scipy>=1.8.0" \
94+ six \
95+ "awscli<2" \
96+ pandas==1.* \
97+ boto3 \
98+ cryptography \
99+ "protobuf>=3.18.3,<4" \
100+ torchserve==${TORCHSERVE_VERSION} \
101+ torch-model-archiver==${TORCHSERVE_VERSION} \
102+ && rm -rf ~/.cache/pip/*
103+
104+ ENV SAGEMAKER_SERVING_MODULE=sagemaker_pytorch_serving_container.serving:main
105+ ENV TEMP=/home/model-server/tmp
106+
107+ RUN useradd -m model-server \
108+ && mkdir -p /home/model-server/tmp /opt/ml/model \
109+ && chown -R model-server /home/model-server /opt/ml/model
110+
111+ COPY --chmod=755 neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
112+ COPY --chmod=755 neuron-monitor.sh deep_learning_container.py /usr/local/bin/
113+ COPY --chmod=755 torchserve-neuron.sh /usr/local/bin/entrypoint.sh
114+ COPY config.properties /home/model-server
115+
116+ RUN ${PIP} install --no-cache-dir "sagemaker-pytorch-inference==${SM_TOOLKIT_VERSION}" \
117+ # patch default_pytorch_inference_handler.py to import torch_neuronx
118+ && DEST_DIR=$(python -c "import os.path, sagemaker_pytorch_serving_container; print(os.path.dirname(sagemaker_pytorch_serving_container.__file__))") \
119+ && DEST_FILE=${DEST_DIR}/default_pytorch_inference_handler.py \
120+ && sed -i "s/import torch/import torch, torch_neuronx/" ${DEST_FILE} \
121+ && rm -rf ~/.cache/pip/*
122+
123+ # Compliance
124+ RUN HOME_DIR=/root \
125+ && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
126+ && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
127+ && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
128+ && chmod +x /usr/local/bin/testOSSCompliance \
129+ && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
130+ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
131+ && rm -rf ${HOME_DIR}/oss_compliance* \
132+ # conda leaves an empty /root/.cache/conda/notices.cache file which is not removed by conda clean -ya
133+ && rm -rf ${HOME_DIR}/.cache/conda
134+
135+ RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.8/license.txt
136+
137+ # Setting up APT and PIP repo for neuron artifacts
138+ ARG NEURON_APT_REPO=apt.repos.neuron.amazonaws.com
139+ ARG NEURON_APT_REPO_KEY
140+ ARG NEURON_PIP_REPO=pip.repos.neuron.amazonaws.com
141+ ARG NEURON_PIP_REPO_KEY
142+ RUN mkdir -p /etc/apt/keyrings \
143+ && APT_REPO_PREFIX=$([ -n "${NEURON_APT_REPO_KEY}" ] && echo "${NEURON_APT_REPO_KEY}@" || echo "") \
144+ && echo "deb [signed-by=/etc/apt/keyrings/neuron.gpg] https://${APT_REPO_PREFIX}${NEURON_APT_REPO} focal main" > /etc/apt/sources.list.d/neuron.list \
145+ && curl $([ -n "${NEURON_APT_REPO_KEY}" ] && echo "-u ${NEURON_APT_REPO_KEY}") -sSL "https://${NEURON_APT_REPO}/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB" | gpg --dearmor > /etc/apt/keyrings/neuron.gpg \
146+ && PIP_REPO_URL=$([ -n "${NEURON_PIP_REPO_KEY}" ] && echo "https://${NEURON_PIP_REPO_KEY}@${NEURON_PIP_REPO}" || echo "https://${NEURON_PIP_REPO}") \
147+ && ${PIP} config set global.extra-index-url "${PIP_REPO_URL}"
148+
149+ # Neuron SDK components version numbers
150+ ARG NEURONX_COLLECTIVES_LIB_VERSION=2.28.27.0-bc30ece58
151+ ARG NEURONX_RUNTIME_LIB_VERSION=2.28.23.0-dd5879008
152+ ARG NEURONX_TOOLS_VERSION=2.26.14.0
153+
154+ ARG NEURONX_CC_VERSION=2.21.18209.0+043b1bf7
155+ ARG NEURONX_FRAMEWORK_VERSION=2.8.0.2.10.13553+1e4dd6ca
156+ ARG NEURONX_DISTRIBUTED_VERSION=0.15.22404+1f27bddf
157+ ARG NEURONX_DISTRIBUTED_INFERENCE_VERSION=0.6.10598+a59fdc00
158+
159+ FROM base AS repo
160+
161+ # Install Neuron components from the apt and pip repos (latest versions)
162+ RUN apt-get update \
163+ && apt-get install -y \
164+ aws-neuronx-tools \
165+ aws-neuronx-collectives \
166+ aws-neuronx-runtime-lib \
167+ && rm -rf /var/lib/apt/lists/* \
168+ && rm -rf /tmp/tmp* \
169+ && apt-get clean
170+
171+ RUN ${PIP} install --no-cache-dir \
172+ neuronx-cc \
173+ torch-neuronx \
174+ neuronx_distributed \
175+ neuronx_distributed_inference \
176+ && rm -rf ~/.cache/pip/*
177+
178+ FROM base AS prod
179+
180+ # Install Neuron components with specific versions
181+ RUN apt-get update \
182+ && apt-get install -y \
183+ aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
184+ aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
185+ aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
186+ && rm -rf /var/lib/apt/lists/* \
187+ && rm -rf /tmp/tmp* \
188+ && apt-get clean
189+
190+ RUN ${PIP} install --no-cache-dir \
191+ neuronx-cc==$NEURONX_CC_VERSION \
192+ torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
193+ neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION \
194+ neuronx_distributed_inference==$NEURONX_DISTRIBUTED_INFERENCE_VERSION \
195+ && rm -rf ~/.cache/pip/*
196+
197+ FROM ${BUILD_STAGE} AS final
198+
199+ EXPOSE 8080 8081
200+
201+ ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
202+ CMD ["/usr/local/bin/entrypoint.sh"]
203+
204+ HEALTHCHECK CMD curl --fail http://localhost:8080/ping || exit 1
0 commit comments