|
| 1 | +ARG TF_URL=https://storage.googleapis.com/tensorflow/versions/2.19.0/tensorflow_cpu-2.19.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl |
| 2 | + |
| 3 | +FROM ubuntu:22.04 AS base_image |
| 4 | + |
| 5 | +ENV DEBIAN_FRONTEND=noninteractive \ |
| 6 | + LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib" |
| 7 | + |
| 8 | +RUN apt-get update \ |
| 9 | + && apt-get upgrade -y \ |
| 10 | + && apt-get autoremove -y \ |
| 11 | + && apt-get clean \ |
| 12 | + && rm -rf /var/lib/apt/lists/* |
| 13 | + |
| 14 | +FROM base_image AS common |
| 15 | + |
| 16 | +LABEL maintainer="Amazon AI" |
| 17 | +LABEL dlc_major_version="1" |
| 18 | + |
| 19 | +# Declaring ARG values |
| 20 | +ARG TF_VERSION=2.19 |
| 21 | +ARG PYTHON=python3.12 |
| 22 | +ARG PYTHON_VERSION=3.12.11 |
| 23 | +ARG PIP=pip3 |
| 24 | +ARG OMPI_VERSION=4.1.8 |
| 25 | + |
| 26 | +# To be passed to ec2 and sagemaker stages |
| 27 | +ENV TF_VERSION=${TF_VERSION} |
| 28 | +ENV PYTHON=${PYTHON} |
| 29 | +ENV PYTHON_VERSION=${PYTHON_VERSION} |
| 30 | + |
| 31 | +ENV PIP=${PIP} |
| 32 | + |
| 33 | +# prevent stopping by user interaction |
| 34 | +ENV DEBIAN_FRONTEND noninteractive |
| 35 | +ENV DEBCONF_NONINTERACTIVE_SEEN true |
| 36 | + |
| 37 | +# Set environment variables for MKL |
| 38 | +# For more about MKL with TensorFlow see: |
| 39 | +# https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn |
| 40 | +ENV KMP_AFFINITY=granularity=fine,compact,1,0 |
| 41 | +ENV KMP_BLOCKTIME=1 |
| 42 | +ENV KMP_SETTINGS=0 |
| 43 | + |
| 44 | +ENV PYTHONDONTWRITEBYTECODE=1 |
| 45 | +ENV PYTHONUNBUFFERED=1 |
| 46 | +ENV PYTHONIOENCODING=UTF-8 |
| 47 | +ENV LANG=C.UTF-8 |
| 48 | +ENV LC_ALL=C.UTF-8 |
| 49 | + |
| 50 | +RUN apt-get update \ |
| 51 | + && apt-get install -y --no-install-recommends \ |
| 52 | + build-essential \ |
| 53 | + openssh-client \ |
| 54 | + openssh-server \ |
| 55 | + ca-certificates \ |
| 56 | + curl \ |
| 57 | + emacs \ |
| 58 | + git \ |
| 59 | + libtemplate-perl \ |
| 60 | + openssl \ |
| 61 | + protobuf-compiler \ |
| 62 | + unzip \ |
| 63 | + wget \ |
| 64 | + vim \ |
| 65 | + zlib1g-dev \ |
| 66 | + # Install dependent library for OpenCV |
| 67 | + libgtk2.0-dev \ |
| 68 | + && rm -rf /var/lib/apt/lists/* \ |
| 69 | + && apt-get clean |
| 70 | + |
| 71 | +# Install Open MPI |
| 72 | +RUN mkdir /tmp/openmpi \ |
| 73 | + && cd /tmp/openmpi \ |
| 74 | + && wget --quiet https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OMPI_VERSION}.tar.gz \ |
| 75 | + && tar zxf openmpi-${OMPI_VERSION}.tar.gz \ |
| 76 | + && cd openmpi-${OMPI_VERSION} \ |
| 77 | + && ./configure --enable-orterun-prefix-by-default \ |
| 78 | + && make -j $(nproc) all \ |
| 79 | + && make install \ |
| 80 | + && ldconfig \ |
| 81 | + && rm -rf /tmp/openmpi |
| 82 | + |
| 83 | +# Create a wrapper for OpenMPI to allow running as root by default |
| 84 | +RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \ |
| 85 | + && echo '#!/bin/bash' > /usr/local/bin/mpirun \ |
| 86 | + && echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \ |
| 87 | + && chmod a+x /usr/local/bin/mpirun |
| 88 | + |
| 89 | +RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \ |
| 90 | + && echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf |
| 91 | + |
| 92 | +ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH |
| 93 | +ENV PATH /usr/local/openmpi/bin/:$PATH |
| 94 | + |
| 95 | +# SSH login fix. Otherwise user is kicked off after login |
| 96 | +RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd |
| 97 | + |
| 98 | +# Create SSH key. |
| 99 | +RUN mkdir -p /root/.ssh/ \ |
| 100 | + && mkdir -p /var/run/sshd \ |
| 101 | + && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \ |
| 102 | + && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \ |
| 103 | + && printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config |
| 104 | + |
| 105 | +WORKDIR / |
| 106 | + |
| 107 | +RUN apt-get update \ |
| 108 | + && apt-get install -y --no-install-recommends \ |
| 109 | + libbz2-dev \ |
| 110 | + libc6-dev \ |
| 111 | + libcurl4-openssl-dev \ |
| 112 | + libffi-dev \ |
| 113 | + libgdbm-dev \ |
| 114 | + liblzma-dev \ |
| 115 | + libncursesw5-dev \ |
| 116 | + libsqlite3-dev \ |
| 117 | + libssl-dev \ |
| 118 | + tk-dev \ |
| 119 | + ffmpeg \ |
| 120 | + libsm6 \ |
| 121 | + libxext6 \ |
| 122 | + && rm -rf /var/lib/apt/lists/* \ |
| 123 | + && apt-get clean |
| 124 | + |
| 125 | +RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \ |
| 126 | + && tar -xvf Python-$PYTHON_VERSION.tgz \ |
| 127 | + && cd Python-$PYTHON_VERSION \ |
| 128 | + && ./configure \ |
| 129 | + && make -j $(nproc) \ |
| 130 | + && make install \ |
| 131 | + && rm -rf ../Python-$PYTHON_VERSION* |
| 132 | + |
| 133 | +RUN ${PIP} --no-cache-dir install --upgrade \ |
| 134 | + pip \ |
| 135 | + setuptools |
| 136 | + |
| 137 | +# Some TF tools expect a "python" binary |
| 138 | +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \ |
| 139 | + && ln -s $(which ${PIP}) /usr/bin/pip |
| 140 | + |
| 141 | +RUN ${PIP} install --no-cache-dir -U \ |
| 142 | + pybind11 \ |
| 143 | + cmake \ |
| 144 | + scipy \ |
| 145 | + Pillow \ |
| 146 | + python-dateutil \ |
| 147 | + requests \ |
| 148 | + "awscli<2" \ |
| 149 | + urllib3 \ |
| 150 | + mpi4py \ |
| 151 | + # Let's install TensorFlow separately in the end to avoid |
| 152 | + # the library version to be overwritten |
| 153 | + && ${PIP} install --no-cache-dir -U \ |
| 154 | + h5py \ |
| 155 | + absl-py \ |
| 156 | + opencv-python \ |
| 157 | + werkzeug \ |
| 158 | + psutil \ |
| 159 | + protobuf |
| 160 | + |
| 161 | +ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py |
| 162 | + |
| 163 | +RUN chmod +x /usr/local/bin/deep_learning_container.py |
| 164 | + |
| 165 | +COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh |
| 166 | +RUN chmod +x /usr/local/bin/bash_telemetry.sh |
| 167 | +RUN echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc |
| 168 | + |
| 169 | +COPY dockerd_ec2_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh |
| 170 | +RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh |
| 171 | + |
| 172 | +RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-${TF_VERSION}/license.txt -o /license.txt |
| 173 | + |
| 174 | +######################################################## |
| 175 | +# _____ ____ ____ ___ |
| 176 | +# | ____/ ___|___ \ |_ _|_ __ ___ __ _ __ _ ___ |
| 177 | +# | _|| | __) | | || '_ ` _ \ / _` |/ _` |/ _ \ |
| 178 | +# | |__| |___ / __/ | || | | | | | (_| | (_| | __/ |
| 179 | +# |_____\____|_____| |___|_| |_| |_|\__,_|\__, |\___| |
| 180 | +# |___/ |
| 181 | +# ____ _ |
| 182 | +# | _ \ ___ ___(_)_ __ ___ |
| 183 | +# | |_) / _ \/ __| | '_ \ / _ \ |
| 184 | +# | _ < __/ (__| | |_) | __/ |
| 185 | +# |_| \_\___|\___|_| .__/ \___| |
| 186 | +# |_| |
| 187 | +######################################################## |
| 188 | + |
| 189 | +FROM common AS ec2 |
| 190 | +ARG TF_URL |
| 191 | + |
| 192 | +RUN ${PIP} install --no-cache-dir -U \ |
| 193 | + ${TF_URL} \ |
| 194 | + "tensorflow-io==0.37.*" \ |
| 195 | + tensorflow-datasets |
| 196 | + |
| 197 | +RUN HOME_DIR=/root \ |
| 198 | + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ |
| 199 | + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ |
| 200 | + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ |
| 201 | + && chmod +x /usr/local/bin/testOSSCompliance \ |
| 202 | + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ |
| 203 | + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ |
| 204 | + && rm -rf ${HOME_DIR}/oss_compliance* |
| 205 | + |
| 206 | +# remove tmp files |
| 207 | +RUN rm -rf /tmp/* |
| 208 | + |
| 209 | +ENTRYPOINT ["bash", "-m", "/usr/local/bin/dockerd_entrypoint.sh"] |
| 210 | + |
| 211 | +CMD ["/bin/bash"] |
| 212 | + |
| 213 | +################################################################# |
| 214 | +# ____ __ __ _ |
| 215 | +# / ___| __ _ __ _ ___| \/ | __ _| | _____ _ __ |
| 216 | +# \___ \ / _` |/ _` |/ _ \ |\/| |/ _` | |/ / _ \ '__| |
| 217 | +# ___) | (_| | (_| | __/ | | | (_| | < __/ | |
| 218 | +# |____/ \__,_|\__, |\___|_| |_|\__,_|_|\_\___|_| |
| 219 | +# |___/ |
| 220 | +# ___ ____ _ |
| 221 | +# |_ _|_ __ ___ __ _ __ _ ___ | _ \ ___ ___(_)_ __ ___ |
| 222 | +# | || '_ ` _ \ / _` |/ _` |/ _ \ | |_) / _ \/ __| | '_ \ / _ \ |
| 223 | +# | || | | | | | (_| | (_| | __/ | _ < __/ (__| | |_) | __/ |
| 224 | +# |___|_| |_| |_|\__,_|\__, |\___| |_| \_\___|\___|_| .__/ \___| |
| 225 | +# |___/ |_| |
| 226 | +################################################################# |
| 227 | + |
| 228 | +FROM common AS sagemaker |
| 229 | + |
| 230 | +LABEL maintainer="Amazon AI" |
| 231 | +LABEL dlc_major_version="1" |
| 232 | + |
| 233 | +ARG TF_URL |
| 234 | + |
| 235 | +# sagemaker-specific environment variable |
| 236 | +ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main |
| 237 | + |
| 238 | +# dependencies for opencv |
| 239 | +# these dependencies are not needed for gpu image |
| 240 | +RUN apt-get update \ |
| 241 | + && apt-get install -y --no-install-recommends \ |
| 242 | + libgtk2.0-dev \ |
| 243 | + libkrb5-dev \ |
| 244 | + libsasl2-dev \ |
| 245 | + libsasl2-modules \ |
| 246 | + krb5-user \ |
| 247 | + && rm -rf /var/lib/apt/lists/* \ |
| 248 | + && apt-get clean |
| 249 | + |
| 250 | +# https://github.com/yaml/pyyaml/issues/601 |
| 251 | +# PyYaml less than 6.0.1 failes to build with cython v3 and above. |
| 252 | +# tf-models-official uses older versions, breaking the install. |
| 253 | +# going to install the older pyyaml and cython to get tfd-models-official |
| 254 | +# the sagemaker package will revert pyyaml back to 6 for its requirement |
| 255 | +# and this is fine since sagemaker is more important than the models and |
| 256 | +# the models still work on pyyaml 6 in this context. |
| 257 | +# Need to install wheel before we can fix the pyyaml issue below |
| 258 | +RUN pip install --no-cache-dir -U \ |
| 259 | + wheel \ |
| 260 | + "cython<3" \ |
| 261 | + "pyyaml<6" \ |
| 262 | + --no-build-isolation |
| 263 | + |
| 264 | +# https://github.com/tensorflow/models/issues/9267 |
| 265 | +# tf-models does not respect existing installations of TF and always installs open source TF |
| 266 | +RUN ${PIP} install \ |
| 267 | + --default-timeout=300 \ |
| 268 | + --retries 5 \ |
| 269 | + --no-cache-dir -U \ |
| 270 | + "tf-models-official==${TF_VERSION}.1" \ |
| 271 | + "tensorflow-text==${TF_VERSION}.0" \ |
| 272 | + && ${PIP} uninstall -y tensorflow tensorflow-gpu \ |
| 273 | + && ${PIP} install --no-cache-dir -U \ |
| 274 | + ${TF_URL} \ |
| 275 | + "tensorflow-io==0.37.*" \ |
| 276 | + tensorflow-datasets |
| 277 | + |
| 278 | +# Install rust and cargo |
| 279 | +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y |
| 280 | +ENV PATH="/root/.cargo/bin:${PATH}" |
| 281 | + |
| 282 | +RUN $PYTHON -m pip install --no-cache-dir -U \ |
| 283 | + numba \ |
| 284 | + bokeh \ |
| 285 | + imageio \ |
| 286 | + opencv-python \ |
| 287 | + plotly \ |
| 288 | + seaborn \ |
| 289 | + shap |
| 290 | + |
| 291 | +RUN $PYTHON -m pip install --no-cache-dir -U \ |
| 292 | + sagemaker \ |
| 293 | + sagemaker-experiments \ |
| 294 | + sagemaker-tensorflow-training \ |
| 295 | + sagemaker-training \ |
| 296 | + y-py \ |
| 297 | + sagemaker-studio-analytics-extension \ |
| 298 | + "sparkmagic<1" \ |
| 299 | + sagemaker-studio-sparkmagic-lib \ |
| 300 | + smclarify |
| 301 | + |
| 302 | +# Remove python kernel installed by sparkmagic |
| 303 | +RUN /usr/local/bin/jupyter-kernelspec remove -f python3 |
| 304 | + |
| 305 | +# remove tmp files |
| 306 | +RUN rm -rf /tmp/* |
| 307 | + |
| 308 | +RUN HOME_DIR=/root \ |
| 309 | + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ |
| 310 | + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ |
| 311 | + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ |
| 312 | + && chmod +x /usr/local/bin/testOSSCompliance \ |
| 313 | + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ |
| 314 | + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ |
| 315 | + && rm -rf ${HOME_DIR}/oss_compliance* |
| 316 | + |
| 317 | +ENTRYPOINT ["bash", "-m", "/usr/local/bin/dockerd_entrypoint.sh"] |
| 318 | + |
| 319 | +CMD ["/bin/bash"] |
0 commit comments