From 7c1a519d5367671c1dad0f6cdb48697bb220ade4 Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Thu, 3 Jul 2025 15:47:48 -0700 Subject: [PATCH 01/11] Figma --- base/x86_64/gpu/cu128/Dockerfile | 10 +++++----- dlc_developer_config.toml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/base/x86_64/gpu/cu128/Dockerfile b/base/x86_64/gpu/cu128/Dockerfile index 89c60b7205c1..350c637f0ff5 100644 --- a/base/x86_64/gpu/cu128/Dockerfile +++ b/base/x86_64/gpu/cu128/Dockerfile @@ -1,10 +1,10 @@ ARG PYTHON="python3" -ARG PYTHON_VERSION="3.12.10" -ARG PYTHON_SHORT_VERSION="3.12" +ARG PYTHON_VERSION="3.11.12" +ARG PYTHON_SHORT_VERSION="3.11" ARG CUDA_MAJOR="12" -ARG CUDA_MINOR="8" +ARG CUDA_MINOR="6" ARG EFA_VERSION="1.42.0" -FROM nvidia/cuda:12.8.1-base-ubuntu24.04 AS base-builder +FROM nvidia/cuda:12.6.3-base-ubuntu22.04 AS base-builder RUN mv /usr/local/cuda/compat /usr/local \ @@ -65,7 +65,7 @@ COPY install_cuda.sh install_cuda.sh RUN bash install_cuda.sh "${CUDA_MAJOR}.${CUDA_MINOR}" && rm install_cuda.sh ############################################################################## -FROM nvidia/cuda:12.8.1-base-ubuntu24.04 AS final +FROM nvidia/cuda:12.6.3-base-ubuntu22.04 AS final ARG PYTHON ARG PYTHON_SHORT_VERSION ARG CUDA_MAJOR diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 1962bfd69e21..e30b390cec8b 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -37,7 +37,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] +build_frameworks = ["base"] # By default we build both training and inference containers. Set true/false values to determine which to build. From 9ba22b5355c1e5e083c382cfb5527cf9945b700c Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Mon, 7 Jul 2025 16:08:43 -0700 Subject: [PATCH 02/11] fix install_cuda.sh --- scripts/install_cuda.sh | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/scripts/install_cuda.sh b/scripts/install_cuda.sh index cf176622ad39..5c0fc27700b5 100644 --- a/scripts/install_cuda.sh +++ b/scripts/install_cuda.sh @@ -46,6 +46,45 @@ function install_nvjpeg_for_cuda_below_129 { rm -rf /tmp/nvjpeg } +function install_cuda126_stack { + CUDNN_VERSION="9.7.0.66" + NCCL_VERSION="v2.24.3-1" + CUDA_HOME="/usr/local/cuda" + + # move cuda-compt and remove existing cuda dir from nvidia/cuda:**.*.*-base-* + rm -rf /usr/local/cuda-* + rm -rf /usr/local/cuda + + # install CUDA 12.6.3 + wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run + chmod +x cuda_12.6.3_560.35.05_linux.run + ./cuda_12.6.3_560.35.05_linux.run --toolkit --silent + rm -f cuda_12.6.3_560.35.05_linux.run + ln -s /usr/local/cuda-12.6 /usr/local/cuda + # bring back cuda-compat + mv /usr/local/compat /usr/local/cuda/compat 2>/dev/null || true + + # install cudnn + mkdir -p /tmp/cudnn + cd /tmp/cudnn + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz + tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ + + # install nccl + mkdir -p /tmp/nccl + cd /tmp/nccl + git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git + cd nccl + make -j src.build + cp -a build/include/* /usr/local/cuda/include/ + cp -a build/lib/* /usr/local/cuda/lib64/ + + install_nvjpeg_for_cuda_below_129 + prune_cuda + ldconfig +} function install_cuda128_stack { CUDNN_VERSION="9.8.0.87" @@ -91,6 +130,8 @@ function install_cuda128_stack { while test $# -gt 0 do case "$1" in + 12.6) install_cuda126_stack; + ;; 12.8) install_cuda128_stack; ;; *) echo "bad argument $1"; exit 1 From 78332c1d0f524cbcfa975c8fb0c70c40da4fa805 Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Sun, 13 Jul 2025 09:45:35 -0700 Subject: [PATCH 03/11] upgrade setup tools --- base/x86_64/gpu/cu128/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base/x86_64/gpu/cu128/Dockerfile b/base/x86_64/gpu/cu128/Dockerfile index 350c637f0ff5..ec70ab641cae 100644 --- a/base/x86_64/gpu/cu128/Dockerfile +++ b/base/x86_64/gpu/cu128/Dockerfile @@ -97,6 +97,9 @@ COPY --from=cuda-builder /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} /usr/local/ COPY install_efa.sh install_efa.sh COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh +RUN ${PIP} --no-cache-dir install --upgrade \ + pip \ + setuptools RUN chmod +x /usr/local/bin/deep_learning_container.py && \ chmod +x /usr/local/bin/bash_telemetry.sh && \ echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \ From 69951ed5e80ae7d0cd45ffa5bdc2ffd8b986f972 Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Sun, 13 Jul 2025 16:18:50 -0700 Subject: [PATCH 04/11] fix setuptool version --- base/buildspec.yml | 6 +++--- base/x86_64/gpu/cu128/Dockerfile | 4 +--- scripts/install_python.sh | 1 + 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/base/buildspec.yml b/base/buildspec.yml index 093a4f0caee1..733fd99ad30c 100644 --- a/base/buildspec.yml +++ b/base/buildspec.yml @@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID prod_account_id: &PROD_ACCOUNT_ID 763104351884 region: ®ION framework: &FRAMEWORK base -version: &VERSION 12.8.1 -short_version: &SHORT_VERSION "12.8" +version: &VERSION 12.6.3 +short_version: &SHORT_VERSION "12.6" arch_type: &ARCH_TYPE x86_64 autopatch_build: "False" @@ -32,7 +32,7 @@ context: target: install_efa.sh images: - base_x86_64_gpu_cuda128: + base_x86_64_gpu_cuda126: <<: *BASE_REPOSITORY context: <<: *BASE_CONTEXT diff --git a/base/x86_64/gpu/cu128/Dockerfile b/base/x86_64/gpu/cu128/Dockerfile index ec70ab641cae..7b811e92e5b9 100644 --- a/base/x86_64/gpu/cu128/Dockerfile +++ b/base/x86_64/gpu/cu128/Dockerfile @@ -97,9 +97,7 @@ COPY --from=cuda-builder /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} /usr/local/ COPY install_efa.sh install_efa.sh COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh -RUN ${PIP} --no-cache-dir install --upgrade \ - pip \ - setuptools + RUN chmod +x /usr/local/bin/deep_learning_container.py && \ chmod +x /usr/local/bin/bash_telemetry.sh && \ echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \ diff --git a/scripts/install_python.sh b/scripts/install_python.sh index c8579da2e4dc..17df2dad6836 100644 --- a/scripts/install_python.sh +++ b/scripts/install_python.sh @@ -24,6 +24,7 @@ function install_python { # this will add pip systemlink to pip${PYTHON_MAJOR_VERSION} python -m pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org python -m pip install --no-cache-dir awscli boto3 requests setuptools + python -m pip install --no-cache-dir --upgrade setuptools } # idiomatic parameter and option handling in sh From 7090b542dbefc9391076a4711d00494c585d6040 Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Sun, 13 Jul 2025 16:24:13 -0700 Subject: [PATCH 05/11] setuptool version upgrade --- scripts/install_python.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/install_python.sh b/scripts/install_python.sh index 17df2dad6836..9ff49b4dd2f1 100644 --- a/scripts/install_python.sh +++ b/scripts/install_python.sh @@ -23,8 +23,7 @@ function install_python { # this will add pip systemlink to pip${PYTHON_MAJOR_VERSION} python -m pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org - python -m pip install --no-cache-dir awscli boto3 requests setuptools - python -m pip install --no-cache-dir --upgrade setuptools + python -m pip install --no-cache-dir awscli boto3 requests setuptools>=70.0 } # idiomatic parameter and option handling in sh From 8388af5e99f02a5729e82c9225e841e7eca78971 Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Sun, 13 Jul 2025 16:25:18 -0700 Subject: [PATCH 06/11] fix version --- scripts/install_python.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install_python.sh b/scripts/install_python.sh index 9ff49b4dd2f1..213cb0c95626 100644 --- a/scripts/install_python.sh +++ b/scripts/install_python.sh @@ -23,7 +23,7 @@ function install_python { # this will add pip systemlink to pip${PYTHON_MAJOR_VERSION} python -m pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org - python -m pip install --no-cache-dir awscli boto3 requests setuptools>=70.0 + python -m pip install --no-cache-dir awscli boto3 requests setuptools>=70.0.0 } # idiomatic parameter and option handling in sh From 7e017d46c0471b070d971e30f10ea5abf834f478 Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Sun, 13 Jul 2025 17:58:42 -0700 Subject: [PATCH 07/11] fix --- base/buildspec.yml | 6 +++--- scripts/install_python.sh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/base/buildspec.yml b/base/buildspec.yml index 733fd99ad30c..90f61fe24a7f 100644 --- a/base/buildspec.yml +++ b/base/buildspec.yml @@ -38,10 +38,10 @@ images: <<: *BASE_CONTEXT image_size_baseline: 11000 device_type: &DEVICE_TYPE gpu - cuda_version: &CUDA_VERSION cu128 + cuda_version: &CUDA_VERSION cu126 python_version: &DOCKER_PYTHON_VERSION py3 - tag_python_version: &TAG_PYTHON_VERSION py312 - os_version: &OS_VERSION ubuntu24.04 + tag_python_version: &TAG_PYTHON_VERSION py311 + os_version: &OS_VERSION ubuntu22.04 tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ] latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ] docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /, *CUDA_VERSION, /Dockerfile ] diff --git a/scripts/install_python.sh b/scripts/install_python.sh index 213cb0c95626..e94ba1f0daa3 100644 --- a/scripts/install_python.sh +++ b/scripts/install_python.sh @@ -23,7 +23,7 @@ function install_python { # this will add pip systemlink to pip${PYTHON_MAJOR_VERSION} python -m pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org - python -m pip install --no-cache-dir awscli boto3 requests setuptools>=70.0.0 + python -m pip install --no-cache-dir awscli boto3 requests setuptools>70.0.0 } # idiomatic parameter and option handling in sh From 9195ff1e9c2932f28d63053389520ff72ab956d4 Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Sun, 13 Jul 2025 18:04:32 -0700 Subject: [PATCH 08/11] setuptools --- scripts/install_python.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install_python.sh b/scripts/install_python.sh index e94ba1f0daa3..3b8d07e7459a 100644 --- a/scripts/install_python.sh +++ b/scripts/install_python.sh @@ -23,7 +23,7 @@ function install_python { # this will add pip systemlink to pip${PYTHON_MAJOR_VERSION} python -m pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org - python -m pip install --no-cache-dir awscli boto3 requests setuptools>70.0.0 + python -m pip install --no-cache-dir awscli boto3 requests setuptools>=78.1.1 } # idiomatic parameter and option handling in sh From 1ab4a8435fc2818e65cf4bd6432f1604aacf2a87 Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Sun, 13 Jul 2025 18:07:26 -0700 Subject: [PATCH 09/11] fix path --- base/x86_64/gpu/cu128/Dockerfile | 126 ------------------------------- 1 file changed, 126 deletions(-) delete mode 100644 base/x86_64/gpu/cu128/Dockerfile diff --git a/base/x86_64/gpu/cu128/Dockerfile b/base/x86_64/gpu/cu128/Dockerfile deleted file mode 100644 index 7b811e92e5b9..000000000000 --- a/base/x86_64/gpu/cu128/Dockerfile +++ /dev/null @@ -1,126 +0,0 @@ -ARG PYTHON="python3" -ARG PYTHON_VERSION="3.11.12" -ARG PYTHON_SHORT_VERSION="3.11" -ARG CUDA_MAJOR="12" -ARG CUDA_MINOR="6" -ARG EFA_VERSION="1.42.0" -FROM nvidia/cuda:12.6.3-base-ubuntu22.04 AS base-builder - - -RUN mv /usr/local/cuda/compat /usr/local \ - && apt-get update \ - && apt-get -y upgrade --only-upgrade systemd \ - && apt-get install -y --allow-change-held-packages --no-install-recommends \ - automake \ - build-essential \ - ca-certificates \ - cmake \ - curl \ - emacs \ - git \ - jq \ - libcurl4-openssl-dev \ - libglib2.0-0 \ - libegl1 \ - libgl1 \ - libsm6 \ - libssl-dev \ - libxext6 \ - libxrender-dev \ - zlib1g-dev \ - unzip \ - vim \ - wget \ - libhwloc-dev \ - libgomp1 \ - libibverbs-dev \ - libnuma1 \ - libnuma-dev \ - libtool \ - openssl \ - python3-dev \ - autoconf \ - pkg-config \ - check \ - libsubunit0 \ - libsubunit-dev \ - libffi-dev \ - libbz2-dev \ - liblzma-dev \ - && apt-get autoremove -y \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -############################################################################## -FROM base-builder AS python-builder -ARG PYTHON_VERSION -COPY install_python.sh install_python.sh -RUN bash install_python.sh ${PYTHON_VERSION} && rm install_python.sh - -############################################################################## -FROM base-builder AS cuda-builder -ARG CUDA_MAJOR -ARG CUDA_MINOR -COPY install_cuda.sh install_cuda.sh -RUN bash install_cuda.sh "${CUDA_MAJOR}.${CUDA_MINOR}" && rm install_cuda.sh - -############################################################################## -FROM nvidia/cuda:12.6.3-base-ubuntu22.04 AS final -ARG PYTHON -ARG PYTHON_SHORT_VERSION -ARG CUDA_MAJOR -ARG CUDA_MINOR -ARG EFA_VERSION -LABEL maintainer="Amazon AI" -LABEL dlc_major_version="1" -ENV DEBIAN_FRONTEND=noninteractive \ - LANG=C.UTF-8 \ - LC_ALL=C.UTF-8 \ - DLC_CONTAINER_TYPE=base \ - # Python won’t try to write .pyc or .pyo files on the import of source modules - # Force stdin, stdout and stderr to be totally unbuffered. Good for logging - PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONIOENCODING=UTF-8 \ - CUDA_HOME="/usr/local/cuda" \ - PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}" \ - LD_LIBRARY_PATH="/usr/local/lib:/usr/local/cuda/lib64:/opt/amazon/ofi-nccl/lib:/opt/amazon/efa/lib:/opt/amazon/openmpi/lib:${LD_LIBRARY_PATH}" - -WORKDIR / - -# + python and pip packages (awscli, boto3, requests) -COPY --from=python-builder /usr/local/lib/python${PYTHON_SHORT_VERSION} /usr/local/lib/python${PYTHON_SHORT_VERSION} -COPY --from=python-builder /usr/local/include/python${PYTHON_SHORT_VERSION} /usr/local/include/python${PYTHON_SHORT_VERSION} -COPY --from=python-builder /usr/local/bin /usr/local/bin -# + cuda-toolkit, cudnn, nccl -COPY --from=cuda-builder /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} -COPY install_efa.sh install_efa.sh -COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py -COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh - -RUN chmod +x /usr/local/bin/deep_learning_container.py && \ - chmod +x /usr/local/bin/bash_telemetry.sh && \ - echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \ - # Install EFA - bash install_efa.sh ${EFA_VERSION} && \ - rm install_efa.sh && \ - # OSS compliance - apt-get update && \ - apt-get upgrade -y && \ - apt-get install -y --allow-change-held-packages --no-install-recommends \ - unzip \ - wget && \ - apt-get clean && \ - HOME_DIR=/root && \ - curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ - unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ - cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ - chmod +x /usr/local/bin/testOSSCompliance && \ - chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ - ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} && \ - rm -rf ${HOME_DIR}/oss_compliance* && \ - rm -rf /tmp/tmp* && \ - rm -rf /var/lib/apt/lists/* && \ - rm -rf /root/.cache | true - -CMD ["/bin/bash"] \ No newline at end of file From 0a0b4543526fb8818eb4577ebbde9728059efed7 Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Sun, 13 Jul 2025 18:09:10 -0700 Subject: [PATCH 10/11] fix path --- base/x86_64/gpu/cu126/Dockerfile | 126 +++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 base/x86_64/gpu/cu126/Dockerfile diff --git a/base/x86_64/gpu/cu126/Dockerfile b/base/x86_64/gpu/cu126/Dockerfile new file mode 100644 index 000000000000..7b811e92e5b9 --- /dev/null +++ b/base/x86_64/gpu/cu126/Dockerfile @@ -0,0 +1,126 @@ +ARG PYTHON="python3" +ARG PYTHON_VERSION="3.11.12" +ARG PYTHON_SHORT_VERSION="3.11" +ARG CUDA_MAJOR="12" +ARG CUDA_MINOR="6" +ARG EFA_VERSION="1.42.0" +FROM nvidia/cuda:12.6.3-base-ubuntu22.04 AS base-builder + + +RUN mv /usr/local/cuda/compat /usr/local \ + && apt-get update \ + && apt-get -y upgrade --only-upgrade systemd \ + && apt-get install -y --allow-change-held-packages --no-install-recommends \ + automake \ + build-essential \ + ca-certificates \ + cmake \ + curl \ + emacs \ + git \ + jq \ + libcurl4-openssl-dev \ + libglib2.0-0 \ + libegl1 \ + libgl1 \ + libsm6 \ + libssl-dev \ + libxext6 \ + libxrender-dev \ + zlib1g-dev \ + unzip \ + vim \ + wget \ + libhwloc-dev \ + libgomp1 \ + libibverbs-dev \ + libnuma1 \ + libnuma-dev \ + libtool \ + openssl \ + python3-dev \ + autoconf \ + pkg-config \ + check \ + libsubunit0 \ + libsubunit-dev \ + libffi-dev \ + libbz2-dev \ + liblzma-dev \ + && apt-get autoremove -y \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +############################################################################## +FROM base-builder AS python-builder +ARG PYTHON_VERSION +COPY install_python.sh install_python.sh +RUN bash install_python.sh ${PYTHON_VERSION} && rm install_python.sh + +############################################################################## +FROM base-builder AS cuda-builder +ARG CUDA_MAJOR +ARG CUDA_MINOR +COPY install_cuda.sh install_cuda.sh +RUN bash install_cuda.sh "${CUDA_MAJOR}.${CUDA_MINOR}" && rm install_cuda.sh + +############################################################################## +FROM nvidia/cuda:12.6.3-base-ubuntu22.04 AS final +ARG PYTHON +ARG PYTHON_SHORT_VERSION +ARG CUDA_MAJOR +ARG CUDA_MINOR +ARG EFA_VERSION +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" +ENV DEBIAN_FRONTEND=noninteractive \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + DLC_CONTAINER_TYPE=base \ + # Python won’t try to write .pyc or .pyo files on the import of source modules + # Force stdin, stdout and stderr to be totally unbuffered. Good for logging + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONIOENCODING=UTF-8 \ + CUDA_HOME="/usr/local/cuda" \ + PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}" \ + LD_LIBRARY_PATH="/usr/local/lib:/usr/local/cuda/lib64:/opt/amazon/ofi-nccl/lib:/opt/amazon/efa/lib:/opt/amazon/openmpi/lib:${LD_LIBRARY_PATH}" + +WORKDIR / + +# + python and pip packages (awscli, boto3, requests) +COPY --from=python-builder /usr/local/lib/python${PYTHON_SHORT_VERSION} /usr/local/lib/python${PYTHON_SHORT_VERSION} +COPY --from=python-builder /usr/local/include/python${PYTHON_SHORT_VERSION} /usr/local/include/python${PYTHON_SHORT_VERSION} +COPY --from=python-builder /usr/local/bin /usr/local/bin +# + cuda-toolkit, cudnn, nccl +COPY --from=cuda-builder /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} /usr/local/cuda-${CUDA_MAJOR}.${CUDA_MINOR} +COPY install_efa.sh install_efa.sh +COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py +COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh + +RUN chmod +x /usr/local/bin/deep_learning_container.py && \ + chmod +x /usr/local/bin/bash_telemetry.sh && \ + echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc && \ + # Install EFA + bash install_efa.sh ${EFA_VERSION} && \ + rm install_efa.sh && \ + # OSS compliance + apt-get update && \ + apt-get upgrade -y && \ + apt-get install -y --allow-change-held-packages --no-install-recommends \ + unzip \ + wget && \ + apt-get clean && \ + HOME_DIR=/root && \ + curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \ + unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \ + cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \ + chmod +x /usr/local/bin/testOSSCompliance && \ + chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \ + ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} && \ + rm -rf ${HOME_DIR}/oss_compliance* && \ + rm -rf /tmp/tmp* && \ + rm -rf /var/lib/apt/lists/* && \ + rm -rf /root/.cache | true + +CMD ["/bin/bash"] \ No newline at end of file From f3fd6a85a32837a4fb3253f3e6c3db889c28b17f Mon Sep 17 00:00:00 2001 From: zhuofuAMZ Date: Thu, 17 Jul 2025 00:21:19 -0700 Subject: [PATCH 11/11] setup tools --- scripts/install_python.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/install_python.sh b/scripts/install_python.sh index 3b8d07e7459a..acc572274443 100644 --- a/scripts/install_python.sh +++ b/scripts/install_python.sh @@ -23,7 +23,8 @@ function install_python { # this will add pip systemlink to pip${PYTHON_MAJOR_VERSION} python -m pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org - python -m pip install --no-cache-dir awscli boto3 requests setuptools>=78.1.1 + python -m pip install --no-cache-dir awscli boto3 requests + python -m pip install --no-cache-dir "setuptools>=78.1.1" } # idiomatic parameter and option handling in sh