diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml
index 1962bfd69e21..cd7d59b57a9c 100644
--- a/dlc_developer_config.toml
+++ b/dlc_developer_config.toml
@@ -37,12 +37,12 @@ deep_canary_mode = false
 [build]
 # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
 # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
-build_frameworks = []
+build_frameworks = ["pytorch"]
 
 
 # By default we build both training and inference containers. Set true/false values to determine which to build.
 build_training = true
-build_inference = true
+build_inference = false
 
 # Set do_build to "false" to skip builds and test the latest image built by this PR
 # Note: at least one build is required to set do_build to "false"
@@ -65,13 +65,13 @@ ecs_tests = true
 eks_tests = true
 ec2_tests = true
 # Set it to true if you are preparing a Benchmark related PR
-ec2_benchmark_tests = false
+ec2_benchmark_tests = true
 
 ### Set ec2_tests_on_heavy_instances = true to be able to run any EC2 tests that use large/expensive instance types by
 ### default. If false, these types of tests will be skipped while other tests will run as usual.
 ### These tests are run in EC2 test jobs, so ec2_tests must be true if ec2_tests_on_heavy_instances is true.
 ### Off by default (set to false)
-ec2_tests_on_heavy_instances = false
+ec2_tests_on_heavy_instances = true
 ### SM specific tests
 ### On by default
 sagemaker_local_tests = true
@@ -119,7 +119,7 @@ use_scheduler = false
 ### TRAINING PR JOBS ###
 
 # Standard Framework Training
-dlc-pr-pytorch-training = ""
+dlc-pr-pytorch-training = "pytorch/training/buildspec-2-8-ec2.yml"
 dlc-pr-tensorflow-2-training = ""
 dlc-pr-autogluon-training = ""
 
diff --git a/pytorch/training/buildspec-2-8-ec2.yml b/pytorch/training/buildspec-2-8-ec2.yml
new file mode 100644
index 000000000000..023d84b6867d
--- /dev/null
+++ b/pytorch/training/buildspec-2-8-ec2.yml
@@ -0,0 +1,72 @@
+account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
+prod_account_id: &PROD_ACCOUNT_ID 763104351884
+region: &REGION <set-$REGION-in-environment>
+framework: &FRAMEWORK pytorch
+version: &VERSION 2.8.0
+short_version: &SHORT_VERSION "2.8"
+arch_type: x86
+# autopatch_build: "True"
+
+repository_info:
+  training_repository: &TRAINING_REPOSITORY
+    image_type: &TRAINING_IMAGE_TYPE training
+    root: !join [ *FRAMEWORK, "/", *TRAINING_IMAGE_TYPE ]
+    repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK, "-", *TRAINING_IMAGE_TYPE ]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
+    release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK, "-", *TRAINING_IMAGE_TYPE ]
+    release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
+
+context:
+  training_context: &TRAINING_CONTEXT
+    start_cuda_compat:
+      source: docker/build_artifacts/start_cuda_compat.sh
+      target: start_cuda_compat.sh
+    dockerd_entrypoint:
+      source: docker/build_artifacts/dockerd_entrypoint.sh
+      target: dockerd_entrypoint.sh
+    changehostname:
+      source: docker/build_artifacts/changehostname.c
+      target: changehostname.c
+    start_with_right_hostname:
+      source: docker/build_artifacts/start_with_right_hostname.sh
+      target: start_with_right_hostname.sh
+    example_mnist_file:
+      source: docker/build_artifacts/mnist.py
+      target: mnist.py
+    deep_learning_container:
+      source: ../../src/deep_learning_container.py
+      target: deep_learning_container.py
+
+images:
+  BuildEC2CPUPTTrainPy3DockerImage:
+    <<: *TRAINING_REPOSITORY
+    build: &PYTORCH_CPU_TRAINING_PY3 false
+    image_size_baseline: 7200
+    device_type: &DEVICE_TYPE cpu
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    os_version: &OS_VERSION ubuntu22.04
+    tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-ec2" ]
+    latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-ec2" ]
+    # skip_build: "False"
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
+    target: ec2
+    context:
+      <<: *TRAINING_CONTEXT
+  BuildEC2GPUPTTrainPy3cu129DockerImage:
+    <<: *TRAINING_REPOSITORY
+    build: &PYTORCH_GPU_TRAINING_PY3 false
+    image_size_baseline: 24000
+    device_type: &DEVICE_TYPE gpu
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    cuda_version: &CUDA_VERSION cu129
+    os_version: &OS_VERSION ubuntu22.04
+    tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
+    latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
+    # skip_build: "False"
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
+                         *DEVICE_TYPE ]
+    target: ec2
+    context:
+      <<: *TRAINING_CONTEXT
diff --git a/pytorch/training/buildspec-2-8-sm.yml b/pytorch/training/buildspec-2-8-sm.yml
new file mode 100644
index 000000000000..aa7372fb0ad5
--- /dev/null
+++ b/pytorch/training/buildspec-2-8-sm.yml
@@ -0,0 +1,72 @@
+account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
+prod_account_id: &PROD_ACCOUNT_ID 763104351884
+region: &REGION <set-$REGION-in-environment>
+framework: &FRAMEWORK pytorch
+version: &VERSION 2.8.0
+short_version: &SHORT_VERSION "2.8"
+arch_type: x86
+# autopatch_build: "True"
+
+repository_info:
+  training_repository: &TRAINING_REPOSITORY
+    image_type: &TRAINING_IMAGE_TYPE training
+    root: !join [ *FRAMEWORK, "/", *TRAINING_IMAGE_TYPE ]
+    repository_name: &REPOSITORY_NAME !join [ pr, "-", *FRAMEWORK, "-", *TRAINING_IMAGE_TYPE ]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
+    release_repository_name: &RELEASE_REPOSITORY_NAME !join [ *FRAMEWORK, "-", *TRAINING_IMAGE_TYPE ]
+    release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
+
+context:
+  training_context: &TRAINING_CONTEXT
+    start_cuda_compat:
+      source: docker/build_artifacts/start_cuda_compat.sh
+      target: start_cuda_compat.sh
+    dockerd_entrypoint:
+      source: docker/build_artifacts/dockerd_entrypoint.sh
+      target: dockerd_entrypoint.sh
+    changehostname:
+      source: docker/build_artifacts/changehostname.c
+      target: changehostname.c
+    start_with_right_hostname:
+      source: docker/build_artifacts/start_with_right_hostname.sh
+      target: start_with_right_hostname.sh
+    example_mnist_file:
+      source: docker/build_artifacts/mnist.py
+      target: mnist.py
+    deep_learning_container:
+      source: ../../src/deep_learning_container.py
+      target: deep_learning_container.py
+
+images:
+  BuildSageMakerCPUPTTrainPy3DockerImage:
+    <<: *TRAINING_REPOSITORY
+    build: &PYTORCH_CPU_TRAINING_PY3 false
+    image_size_baseline: 7200
+    device_type: &DEVICE_TYPE cpu
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    os_version: &OS_VERSION ubuntu22.04
+    tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    # skip_build: "False"
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /Dockerfile., *DEVICE_TYPE ]
+    target: sagemaker
+    context:
+      <<: *TRAINING_CONTEXT
+  BuildSageMakerGPUPTTrainPy3DockerImage:
+    <<: *TRAINING_REPOSITORY
+    build: &PYTORCH_GPU_TRAINING_PY3 false
+    image_size_baseline: 24000
+    device_type: &DEVICE_TYPE gpu
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    cuda_version: &CUDA_VERSION cu129
+    os_version: &OS_VERSION ubuntu22.04
+    tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-sagemaker" ]
+    # skip_build: "False"
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *DOCKER_PYTHON_VERSION, /, *CUDA_VERSION, /Dockerfile.,
+                         *DEVICE_TYPE ]
+    target: sagemaker
+    context:
+      <<: *TRAINING_CONTEXT
diff --git a/pytorch/training/buildspec.yml b/pytorch/training/buildspec.yml
index b332931b2e40..78ac196ed806 100644
--- a/pytorch/training/buildspec.yml
+++ b/pytorch/training/buildspec.yml
@@ -1 +1 @@
-buildspec_pointer: buildspec-2-7-sm.yml
+buildspec_pointer: buildspec-2-8-ec2.yml
diff --git a/pytorch/training/docker/2.8/py3/Dockerfile.cpu b/pytorch/training/docker/2.8/py3/Dockerfile.cpu
new file mode 100644
index 000000000000..7d78611d3290
--- /dev/null
+++ b/pytorch/training/docker/2.8/py3/Dockerfile.cpu
@@ -0,0 +1,364 @@
+ARG PYTHON=python3
+ARG PYTHON_VERSION=3.12.10
+ARG PYTHON_SHORT_VERSION=3.12
+ARG PYTORCH_VERSION=2.8.0
+
+ARG OPEN_MPI_VERSION=4.1.7
+
+ARG TORCHTNT_VERSION=0.2.4
+ARG TORCHDATA_VERSION=0.11.0
+ARG TORCHAUDIO_VERSION=2.8.0
+ARG TORCHVISION_VERSION=0.23.0
+
+FROM ubuntu:22.04 AS base_image
+
+# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20
+ENV DEBIAN_FRONTEND=noninteractive
+ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"
+
+RUN apt-get update \
+ && apt-get upgrade -y \
+ && apt-get autoremove -y \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+#################################################################
+#   ____
+#  / ___| ___  _ __ ___  _ __ ___   ___  _ __
+# | |    / _ \| '_ ` _ \| '_ ` _ \ / _ \| '_ \
+# | |___  (_) | | | | | | | | | | | (_) | | | |
+#  \____|\___/|_| |_| |_|_| |_| |_|\___/|_| |_|
+#  ___                              ____           _
+# |_ _|_ __ ___   __ _  __ _  ___  |  _ \ ___  ___(_)_ __   ___
+#  | || '_ ` _ \ / _` |/ _` |/ _ \ | |_) / _ \/ __| | '_ \ / _ \
+#  | || | | | | | (_| | (_| |  __/ |  _ <  __/ (__| | |_) |  __/
+# |___|_| |_| |_|\__,_|\__, |\___| |_| \_\___|\___|_| .__/ \___|
+#                      |___/                        |_|
+#################################################################
+
+FROM base_image AS common
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+ARG PYTHON
+ARG PYTHON_VERSION
+ARG PYTHON_SHORT_VERSION
+
+ARG OPEN_MPI_VERSION
+
+ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"
+ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
+
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+
+ENV DLC_CONTAINER_TYPE=training
+WORKDIR /
+
+RUN apt-get update \
+ && apt-get -y upgrade --only-upgrade systemd \
+ && apt-get install -y --no-install-recommends \
+    automake \
+    build-essential \
+    ca-certificates \
+    cmake \
+    curl \
+    emacs \
+    git \
+    jq \
+    libcurl4-openssl-dev \
+    libglib2.0-0 \
+    libgl1-mesa-glx \
+    libsm6 \
+    libssl-dev \
+    libxext6 \
+    libxrender-dev \
+    zlib1g-dev \
+    unzip \
+    vim \
+    wget \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    llvm \
+    libncurses5-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    liblzma-dev \
+    libffi-dev \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Install Open MPI
+RUN wget https://www.open-mpi.org/software/ompi/v4.1/downloads/openmpi-${OPEN_MPI_VERSION}.tar.gz \
+ && gunzip -c openmpi-${OPEN_MPI_VERSION}.tar.gz | tar xf - \
+ && cd openmpi-${OPEN_MPI_VERSION} \
+ && ./configure --prefix=/home/.openmpi \
+ && make all install \
+ && cd .. \
+ && rm openmpi-${OPEN_MPI_VERSION}.tar.gz \
+ && rm -rf openmpi-${OPEN_MPI_VERSION}
+
+# The ENV variables declared below are changed in the previous section
+# Grouping these ENV variables in the first section causes
+# ompi_info to fail. This is only observed in CPU containers
+ENV PATH="/home/.openmpi/bin:${PATH}"
+ENV LD_LIBRARY_PATH="/home/.openmpi/lib:${LD_LIBRARY_PATH}"
+RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value
+
+# Install OpenSSH for MPI to communicate between containers, allow OpenSSH to talk to containers without asking for confirmation
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends openssh-client openssh-server \
+ && mkdir -p /var/run/sshd \
+ && cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
+ && echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
+ && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+# Configure OpenSSH so that nodes can communicate with each other
+RUN mkdir -p /var/run/sshd \
+ && sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+RUN rm -rf /root/.ssh/ \
+ && mkdir -p /root/.ssh/ \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+
+# install python
+RUN cd /tmp/ \
+&& wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz \
+&& tar xzf Python-${PYTHON_VERSION}.tgz \
+&& cd Python-${PYTHON_VERSION} \
+&& ./configure --enable-optimizations --with-lto --with-computed-gotos --with-system-ffi \
+&& make -j "$(nproc)" \
+&& make altinstall \
+&& cd .. \
+&& rm -rf Python-${PYTHON_VERSION} \
+&& rm Python-${PYTHON_VERSION}.tgz \
+&& ln -s /usr/local/bin/python${PYTHON_SHORT_VERSION} /usr/local/bin/python \
+&& ln -s /usr/local/bin/python${PYTHON_SHORT_VERSION} /usr/local/bin/python3 \
+# This installation generate a .python_history file in the root directory leads sanity check to fail
+&& rm -f /root/.python_history
+
+# Python Path
+ENV PATH="/usr/local/bin:${PATH}"
+
+# this will add pip systemlink to pip${PYTHON_SHORT_VERSION}
+RUN python -m pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org
+
+# Install common packages
+RUN pip install --no-cache-dir \
+    cython \
+    cryptography \
+    pyOpenSSL \
+    pybind11 \
+    mkl \
+    mkl-include \
+    parso \
+    typing \
+    charset-normalizer \
+    packaging \
+    boto3 \
+    PyYAML \
+    numpy \
+    scipy \
+    click \
+    psutil \
+    ipython \
+    ipykernel \
+    pillow \
+    h5py \
+    fsspec \
+    "idna>=3.7" \
+    "tqdm>=4.66.3" \
+    "requests>=2.32.0" \
+    "setuptools>=70.0.0" \
+    "urllib3>=2.5.0" \
+    "awscli" \
+    # opencv-python 4.12.0.88 reuqires numpy<2.3.0, which is not compatible with previous prod image(2.3.1)
+    opencv-python==4.11.0.86 \
+    mpi4py \
+    jinja2>=3.1.6 \
+    tornado>=6.5.1
+
+RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.8/license.txt
+
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
+RUN chmod +x /usr/local/bin/bash_telemetry.sh
+RUN echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc
+
+# Removing the cache as it is needed for security verification
+RUN rm -rf /root/.cache | true
+
+########################################################
+#  _____ ____ ____    ___
+# | ____/ ___|___ \  |_ _|_ __ ___   __ _  __ _  ___
+# |  _|| |     __) |  | || '_ ` _ \ / _` |/ _` |/ _ \
+# | |__| |___ / __/   | || | | | | | (_| | (_| |  __/
+# |_____\____|_____| |___|_| |_| |_|\__,_|\__, |\___|
+#                                         |___/
+#  ____           _
+# |  _ \ ___  ___(_)_ __   ___
+# | |_) / _ \/ __| | '_ \ / _ \
+# |  _ <  __/ (__| | |_) |  __/
+# |_| \_\___|\___|_| .__/ \___|
+#                  |_|
+########################################################
+
+FROM common AS ec2
+
+ARG PYTHON
+ARG PYTHON_SHORT_VERSION
+ARG PYTORCH_VERSION
+ARG TORCHTNT_VERSION
+ARG TORCHDATA_VERSION
+ARG TORCHAUDIO_VERSION
+ARG TORCHVISION_VERSION
+
+WORKDIR /
+
+# Install PyTorch
+RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \
+    torchvision==${TORCHVISION_VERSION} \
+    torchaudio==${TORCHAUDIO_VERSION} \
+    --index-url https://download.pytorch.org/whl/cpu \
+    && pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \
+    torchdata==${TORCHDATA_VERSION} \
+    s3torchconnector \
+    fastai \
+    accelerate \
+    # pin numpy requirement for fastai dependency
+    # requires explicit declaration of spacy, thic, blis
+    spacy \
+    thinc  \
+    blis \
+    numpy \
+ && pip uninstall -y dataclasses
+
+RUN HOME_DIR=/root \
+ && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
+ && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
+ && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
+ && chmod +x /usr/local/bin/testOSSCompliance \
+ && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
+ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
+ && rm -rf ${HOME_DIR}/oss_compliance* \
+ && rm -rf /tmp/tmp*
+
+# Removing the cache as it is needed for security verification
+RUN rm -rf /root/.cache | true
+
+COPY dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh
+RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh
+ENTRYPOINT ["bash", "-m", "dockerd_entrypoint.sh"]
+
+# Starts framework
+CMD ["/bin/bash"]
+
+#################################################################
+#  ____                   __  __       _
+# / ___|  __ _  __ _  ___|  \/  | __ _| | _____ _ __
+# \___ \ / _` |/ _` |/ _ \ |\/| |/ _` | |/ / _ \ '__|
+#  ___) | (_| | (_| |  __/ |  | | (_| |   <  __/ |
+# |____/ \__,_|\__, |\___|_|  |_|\__,_|_|\_\___|_|
+#              |___/
+#  ___                              ____           _
+# |_ _|_ __ ___   __ _  __ _  ___  |  _ \ ___  ___(_)_ __   ___
+#  | || '_ ` _ \ / _` |/ _` |/ _ \ | |_) / _ \/ __| | '_ \ / _ \
+#  | || | | | | | (_| | (_| |  __/ |  _ <  __/ (__| | |_) |  __/
+# |___|_| |_| |_|\__,_|\__, |\___| |_| \_\___|\___|_| .__/ \___|
+#                      |___/                        |_|
+#################################################################
+
+FROM common AS sagemaker
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+ARG PYTHON
+ARG PYTHON_SHORT_VERSION
+ARG PYTORCH_VERSION
+ARG TORCHTNT_VERSION
+ARG TORCHDATA_VERSION
+ARG TORCHAUDIO_VERSION
+ARG TORCHVISION_VERSION
+
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_pytorch_container.training:main
+
+WORKDIR /
+
+# Install PyTorch
+RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \
+    torchvision==${TORCHVISION_VERSION} \
+    torchaudio==${TORCHAUDIO_VERSION} \
+    --index-url https://download.pytorch.org/whl/cpu \
+    && pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \
+    torchdata==${TORCHDATA_VERSION} \
+    s3torchconnector \
+    fastai \
+    accelerate \
+    # pin numpy requirement for fastai dependency
+    # requires explicit declaration of spacy, thic, blis
+    spacy \
+    thinc==8.3.4  \
+    blis \
+    numpy \
+ && pip uninstall -y dataclasses
+
+# Install SM packages
+RUN pip install --no-cache-dir -U \
+    smclarify \
+    "sagemaker>=2" \
+    sagemaker-experiments \
+    sagemaker-pytorch-training \
+    sagemaker-training
+
+# Install extra packages
+RUN pip install --no-cache-dir -U \
+    bokeh \
+    imageio \
+    numba \
+    pandas \
+    plotly \
+    scikit-learn \
+    seaborn \
+    shap \
+    # pinned for sagemaker==2.233.0
+    cloudpickle 
+
+# Copy workaround script for incorrect hostname
+COPY changehostname.c /
+COPY start_with_right_hostname.sh /usr/local/bin/start_with_right_hostname.sh
+
+RUN chmod +x /usr/local/bin/start_with_right_hostname.sh
+
+RUN HOME_DIR=/root \
+ && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
+ && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
+ && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
+ && chmod +x /usr/local/bin/testOSSCompliance \
+ && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
+ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
+ && rm -rf ${HOME_DIR}/oss_compliance* \
+ && rm -rf /tmp/tmp*
+
+# Removing the cache as it is needed for security verification
+RUN rm -rf /root/.cache | true
+
+ENTRYPOINT ["bash", "-m", "start_with_right_hostname.sh"]
+CMD ["/bin/bash"]
+
diff --git a/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu b/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu
new file mode 100644
index 000000000000..70d9ba03c31f
--- /dev/null
+++ b/pytorch/training/docker/2.8/py3/cu129/Dockerfile.gpu
@@ -0,0 +1,463 @@
+ARG PYTHON=python3
+ARG PYTHON_VERSION=3.12.10
+ARG PYTHON_SHORT_VERSION=3.12
+ARG PYTORCH_VERSION=2.8.0
+ARG TORCHTNT_VERSION=0.2.4
+ARG TORCHAUDIO_VERSION=2.8.0
+ARG TORCHVISION_VERSION=0.23.0
+ARG TORCHDATA_VERSION=0.11.0
+
+ARG CUDA_VERSION=12.9.1
+ARG CUDNN_VERSION=9.10.2.21
+ARG NCCL_VERSION=2.27.3
+ARG EFA_VERSION=1.43.1
+ARG GDRCOPY_VERSION=2.5
+ARG TE_VERSION=2.5
+ARG FLASH_ATTN_VERSION=2.8.2
+
+FROM nvidia/cuda:12.9.1-base-ubuntu22.04 AS base_image
+
+# This arg required to stop docker build waiting for region configuration while installing tz data from ubuntu 20
+ENV DEBIAN_FRONTEND=noninteractive
+# ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"
+
+RUN apt-get update \
+ && apt-get upgrade -y \
+ && apt-get autoremove -y \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+#################################################################
+#   ____
+#  / ___| ___  _ __ ___  _ __ ___   ___  _ __
+# | |    / _ \| '_ ` _ \| '_ ` _ \ / _ \| '_ \
+# | |___  (_) | | | | | | | | | | | (_) | | | |
+#  \____|\___/|_| |_| |_|_| |_| |_|\___/|_| |_|
+#  ___                              ____           _
+# |_ _|_ __ ___   __ _  __ _  ___  |  _ \ ___  ___(_)_ __   ___
+#  | || '_ ` _ \ / _` |/ _` |/ _ \ | |_) / _ \/ __| | '_ \ / _ \
+#  | || | | | | | (_| | (_| |  __/ |  _ <  __/ (__| | |_) |  __/
+# |___|_| |_| |_|\__,_|\__, |\___| |_| \_\___|\___|_| .__/ \___|
+#                      |___/                        |_|
+#################################################################
+
+FROM base_image AS common
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+ARG PYTHON
+ARG PYTHON_VERSION
+ARG PYTHON_SHORT_VERSION
+ARG PYTORCH_VERSION
+ARG TORCHDATA_VERSION
+ARG TORCHAUDIO_VERSION
+ARG TORCHVISION_VERSION
+ARG TORCHTNT_VERSION
+
+ARG CUDA_VERSION
+ARG CUDNN_VERSION
+ARG NCCL_VERSION
+ARG EFA_VERSION
+
+ENV CUDA_HOME="/usr/local/cuda"
+# ENV LD_LIBRARY_PATH="/usr/local/lib:${LD_LIBRARY_PATH}"
+# ENV LD_LIBRARY_PATH="/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
+ENV PATH="${CUDA_HOME}/bin:${PATH}"
+ENV EFA_PATH="/opt/amazon/efa"
+ENV OPEN_MPI_PATH="/opt/amazon/openmpi"
+
+# Python won’t try to write .pyc or .pyo files on the import of source modules
+# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONIOENCODING=UTF-8
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+
+ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+
+ENV DLC_CONTAINER_TYPE=training
+WORKDIR /
+
+RUN apt-get update \
+ && apt-get -y upgrade --only-upgrade systemd \
+ && apt-get install -y --allow-change-held-packages --no-install-recommends \
+    automake \
+    build-essential \
+    ca-certificates \
+    cmake \
+    curl \
+    emacs \
+    git \
+    jq \
+    libcurl4-openssl-dev \
+    libglib2.0-0 \
+    libgl1-mesa-glx \
+    libsm6 \
+    libssl-dev \
+    libxext6 \
+    libxrender-dev \
+    zlib1g-dev \
+    unzip \
+    vim \
+    wget \
+    cuda-toolkit-12=${CUDA_VERSION}-1 \
+    libcudnn9-cuda-12=${CUDNN_VERSION}-1 \
+    libcudnn9-dev-cuda-12=${CUDNN_VERSION}-1 \
+    libcudnn9-headers-cuda-12=${CUDNN_VERSION}-1 \
+    libhwloc-dev \
+    libgomp1 \
+    libibverbs-dev \
+    libnuma1 \
+    libnuma-dev \
+    libtool \
+    openssl \
+    python3-dev \
+    autoconf \
+    pkg-config \
+    check \
+    libsubunit0 \
+    libsubunit-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    llvm \
+    libncurses5-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    liblzma-dev \
+    libffi-dev \
+ && rm -rf /var/lib/apt/lists/* \
+ && apt-get clean
+
+ # patch nvjpeg to fix CVE
+RUN mkdir -p /tmp/nvjpeg \
+&& cd /tmp/nvjpeg \
+&& wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
+&& tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
+&& rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \
+&& rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \
+&& cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \
+&& cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \
+&& rm -rf /tmp/nvjpeg \
+# patch cuobjdump and nvdisasm
+&& rm -rf /usr/local/cuda/bin/cuobjdump* \
+&& rm -rf /usr/local/cuda/bin/nvdisasm* 
+
+# For EFA, below flags are needed to install EFA on docker image
+#  -n, --no-verify       Skip EFA device verification and test
+#  -l, --skip-limit-conf Skip EFA limit configuration
+#  -k, --skip-kmod       Skip EFA kmod installation
+# start from 0.38.0 EFA now bundles the AWS OFI NCCL plugin, 
+# which can now be found in /opt/amazon/ofi-nccl/lib/x86_64-linux-gnu rather than the original /opt/aws-ofi-nccl/.
+RUN mkdir /tmp/efa \
+ && cd /tmp/efa \
+ && curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
+ && tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
+ && cd aws-efa-installer \
+ && apt-get update \
+ && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \
+ && rm -rf /tmp/efa \
+ && rm -rf /var/lib/apt/lists/* \
+ && apt-get clean
+
+ENV PATH="${OPEN_MPI_PATH}/bin:${EFA_PATH}/bin:${PATH}"
+ENV LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
+
+# Configure Open MPI and configure NCCL parameters
+RUN mv ${OPEN_MPI_PATH}/bin/mpirun ${OPEN_MPI_PATH}/bin/mpirun.real \
+ && echo '#!/bin/bash' > ${OPEN_MPI_PATH}/bin/mpirun \
+ && echo "${OPEN_MPI_PATH}/bin/mpirun.real --allow-run-as-root \"\$@\"" >> ${OPEN_MPI_PATH}/bin/mpirun \
+ && chmod a+x ${OPEN_MPI_PATH}/bin/mpirun \
+ && echo "hwloc_base_binding_policy = none" >> ${OPEN_MPI_PATH}/etc/openmpi-mca-params.conf \
+ && echo "rmaps_base_mapping_policy = slot" >> ${OPEN_MPI_PATH}/etc/openmpi-mca-params.conf \
+ && echo NCCL_DEBUG=INFO >> /etc/nccl.conf \
+ && echo NCCL_SOCKET_IFNAME=^lo,docker >> /etc/nccl.conf
+
+# Install OpenSSH for MPI to communicate between containers, allow OpenSSH to talk to containers without asking for confirmation
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends openssh-client openssh-server \
+ && mkdir -p /var/run/sshd \
+ && cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
+ && echo "    StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
+ && mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config \
+ && rm -rf /var/lib/apt/lists/* \
+ && apt-get clean
+
+# Configure OpenSSH so that nodes can communicate with each other
+RUN mkdir -p /var/run/sshd \
+ && sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
+
+RUN rm -rf /root/.ssh/ \
+ && mkdir -p /root/.ssh/ \
+ && ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
+ && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
+ && printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config
+
+# install python
+RUN cd /tmp/ \
+&& wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz \
+&& tar xzf Python-${PYTHON_VERSION}.tgz \
+&& cd Python-${PYTHON_VERSION} \
+&& ./configure --enable-optimizations --with-lto --with-computed-gotos --with-system-ffi \
+&& make -j "$(nproc)" \
+&& make altinstall \
+&& cd .. \
+&& rm -rf Python-${PYTHON_VERSION} \
+&& rm Python-${PYTHON_VERSION}.tgz \
+&& ln -s /usr/local/bin/python${PYTHON_SHORT_VERSION} /usr/local/bin/python \
+&& ln -s /usr/local/bin/python${PYTHON_SHORT_VERSION} /usr/local/bin/python3 \
+# This installation generate a .python_history file in the root directory leads sanity check to fail
+&& rm -f /root/.python_history
+
+# Python Path
+ENV PATH="/usr/local/bin:${PATH}"
+
+# this will add pip systemlink to pip${PYTHON_SHORT_VERSION}
+RUN python -m pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org
+
+# Install common conda packages
+RUN pip install --no-cache-dir \
+    cython \
+    cryptography \
+    pyOpenSSL \
+    pybind11 \
+    mkl \
+    mkl-include \
+    parso \
+    typing \
+    charset-normalizer \
+    packaging \
+    boto3 \
+    PyYAML \
+    numpy \
+    scipy \
+    click \
+    psutil \
+    ipython \
+    ipykernel \
+    pillow \
+    h5py \
+    fsspec \
+    "idna>=3.7" \
+    "tqdm>=4.66.3" \
+    "requests>=2.32.0" \
+    "setuptools>=70.0.0" \
+    "urllib3>=2.5.0" \
+    "awscli" \
+    ninja \
+    opencv-python==4.11.0.86 \
+    mpi4py \
+    jinja2>=3.1.6 \
+    tornado>=6.5.1
+
+# Install PyTorch
+RUN pip install --no-cache-dir -U torch==${PYTORCH_VERSION} \
+    torchvision==${TORCHVISION_VERSION} \
+    torchaudio==${TORCHAUDIO_VERSION} \
+    --index-url https://download.pytorch.org/whl/cu129 \
+    && pip install --no-cache-dir -U torchtnt==${TORCHTNT_VERSION} \
+    torchdata==${TORCHDATA_VERSION} \
+    triton \
+    s3torchconnector \
+    fastai \
+    accelerate \
+    # pin numpy requirement for fastai dependency
+    # requires explicit declaration of spacy, thic, blis
+    spacy \
+    thinc \
+    blis \
+    numpy \
+ && pip uninstall -y dataclasses
+
+RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.8/license.txt
+
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py
+
+COPY start_cuda_compat.sh /usr/local/bin/start_cuda_compat.sh
+RUN chmod +x /usr/local/bin/start_cuda_compat.sh
+
+COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
+RUN chmod +x /usr/local/bin/bash_telemetry.sh
+RUN echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc
+
+# Removing the cache as it is needed for security verification
+RUN rm -rf /root/.cache | true
+
+########################################################
+#  _____ ____ ____    ___
+# | ____/ ___|___ \  |_ _|_ __ ___   __ _  __ _  ___
+# |  _|| |     __) |  | || '_ ` _ \ / _` |/ _` |/ _ \
+# | |__| |___ / __/   | || | | | | | (_| | (_| |  __/
+# |_____\____|_____| |___|_| |_| |_|\__,_|\__, |\___|
+#                                         |___/
+#  ____           _
+# |  _ \ ___  ___(_)_ __   ___
+# | |_) / _ \/ __| | '_ \ / _ \
+# |  _ <  __/ (__| | |_) |  __/
+# |_| \_\___|\___|_| .__/ \___|
+#                  |_|
+########################################################
+
+FROM common AS ec2
+
+ARG PYTHON
+ARG PYTHON_SHORT_VERSION
+ARG NCCL_VERSION
+ARG GDRCOPY_VERSION
+ARG TE_VERSION
+ARG FLASH_ATTN_VERSION
+
+WORKDIR /
+
+
+# Install GDRCopy which is a dependency of SM Distributed DataParallel binary
+# The test binaries requires cuda driver library which could be found in conda
+# So update the linker path to point to it to avoid -Lcuda not found
+RUN cd /tmp \
+ && git clone https://github.com/NVIDIA/gdrcopy.git -b v${GDRCOPY_VERSION} \
+ && cd gdrcopy \
+ && sed -ie '12s@$@ -L $(CUDA)/lib64/stubs@' tests/Makefile \
+ && CUDA=${CUDA_HOME} make install \
+ && rm -rf /tmp/gdrcopy
+
+# Install NCCL
+RUN cd /tmp \
+ && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION}-1 \
+ && cd nccl \
+ && make -j64 src.build BUILDDIR=/usr/local \
+ && rm -rf /tmp/nccl
+
+# Install flash attn and NVIDIA transformer engine.
+# Optionally set NVTE_FRAMEWORK to avoid bringing in additional frameworks during TE install
+ENV NVTE_FRAMEWORK=pytorch
+# Install flash-attn using instructions from https://github.com/Dao-AILab/flash-attention#installation-and-features
+# Set MAX_JOBS=4 to avoid OOM issues in installation process
+RUN MAX_JOBS=4 pip install --no-cache-dir flash-attn==${FLASH_ATTN_VERSION} --no-build-isolation
+# Install TE using instructions from https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/installation.html
+RUN pip install --no-cache-dir git+https://github.com/NVIDIA/TransformerEngine.git@release_v${TE_VERSION} --no-build-isolation
+
+COPY dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh
+RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh
+
+RUN HOME_DIR=/root \
+ && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
+ && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
+ && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
+ && chmod +x /usr/local/bin/testOSSCompliance \
+ && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
+ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
+ && rm -rf ${HOME_DIR}/oss_compliance* \
+ && rm -rf /tmp/tmp*
+
+# Removing the cache as it is needed for security verification
+RUN rm -rf /root/.cache | true
+
+ENTRYPOINT ["bash", "-m", "dockerd_entrypoint.sh"]
+CMD ["/bin/bash"]
+
+
+#################################################################
+#  ____                   __  __       _
+# / ___|  __ _  __ _  ___|  \/  | __ _| | _____ _ __
+# \___ \ / _` |/ _` |/ _ \ |\/| |/ _` | |/ / _ \ '__|
+#  ___) | (_| | (_| |  __/ |  | | (_| |   <  __/ |
+# |____/ \__,_|\__, |\___|_|  |_|\__,_|_|\_\___|_|
+#              |___/
+#  ___                              ____           _
+# |_ _|_ __ ___   __ _  __ _  ___  |  _ \ ___  ___(_)_ __   ___
+#  | || '_ ` _ \ / _` |/ _` |/ _ \ | |_) / _ \/ __| | '_ \ / _ \
+#  | || | | | | | (_| | (_| |  __/ |  _ <  __/ (__| | |_) |  __/
+# |___|_| |_| |_|\__,_|\__, |\___| |_| \_\___|\___|_| .__/ \___|
+#                      |___/                        |_|
+#################################################################
+
+FROM common AS sagemaker
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+ENV SAGEMAKER_TRAINING_MODULE=sagemaker_pytorch_container.training:main
+
+ARG PYTHON
+ARG PYTHON_SHORT_VERSION
+ARG NCCL_VERSION
+ARG GDRCOPY_VERSION
+ARG TE_VERSION
+ARG FLASH_ATTN_VERSION
+
+WORKDIR /
+
+# Install GDRCopy which is a dependency of SM Distributed DataParallel binary
+# The test binaries requires cuda driver library which could be found in conda
+# So update the linker path to point to it to avoid -Lcuda not found
+RUN cd /tmp \
+ && git clone https://github.com/NVIDIA/gdrcopy.git -b v${GDRCOPY_VERSION} \
+ && cd gdrcopy \
+ && sed -ie '12s@$@ -L $(CUDA)/lib64/stubs@' tests/Makefile \
+ && CUDA=${CUDA_HOME} make install \
+ && rm -rf /tmp/gdrcopy
+
+# Install NCCL
+RUN cd /tmp \
+ && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION}-1 \
+ && cd nccl \
+ && make -j64 src.build BUILDDIR=/usr/local \
+ && rm -rf /tmp/nccl
+
+RUN pip uninstall -y ninja && pip install ninja
+
+# Install flash attn and NVIDIA transformer engine.
+# Optionally set NVTE_FRAMEWORK to avoid bringing in additional frameworks during TE install
+ENV NVTE_FRAMEWORK=pytorch
+# Install flash-attn using instructions from https://github.com/Dao-AILab/flash-attention#installation-and-features
+# Set MAX_JOBS=4 to avoid OOM issues in installation process
+RUN MAX_JOBS=4 pip install --no-cache-dir flash-attn==${FLASH_ATTN_VERSION} --no-build-isolation
+# Install TE using instructions from https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/installation.html
+RUN pip install --no-cache-dir git+https://github.com/NVIDIA/TransformerEngine.git@release_v${TE_VERSION} --no-build-isolation
+
+# Install SM packages
+RUN pip install --no-cache-dir -U \
+    smclarify \
+    "sagemaker>=2" \
+    sagemaker-experiments \
+    sagemaker-pytorch-training \
+    sagemaker-training
+
+# Install extra packages
+RUN pip install --no-cache-dir -U \
+    bokeh \
+    imageio \
+    numba \
+    pandas \
+    plotly \
+    shap \
+    scikit-learn \
+    seaborn \
+    # pinned for sagemaker==2.233.0
+    cloudpickle 
+
+RUN HOME_DIR=/root \
+ && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
+ && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
+ && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
+ && chmod +x /usr/local/bin/testOSSCompliance \
+ && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
+ && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
+ && rm -rf ${HOME_DIR}/oss_compliance* \
+ && rm -rf /tmp/tmp*
+
+# Removing the cache as it is needed for security verification
+RUN rm -rf /root/.cache | true
+
+# Copy workaround script for incorrect hostname
+COPY changehostname.c /
+COPY start_with_right_hostname.sh /usr/local/bin/start_with_right_hostname.sh
+RUN chmod +x /usr/local/bin/start_with_right_hostname.sh
+
+ENTRYPOINT ["bash", "-m", "start_with_right_hostname.sh"]
+CMD ["/bin/bash"]
diff --git a/test/dlc_tests/conftest.py b/test/dlc_tests/conftest.py
index 788057ad4b9e..f18a289e5f94 100644
--- a/test/dlc_tests/conftest.py
+++ b/test/dlc_tests/conftest.py
@@ -55,6 +55,7 @@
     # ECR repo name fixtures
     # PyTorch
     "pytorch_training",
+    "pytorch_training___2__8",
     "pytorch_training___2__7",
     "pytorch_training___2__6",
     "pytorch_training___2__5",
@@ -943,6 +944,7 @@ def skip_smdebug_v1_test(request):
         ">=2.4,<2.6": ["cpu", "cu124"],
         ">=2.6,<2.7.1": ["cpu", "cu126"],
         ">=2.7.1,<2.8": ["cpu", "cu128"],
+        ">=2.8,<2.9": ["cpu", "cu129"],
     }
     if _validate_pytorch_framework_version(request, image_uri, "skip_smdebug_v1_test", skip_dict):
         pytest.skip(f"SM Profiler v1 is on path for deprecation, skipping test")
@@ -967,6 +969,7 @@ def skip_dgl_test(request):
         ">=2.4,<2.6": ["cpu", "cu124"],
         ">=2.6,<2.7.1": ["cpu", "cu126"],
         ">=2.7.1,<2.8": ["cpu", "cu128"],
+        ">=2.8,<2.9": ["cpu", "cu129"],
     }
     if _validate_pytorch_framework_version(request, image_uri, "skip_dgl_test", skip_dict):
         pytest.skip(f"DGL binaries are removed, skipping test")
@@ -1032,6 +1035,7 @@ def skip_serialized_release_pt_test(request):
         ">=2.4,<2.6": ["cpu", "cu124"],
         ">=2.6,<2.7.1": ["cpu", "cu126"],
         ">=2.7.1,<2.8": ["cpu", "cu128"],
+        ">=2.8,<2.9": ["cpu", "cu129"],
     }
     if _validate_pytorch_framework_version(
         request, image_uri, "skip_serialized_release_pt_test", skip_dict
diff --git a/test/dlc_tests/container_tests/bin/efa/testEFA b/test/dlc_tests/container_tests/bin/efa/testEFA
index 420cd711dc18..52f5664625d8 100755
--- a/test/dlc_tests/container_tests/bin/efa/testEFA
+++ b/test/dlc_tests/container_tests/bin/efa/testEFA
@@ -36,7 +36,7 @@ validate_all_reduce_performance_logs(){
     # EFA 1.37.0 using "Using network Libfabric" instead of "Using network AWS Libfabric"
     grep -E "Using network (AWS )?Libfabric" ${TRAINING_LOG} || { echo "efa is not working, please check if it is installed correctly"; exit 1; }
     if [[ ${INSTANCE_TYPE} == p4d* || ${INSTANCE_TYPE} == p5* ]]; then
-        grep "Setting NCCL_TOPO_FILE environment variable to" ${TRAINING_LOG}
+        grep "NCCL_TOPO_FILE set by environment to" ${TRAINING_LOG}
         # EFA 1.37.0 change from NET/AWS Libfabric/0/GDRDMA to NET/Libfabric/0/GDRDMA
         grep -E "NET/(AWS )?Libfabric/0/GDRDMA" ${TRAINING_LOG}
     fi
@@ -89,7 +89,7 @@ check_efa_nccl_all_reduce(){
     
     RETURN_VAL=${PIPESTATUS[0]}
     # In case, if you would like see logs, uncomment below line
-    # RESULT=$(cat ${TRAINING_LOG})
+    RESULT=$(cat ${TRAINING_LOG})
 
     if [ ${RETURN_VAL} -eq 0 ]; then
         echo "***************************** check_efa_nccl_all_reduce passed *****************************"
diff --git a/test/dlc_tests/ec2/pytorch/training/common_cases.py b/test/dlc_tests/ec2/pytorch/training/common_cases.py
index 8f8cc7cc03e3..cd58a03eca10 100644
--- a/test/dlc_tests/ec2/pytorch/training/common_cases.py
+++ b/test/dlc_tests/ec2/pytorch/training/common_cases.py
@@ -1,4 +1,6 @@
 import os
+import sys
+import logging
 
 from packaging.version import Version
 from packaging.specifiers import SpecifierSet
@@ -19,6 +21,10 @@
     get_efa_ec2_instance_type,
 )
 
+LOGGER = logging.getLogger(__name__)
+LOGGER.setLevel(logging.INFO)
+LOGGER.addHandler(logging.StreamHandler(sys.stderr))
+
 # Test functions
 PT_STANDALONE_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "pytorch_tests", "testPyTorchStandalone")
 PT_MNIST_CMD = os.path.join(CONTAINER_TESTS_PREFIX, "pytorch_tests", "testPyTorch")
@@ -351,6 +357,7 @@ def pytorch_cudnn_match_gpu(pytorch_training, ec2_connection, region):
     """
     Test cuDNN Package
     PT 2.1 reintroduces a dependency on CUDNN to support NVDA TransformerEngine. This test is to ensure that torch CUDNN matches system CUDNN in the container.
+    Checks both /usr/include/ and /usr/include/x86_64-linux-gnu/ paths to support different cuDNN package installations.
     """
     container_name = "pytorch_cudnn"
     account_id = get_account_id_from_image_uri(pytorch_training)
@@ -360,9 +367,30 @@ def pytorch_cudnn_match_gpu(pytorch_training, ec2_connection, region):
         f"docker run --runtime=nvidia --gpus all --name {container_name} -itd {pytorch_training}",
         hide=True,
     )
-    major_cmd = 'cat /usr/include/cudnn_version.h | grep "#define CUDNN_MAJOR"'
-    minor_cmd = 'cat /usr/include/cudnn_version.h | grep "#define CUDNN_MINOR"'
-    patch_cmd = 'cat /usr/include/cudnn_version.h | grep "#define CUDNN_PATCHLEVEL"'
+
+    cudnn_paths = [
+        "/usr/include/cudnn_version.h",
+        "/usr/include/x86_64-linux-gnu/cudnn_version.h"
+    ]
+    
+    for path in cudnn_paths:
+        check_cmd = f"[ -f {path} ] && echo 'Found'"
+        result = ec2_connection.run(
+            f"docker exec --user root {container_name} bash -c '{check_cmd}'",
+            hide=True,
+            warn=True
+        )
+        if result.ok and result.stdout.strip() == 'Found':
+            cudnn_path = path
+            LOGGER.info(f"Found cuDNN header at: {cudnn_path}")
+            break
+    else:
+        raise FileNotFoundError("Could not find cudnn_version.h in any standard location")
+
+    major_cmd = f'cat {cudnn_path} | grep "#define CUDNN_MAJOR"'
+    minor_cmd = f'cat {cudnn_path} | grep "#define CUDNN_MINOR"'
+    patch_cmd = f'cat {cudnn_path} | grep "#define CUDNN_PATCHLEVEL"'
+
     major = ec2_connection.run(
         f"docker exec --user root {container_name} bash -c '{major_cmd}'", hide=True
     ).stdout.split()[-1]
@@ -385,7 +413,7 @@ def pytorch_cudnn_match_gpu(pytorch_training, ec2_connection, region):
 
     assert (
         system_cudnn == cudnn_from_torch
-    ), f"System CUDNN {system_cudnn} and torch cudnn {cudnn_from_torch} do not match. Please downgrade system CUDNN or recompile torch with correct CUDNN verson."
+    ), f"System CUDNN {system_cudnn} (from {cudnn_path}) and torch cudnn {cudnn_from_torch} do not match. Please downgrade system CUDNN or recompile torch with correct CUDNN verson."
 
 
 def pytorch_curand_gpu(pytorch_training, ec2_connection):
diff --git a/test/dlc_tests/ec2/pytorch/training/test_pytorch_training_2_8.py b/test/dlc_tests/ec2/pytorch/training/test_pytorch_training_2_8.py
new file mode 100644
index 000000000000..2a9b678105ab
--- /dev/null
+++ b/test/dlc_tests/ec2/pytorch/training/test_pytorch_training_2_8.py
@@ -0,0 +1,137 @@
+import pytest
+
+import test.test_utils as test_utils
+
+from test.test_utils import ec2
+
+from test.dlc_tests.ec2.pytorch.training import common_cases
+from test.dlc_tests.ec2 import smclarify_cases
+
+
+@pytest.mark.usefixtures("sagemaker")
+@pytest.mark.integration("pytorch_gpu_tests")
+@pytest.mark.model("N/A")
+@pytest.mark.team("conda")
+@pytest.mark.parametrize(
+    "ec2_instance_type, region", common_cases.PT_EC2_GPU_INSTANCE_TYPE_AND_REGION, indirect=True
+)
+def test_pytorch_2_8_gpu(
+    pytorch_training___2__8, ec2_connection, region, gpu_only, ec2_instance_type
+):
+    pytorch_training = pytorch_training___2__8
+    if test_utils.is_image_incompatible_with_instance_type(pytorch_training, ec2_instance_type):
+        pytest.skip(
+            f"Image {pytorch_training} is incompatible with instance type {ec2_instance_type}"
+        )
+
+    test_cases = [
+        # (common_cases.pytorch_standalone, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_training_mnist, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_linear_regression_gpu, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_gloo, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_nccl, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_mpi, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_training_torchaudio, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_training_torchdata, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_cudnn_match_gpu, (pytorch_training, ec2_connection, region)),
+        # (common_cases.pytorch_curand_gpu, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_telemetry_bashrc_gpu, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_telemetry_entrypoint_gpu, (pytorch_training, ec2_connection)),
+    ]
+
+    if "sagemaker" in pytorch_training:
+        test_cases.append(
+            (smclarify_cases.smclarify_metrics_gpu, (pytorch_training, ec2_connection)),
+        )
+
+    # AMP must be run on multi_gpu
+    if ec2.is_instance_multi_gpu(ec2_instance_type):
+        test_cases.append((common_cases.pytorch_amp, (pytorch_training, ec2_connection)))
+
+    test_utils.execute_serial_test_cases(test_cases, test_description="PT 2.8 GPU")
+
+
+@pytest.mark.usefixtures("sagemaker")
+@pytest.mark.integration("pytorch_gpu_heavy_tests")
+@pytest.mark.model("N/A")
+@pytest.mark.team("conda")
+@pytest.mark.parametrize(
+    "ec2_instance_type, region",
+    common_cases.PT_EC2_HEAVY_GPU_INSTANCE_TYPE_AND_REGION,
+    indirect=True,
+)
+@pytest.mark.skipif(
+    test_utils.is_pr_context() and not ec2.are_heavy_instance_ec2_tests_enabled(),
+    reason="Skip GPU Heavy tests in PR context unless explicitly enabled",
+)
+def test_pytorch_2_8_gpu_heavy(
+    pytorch_training___2__8, ec2_connection, region, gpu_only, ec2_instance_type
+):
+    pytorch_training = pytorch_training___2__8
+    if test_utils.is_image_incompatible_with_instance_type(pytorch_training, ec2_instance_type):
+        pytest.skip(
+            f"Image {pytorch_training} is incompatible with instance type {ec2_instance_type}"
+        )
+
+    test_cases = [
+        # (common_cases.pytorch_gdrcopy, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_transformer_engine, (pytorch_training, ec2_connection)),
+    ]
+
+    test_utils.execute_serial_test_cases(test_cases, test_description="PT 2.8 GPU Heavy")
+
+
+@pytest.mark.usefixtures("sagemaker")
+@pytest.mark.integration("inductor")
+@pytest.mark.model("N/A")
+@pytest.mark.team("training-compiler")
+@pytest.mark.parametrize(
+    "ec2_instance_type, region",
+    common_cases.PT_EC2_GPU_INDUCTOR_INSTANCE_TYPE_AND_REGION,
+    indirect=True,
+)
+def test_pytorch_2_8_gpu_inductor(
+    pytorch_training___2__8, ec2_connection, region, gpu_only, ec2_instance_type
+):
+    pytorch_training = pytorch_training___2__8
+    if test_utils.is_image_incompatible_with_instance_type(pytorch_training, ec2_instance_type):
+        pytest.skip(
+            f"Image {pytorch_training} is incompatible with instance type {ec2_instance_type}"
+        )
+
+    test_cases = [
+        (common_cases.pytorch_gloo_inductor_gpu, (pytorch_training, ec2_connection)),
+        (common_cases.pytorch_mpi_inductor_gpu, (pytorch_training, ec2_connection)),
+        (common_cases.pytorch_nccl_inductor, (pytorch_training, ec2_connection)),
+        (common_cases.pytorch_amp_inductor, (pytorch_training, ec2_connection)),
+    ]
+
+    test_utils.execute_serial_test_cases(test_cases, test_description="PT 2.8 GPU Inductor")
+
+
+@pytest.mark.usefixtures("sagemaker")
+@pytest.mark.integration("pytorch_cpu_tests")
+@pytest.mark.model("N/A")
+@pytest.mark.team("conda")
+@pytest.mark.parametrize("ec2_instance_type", common_cases.PT_EC2_CPU_INSTANCE_TYPE, indirect=True)
+def test_pytorch_2_8_cpu(pytorch_training___2__8, ec2_connection, cpu_only):
+    pytorch_training = pytorch_training___2__8
+
+    test_cases = [
+        # (common_cases.pytorch_standalone, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_training_mnist, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_linear_regression_cpu, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_gloo, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_mpi, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_training_torchaudio, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_training_torchdata, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_telemetry_bashrc_cpu, (pytorch_training, ec2_connection)),
+        # (common_cases.pytorch_telemetry_entrypoint_cpu, (pytorch_training, ec2_connection)),
+    ]
+
+    if "sagemaker" in pytorch_training:
+        test_cases += [
+            (smclarify_cases.smclarify_metrics_cpu, (pytorch_training, ec2_connection)),
+        ]
+
+    test_utils.execute_serial_test_cases(test_cases, test_description="PT 2.8 CPU")
diff --git a/test/dlc_tests/ec2/test_efa.py b/test/dlc_tests/ec2/test_efa.py
index 9543d783f21c..053eb97c81ce 100644
--- a/test/dlc_tests/ec2/test_efa.py
+++ b/test/dlc_tests/ec2/test_efa.py
@@ -147,50 +147,50 @@ def test_efa_tensorflow(
     )
 
 
-@pytest.mark.skip(
-    "EFA healthcheck binaries are not maintained by DLC, we will skip these tests moving foward unless binaries are added otherwise."
-)
-@pytest.mark.processor("gpu")
-@pytest.mark.model("N/A")
-@pytest.mark.integration("efa")
-@pytest.mark.usefixtures("sagemaker_only")
-@pytest.mark.usefixtures("pt201_and_above_only")
-@pytest.mark.allow_p4de_use
-@pytest.mark.parametrize("ec2_instance_type,region", EC2_EFA_GPU_ONLY_P4_INSTANCE_TYPE_AND_REGION)
-@pytest.mark.team("conda")
-@pytest.mark.skipif(
-    is_pr_context() and not are_heavy_instance_ec2_tests_enabled(),
-    reason="Skip EFA test in PR context unless explicitly enabled",
-)
-def test_pytorch_efa_healthcheck(
-    pytorch_training,
-    efa_ec2_instances,
-    efa_ec2_connections,
-    ec2_instance_type,
-    region,
-    gpu_only,
-):
-    """
-    Run EFA Health Check tests on DLC.
-    :param pytorch_training: str PyTorch Training DLC image URI
-    :param efa_ec2_instances: list of tuples of instance-ids and SSH-keys for EFA-enabled instances
-    :param efa_ec2_connections: list of Fabric Connection objects for EFA-enabled instances
-    :param ec2_instance_type: str Instance Type being tested
-    :param region: str Region in which EFA-enabled instances are launched
-    :param gpu_only: pytest fixture to limit test only to GPU DLCs
-    """
-    _setup_multinode_efa_instances(
-        pytorch_training, efa_ec2_instances, efa_ec2_connections, ec2_instance_type, region
-    )
-    master_connection = efa_ec2_connections[0]
-    run_cmd_on_container(MASTER_CONTAINER_NAME, master_connection, EFA_SANITY_TEST_CMD, hide=False)
-    run_cmd_on_container(
-        MASTER_CONTAINER_NAME,
-        master_connection,
-        f"{EFA_PYTORCH_HEALTHCHECK_TEST_CMD}",
-        hide=False,
-        timeout=DEFAULT_EFA_TIMEOUT,
-    )
+# @pytest.mark.skip(
+#     "EFA healthcheck binaries are not maintained by DLC, we will skip these tests moving foward unless binaries are added otherwise."
+# )
+# @pytest.mark.processor("gpu")
+# @pytest.mark.model("N/A")
+# @pytest.mark.integration("efa")
+# @pytest.mark.usefixtures("sagemaker_only")
+# @pytest.mark.usefixtures("pt201_and_above_only")
+# @pytest.mark.allow_p4de_use
+# @pytest.mark.parametrize("ec2_instance_type,region", EC2_EFA_GPU_ONLY_P4_INSTANCE_TYPE_AND_REGION)
+# @pytest.mark.team("conda")
+# @pytest.mark.skipif(
+#     is_pr_context() and not are_heavy_instance_ec2_tests_enabled(),
+#     reason="Skip EFA test in PR context unless explicitly enabled",
+# )
+# def test_pytorch_efa_healthcheck(
+#     pytorch_training,
+#     efa_ec2_instances,
+#     efa_ec2_connections,
+#     ec2_instance_type,
+#     region,
+#     gpu_only,
+# ):
+#     """
+#     Run EFA Health Check tests on DLC.
+#     :param pytorch_training: str PyTorch Training DLC image URI
+#     :param efa_ec2_instances: list of tuples of instance-ids and SSH-keys for EFA-enabled instances
+#     :param efa_ec2_connections: list of Fabric Connection objects for EFA-enabled instances
+#     :param ec2_instance_type: str Instance Type being tested
+#     :param region: str Region in which EFA-enabled instances are launched
+#     :param gpu_only: pytest fixture to limit test only to GPU DLCs
+#     """
+#     _setup_multinode_efa_instances(
+#         pytorch_training, efa_ec2_instances, efa_ec2_connections, ec2_instance_type, region
+#     )
+#     master_connection = efa_ec2_connections[0]
+#     run_cmd_on_container(MASTER_CONTAINER_NAME, master_connection, EFA_SANITY_TEST_CMD, hide=False)
+#     run_cmd_on_container(
+#         MASTER_CONTAINER_NAME,
+#         master_connection,
+#         f"{EFA_PYTORCH_HEALTHCHECK_TEST_CMD}",
+#         hide=False,
+#         timeout=DEFAULT_EFA_TIMEOUT,
+#     )
 
 
 def _setup_multinode_efa_instances(
@@ -380,12 +380,33 @@ def _create_master_mpi_hosts_file(efa_ec2_connections, worker_instance_ids, inst
         for worker_ip in worker_instance_private_ips:
             hosts_string += f"\n{worker_ip} slots={slots} "
 
+        # TODO: remove logging
+        LOGGER.info(f"Attempting to create hosts file with content:\n{hosts_string}")
+
+        LOGGER.info(f"""echo -e "{hosts_string}" > {HOSTS_FILE_LOCATION}""")
         run_cmd_on_container(
             MASTER_CONTAINER_NAME,
             master_connection,
             f"""echo -e "{hosts_string}" > {HOSTS_FILE_LOCATION}""",
         )
 
+        # TODO: remove logging
+        LOGGER.info(f"Checking if hosts file exists:")
+        run_cmd_on_container(
+            MASTER_CONTAINER_NAME,
+            master_connection,
+            f"ls -l {HOSTS_FILE_LOCATION}",
+            hide=False
+        )
+
+        LOGGER.info(f"Checking hosts file contents:")
+        run_cmd_on_container(
+            MASTER_CONTAINER_NAME,
+            master_connection,
+            f"cat {HOSTS_FILE_LOCATION}",
+            hide=False
+        )
+
 
 def _setup_worker_efa_ssh_config(connection, master_pub_key):
     """
diff --git a/test/testrunner.py b/test/testrunner.py
index 4746740437bc..86b2ed4692de 100644
--- a/test/testrunner.py
+++ b/test/testrunner.py
@@ -444,7 +444,8 @@ def main():
         pytest_cmd = [
             "-s",
             "-rA",
-            test_path,
+            # test_path,
+            os.path.join(test_path, "test_efa.py::test_pytorch_efa"),
             f"--junitxml={report}",
             "-n=auto",
         ]