From b249d5c52cdbbc58641a75899c55670cb35c8da7 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Tue, 9 Sep 2025 18:20:39 +0200 Subject: [PATCH 1/2] drop using dockers --- .github/checkgroup.yml | 28 --------- .github/label-change.yml | 6 -- dockers/README.md | 69 ----------------------- dockers/base-cuda/Dockerfile | 106 ----------------------------------- dockers/docs/Dockerfile | 67 ---------------------- dockers/nvidia/Dockerfile | 70 ----------------------- dockers/release/Dockerfile | 61 -------------------- 7 files changed, 407 deletions(-) delete mode 100644 dockers/README.md delete mode 100644 dockers/base-cuda/Dockerfile delete mode 100644 dockers/docs/Dockerfile delete mode 100644 dockers/nvidia/Dockerfile delete mode 100644 dockers/release/Dockerfile diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 99375a2c48bce..e2a8b0c2fd7a4 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -111,34 +111,6 @@ subprojects: - "docs-make (pytorch, doctest)" - "docs-make (pytorch, html)" - - id: "pytorch_lightning: Docker" - paths: - - ".actions/*" - - ".github/workflows/docker-build.yml" - - "dockers/**" - - "requirements/pytorch/**" - - "requirements/fabric/**" - - "setup.py" - - "!requirements/*/docs.txt" - - "!*.md" - - "!**/*.md" - checks: - - "build-cuda (3.10, 2.1.2, 12.1.1)" - - "build-cuda (3.11, 2.2.2, 12.1.1)" - - "build-cuda (3.11, 2.3.1, 12.1.1)" - - "build-cuda (3.11, 2.4.1, 12.1.1)" - - "build-cuda (3.12, 2.5.1, 12.1.1)" - - "build-cuda (3.12, 2.6.0, 12.4.1)" - #- "build-NGC" - - "build-pl (3.10, 2.1, 12.1.1)" - - "build-pl (3.11, 2.2, 12.1.1)" - - "build-pl (3.11, 2.3, 12.1.1)" - - "build-pl (3.11, 2.4, 12.1.1)" - - "build-pl (3.12, 2.5, 12.1.1)" - - "build-pl (3.12, 2.6, 12.4.1)" - - "build-pl (3.12, 2.7, 12.6.3)" - - "build-pl (3.12, 2.8, 12.6.3, true)" - # SECTION: lightning_fabric - id: "lightning_fabric: CPU workflow" diff --git a/.github/label-change.yml b/.github/label-change.yml index 8312c612b1207..7d4f3c089de41 100644 --- a/.github/label-change.yml +++ b/.github/label-change.yml @@ -54,12 +54,6 @@ package: - "src/lightning_fabric/*" - "src/pytorch_lightning/*" -dockers: - - changed-files: - - any-glob-to-any-file: - - "dockers/**" - - ".github/workflows/docker-build.yml" - dependencies: - changed-files: - any-glob-to-any-file: diff --git a/dockers/README.md b/dockers/README.md deleted file mode 100644 index cff8bf542f95b..0000000000000 --- a/dockers/README.md +++ /dev/null @@ -1,69 +0,0 @@ -# Docker images - -## Build images from Dockerfiles - -You can build it on your own, note it takes lots of time, be prepared. - -```bash -git clone https://github.com/Lightning-AI/lightning.git - -# build with the default arguments -docker image build -t pytorch-lightning:latest -f dockers/base-cuda/Dockerfile . - -# build with specific arguments -docker image build \ - -t pytorch-lightning:base-cuda12.6.3-py3.10-torch2.8 \ - -f dockers/base-cuda/Dockerfile \ - --build-arg PYTHON_VERSION=3.10 \ - --build-arg PYTORCH_VERSION=2.8 \ - --build-arg CUDA_VERSION=12.6.3 \ - . -``` - -To run your docker use - -```bash -docker image list -docker run --rm -it pytorch-lightning:latest bash -``` - -and if you do not need it anymore, just clean it: - -```bash -docker image list -docker image rm pytorch-lightning:latest -``` - -## Run docker image with GPUs - -To run docker image with access to your GPUs, you need to install - -```bash -# Add the package repositories -distribution=$(. /etc/os-release;echo $ID$VERSION_ID) -curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - -curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list - -sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit -sudo systemctl restart docker -``` - -and later run the docker image with `--gpus all`. For example, - -``` -docker run --rm -it --gpus all pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.10-torch2.8 -``` - -## Run Jupyter server - -1. Build the docker image: - ```bash - docker image build -t pytorch-lightning:v2.5.1 -f dockers/nvidia/Dockerfile --build-arg LIGHTNING_VERSION=2.5.1 . - ``` -1. start the server and map ports: - ```bash - docker run --rm -it --gpus=all -p 8888:8888 pytorch-lightning:v2.5.1 - ``` -1. Connect in local browser: - - copy the generated path e.g. `http://hostname:8888/?token=0719fa7e1729778b0cec363541a608d5003e26d4910983c6` - - replace the `hostname` by `localhost` diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile deleted file mode 100644 index 41faf0ca55113..0000000000000 --- a/dockers/base-cuda/Dockerfile +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright The Lightning AI team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -ARG UBUNTU_VERSION=22.04 -ARG CUDA_VERSION=12.1.1 - - -FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} - -ARG PYTHON_VERSION=3.10 -ARG PYTORCH_VERSION=2.8 -ARG MAX_ALLOWED_NCCL=2.22.3 -ARG MAKE_FLAGS="-j$(nproc)" - -SHELL ["/bin/bash", "-c"] -# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ -ENV \ - DEBIAN_FRONTEND="noninteractive" \ - TZ="Etc/UTC" \ - PATH="$PATH:/root/.local/bin" \ - CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \ - MKL_THREADING_LAYER="GNU" \ - MAKEFLAGS=${MAKE_FLAGS} - -RUN \ - CUDA_VERSION_MM=${CUDA_VERSION%.*} && \ - apt-get update -qq --fix-missing && apt-get install -y wget && \ - NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \ - echo "NCCL version found: $NCCL_VER" && \ - TO_INSTALL_NCCL=$(echo -e "$MAX_ALLOWED_NCCL\n$NCCL_VER" | sort -V | head -n1)-1+cuda${CUDA_VERSION_MM} && \ - echo "NCCL version to install: $TO_INSTALL_NCCL" && \ - apt-get install -y --no-install-recommends --allow-downgrades --allow-change-held-packages \ - build-essential \ - pkg-config \ - cmake \ - git \ - wget \ - curl \ - unzip \ - ca-certificates \ - software-properties-common \ - libopenmpi-dev \ - openmpi-bin \ - ssh \ - ninja-build \ - libnccl2=$TO_INSTALL_NCCL \ - libnccl-dev=$TO_INSTALL_NCCL && \ - # Install python - add-apt-repository ppa:deadsnakes/ppa && \ - apt-get install -y \ - python${PYTHON_VERSION} \ - python${PYTHON_VERSION}-dev \ - && \ - update-alternatives --install /usr/bin/python${PYTHON_VERSION%%.*} python${PYTHON_VERSION%%.*} /usr/bin/python${PYTHON_VERSION} 1 && \ - update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 && \ - # Cleaning - apt-get autoremove -y && \ - apt-get clean && \ - rm -rf /root/.cache && \ - rm -rf /var/lib/apt/lists/* - -COPY requirements/pytorch/ requirements/pytorch/ -COPY requirements/_integrations/ requirements/_integrations/ - -ENV PYTHONPATH="/usr/lib/python${PYTHON_VERSION}/site-packages" - -RUN \ - curl https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} && \ - # Disable cache \ - pip config set global.cache-dir false && \ - # Install recent setuptools to obtain pkg_resources \ - pip install setuptools==75.6.0 && \ - # set particular PyTorch version \ - pip install -q wget packaging && \ - python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py && \ - for fpath in `ls requirements/**/*.txt`; do \ - python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \ - done && \ - CUDA_VERSION_MM=${CUDA_VERSION%.*} && \ - pip install --no-cache-dir \ - -r requirements/pytorch/base.txt \ - -r requirements/pytorch/extra.txt \ - -r requirements/pytorch/test.txt \ - -r requirements/pytorch/strategies.txt \ - --extra-index-url="https://download.pytorch.org/whl/cu${CUDA_VERSION_MM//'.'/''}/" \ - --extra-index-url="https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM//'.'/''}/" - -RUN \ - # Show what we have \ - pip --version && \ - pip list && \ - python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \ - python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \ - python requirements/pytorch/check-avail-extras.py && \ - rm -rf requirements/ diff --git a/dockers/docs/Dockerfile b/dockers/docs/Dockerfile deleted file mode 100644 index ec590bf182ee2..0000000000000 --- a/dockers/docs/Dockerfile +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright The Lightning AI team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -FROM ubuntu:20.04 - -LABEL maintainer="Lightning-AI " - -SHELL ["/bin/bash", "-c"] -# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ -ENV \ - DEBIAN_FRONTEND="noninteractive" \ - TZ="Etc/UTC" \ - PATH="$PATH:/root/.local/bin" \ - # MAKEFLAGS="-j$(nproc)" - MAKEFLAGS="-j2" - -RUN \ - apt-get update -qq --fix-missing && \ - apt-get install -y --no-install-recommends \ - build-essential \ - software-properties-common \ - ca-certificates \ - pkg-config \ - cmake \ - git \ - wget \ - curl \ - unzip \ - rsync \ - ninja-build \ - pandoc \ - texlive-latex-extra \ - dvipng \ - texlive-pictures \ - python3 \ - python3-setuptools \ - python3-dev \ - && \ - update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \ - # Cleaning - apt-get autoremove -y && \ - apt-get clean && \ - rm -rf /root/.cache && \ - rm -rf /var/lib/apt/lists/* - -COPY requirements/docs.txt docs.txt - -# NOTE: exclude specific `botocore` below as is seems to be corrupted version -RUN \ - wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \ - python get-pip.py && \ - rm get-pip.py && \ - pip install awscli gsutil --no-cache-dir && \ - pip install torch -r docs.txt --no-cache-dir \ - -f https://download.pytorch.org/whl/cpu/torch_stable.html && \ - pip list diff --git a/dockers/nvidia/Dockerfile b/dockers/nvidia/Dockerfile deleted file mode 100644 index ef329fd56433b..0000000000000 --- a/dockers/nvidia/Dockerfile +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright The Lightning AI team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -ARG PYTORCH_VERSION=24.05 - -# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes -FROM nvcr.io/nvidia/pytorch:${PYTORCH_VERSION}-py3 - -LABEL maintainer="Lightning-AI " - -ARG LIGHTNING_VERSION="" - -RUN python -c "import torch ; print(torch.__version__)" >> torch_version.info - -COPY ./ /workspace/pytorch-lightning/ - -RUN \ - cd /workspace && \ - # replace by specific version if asked - if [ ! -z "$LIGHTNING_VERSION" ] ; then \ - rm -rf pytorch-lightning ; \ - git clone https://github.com/Lightning-AI/pytorch-lightning.git ; \ - cd pytorch-lightning ; \ - git checkout ${LIGHTNING_VERSION} ; \ - git submodule update --init --recursive ; \ - cd .. ; \ - fi && \ -# save the examples \ - ls -lh pytorch-lightning/ && \ - rm -rf pytorch-lightning/.git && \ - rm -rf pytorch-lightning/_notebooks/.git && \ - mv pytorch-lightning/_notebooks/.notebooks/ notebooks && \ - cp -r pytorch-lightning/*examples . && \ - -# Installations \ - pip install "Pillow>=8.2, !=8.3.0" "cryptography>=3.4" "py>=1.10" --no-cache-dir && \ - PACKAGE_NAME=pytorch pip install './lightning[extra,loggers,strategies]' --no-cache-dir && \ - rm -rf pytorch-lightning && \ - pip list - -RUN pip install jupyterlab[all] -U --no-cache-dir - -# create jupyter_notebook_config.py -RUN mkdir -p /root/.jupyter && \ - echo "c.NotebookApp.contents_manager_class = 'notebook.services.contents.largefilemanager.LargeFileManager'" > /root/.jupyter/jupyter_notebook_config.py - -ENV PYTHONPATH="/workspace" - -RUN \ - TORCH_VERSION=$(cat torch_version.info) && \ - rm torch_version.info && \ - ls -lh . && \ - python --version && \ - pip --version && \ - pip list | grep torch && \ - python -c "import torch; assert torch.__version__.startswith('$TORCH_VERSION'), torch.__version__" && \ - python -c "import pytorch_lightning as pl; print(pl.__version__)" - -CMD ["jupyter", "notebook", "--port=8888", "--no-browser", "--ip=0.0.0.0", "--allow-root"] diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile deleted file mode 100644 index b80c23dfc73f3..0000000000000 --- a/dockers/release/Dockerfile +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright The Lightning AI team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -ARG PYTHON_VERSION=3.10 -ARG PYTORCH_VERSION=2.8 -ARG CUDA_VERSION=12.6.3 - -FROM pytorchlightning/pytorch_lightning:base-cuda${CUDA_VERSION}-py${PYTHON_VERSION}-torch${PYTORCH_VERSION} - -LABEL maintainer="Lightning-AI " - -ARG LIGHTNING_VERSION="" -ARG PYTORCH_VERSION - -COPY ./ /home/pytorch-lightning/ - -# install dependencies -RUN \ - cd /home && \ - mv pytorch-lightning/_notebooks notebooks && \ - mv pytorch-lightning/examples . && \ - # replace by specific version if asked - if [ ! -z "$LIGHTNING_VERSION" ] ; then \ - rm -rf pytorch-lightning ; \ - wget https://github.com/Lightning-AI/pytorch-lightning/archive/${LIGHTNING_VERSION}.zip --progress=bar:force:noscroll ; \ - unzip ${LIGHTNING_VERSION}.zip ; \ - mv pytorch-lightning-*/ pytorch-lightning ; \ - rm *.zip ; \ - fi && \ - # otherwise there is collision with folder name and pkg name on Pypi - cd pytorch-lightning && \ - # pip install setuptools==75.6.0 && \ - pip install -U "lightning-utilities[cli]" && \ - # drop deepspeed since it is not supported by our minimal Torch requirements \ - echo "PYTORCH_VERSION is: '$PYTORCH_VERSION'" && \ - if [[ "$PYTORCH_VERSION" =~ ^(2\.1|2\.2|2\.3|2\.4)$ ]]; then \ - python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files requirements/fabric/strategies.txt ; \ - python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files requirements/pytorch/strategies.txt ; \ - fi && \ - PACKAGE_NAME=lightning pip install '.[extra,loggers,strategies]' --no-cache-dir && \ - PACKAGE_NAME=pytorch pip install '.[extra,loggers,strategies]' --no-cache-dir && \ - cd .. && \ - rm -rf pytorch-lightning - -RUN python --version && \ - pip --version && \ - pip list && \ - python -c "import pytorch_lightning as pl; print(pl.__version__)" - -# CMD ["/bin/bash"] From af78563be3969da604fde854bd4d9578cff7b608 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Tue, 9 Sep 2025 18:29:16 +0200 Subject: [PATCH 2/2] drop build --- .github/workflows/docker-build.yml | 167 ----------------------------- 1 file changed, 167 deletions(-) delete mode 100644 .github/workflows/docker-build.yml diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml deleted file mode 100644 index 7b73bc7b0015a..0000000000000 --- a/.github/workflows/docker-build.yml +++ /dev/null @@ -1,167 +0,0 @@ -name: Docker builds - -on: - push: - branches: [master, "release/*"] - pull_request: - branches: [master, "release/*"] - types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped - paths: - - ".actions/*" - - ".github/workflows/docker-build.yml" - - "dockers/**" - - "requirements/*.txt" - - "requirements/pytorch/**" - - "requirements/fabric/**" - - "setup.py" - - "!requirements/*/docs.txt" - - "!*.md" - - "!**/*.md" - schedule: - - cron: "0 0 * * *" # at the end of every day - release: - types: [published] - workflow_dispatch: {} - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}-${{ github.event_name }} - cancel-in-progress: ${{ github.event_name == 'pull_request' }} - -env: - PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} - PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} - -jobs: - build-pl: - # the images generated by this job are not used anywhere in this repository. they are just meant to be available - # for users - if: github.event.pull_request.draft == false - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - include: - # We only release one docker image per PyTorch version. - # Make sure the matrix here matches the one below. - - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.1" } - - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.1" } - - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.1" } - - { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.1" } - - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.1" } - - { python_version: "3.12", pytorch_version: "2.6", cuda_version: "12.4.1" } - - { python_version: "3.12", pytorch_version: "2.7", cuda_version: "12.6.3" } - - { python_version: "3.12", pytorch_version: "2.8", cuda_version: "12.6.3", latest: "true" } - steps: - - uses: actions/checkout@v5 - with: - submodules: true - - uses: docker/setup-buildx-action@v3 - - uses: docker/login-action@v3 - if: env.PUSH_RELEASE == 'true' && github.repository_owner == 'Lightning-AI' - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Get release version - if: github.event_name == 'release' - # For workflows triggered by release, `GITHUB_REF` is the release tag created. - run: echo "RELEASE_VERSION=$(echo ${GITHUB_REF##*/})" >> $GITHUB_ENV - - name: Set tags - run: | - pip install -q -r .actions/requirements.txt - tags=$(python .actions/assistant.py generate_docker_tags \ - --release_version="${{ env.RELEASE_VERSION }}" \ - --python_version="${{ matrix.python_version }}" \ - --torch_version="${{ matrix.pytorch_version }}" \ - --cuda_version="${{ matrix.cuda_version }}" \ - --add_latest="${{ matrix.latest || 'false' }}") - echo "DOCKER_TAGS=$tags" >> $GITHUB_ENV - - - uses: docker/build-push-action@v6 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - CUDA_VERSION=${{ matrix.cuda_version }} - LIGHTNING_VERSION=${{ env.RELEASE_VERSION }} - file: dockers/release/Dockerfile - push: ${{ env.PUSH_RELEASE }} # pushed in release-docker.yml only when PL is released - tags: ${{ env.DOCKER_TAGS }} - timeout-minutes: 35 - - build-cuda: - if: github.event.pull_request.draft == false - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - # adding dome more images as Thunder mainly using python 3.10, - # and we need to support integrations as for example LitGPT - python_version: ["3.10"] - pytorch_version: ["2.7.1", "2.8.0"] - cuda_version: ["12.6.3"] - include: - # These are the base images for PL release docker images. - # Make sure the matrix here matches the one above. - - { python_version: "3.10", pytorch_version: "2.1.2", cuda_version: "12.1.1" } - - { python_version: "3.11", pytorch_version: "2.2.2", cuda_version: "12.1.1" } - - { python_version: "3.11", pytorch_version: "2.3.1", cuda_version: "12.1.1" } - - { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.1" } - - { python_version: "3.12", pytorch_version: "2.5.1", cuda_version: "12.1.1" } - - { python_version: "3.12", pytorch_version: "2.6.0", cuda_version: "12.4.1" } - - { python_version: "3.12", pytorch_version: "2.7.1", cuda_version: "12.6.3" } - - { python_version: "3.12", pytorch_version: "2.8.0", cuda_version: "12.6.3" } - steps: - - uses: actions/checkout@v5 - - uses: docker/setup-buildx-action@v3 - - uses: docker/login-action@v3 - if: env.PUSH_NIGHTLY == 'true' && github.repository_owner == 'Lightning-AI' - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: shorten Torch version - run: | - # convert 1.10.2 to 1.10 - pt_version=$(echo ${{ matrix.pytorch_version }} | cut -d. -f1,2) - echo "PT_VERSION=$pt_version" >> $GITHUB_ENV - - uses: docker/build-push-action@v6 - with: - build-args: | - PYTHON_VERSION=${{ matrix.python_version }} - PYTORCH_VERSION=${{ matrix.pytorch_version }} - CUDA_VERSION=${{ matrix.cuda_version }} - MAKE_FLAGS="-j2" - file: dockers/base-cuda/Dockerfile - push: ${{ env.PUSH_NIGHTLY }} - tags: "pytorchlightning/pytorch_lightning:base-cuda${{ matrix.cuda_version }}-py${{ matrix.python_version }}-torch${{ env.PT_VERSION }}" - timeout-minutes: 95 - - uses: ravsamhq/notify-slack-action@v2 - if: failure() && env.PUSH_NIGHTLY == 'true' - with: - status: ${{ job.status }} - token: ${{ secrets.GITHUB_TOKEN }} - notification_title: ${{ format('CUDA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} - message_format: "{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>" - env: - SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} - - build-NGC: - if: github.event.pull_request.draft == false - # fixme: use larger machine or optimize image size - # runs-on: ubuntu-latest-4-cores - # then drop continue-on-error - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v5 - - name: Build Conda Docker - # publish master/release - continue-on-error: true - uses: docker/build-push-action@v6 - with: - build-args: | - PYTORCH_VERSION="25.04" - file: dockers/nvidia/Dockerfile - push: false - timeout-minutes: 55