diff --git a/.azure/gpu-benchmarks.yml b/.azure/gpu-benchmarks.yml index 111589945e048..24b78542a798a 100644 --- a/.azure/gpu-benchmarks.yml +++ b/.azure/gpu-benchmarks.yml @@ -46,7 +46,7 @@ jobs: variables: DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' ) container: - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0" options: "--gpus=all --shm-size=32g" strategy: matrix: diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index e63641b8ecc7d..ee7fe2e281478 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -60,7 +60,7 @@ jobs: image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0" PACKAGE_NAME: "fabric" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0" PACKAGE_NAME: "lightning" workspace: clean: all diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 4605e824426e9..1ece70f75e193 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -53,7 +53,7 @@ jobs: image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0" PACKAGE_NAME: "pytorch" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.1.0" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0" PACKAGE_NAME: "lightning" pool: lit-rtx-3090 variables: diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 20875df42c5a8..b9fcde984bc74 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -21,23 +21,26 @@ subprojects: checks: - "pl-cpu (macOS-13, lightning, 3.9, 2.1, oldest)" - "pl-cpu (macOS-14, lightning, 3.10, 2.1)" - - "pl-cpu (macOS-14, lightning, 3.11, 2.2)" + - "pl-cpu (macOS-14, lightning, 3.11, 2.2.2)" - "pl-cpu (macOS-14, lightning, 3.11, 2.3)" - - "pl-cpu (macOS-14, lightning, 3.12, 2.4)" + - "pl-cpu (macOS-14, lightning, 3.12, 2.4.1)" + - "pl-cpu (macOS-14, lightning, 3.12, 2.5.1)" - "pl-cpu (ubuntu-20.04, lightning, 3.9, 2.1, oldest)" - "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.1)" - - "pl-cpu (ubuntu-20.04, lightning, 3.11, 2.2)" + - "pl-cpu (ubuntu-20.04, lightning, 3.11, 2.2.2)" - "pl-cpu (ubuntu-20.04, lightning, 3.11, 2.3)" - - "pl-cpu (ubuntu-20.04, lightning, 3.12, 2.4)" + - "pl-cpu (ubuntu-22.04, lightning, 3.12, 2.4.1)" + - "pl-cpu (ubuntu-22.04, lightning, 3.12, 2.5.1)" - "pl-cpu (windows-2022, lightning, 3.9, 2.1, oldest)" - "pl-cpu (windows-2022, lightning, 3.10, 2.1)" - - "pl-cpu (windows-2022, lightning, 3.11, 2.2)" + - "pl-cpu (windows-2022, lightning, 3.11, 2.2.2)" - "pl-cpu (windows-2022, lightning, 3.11, 2.3)" - - "pl-cpu (windows-2022, lightning, 3.12, 2.4)" + - "pl-cpu (windows-2022, lightning, 3.12, 2.4.1)" + - "pl-cpu (windows-2022, lightning, 3.12, 2.5.1)" - "pl-cpu (macOS-14, pytorch, 3.9, 2.1)" - "pl-cpu (ubuntu-20.04, pytorch, 3.9, 2.1)" - "pl-cpu (windows-2022, pytorch, 3.9, 2.1)" - - "pl-cpu (macOS-12, pytorch, 3.10, 2.1)" + - "pl-cpu (macOS-13, pytorch, 3.10, 2.1)" - "pl-cpu (ubuntu-22.04, pytorch, 3.10, 2.1)" - "pl-cpu (windows-2022, pytorch, 3.10, 2.1)" @@ -141,15 +144,17 @@ subprojects: - "!*.md" - "!**/*.md" checks: - - "build-cuda (3.11, 2.1, 12.1.0)" - - "build-cuda (3.11, 2.2, 12.1.0)" - - "build-cuda (3.11, 2.3, 12.1.0)" - - "build-cuda (3.12, 2.4, 12.1.0)" + - "build-cuda (3.10, 2.1.2, 12.1.0)" + - "build-cuda (3.11, 2.2.2, 12.1.0)" + - "build-cuda (3.11, 2.3.1, 12.1.0)" + - "build-cuda (3.11, 2.4.1, 12.1.0)" + - "build-cuda (3.12, 2.5.1, 12.1.0)" #- "build-NGC" - - "build-pl (3.11, 2.1, 12.1.0)" + - "build-pl (3.10, 2.1, 12.1.0)" - "build-pl (3.11, 2.2, 12.1.0)" - "build-pl (3.11, 2.3, 12.1.0)" - - "build-pl (3.12, 2.4, 12.1.0)" + - "build-pl (3.11, 2.4, 12.1.0)" + - "build-pl (3.12, 2.5, 12.1.0)" # SECTION: lightning_fabric @@ -168,23 +173,26 @@ subprojects: checks: - "fabric-cpu (macOS-13, lightning, 3.9, 2.1, oldest)" - "fabric-cpu (macOS-14, lightning, 3.10, 2.1)" - - "fabric-cpu (macOS-14, lightning, 3.11, 2.2)" + - "fabric-cpu (macOS-14, lightning, 3.11, 2.2.2)" - "fabric-cpu (macOS-14, lightning, 3.11, 2.3)" - - "fabric-cpu (macOS-14, lightning, 3.12, 2.4)" + - "fabric-cpu (macOS-14, lightning, 3.12, 2.4.1)" + - "fabric-cpu (macOS-14, lightning, 3.12, 2.5.1)" - "fabric-cpu (ubuntu-20.04, lightning, 3.9, 2.1, oldest)" - "fabric-cpu (ubuntu-20.04, lightning, 3.10, 2.1)" - - "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.2)" + - "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.2.2)" - "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.3)" - - "fabric-cpu (ubuntu-20.04, lightning, 3.12, 2.4)" + - "fabric-cpu (ubuntu-22.04, lightning, 3.12, 2.4.1)" + - "fabric-cpu (ubuntu-22.04, lightning, 3.12, 2.5.1)" - "fabric-cpu (windows-2022, lightning, 3.9, 2.1, oldest)" - "fabric-cpu (windows-2022, lightning, 3.10, 2.1)" - - "fabric-cpu (windows-2022, lightning, 3.11, 2.2)" + - "fabric-cpu (windows-2022, lightning, 3.11, 2.2.2)" - "fabric-cpu (windows-2022, lightning, 3.11, 2.3)" - - "fabric-cpu (windows-2022, lightning, 3.12, 2.4)" + - "fabric-cpu (windows-2022, lightning, 3.12, 2.4.1)" + - "fabric-cpu (windows-2022, lightning, 3.12, 2.5.1)" - "fabric-cpu (macOS-14, fabric, 3.9, 2.1)" - "fabric-cpu (ubuntu-20.04, fabric, 3.9, 2.1)" - "fabric-cpu (windows-2022, fabric, 3.9, 2.1)" - - "fabric-cpu (macOS-12, fabric, 3.10, 2.1)" + - "fabric-cpu (macOS-13, fabric, 3.10, 2.1)" - "fabric-cpu (ubuntu-22.04, fabric, 3.10, 2.1)" - "fabric-cpu (windows-2022, fabric, 3.10, 2.1)" @@ -258,14 +266,14 @@ subprojects: - "install-pkg (ubuntu-22.04, lightning, 3.11)" - "install-pkg (ubuntu-22.04, notset, 3.9)" - "install-pkg (ubuntu-22.04, notset, 3.11)" - - "install-pkg (macOS-12, fabric, 3.9)" - - "install-pkg (macOS-12, fabric, 3.11)" - - "install-pkg (macOS-12, pytorch, 3.9)" - - "install-pkg (macOS-12, pytorch, 3.11)" - - "install-pkg (macOS-12, lightning, 3.9)" - - "install-pkg (macOS-12, lightning, 3.11)" - - "install-pkg (macOS-12, notset, 3.9)" - - "install-pkg (macOS-12, notset, 3.11)" + - "install-pkg (macOS-13, fabric, 3.9)" + - "install-pkg (macOS-13, fabric, 3.11)" + - "install-pkg (macOS-13, pytorch, 3.9)" + - "install-pkg (macOS-13, pytorch, 3.11)" + - "install-pkg (macOS-13, lightning, 3.9)" + - "install-pkg (macOS-13, lightning, 3.11)" + - "install-pkg (macOS-13, notset, 3.9)" + - "install-pkg (macOS-13, notset, 3.11)" - "install-pkg (windows-2022, fabric, 3.9)" - "install-pkg (windows-2022, fabric, 3.11)" - "install-pkg (windows-2022, pytorch, 3.9)" diff --git a/.github/workflows/call-clear-cache.yml b/.github/workflows/call-clear-cache.yml index 091e6a002ab3c..4c189879fb48c 100644 --- a/.github/workflows/call-clear-cache.yml +++ b/.github/workflows/call-clear-cache.yml @@ -23,18 +23,18 @@ on: jobs: cron-clear: if: github.event_name == 'schedule' || github.event_name == 'pull_request' - uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.8 with: - scripts-ref: v0.11.7 + scripts-ref: v0.11.8 dry-run: ${{ github.event_name == 'pull_request' }} pattern: "latest|docs" age-days: 7 direct-clear: if: github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' - uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.11.8 with: - scripts-ref: v0.11.7 + scripts-ref: v0.11.8 dry-run: ${{ github.event_name == 'pull_request' }} pattern: ${{ inputs.pattern || 'pypi_wheels' }} # setting str in case of PR / debugging age-days: ${{ fromJSON(inputs.age-days) || 0 }} # setting 0 in case of PR / debugging diff --git a/.github/workflows/ci-check-md-links.yml b/.github/workflows/ci-check-md-links.yml index 53b06c207482d..af5378c4221e3 100644 --- a/.github/workflows/ci-check-md-links.yml +++ b/.github/workflows/ci-check-md-links.yml @@ -14,7 +14,7 @@ on: jobs: check-md-links: - uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.11.8 with: config-file: ".github/markdown-links-config.json" base-branch: "master" diff --git a/.github/workflows/ci-pkg-install.yml b/.github/workflows/ci-pkg-install.yml index d22a8d3ace1e2..f096a709595af 100644 --- a/.github/workflows/ci-pkg-install.yml +++ b/.github/workflows/ci-pkg-install.yml @@ -42,7 +42,7 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-22.04", "macOS-12", "windows-2022"] + os: ["ubuntu-22.04", "macOS-13", "windows-2022"] pkg-name: ["fabric", "pytorch", "lightning", "notset"] python-version: ["3.9", "3.11"] steps: diff --git a/.github/workflows/ci-schema.yml b/.github/workflows/ci-schema.yml index e5ae526f196b7..2ccaadd2f51f5 100644 --- a/.github/workflows/ci-schema.yml +++ b/.github/workflows/ci-schema.yml @@ -8,7 +8,7 @@ on: jobs: check: - uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.7 + uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.11.8 with: # skip azure due to the wrong schema file by MSFT # https://github.com/Lightning-AI/lightning-flash/pull/1455#issuecomment-1244793607 diff --git a/.github/workflows/ci-tests-fabric.yml b/.github/workflows/ci-tests-fabric.yml index 06616650deb9c..ca4dd0b845750 100644 --- a/.github/workflows/ci-tests-fabric.yml +++ b/.github/workflows/ci-tests-fabric.yml @@ -43,17 +43,20 @@ jobs: - { os: "macOS-14", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } - - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } - - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } + - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } # only run PyTorch latest with Python latest, use Fabric scope to limit dependency issues - - { os: "macOS-12", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } + - { os: "macOS-13", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } - { os: "windows-2022", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.1" } # "oldest" versions tests, only on minimum Python diff --git a/.github/workflows/ci-tests-pytorch.yml b/.github/workflows/ci-tests-pytorch.yml index 4de22a24f36e6..0c7deddbe5923 100644 --- a/.github/workflows/ci-tests-pytorch.yml +++ b/.github/workflows/ci-tests-pytorch.yml @@ -47,17 +47,20 @@ jobs: - { os: "macOS-14", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.10", pytorch-version: "2.1" } - - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } - - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } - - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } + - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.2.2" } - { os: "macOS-14", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - { os: "windows-2022", pkg-name: "lightning", python-version: "3.11", pytorch-version: "2.3" } - - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - - { os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } - - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.4.1" } + - { os: "macOS-14", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } + - { os: "ubuntu-22.04", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } + - { os: "windows-2022", pkg-name: "lightning", python-version: "3.12", pytorch-version: "2.5.1" } # only run PyTorch latest with Python latest, use PyTorch scope to limit dependency issues - - { os: "macOS-12", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } + - { os: "macOS-13", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } - { os: "ubuntu-22.04", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } - { os: "windows-2022", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.1" } # "oldest" versions tests, only on minimum Python diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 6df2b8cbb73d3..09ae3adc45ac6 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -43,10 +43,11 @@ jobs: include: # We only release one docker image per PyTorch version. # Make sure the matrix here matches the one below. - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" } - - { python_version: "3.12", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 with: @@ -103,10 +104,11 @@ jobs: include: # These are the base images for PL release docker images. # Make sure the matrix here matches the one above. - - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } - - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" } - - { python_version: "3.12", pytorch_version: "2.4", cuda_version: "12.1.0" } + - { python_version: "3.10", pytorch_version: "2.1.2", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.2.2", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.3.1", cuda_version: "12.1.0" } + - { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.0" } + - { python_version: "3.12", pytorch_version: "2.5.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 @@ -115,6 +117,12 @@ jobs: with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} + + - name: shorten Torch version + run: | + # convert 1.10.2 to 1.10 + pt_version=$(echo ${{ matrix.pytorch_version }} | cut -d. -f1,2) + echo "PT_VERSION=$pt_version" >> $GITHUB_ENV - uses: docker/build-push-action@v6 with: build-args: | @@ -123,7 +131,7 @@ jobs: CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/base-cuda/Dockerfile push: ${{ env.PUSH_NIGHTLY }} - tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }}" + tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ env.PT_VERSION }}-cuda${{ matrix.cuda_version }}" timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v2 if: failure() && env.PUSH_NIGHTLY == 'true' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cb7604831767b..24fc40566b152 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -58,7 +58,7 @@ repos: #args: ["--write-changes"] # uncomment if you want to get automatic fixing - repo: https://github.com/PyCQA/docformatter - rev: v1.7.5 + rev: 06907d0267368b49b9180eed423fae5697c1e909 # todo: fix for docformatter after last 1.7.5 hooks: - id: docformatter additional_dependencies: [tomli] diff --git a/_notebooks b/_notebooks index d527353491441..b83fde09c7243 160000 --- a/_notebooks +++ b/_notebooks @@ -1 +1 @@ -Subproject commit d5273534914411886ed45d59536f6042d24f6fe0 +Subproject commit b83fde09c724311af0d528e810b2ba606f31c95e diff --git a/docs/source-fabric/fundamentals/launch.rst b/docs/source-fabric/fundamentals/launch.rst index f8c0deecf4e25..81b6cd9d186f1 100644 --- a/docs/source-fabric/fundamentals/launch.rst +++ b/docs/source-fabric/fundamentals/launch.rst @@ -116,7 +116,7 @@ This is essentially the same as running ``python path/to/your/script.py``, but i machine. --precision [16-mixed|bf16-mixed|32-true|64-true|64|32|16|bf16] Double precision (``64-true`` or ``64``), - full precision (``32-true`` or ``64``), half + full precision (``32-true`` or ``32``), half precision (``16-mixed`` or ``16``) or bfloat16 precision (``bf16-mixed`` or ``bf16``) diff --git a/docs/source-pytorch/accelerators/tpu_advanced.rst b/docs/source-pytorch/accelerators/tpu_advanced.rst index e410c6e82539f..d74f9b07374c9 100644 --- a/docs/source-pytorch/accelerators/tpu_advanced.rst +++ b/docs/source-pytorch/accelerators/tpu_advanced.rst @@ -52,7 +52,7 @@ Example: model = WeightSharingModule() trainer = Trainer(max_epochs=1, accelerator="tpu") -See `XLA Documentation `_ +See `XLA Documentation `_ ---- @@ -61,4 +61,4 @@ XLA XLA is the library that interfaces PyTorch with the TPUs. For more information check out `XLA `_. -Guide for `troubleshooting XLA `_ +Guide for `troubleshooting XLA `_ diff --git a/docs/source-pytorch/accelerators/tpu_basic.rst b/docs/source-pytorch/accelerators/tpu_basic.rst index fb4e2b7bde244..217b76106aea9 100644 --- a/docs/source-pytorch/accelerators/tpu_basic.rst +++ b/docs/source-pytorch/accelerators/tpu_basic.rst @@ -108,7 +108,7 @@ There are cases in which training on TPUs is slower when compared with GPUs, for - XLA Graph compilation during the initial steps `Reference `_ - Some tensor ops are not fully supported on TPU, or not supported at all. These operations will be performed on CPU (context switch). -The official PyTorch XLA `performance guide `_ +The official PyTorch XLA `performance guide `_ has more detailed information on how PyTorch code can be optimized for TPU. In particular, the -`metrics report `_ allows +`metrics report `_ allows one to identify operations that lead to context switching. diff --git a/docs/source-pytorch/accelerators/tpu_faq.rst b/docs/source-pytorch/accelerators/tpu_faq.rst index f4b2c60633d26..109449ef2cc9a 100644 --- a/docs/source-pytorch/accelerators/tpu_faq.rst +++ b/docs/source-pytorch/accelerators/tpu_faq.rst @@ -78,7 +78,7 @@ A lot of PyTorch operations aren't lowered to XLA, which could lead to significa These operations are moved to the CPU memory and evaluated, and then the results are transferred back to the XLA device(s). By using the `xla_debug` Strategy, users could create a metrics report to diagnose issues. -The report includes things like (`XLA Reference `_): +The report includes things like (`XLA Reference `_): * how many times we issue XLA compilations and time spent on issuing. * how many times we execute and time spent on execution diff --git a/docs/source-pytorch/common/progress_bar.rst b/docs/source-pytorch/common/progress_bar.rst index e0c29fccdc494..106c2289e5c7b 100644 --- a/docs/source-pytorch/common/progress_bar.rst +++ b/docs/source-pytorch/common/progress_bar.rst @@ -36,6 +36,10 @@ You can update ``refresh_rate`` (rate (number of batches) at which the progress trainer = Trainer(callbacks=[TQDMProgressBar(refresh_rate=10)]) +.. note:: + + The ``smoothing`` option has no effect when using the default implementation of :class:`~lightning.pytorch.callbacks.TQDMProgressBar`, as the progress bar is updated using the ``bar.refresh()`` method instead of ``bar.update()``. This can cause the progress bar to become desynchronized with the actual progress. To avoid this issue, you can use the ``bar.update()`` method instead, but this may require customizing the :class:`~lightning.pytorch.callbacks.TQDMProgressBar` class. + By default the training progress bar is reset (overwritten) at each new epoch. If you wish for a new progress bar to be displayed at the end of every epoch, set :paramref:`TQDMProgressBar.leave ` to ``True``. diff --git a/docs/source-pytorch/upgrade/sections/2_0_regular.rst b/docs/source-pytorch/upgrade/sections/2_0_regular.rst index 192f20bc669b9..2f94ef7ab66fd 100644 --- a/docs/source-pytorch/upgrade/sections/2_0_regular.rst +++ b/docs/source-pytorch/upgrade/sections/2_0_regular.rst @@ -6,7 +6,7 @@ - Then - Ref - * - used PyTorch 3.11 + * - used PyTorch 1.11 - upgrade to PyTorch 2.1 or higher - `PR18691`_ diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 0a99614a46870..42c055e85ca7d 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.5.0 +torch >=2.1.0, <2.6.0 fsspec[http] >=2022.5.0, <2024.4.0 packaging >=20.0, <=23.1 typing-extensions >=4.4.0, <4.10.0 diff --git a/requirements/fabric/examples.txt b/requirements/fabric/examples.txt index cb4135da2409a..3352db77d8bd9 100644 --- a/requirements/fabric/examples.txt +++ b/requirements/fabric/examples.txt @@ -1,6 +1,6 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torchvision >=0.16.0, <0.20.0 -torchmetrics >=0.10.0, <1.3.0 +torchvision >=0.16.0, <0.21.0 +torchmetrics >=0.10.0, <1.5.0 lightning-utilities >=0.8.0, <0.12.0 diff --git a/requirements/fabric/strategies.txt b/requirements/fabric/strategies.txt index 4aee89d9f68e7..394aceb39cd6b 100644 --- a/requirements/fabric/strategies.txt +++ b/requirements/fabric/strategies.txt @@ -6,4 +6,5 @@ # note: is a bug around 0.10 with `MPS_Accelerator must implement all abstract methods` # shall be resolved by https://github.com/microsoft/DeepSpeed/issues/4372 deepspeed >=0.8.2, <=0.9.3; platform_system != "Windows" and platform_system != "Darwin" # strict -bitsandbytes >=0.42.0,<0.43.0 +bitsandbytes >=0.44.0,<0.44.2; sys_platform == 'linux' or sys_platform == 'win32' +bitsandbytes >=0.42.0,<0.43.0 ; sys_platform == 'darwin' diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 8fb9122051eec..2da6ae8854d64 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -7,4 +7,4 @@ pytest-rerunfailures ==12.0 pytest-random-order ==1.1.0 click ==8.1.7 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute -torchmetrics >=0.7.0, <1.3.0 # needed for using fixed compare_version +torchmetrics >=0.7.0, <1.5.0 # needed for using fixed compare_version diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 6ff628d7edfb5..94aca759c37e2 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -1,11 +1,11 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.5.0 +torch >=2.1.0, <2.6.0 tqdm >=4.57.0, <4.67.0 PyYAML >=5.4, <6.1.0 fsspec[http] >=2022.5.0, <2024.4.0 -torchmetrics >=0.7.0, <1.3.0 # needed for using fixed compare_version +torchmetrics >=0.7.0, <1.5.0 # needed for using fixed compare_version packaging >=20.0, <=23.1 typing-extensions >=4.4.0, <4.10.0 lightning-utilities >=0.10.0, <0.12.0 diff --git a/requirements/pytorch/examples.txt b/requirements/pytorch/examples.txt index 9a6ae7e47dfb8..2e793e0045da9 100644 --- a/requirements/pytorch/examples.txt +++ b/requirements/pytorch/examples.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment requests <2.32.0 -torchvision >=0.16.0, <0.20.0 +torchvision >=0.16.0, <0.21.0 ipython[all] <8.15.0 -torchmetrics >=0.10.0, <1.3.0 +torchmetrics >=0.10.0, <1.5.0 lightning-utilities >=0.8.0, <0.12.0 diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index 6962da858c4ab..12bbdf5a70ab0 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -8,4 +8,5 @@ hydra-core >=1.2.0, <1.4.0 jsonargparse[signatures] >=4.27.7, <4.28.0 rich >=12.3.0, <13.6.0 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute -bitsandbytes >=0.42.0,<0.43.0 +bitsandbytes >=0.44.0,<0.44.2; sys_platform == 'linux' or sys_platform == 'win32' +bitsandbytes >=0.42.0,<0.43.0 ; sys_platform == 'darwin' diff --git a/requirements/typing.txt b/requirements/typing.txt index 0323edfd6098a..71414998dd7f3 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy==1.11.0 -torch==2.4.1 +torch==2.5.1 types-Markdown types-PyYAML diff --git a/src/lightning/fabric/__init__.py b/src/lightning/fabric/__init__.py index 921d3d61e60fe..d675b21e5d1d2 100644 --- a/src/lightning/fabric/__init__.py +++ b/src/lightning/fabric/__init__.py @@ -2,6 +2,7 @@ import logging import os +import sys from lightning_utilities.core.imports import package_available @@ -26,6 +27,10 @@ # https://github.com/pytorch/pytorch/issues/83973 os.environ["PYTORCH_NVML_BASED_CUDA_CHECK"] = "1" +# see https://github.com/pytorch/pytorch/issues/139990 +if sys.platform == "win32": + os.environ["USE_LIBUV"] = "0" + from lightning.fabric.fabric import Fabric # noqa: E402 from lightning.fabric.utilities.seed import seed_everything # noqa: E402 diff --git a/src/lightning/fabric/cli.py b/src/lightning/fabric/cli.py index 5ca46ba331622..7c81afa916196 100644 --- a/src/lightning/fabric/cli.py +++ b/src/lightning/fabric/cli.py @@ -140,7 +140,7 @@ def _main() -> None: type=click.Choice(get_args(_PRECISION_INPUT_STR) + get_args(_PRECISION_INPUT_STR_ALIAS)), default=None, help=( - "Double precision (``64-true`` or ``64``), full precision (``32-true`` or ``64``), " + "Double precision (``64-true`` or ``64``), full precision (``32-true`` or ``32``), " "half precision (``16-mixed`` or ``16``) or bfloat16 precision (``bf16-mixed`` or ``bf16``)" ), ) diff --git a/src/lightning/fabric/plugins/precision/bitsandbytes.py b/src/lightning/fabric/plugins/precision/bitsandbytes.py index 0f524dd67fad9..394415452890a 100644 --- a/src/lightning/fabric/plugins/precision/bitsandbytes.py +++ b/src/lightning/fabric/plugins/precision/bitsandbytes.py @@ -43,7 +43,7 @@ class BitsandbytesPrecision(Precision): - """Plugin for quantizing weights with `bitsandbytes `__. + """Plugin for quantizing weights with `bitsandbytes `__. .. warning:: This is an :ref:`experimental ` feature. @@ -184,11 +184,15 @@ def _replace_param( if param.device.type == "meta": if isinstance(param, bnb.nn.Params4bit): return bnb.nn.Params4bit( - data, + data=data, requires_grad=data.requires_grad, quant_state=quant_state, + blocksize=param.blocksize, compress_statistics=param.compress_statistics, quant_type=param.quant_type, + quant_storage=param.quant_storage, + module=param.module, + bnb_quantized=param.bnb_quantized, ) return torch.nn.Parameter(data, requires_grad=data.requires_grad) param.data = data @@ -322,6 +326,7 @@ def quantize_(self, weight: Optional[torch.Tensor] = None, device: Optional[torc return assert isinstance(self.weight, bnb.nn.Params4bit) self.weight = self.quantize(self.weight, weight, device) + self.weight.bnb_quantized = True @staticmethod def quantize( @@ -337,6 +342,7 @@ def quantize( blocksize=params4bit.blocksize, compress_statistics=params4bit.compress_statistics, quant_type=params4bit.quant_type, + quant_storage=params4bit.quant_storage, ) return _replace_param(params4bit, w_4bit, quant_state) diff --git a/src/lightning/fabric/strategies/deepspeed.py b/src/lightning/fabric/strategies/deepspeed.py index 93a17f10c8998..e71b8e2db3d58 100644 --- a/src/lightning/fabric/strategies/deepspeed.py +++ b/src/lightning/fabric/strategies/deepspeed.py @@ -598,7 +598,7 @@ def _initialize_engine( ) -> Tuple["DeepSpeedEngine", Optimizer]: """Initialize one model and one optimizer with an optional learning rate scheduler. - This calls :func:`deepspeed.initialize` internally. + This calls ``deepspeed.initialize`` internally. """ import deepspeed diff --git a/src/lightning/fabric/strategies/xla_fsdp.py b/src/lightning/fabric/strategies/xla_fsdp.py index 6da693bafb1c8..e4c080d8110db 100644 --- a/src/lightning/fabric/strategies/xla_fsdp.py +++ b/src/lightning/fabric/strategies/xla_fsdp.py @@ -56,7 +56,7 @@ class XLAFSDPStrategy(ParallelStrategy, _Sharded): .. warning:: This is an :ref:`experimental ` feature. - For more information check out https://github.com/pytorch/xla/blob/master/docs/fsdp.md + For more information check out https://github.com/pytorch/xla/blob/v2.5.0/docs/fsdp.md Args: auto_wrap_policy: Same as ``auto_wrap_policy`` parameter in diff --git a/src/lightning/pytorch/callbacks/progress/rich_progress.py b/src/lightning/pytorch/callbacks/progress/rich_progress.py index 497e96e11b9c4..896de71267835 100644 --- a/src/lightning/pytorch/callbacks/progress/rich_progress.py +++ b/src/lightning/pytorch/callbacks/progress/rich_progress.py @@ -206,14 +206,14 @@ class RichProgressBarTheme: """ - description: Union[str, "Style"] = "white" + description: Union[str, "Style"] = "" progress_bar: Union[str, "Style"] = "#6206E0" progress_bar_finished: Union[str, "Style"] = "#6206E0" progress_bar_pulse: Union[str, "Style"] = "#6206E0" - batch_progress: Union[str, "Style"] = "white" - time: Union[str, "Style"] = "grey54" - processing_speed: Union[str, "Style"] = "grey70" - metrics: Union[str, "Style"] = "white" + batch_progress: Union[str, "Style"] = "" + time: Union[str, "Style"] = "dim" + processing_speed: Union[str, "Style"] = "dim underline" + metrics: Union[str, "Style"] = "italic" metrics_text_delimiter: str = " " metrics_format: str = ".3f" @@ -280,7 +280,6 @@ def __init__( self._metric_component: Optional[MetricsTextColumn] = None self._progress_stopped: bool = False self.theme = theme - self._update_for_light_colab_theme() @property def refresh_rate(self) -> float: @@ -318,13 +317,6 @@ def test_progress_bar(self) -> "Task": assert self.test_progress_bar_id is not None return self.progress.tasks[self.test_progress_bar_id] - def _update_for_light_colab_theme(self) -> None: - if _detect_light_colab_theme(): - attributes = ["description", "batch_progress", "metrics"] - for attr in attributes: - if getattr(self.theme, attr) == "white": - setattr(self.theme, attr, "black") - @override def disable(self) -> None: self._enabled = False @@ -449,7 +441,7 @@ def on_validation_batch_start( def _add_task(self, total_batches: Union[int, float], description: str, visible: bool = True) -> "TaskID": assert self.progress is not None return self.progress.add_task( - f"[{self.theme.description}]{description}", + f"[{self.theme.description}]{description}" if self.theme.description else description, total=total_batches, visible=visible, ) @@ -656,20 +648,3 @@ def __getstate__(self) -> Dict: state["progress"] = None state["_console"] = None return state - - -def _detect_light_colab_theme() -> bool: - """Detect if it's light theme in Colab.""" - try: - import get_ipython - except (NameError, ModuleNotFoundError): - return False - ipython = get_ipython() - if "google.colab" in str(ipython.__class__): - try: - from google.colab import output - - return output.eval_js('document.documentElement.matches("[theme=light]")') - except ModuleNotFoundError: - return False - return False diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py index 782fc40d928ef..d8374ef7ea5e8 100644 --- a/src/lightning/pytorch/core/module.py +++ b/src/lightning/pytorch/core/module.py @@ -531,7 +531,7 @@ def log( logger=logger, on_step=on_step, on_epoch=on_epoch, - reduce_fx=reduce_fx, # type: ignore[arg-type] + reduce_fx=reduce_fx, enable_graph=enable_graph, add_dataloader_idx=add_dataloader_idx, batch_size=batch_size, @@ -1405,7 +1405,9 @@ def forward(self, x): input_sample = self._apply_batch_transfer_handler(input_sample) file_path = str(file_path) if isinstance(file_path, Path) else file_path - torch.onnx.export(self, input_sample, file_path, **kwargs) + # PyTorch (2.5) declares file_path to be str | PathLike[Any] | None, but + # BytesIO does work, too. + torch.onnx.export(self, input_sample, file_path, **kwargs) # type: ignore self.train(mode) @torch.no_grad() diff --git a/src/lightning/pytorch/plugins/precision/bitsandbytes.py b/src/lightning/pytorch/plugins/precision/bitsandbytes.py index 62acc7bf77c8d..3a2daa828bc3c 100644 --- a/src/lightning/pytorch/plugins/precision/bitsandbytes.py +++ b/src/lightning/pytorch/plugins/precision/bitsandbytes.py @@ -16,7 +16,7 @@ class BitsandbytesPrecision(Precision, FabricBNBPrecision): - """Plugin for quantizing weights with `bitsandbytes `__. + """Plugin for quantizing weights with `bitsandbytes `__. .. warning:: This is an :ref:`experimental ` feature. diff --git a/src/lightning/pytorch/strategies/deepspeed.py b/src/lightning/pytorch/strategies/deepspeed.py index 382f8070898f8..1eaa5bab75fbe 100644 --- a/src/lightning/pytorch/strategies/deepspeed.py +++ b/src/lightning/pytorch/strategies/deepspeed.py @@ -414,7 +414,7 @@ def _setup_model_and_optimizer( ) -> Tuple["deepspeed.DeepSpeedEngine", Optimizer]: """Initialize one model and one optimizer with an optional learning rate scheduler. - This calls :func:`deepspeed.initialize` internally. + This calls ``deepspeed.initialize`` internally. """ import deepspeed diff --git a/src/lightning/pytorch/trainer/connectors/logger_connector/result.py b/src/lightning/pytorch/trainer/connectors/logger_connector/result.py index 583105c3660e0..62cc7844d3897 100644 --- a/src/lightning/pytorch/trainer/connectors/logger_connector/result.py +++ b/src/lightning/pytorch/trainer/connectors/logger_connector/result.py @@ -351,6 +351,7 @@ def _extract_batch_size(self, value: _ResultMetric, batch_size: Optional[int], m return batch_size + @torch.compiler.disable def log( self, fx: str, @@ -413,6 +414,7 @@ def log( batch_size = self._extract_batch_size(self[key], batch_size, meta) self.update_metrics(key, value, batch_size) + @torch.compiler.disable def update_metrics(self, key: str, value: _VALUE, batch_size: int) -> None: result_metric = self[key] # performance: avoid calling `__call__` to avoid the checks in `torch.nn.Module._call_impl` diff --git a/tests/run_standalone_tests.sh b/tests/run_standalone_tests.sh index 0aa0bacff168a..8a4d8e180d112 100755 --- a/tests/run_standalone_tests.sh +++ b/tests/run_standalone_tests.sh @@ -48,6 +48,7 @@ function show_batched_output { # heuristic: stop if there's mentions of errors. this can prevent false negatives when only some of the ranks fail if perl -nle 'print if /error|(?