From d016ee965ba6e00b5624379994d5b82c66f5768e Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Wed, 19 Mar 2025 15:15:38 +0100 Subject: [PATCH 001/112] ci: bump next `torch` 2.7 (#20656) * ci: bump next `torch` 2.7 * cuda_version: "12.6.3" (cherry picked from commit 28f0fcb2861fc3d0e45d6bef858db27a93559726) --- .azure/gpu-tests-fabric.yml | 3 +++ .azure/gpu-tests-pytorch.yml | 3 +++ .github/workflows/ci-tests-fabric.yml | 6 +++--- .github/workflows/ci-tests-pytorch.yml | 6 +++--- .github/workflows/docker-build.yml | 3 ++- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index 4d738d9110599..9e514fdd853cd 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -62,6 +62,9 @@ jobs: "Fabric | latest": image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.6-cuda12.4.1" PACKAGE_NAME: "fabric" + "Fabric | future": + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3" + PACKAGE_NAME: "fabric" "Lightning | latest": image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.6-cuda12.4.1" PACKAGE_NAME: "lightning" diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 414f98dab3f66..986be6ae74a1a 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -55,6 +55,9 @@ jobs: "PyTorch | latest": image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.6-cuda12.4.1" PACKAGE_NAME: "pytorch" + "PyTorch | future": + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3" + PACKAGE_NAME: "pytorch" "Lightning | latest": image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.6-cuda12.4.1" PACKAGE_NAME: "lightning" diff --git a/.github/workflows/ci-tests-fabric.yml b/.github/workflows/ci-tests-fabric.yml index b94772b4fbcbc..51ee7cfeead4f 100644 --- a/.github/workflows/ci-tests-fabric.yml +++ b/.github/workflows/ci-tests-fabric.yml @@ -68,9 +68,9 @@ jobs: - { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.5" } - { os: "windows-2022", pkg-name: "fabric", python-version: "3.10", pytorch-version: "2.5" } # adding recently cut Torch 2.7 - FUTURE - # - { os: "macOS-14", pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.7" } - # - { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.7" } - # - { os: "windows-2022", pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.7" } + - { os: "macOS-14", pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.7" } + - { os: "ubuntu-22.04", pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.7" } + - { os: "windows-2022", pkg-name: "fabric", python-version: "3.12", pytorch-version: "2.7" } timeout-minutes: 25 # because of building grpcio on Mac env: PACKAGE_NAME: ${{ matrix.pkg-name }} diff --git a/.github/workflows/ci-tests-pytorch.yml b/.github/workflows/ci-tests-pytorch.yml index a731723b25fad..1e4b0bc4979ff 100644 --- a/.github/workflows/ci-tests-pytorch.yml +++ b/.github/workflows/ci-tests-pytorch.yml @@ -72,9 +72,9 @@ jobs: - { os: "ubuntu-22.04", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.5" } - { os: "windows-2022", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "2.5" } # adding recently cut Torch 2.7 - FUTURE - # - { os: "macOS-14", pkg-name: "pytorch", python-version: "3.12", pytorch-version: "2.7" } - # - { os: "ubuntu-22.04", pkg-name: "pytorch", python-version: "3.12", pytorch-version: "2.7" } - # - { os: "windows-2022", pkg-name: "pytorch", python-version: "3.12", pytorch-version: "2.7" } + - { os: "macOS-14", pkg-name: "pytorch", python-version: "3.12", pytorch-version: "2.7" } + - { os: "ubuntu-22.04", pkg-name: "pytorch", python-version: "3.12", pytorch-version: "2.7" } + - { os: "windows-2022", pkg-name: "pytorch", python-version: "3.12", pytorch-version: "2.7" } timeout-minutes: 50 env: PACKAGE_NAME: ${{ matrix.pkg-name }} diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index b623cdc9337f3..21842413f7301 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -101,6 +101,7 @@ jobs: - { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.1" } - { python_version: "3.12", pytorch_version: "2.5.1", cuda_version: "12.1.1" } - { python_version: "3.12", pytorch_version: "2.6.0", cuda_version: "12.4.1" } + - { python_version: "3.12", pytorch_version: "2.7.0", cuda_version: "12.6.3" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 @@ -131,7 +132,7 @@ jobs: status: ${{ job.status }} token: ${{ secrets.GITHUB_TOKEN }} notification_title: ${{ format('CUDA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} - message_format: "{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>" # akihironitta + message_format: "{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>" env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} From 29d0ef3846fb6c058e0d9459ea8fda9c7e3f0c2f Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Wed, 19 Mar 2025 15:15:58 +0100 Subject: [PATCH 002/112] docker: releasing PL with latest PT 2.6 (#20655) * docker: releasing Pl with PT 2.6 * cuda_version: "12.4.1" (cherry picked from commit 5cbfb8649ad987701504178d8a9ae086865b0369) --- .github/checkgroup.yml | 4 +++- .github/workflows/docker-build.yml | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 271284635b638..5dbf9fdbb1763 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -127,12 +127,14 @@ subprojects: - "build-cuda (3.11, 2.3.1, 12.1.1)" - "build-cuda (3.11, 2.4.1, 12.1.1)" - "build-cuda (3.12, 2.5.1, 12.1.1)" + - "build-cuda (3.12, 2.6.0, 12.4.1)" #- "build-NGC" - "build-pl (3.10, 2.1, 12.1.1)" - "build-pl (3.11, 2.2, 12.1.1)" - "build-pl (3.11, 2.3, 12.1.1)" - "build-pl (3.11, 2.4, 12.1.1)" - - "build-pl (3.12, 2.5, 12.1.1, true)" + - "build-pl (3.12, 2.5, 12.1.1)" + - "build-pl (3.12, 2.6, 12.4.1, true)" # SECTION: lightning_fabric diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 21842413f7301..97031afaa5ab2 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -47,7 +47,8 @@ jobs: - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.1" } - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.1" } - { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.1" } - - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.1", latest: "true" } + - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.1" } + - { python_version: "3.12", pytorch_version: "2.6", cuda_version: "12.4.1", latest: "true" } steps: - uses: actions/checkout@v4 with: From c8f30decec51b673ea0ff81b1bb47e9cf0040fa0 Mon Sep 17 00:00:00 2001 From: PL Ghost <75324987+pl-ghost@users.noreply.github.com> Date: Fri, 21 Mar 2025 08:37:24 +0100 Subject: [PATCH 003/112] Adding test for legacy checkpoint created with 2.5.1 (#20659) update tutorials to `2.5.1` Co-authored-by: Borda (cherry picked from commit d04fc349a7b1227367059737ab351003fbd5cd36) --- tests/legacy/back-compatible-versions.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/legacy/back-compatible-versions.txt b/tests/legacy/back-compatible-versions.txt index eb49457dfa157..996f1340747dc 100644 --- a/tests/legacy/back-compatible-versions.txt +++ b/tests/legacy/back-compatible-versions.txt @@ -104,3 +104,4 @@ 2.3.1 2.3.2 2.3.3 +2.5.1 From aca3e2eaf39dc7cfd5eb285fb923839fabc890f4 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 21 Mar 2025 10:43:26 +0100 Subject: [PATCH 004/112] fix case-sensitive model name (#20661) (cherry picked from commit 851d022ef8ad711de703dc7c4a65fff15b19f89c) --- src/lightning/pytorch/utilities/model_registry.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lightning/pytorch/utilities/model_registry.py b/src/lightning/pytorch/utilities/model_registry.py index a9ed495eb37d8..104da2514f5c2 100644 --- a/src/lightning/pytorch/utilities/model_registry.py +++ b/src/lightning/pytorch/utilities/model_registry.py @@ -63,7 +63,7 @@ def _parse_registry_model_version(ckpt_path: Optional[_PATH]) -> tuple[str, str] ('model-name', '1.0') >>> _parse_registry_model_version("registry:model-name") ('model-name', '') - >>> _parse_registry_model_version("registry:version:v2") + >>> _parse_registry_model_version("registry:VERSION:v2") ('', 'v2') """ @@ -71,16 +71,16 @@ def _parse_registry_model_version(ckpt_path: Optional[_PATH]) -> tuple[str, str] raise ValueError(f"Invalid registry path: {ckpt_path}") # Split the path by ':' - parts = str(ckpt_path).lower().split(":") + parts = str(ckpt_path).split(":") # Default values model_name, version = "", "" # Extract the model name and version based on the parts - if len(parts) >= 2 and parts[1] != "version": + if len(parts) >= 2 and parts[1].lower() != "version": model_name = parts[1] - if len(parts) == 3 and parts[1] == "version": + if len(parts) == 3 and parts[1].lower() == "version": version = parts[2] - elif len(parts) == 4 and parts[2] == "version": + elif len(parts) == 4 and parts[2].lower() == "version": version = parts[3] return model_name, version From a43288b0d8232449a57afc2dd36ddb7d88383860 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Mar 2025 21:06:52 +0100 Subject: [PATCH 005/112] build(deps): bump Lightning-AI/utilities from 0.14.1 to 0.14.2 (#20667) * build(deps): bump Lightning-AI/utilities from 0.14.1 to 0.14.2 Bumps [Lightning-AI/utilities](https://github.com/lightning-ai/utilities) from 0.14.1 to 0.14.2. - [Release notes](https://github.com/lightning-ai/utilities/releases) - [Changelog](https://github.com/Lightning-AI/utilities/blob/main/CHANGELOG.md) - [Commits](https://github.com/lightning-ai/utilities/compare/v0.14.1...v0.14.2) --- updated-dependencies: - dependency-name: Lightning-AI/utilities dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Apply suggestions from code review --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 44eb839880d595e1c061ff6d92d868739413deb5) --- .github/workflows/call-clear-cache.yml | 8 ++++---- .github/workflows/ci-check-md-links.yml | 2 +- .github/workflows/ci-schema.yml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/call-clear-cache.yml b/.github/workflows/call-clear-cache.yml index b736d2a91f55f..bfadd100fa65d 100644 --- a/.github/workflows/call-clear-cache.yml +++ b/.github/workflows/call-clear-cache.yml @@ -23,18 +23,18 @@ on: jobs: cron-clear: if: github.event_name == 'schedule' || github.event_name == 'pull_request' - uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.14.1 + uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.14.2 with: - scripts-ref: v0.14.1 + scripts-ref: v0.14.2 dry-run: ${{ github.event_name == 'pull_request' }} pattern: "latest|docs" age-days: 7 direct-clear: if: github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' - uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.14.1 + uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.14.2 with: - scripts-ref: v0.14.1 + scripts-ref: v0.14.2 dry-run: ${{ github.event_name == 'pull_request' }} pattern: ${{ inputs.pattern || 'pypi_wheels' }} # setting str in case of PR / debugging age-days: ${{ fromJSON(inputs.age-days) || 0 }} # setting 0 in case of PR / debugging diff --git a/.github/workflows/ci-check-md-links.yml b/.github/workflows/ci-check-md-links.yml index b619b756c1349..413138f8f677a 100644 --- a/.github/workflows/ci-check-md-links.yml +++ b/.github/workflows/ci-check-md-links.yml @@ -14,7 +14,7 @@ on: jobs: check-md-links: - uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.14.1 + uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.14.2 with: config-file: ".github/markdown-links-config.json" base-branch: "master" diff --git a/.github/workflows/ci-schema.yml b/.github/workflows/ci-schema.yml index aec5f9b4bc261..08273029a3141 100644 --- a/.github/workflows/ci-schema.yml +++ b/.github/workflows/ci-schema.yml @@ -8,7 +8,7 @@ on: jobs: check: - uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.14.1 + uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.14.2 with: # skip azure due to the wrong schema file by MSFT # https://github.com/Lightning-AI/lightning-flash/pull/1455#issuecomment-1244793607 From dd2dc0ba821f14b34a7e6891143f08ce58054d77 Mon Sep 17 00:00:00 2001 From: Robin <167366979+allrob23@users.noreply.github.com> Date: Fri, 28 Mar 2025 05:47:24 -0300 Subject: [PATCH 006/112] Improve performance of available_accelerators by returning a set (#20672) perf[registry]: update available_accelerators to return a set (cherry picked from commit 5cc8471cf7088ce346952330052a9b659d7ec62d) --- src/lightning/fabric/accelerators/registry.py | 6 +++--- tests/tests_fabric/accelerators/test_registry.py | 2 +- tests/tests_pytorch/accelerators/test_registry.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lightning/fabric/accelerators/registry.py b/src/lightning/fabric/accelerators/registry.py index 17d5233336d50..4959a0fb9426a 100644 --- a/src/lightning/fabric/accelerators/registry.py +++ b/src/lightning/fabric/accelerators/registry.py @@ -107,9 +107,9 @@ def remove(self, name: str) -> None: """Removes the registered accelerator by name.""" self.pop(name) - def available_accelerators(self) -> list[str]: - """Returns a list of registered accelerators.""" - return list(self.keys()) + def available_accelerators(self) -> set[str]: + """Returns a set of registered accelerators.""" + return set(self.keys()) def __str__(self) -> str: return "Registered Accelerators: {}".format(", ".join(self.available_accelerators())) diff --git a/tests/tests_fabric/accelerators/test_registry.py b/tests/tests_fabric/accelerators/test_registry.py index 28bfbb8ffd97c..8036a6f45b8a0 100644 --- a/tests/tests_fabric/accelerators/test_registry.py +++ b/tests/tests_fabric/accelerators/test_registry.py @@ -70,4 +70,4 @@ def is_available(): def test_available_accelerators_in_registry(): - assert ACCELERATOR_REGISTRY.available_accelerators() == ["cpu", "cuda", "mps", "tpu"] + assert ACCELERATOR_REGISTRY.available_accelerators() == {"cpu", "cuda", "mps", "tpu"} diff --git a/tests/tests_pytorch/accelerators/test_registry.py b/tests/tests_pytorch/accelerators/test_registry.py index 1c4358fea9696..8b29c9e937247 100644 --- a/tests/tests_pytorch/accelerators/test_registry.py +++ b/tests/tests_pytorch/accelerators/test_registry.py @@ -16,7 +16,7 @@ def test_available_accelerators_in_registry(): """Tests the accelerators available by default, not including external, third-party accelerators.""" - available = set(AcceleratorRegistry.available_accelerators()) + available = AcceleratorRegistry.available_accelerators() expected = {"cpu", "cuda", "mps", "tpu"} # Note: the registry is global, other tests may register new strategies as a side effect assert expected.issubset(available) From 6865957163273451438d752d1e9551fb8a5d2b79 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 28 Mar 2025 09:48:48 +0100 Subject: [PATCH 007/112] drop duplicated `torchmetrics` requirement from Fabric's testing (#20675) (cherry picked from commit ca13f77eab5d8b85ee84eb9fd7484c324ba198b1) --- requirements/fabric/test.txt | 1 - requirements/pytorch/base.txt | 2 +- src/lightning/fabric/utilities/spike.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 2da6ae8854d64..4ce2ff40f5bce 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -7,4 +7,3 @@ pytest-rerunfailures ==12.0 pytest-random-order ==1.1.0 click ==8.1.7 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute -torchmetrics >=0.7.0, <1.5.0 # needed for using fixed compare_version diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index cdf3cc03e2985..7a87aa86278c4 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -5,7 +5,7 @@ torch >=2.1.0, <2.6.0 tqdm >=4.57.0, <4.67.0 PyYAML >=5.4, <6.1.0 fsspec[http] >=2022.5.0, <2024.4.0 -torchmetrics >=0.7.0, <1.5.0 # needed for using fixed compare_version +torchmetrics >=0.7.0, <1.5.0 packaging >=20.0, <=23.1 typing-extensions >=4.4.0, <4.11.0 lightning-utilities >=0.10.0, <0.12.0 diff --git a/src/lightning/fabric/utilities/spike.py b/src/lightning/fabric/utilities/spike.py index 04c554461c58c..e96eccd75b1a2 100644 --- a/src/lightning/fabric/utilities/spike.py +++ b/src/lightning/fabric/utilities/spike.py @@ -52,7 +52,7 @@ def __init__( from torchmetrics.aggregation import MeanMetric from torchmetrics.wrappers import Running else: - raise RuntimeError("SpikeDetection requires torchmetrics>=1.0.0! Please upgrade your version!") + raise RuntimeError("SpikeDetection requires `torchmetrics>=1.0.0` Please upgrade your version.") super().__init__() self.last_val: Union[torch.Tensor, float] = 0.0 From 9e24cc2d43cd25206896c6bd479cf003ca26b8a6 Mon Sep 17 00:00:00 2001 From: 0x8RI4NY33 <47711448+briannnyee@users.noreply.github.com> Date: Thu, 3 Apr 2025 18:54:46 +0200 Subject: [PATCH 008/112] Update the doc to use `breakpoint()` (#20691) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to my friend ChatGPT, using pdb.set_trace() is too old-school. Update the doc to use breakpoint() instead to stay young 😎 (cherry picked from commit 831870a15a17cca4152ffde5c8a3ebc535c68ab0) --- docs/source-pytorch/debug/debugging_basic.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source-pytorch/debug/debugging_basic.rst b/docs/source-pytorch/debug/debugging_basic.rst index ef7ad2b5bb15c..8d725fca673cb 100644 --- a/docs/source-pytorch/debug/debugging_basic.rst +++ b/docs/source-pytorch/debug/debugging_basic.rst @@ -33,9 +33,7 @@ A breakpoint stops your code execution so you can inspect variables, etc... and x = 2 # set breakpoint - import pdb - - pdb.set_trace() + breakpoint() y = x**2 In this example, the code will stop before executing the ``y = x**2`` line. From 0c2766b69f9e4a51f01f927b07da56c0d7e030fb Mon Sep 17 00:00:00 2001 From: PL Ghost <75324987+pl-ghost@users.noreply.github.com> Date: Mon, 7 Apr 2025 13:12:41 +0200 Subject: [PATCH 009/112] docs: update ref to latest tutorials (#20698) update tutorials to `cc67b8ba` Co-authored-by: Borda (cherry picked from commit f6fd9c01d2361c400221fec6e4acf92182669a39) --- _notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_notebooks b/_notebooks index 1e0e807329216..cc67b8ba1842d 160000 --- a/_notebooks +++ b/_notebooks @@ -1 +1 @@ -Subproject commit 1e0e80732921606b641a4ab0e2eeebc93a60308f +Subproject commit cc67b8ba1842d176b14e59727ad1938b3ea47344 From acfb3a550d9fa8d6695fcf9e489827585417b098 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Apr 2025 13:41:16 +0200 Subject: [PATCH 010/112] build(deps): bump Lightning-AI/utilities from 0.14.2 to 0.14.3 (#20700) * build(deps): bump Lightning-AI/utilities from 0.14.2 to 0.14.3 Bumps [Lightning-AI/utilities](https://github.com/lightning-ai/utilities) from 0.14.2 to 0.14.3. - [Release notes](https://github.com/lightning-ai/utilities/releases) - [Changelog](https://github.com/Lightning-AI/utilities/blob/main/CHANGELOG.md) - [Commits](https://github.com/lightning-ai/utilities/compare/v0.14.2...v0.14.3) --- updated-dependencies: - dependency-name: Lightning-AI/utilities dependency-version: 0.14.3 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Apply suggestions from code review --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 5e0209ab4113d7f725174569863c161e9a2bddf3) --- .github/workflows/call-clear-cache.yml | 8 ++++---- .github/workflows/ci-check-md-links.yml | 2 +- .github/workflows/ci-schema.yml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/call-clear-cache.yml b/.github/workflows/call-clear-cache.yml index bfadd100fa65d..6422e856e09ff 100644 --- a/.github/workflows/call-clear-cache.yml +++ b/.github/workflows/call-clear-cache.yml @@ -23,18 +23,18 @@ on: jobs: cron-clear: if: github.event_name == 'schedule' || github.event_name == 'pull_request' - uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.14.2 + uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.14.3 with: - scripts-ref: v0.14.2 + scripts-ref: v0.14.3 dry-run: ${{ github.event_name == 'pull_request' }} pattern: "latest|docs" age-days: 7 direct-clear: if: github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' - uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.14.2 + uses: Lightning-AI/utilities/.github/workflows/cleanup-caches.yml@v0.14.3 with: - scripts-ref: v0.14.2 + scripts-ref: v0.14.3 dry-run: ${{ github.event_name == 'pull_request' }} pattern: ${{ inputs.pattern || 'pypi_wheels' }} # setting str in case of PR / debugging age-days: ${{ fromJSON(inputs.age-days) || 0 }} # setting 0 in case of PR / debugging diff --git a/.github/workflows/ci-check-md-links.yml b/.github/workflows/ci-check-md-links.yml index 413138f8f677a..efae72a87f86c 100644 --- a/.github/workflows/ci-check-md-links.yml +++ b/.github/workflows/ci-check-md-links.yml @@ -14,7 +14,7 @@ on: jobs: check-md-links: - uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.14.2 + uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.14.3 with: config-file: ".github/markdown-links-config.json" base-branch: "master" diff --git a/.github/workflows/ci-schema.yml b/.github/workflows/ci-schema.yml index 08273029a3141..fe8cbfbc7ddb4 100644 --- a/.github/workflows/ci-schema.yml +++ b/.github/workflows/ci-schema.yml @@ -8,7 +8,7 @@ on: jobs: check: - uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.14.2 + uses: Lightning-AI/utilities/.github/workflows/check-schema.yml@v0.14.3 with: # skip azure due to the wrong schema file by MSFT # https://github.com/Lightning-AI/lightning-flash/pull/1455#issuecomment-1244793607 From 8512d1d062023e29d78d31df3eb9fdaa2fb33241 Mon Sep 17 00:00:00 2001 From: Tom Hu <88201630+thomasrockhu-codecov@users.noreply.github.com> Date: Mon, 7 Apr 2025 04:57:07 -0700 Subject: [PATCH 011/112] feat: add Codecov test results (#20694) * feat: add Codecov test results * linter --------- Co-authored-by: Jirka B Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 02311d03fb982560246eead7c08104481fac9579) --- .github/workflows/ci-tests-fabric.yml | 3 ++- .github/workflows/ci-tests-pytorch.yml | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-tests-fabric.yml b/.github/workflows/ci-tests-fabric.yml index 51ee7cfeead4f..b3204c71e00b7 100644 --- a/.github/workflows/ci-tests-fabric.yml +++ b/.github/workflows/ci-tests-fabric.yml @@ -159,7 +159,8 @@ jobs: run: | echo $GITHUB_RUN_ID python -m coverage run --source ${{ env.COVERAGE_SCOPE }} \ - -m pytest -v --timeout=30 --durations=50 --random-order-seed=$GITHUB_RUN_ID + -m pytest -v --timeout=30 --durations=50 --random-order-seed=$GITHUB_RUN_ID \ + --junitxml=junit.xml -o junit_family=legacy # NOTE: for Codecov's test results - name: Statistics if: success() diff --git a/.github/workflows/ci-tests-pytorch.yml b/.github/workflows/ci-tests-pytorch.yml index 1e4b0bc4979ff..d295d5475942a 100644 --- a/.github/workflows/ci-tests-pytorch.yml +++ b/.github/workflows/ci-tests-pytorch.yml @@ -196,7 +196,8 @@ jobs: run: | echo $GITHUB_RUN_ID python -m coverage run --source ${{ env.COVERAGE_SCOPE }} \ - -m pytest . -v --timeout=60 --durations=50 --random-order-seed=$GITHUB_RUN_ID + -m pytest . -v --timeout=60 --durations=50 --random-order-seed=$GITHUB_RUN_ID \ + --junitxml=junit.xml -o junit_family=legacy # NOTE: for Codecov's test results - name: Statistics if: success() @@ -205,6 +206,12 @@ jobs: coverage report coverage xml + - name: Upload test results to Codecov + if: ${{ !cancelled() }} # Run even if tests fail + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 # see: https://github.com/actions/toolkit/issues/399 From 1222da4ef41f2d0abdb30e3878240008125bc619 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 8 Apr 2025 12:44:19 +0200 Subject: [PATCH 012/112] [pre-commit.ci] pre-commit suggestions (#20703) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit suggestions updates: - [github.com/codespell-project/codespell: v2.3.0 → v2.4.1](https://github.com/codespell-project/codespell/compare/v2.3.0...v2.4.1) - [github.com/PyCQA/docformatter: 06907d0267368b49b9180eed423fae5697c1e909 → v1.7.5](https://github.com/PyCQA/docformatter/compare/06907d0267368b49b9180eed423fae5697c1e909...v1.7.5) - [github.com/astral-sh/ruff-pre-commit: v0.8.6 → v0.11.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.8.6...v0.11.4) - [github.com/executablebooks/mdformat: 0.7.21 → 0.7.22](https://github.com/executablebooks/mdformat/compare/0.7.21...0.7.22) - [github.com/pre-commit/mirrors-prettier: v3.1.0 → v4.0.0-alpha.8](https://github.com/pre-commit/mirrors-prettier/compare/v3.1.0...v4.0.0-alpha.8) * Apply suggestions from code review --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit cd785a4ba437e0549646fede6c1bf515d1a39124) --- .pre-commit-config.yaml | 6 +++--- src/lightning/pytorch/callbacks/timer.py | 4 ++-- tests/tests_pytorch/helpers/datasets.py | 2 +- .../tests_pytorch/trainer/connectors/test_data_connector.py | 2 +- .../tests_pytorch/trainer/logging_/test_logger_connector.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f2e475f602913..4723638fc5e4a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -51,7 +51,7 @@ repos: - id: detect-private-key - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 + rev: v2.4.1 hooks: - id: codespell additional_dependencies: [tomli] @@ -70,7 +70,7 @@ repos: - id: sphinx-lint - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.6 + rev: v0.11.4 hooks: # try to fix what is possible - id: ruff @@ -81,7 +81,7 @@ repos: - id: ruff - repo: https://github.com/executablebooks/mdformat - rev: 0.7.21 + rev: 0.7.22 hooks: - id: mdformat additional_dependencies: diff --git a/src/lightning/pytorch/callbacks/timer.py b/src/lightning/pytorch/callbacks/timer.py index b6b74d280427c..91f5fd0e75d9b 100644 --- a/src/lightning/pytorch/callbacks/timer.py +++ b/src/lightning/pytorch/callbacks/timer.py @@ -111,8 +111,8 @@ def __init__( self._duration = duration.total_seconds() if duration is not None else None self._interval = interval self._verbose = verbose - self._start_time: dict[RunningStage, Optional[float]] = {stage: None for stage in RunningStage} - self._end_time: dict[RunningStage, Optional[float]] = {stage: None for stage in RunningStage} + self._start_time: dict[RunningStage, Optional[float]] = dict.fromkeys(RunningStage) + self._end_time: dict[RunningStage, Optional[float]] = dict.fromkeys(RunningStage) self._offset = 0 def start_time(self, stage: str = RunningStage.TRAINING) -> Optional[float]: diff --git a/tests/tests_pytorch/helpers/datasets.py b/tests/tests_pytorch/helpers/datasets.py index 014fb374e5d5e..638d3a2946a74 100644 --- a/tests/tests_pytorch/helpers/datasets.py +++ b/tests/tests_pytorch/helpers/datasets.py @@ -148,7 +148,7 @@ def __init__(self, root: str, num_samples: int = 100, digits: Optional[Sequence] @staticmethod def _prepare_subset(full_data: Tensor, full_targets: Tensor, num_samples: int, digits: Sequence): - classes = {d: 0 for d in digits} + classes = dict.fromkeys(digits, 0) indexes = [] for idx, target in enumerate(full_targets): label = target.item() diff --git a/tests/tests_pytorch/trainer/connectors/test_data_connector.py b/tests/tests_pytorch/trainer/connectors/test_data_connector.py index ceb0418f2cb1d..1bb0d1478e7d3 100644 --- a/tests/tests_pytorch/trainer/connectors/test_data_connector.py +++ b/tests/tests_pytorch/trainer/connectors/test_data_connector.py @@ -497,7 +497,7 @@ def test_dataloader_source_request_from_module(): @pytest.mark.parametrize( - "hook_name", ("on_before_batch_transfer", "transfer_batch_to_device", "on_after_batch_transfer") + "hook_name", ["on_before_batch_transfer", "transfer_batch_to_device", "on_after_batch_transfer"] ) class TestDataHookSelector: def overridden_func(self, batch, *args, **kwargs): diff --git a/tests/tests_pytorch/trainer/logging_/test_logger_connector.py b/tests/tests_pytorch/trainer/logging_/test_logger_connector.py index faf88a09f6499..d3d355edb003b 100644 --- a/tests/tests_pytorch/trainer/logging_/test_logger_connector.py +++ b/tests/tests_pytorch/trainer/logging_/test_logger_connector.py @@ -246,7 +246,7 @@ def test_fx_validator_integration(tmp_path): }) trainer.test(model, verbose=False) - not_supported.update({k: "result collection is not registered yet" for k in not_supported}) + not_supported.update(dict.fromkeys(not_supported, "result collection is not registered yet")) not_supported.update({ "predict_dataloader": "result collection is not registered yet", "on_predict_model_eval": "result collection is not registered yet", From 99ed7e10850dffc332445b065eb18d30ff519dba Mon Sep 17 00:00:00 2001 From: PL Ghost <75324987+pl-ghost@users.noreply.github.com> Date: Mon, 14 Apr 2025 09:33:56 +0200 Subject: [PATCH 013/112] docs: update ref to latest tutorials (#20712) update tutorials to `1c1160e5` Co-authored-by: Borda (cherry picked from commit 03635d2d2e23b5286c675cc90479f3246de6814e) --- _notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_notebooks b/_notebooks index cc67b8ba1842d..1c1160e543bb5 160000 --- a/_notebooks +++ b/_notebooks @@ -1 +1 @@ -Subproject commit cc67b8ba1842d176b14e59727ad1938b3ea47344 +Subproject commit 1c1160e543bb56760886a45dcb7e1e03a22f634c From fce15516462f571af5ee791d9a1454d23c11b74f Mon Sep 17 00:00:00 2001 From: Deependu Date: Thu, 17 Apr 2025 23:16:01 +0530 Subject: [PATCH 014/112] feat: add tests for `save_hyperparameters` with `ignore` behavior and composition handling (#20718) * add tests to check for `save_hyperparameter: ignore` * update --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 8055717b9c73c7c270d09ffbaec6b0798d9c56a4) --- tests/tests_pytorch/models/test_hparams.py | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/tests_pytorch/models/test_hparams.py b/tests/tests_pytorch/models/test_hparams.py index 6fd400aab2724..3c7838f11a85a 100644 --- a/tests/tests_pytorch/models/test_hparams.py +++ b/tests/tests_pytorch/models/test_hparams.py @@ -440,6 +440,41 @@ def __init__(self, same_arg="parent_default", other_arg="other"): assert parent.child.hparams == {"same_arg": "cocofruit"} +@pytest.mark.parametrize("base_class", [HyperparametersMixin, LightningModule, LightningDataModule]) +def test_save_hyperparameters_ignore(base_class): + """Test if `save_hyperparameter` applies the ignore list correctly during initialization.""" + + class PLSubclass(base_class): + def __init__(self, learning_rate=1e-3, optimizer="adam"): + super().__init__() + self.save_hyperparameters(ignore=["learning_rate"]) + + pl_instance = PLSubclass(learning_rate=0.01, optimizer="sgd") + assert pl_instance.hparams == {"optimizer": "sgd"} + + +@pytest.mark.parametrize("base_class", [HyperparametersMixin, LightningModule, LightningDataModule]) +def test_save_hyperparameters_ignore_under_composition(base_class): + """Test that in a composed system, hyperparameter saving skips ignored fields from nested modules.""" + + class ChildModule(base_class): + def __init__(self, dropout, activation, init_method): + super().__init__() + self.save_hyperparameters(ignore=["dropout", "activation"]) + + class ParentModule(base_class): + def __init__(self, batch_size, optimizer): + super().__init__() + self.child = ChildModule(dropout=0.1, activation="relu", init_method="xavier") + + class PipelineWrapper: # not a Lightning subclass on purpose + def __init__(self, run_id="abc123", seed=42): + self.parent_module = ParentModule(batch_size=64, optimizer="adam") + + pipeline = PipelineWrapper() + assert pipeline.parent_module.child.hparams == {"init_method": "xavier", "batch_size": 64, "optimizer": "adam"} + + class LocalVariableModelSuperLast(BoringModel): """This model has the super().__init__() call at the end.""" From ba2438f118cc38703f166486299a771a7e778467 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Apr 2025 20:34:40 +0200 Subject: [PATCH 015/112] build(deps): update tensorboard requirement from <2.15.0,>=2.9.1 to >=2.9.1,<2.20.0 in /requirements (#20735) build(deps): update tensorboard requirement in /requirements Updates the requirements on [tensorboard](https://github.com/tensorflow/tensorboard) to permit the latest version. - [Release notes](https://github.com/tensorflow/tensorboard/releases) - [Changelog](https://github.com/tensorflow/tensorboard/blob/master/RELEASE.md) - [Commits](https://github.com/tensorflow/tensorboard/compare/2.9.1...2.19.0) --- updated-dependencies: - dependency-name: tensorboard dependency-version: 2.19.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit df18e4c65ee7a2a7c0e827722e8a84abc33c82d1) --- requirements/pytorch/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 4e1da300dd2fd..b9f4bd5d34be7 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -16,4 +16,4 @@ pandas >1.0, <2.2.0 # needed in benchmarks fastapi # for `ServableModuleValidator` # not setting version as re-defined in App uvicorn # for `ServableModuleValidator` # not setting version as re-defined in App -tensorboard >=2.9.1, <2.15.0 # for `TensorBoardLogger` +tensorboard >=2.9.1, <2.20.0 # for `TensorBoardLogger` From 312e9467957012374a660ff704fa3537f6883cf1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Apr 2025 11:05:18 +0200 Subject: [PATCH 016/112] build(deps): update ipython[all] requirement from <8.15.0 to <8.19.0 in /requirements (#20733) build(deps): update ipython[all] requirement in /requirements Updates the requirements on [ipython[all]](https://github.com/ipython/ipython) to permit the latest version. - [Release notes](https://github.com/ipython/ipython/releases) - [Commits](https://github.com/ipython/ipython/compare/rel-0.8.4...8.18.1) --- updated-dependencies: - dependency-name: ipython[all] dependency-version: 8.18.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 58827d3889a76571c0b229bd3a731f07efa7d8a9) --- requirements/pytorch/examples.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/examples.txt b/requirements/pytorch/examples.txt index 2e793e0045da9..b19fa5b9b98ab 100644 --- a/requirements/pytorch/examples.txt +++ b/requirements/pytorch/examples.txt @@ -3,6 +3,6 @@ requests <2.32.0 torchvision >=0.16.0, <0.21.0 -ipython[all] <8.15.0 +ipython[all] <8.19.0 torchmetrics >=0.10.0, <1.5.0 lightning-utilities >=0.8.0, <0.12.0 From 711eeada959d099c80ffdb9f35d6d79dd428b11d Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Tue, 22 Apr 2025 11:57:40 +0200 Subject: [PATCH 017/112] ci: try to suppress false failing check for create legacy checkpoint (#20745) (cherry picked from commit 08a04de85aa6c1304841f801ed4c6a2e397f7bed) --- .github/workflows/_legacy-checkpoints.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_legacy-checkpoints.yml b/.github/workflows/_legacy-checkpoints.yml index de9db06251e77..432645a41ea53 100644 --- a/.github/workflows/_legacy-checkpoints.yml +++ b/.github/workflows/_legacy-checkpoints.yml @@ -101,7 +101,7 @@ jobs: - name: "Determine: Keep artifact & DryRun" run: | python -c "print('KEEP_DAYS=' + str(30 if '${{ github.event_name }}'.startswith('pull_request') else 0))" >> $GITHUB_ENV - python -c "print('AWS_RUN=' + str('' if '${{inputs.push_to_s3}}' == 'true' else '--dryrun'))" >> $GITHUB_ENV + python -c "print('AWS_RUN=' + str('' if '${{inputs.push_to_s3}}' == 'true' else '--region us-east-1 --dryrun'))" >> $GITHUB_ENV - name: Upload checkpoints to GitHub Actions artifact uses: actions/upload-artifact@v4 From ea9c032c721c228a5bfaffc3f6a4a3d73cfc6d43 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Tue, 22 Apr 2025 15:09:35 +0200 Subject: [PATCH 018/112] ci: try to supress false failing check for create legacy checkpoint (#20746) (cherry picked from commit c88bb407b3e7d340cbf1653f14df6c31bf8b24fd) --- .github/workflows/_legacy-checkpoints.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_legacy-checkpoints.yml b/.github/workflows/_legacy-checkpoints.yml index 432645a41ea53..9306170f549d3 100644 --- a/.github/workflows/_legacy-checkpoints.yml +++ b/.github/workflows/_legacy-checkpoints.yml @@ -101,7 +101,7 @@ jobs: - name: "Determine: Keep artifact & DryRun" run: | python -c "print('KEEP_DAYS=' + str(30 if '${{ github.event_name }}'.startswith('pull_request') else 0))" >> $GITHUB_ENV - python -c "print('AWS_RUN=' + str('' if '${{inputs.push_to_s3}}' == 'true' else '--region us-east-1 --dryrun'))" >> $GITHUB_ENV + python -c "print('AWS_RUN=' + str('' if '${{inputs.push_to_s3}}' == 'true' else '--dryrun'))" >> $GITHUB_ENV - name: Upload checkpoints to GitHub Actions artifact uses: actions/upload-artifact@v4 @@ -113,6 +113,7 @@ jobs: - run: pip install -r requirements/ci.txt - name: Upload checkpoints to S3 + if: secrets.AWS_REGION != '' working-directory: ${{ env.LEGACY_FOLDER }} env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY }} From f59ae96b2131a3ca688c6c0d1761e754ad69a1eb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Apr 2025 15:12:35 +0200 Subject: [PATCH 019/112] build(deps): update torchmetrics requirement from <1.5.0,>=0.10.0 to >=0.10.0,<1.8.0 in /requirements (#20739) build(deps): update torchmetrics requirement in /requirements Updates the requirements on [torchmetrics](https://github.com/Lightning-AI/torchmetrics) to permit the latest version. - [Release notes](https://github.com/Lightning-AI/torchmetrics/releases) - [Changelog](https://github.com/Lightning-AI/torchmetrics/blob/master/CHANGELOG.md) - [Commits](https://github.com/Lightning-AI/torchmetrics/compare/v0.10.0...v1.7.1) --- updated-dependencies: - dependency-name: torchmetrics dependency-version: 1.7.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 156aad119db30c62f6654dde9d6ff40b0b6b0cc3) --- requirements/fabric/examples.txt | 2 +- requirements/pytorch/base.txt | 2 +- requirements/pytorch/examples.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/fabric/examples.txt b/requirements/fabric/examples.txt index 3352db77d8bd9..6be089ebb9767 100644 --- a/requirements/fabric/examples.txt +++ b/requirements/fabric/examples.txt @@ -2,5 +2,5 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment torchvision >=0.16.0, <0.21.0 -torchmetrics >=0.10.0, <1.5.0 +torchmetrics >=0.10.0, <1.8.0 lightning-utilities >=0.8.0, <0.12.0 diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 7a87aa86278c4..4b32e44d2cacc 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -5,7 +5,7 @@ torch >=2.1.0, <2.6.0 tqdm >=4.57.0, <4.67.0 PyYAML >=5.4, <6.1.0 fsspec[http] >=2022.5.0, <2024.4.0 -torchmetrics >=0.7.0, <1.5.0 +torchmetrics >=0.7.0, <1.8.0 packaging >=20.0, <=23.1 typing-extensions >=4.4.0, <4.11.0 lightning-utilities >=0.10.0, <0.12.0 diff --git a/requirements/pytorch/examples.txt b/requirements/pytorch/examples.txt index b19fa5b9b98ab..8a19179b813e0 100644 --- a/requirements/pytorch/examples.txt +++ b/requirements/pytorch/examples.txt @@ -4,5 +4,5 @@ requests <2.32.0 torchvision >=0.16.0, <0.21.0 ipython[all] <8.19.0 -torchmetrics >=0.10.0, <1.5.0 +torchmetrics >=0.10.0, <1.8.0 lightning-utilities >=0.8.0, <0.12.0 From d795f63f75c2674e106300eb81064ff25e47ce2a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Apr 2025 19:30:17 +0200 Subject: [PATCH 020/112] build(deps): update cloudpickle requirement from <2.3.0,>=1.3 to >=1.3,<3.2.0 in /requirements (#20740) build(deps): update cloudpickle requirement in /requirements Updates the requirements on [cloudpickle](https://github.com/cloudpipe/cloudpickle) to permit the latest version. - [Release notes](https://github.com/cloudpipe/cloudpickle/releases) - [Changelog](https://github.com/cloudpipe/cloudpickle/blob/master/CHANGES.md) - [Commits](https://github.com/cloudpipe/cloudpickle/compare/v1.3.0...v3.1.1) --- updated-dependencies: - dependency-name: cloudpickle dependency-version: 3.1.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit fd1a38b27516ce073cef2c687a382d3c0484a45c) --- requirements/pytorch/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index b9f4bd5d34be7..137d0b857f659 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -6,7 +6,7 @@ pytest-rerunfailures ==12.0 pytest-random-order ==1.1.0 # needed in tests -cloudpickle >=1.3, <2.3.0 +cloudpickle >=1.3, <3.2.0 scikit-learn >0.22.1, <1.4.0 numpy >=1.17.2, <1.27.0 onnx >=1.12.0, <1.17.0 From 1eca4f396fb53bc53bcb674f5bbaa9f263ca30ce Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Apr 2025 19:39:15 +0200 Subject: [PATCH 021/112] build(deps): update onnxruntime requirement from <1.19.0,>=1.12.0 to >=1.12.0,<1.21.0 in /requirements (#20738) build(deps): update onnxruntime requirement in /requirements Updates the requirements on [onnxruntime](https://github.com/microsoft/onnxruntime) to permit the latest version. - [Release notes](https://github.com/microsoft/onnxruntime/releases) - [Changelog](https://github.com/microsoft/onnxruntime/blob/main/docs/ReleaseManagement.md) - [Commits](https://github.com/microsoft/onnxruntime/compare/v1.12.0...v1.20.1) --- updated-dependencies: - dependency-name: onnxruntime dependency-version: 1.20.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit ffec8706b2c7b22b89d7b12f4d77572a0582b2ae) --- requirements/pytorch/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 137d0b857f659..fcb0d50c8efda 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -10,7 +10,7 @@ cloudpickle >=1.3, <3.2.0 scikit-learn >0.22.1, <1.4.0 numpy >=1.17.2, <1.27.0 onnx >=1.12.0, <1.17.0 -onnxruntime >=1.12.0, <1.19.0 +onnxruntime >=1.12.0, <1.21.0 psutil <5.9.6 # for `DeviceStatsMonitor` pandas >1.0, <2.2.0 # needed in benchmarks fastapi # for `ServableModuleValidator` # not setting version as re-defined in App From 77f82da45769f763ca7e954e2342c8c97999b388 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Apr 2025 19:43:44 +0200 Subject: [PATCH 022/112] build(deps): update packaging requirement from <24.2 to <25.1 in /requirements (#20734) build(deps): update packaging requirement in /requirements Updates the requirements on [packaging](https://github.com/pypa/packaging) to permit the latest version. - [Release notes](https://github.com/pypa/packaging/releases) - [Changelog](https://github.com/pypa/packaging/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pypa/packaging/compare/20.0...25.0) --- updated-dependencies: - dependency-name: packaging dependency-version: '25.0' dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 396038a7d4337288bbbf101b32eb1d28b8c0c5f7) --- requirements/ci.txt | 2 +- requirements/fabric/base.txt | 2 +- requirements/pytorch/base.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index 6ea6bbadbdc96..d6a693ad50ca3 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -5,4 +5,4 @@ twine ==6.0.1 importlib-metadata <8.0.0 wget pkginfo ==1.12.0 -packaging <24.2 +packaging <25.1 diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 70cd75c1c0d37..fc12498cbbdd5 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -3,6 +3,6 @@ torch >=2.1.0, <2.6.0 fsspec[http] >=2022.5.0, <2024.4.0 -packaging >=20.0, <=23.1 +packaging >=20.0, <=25.0 typing-extensions >=4.4.0, <4.11.0 lightning-utilities >=0.10.0, <0.12.0 diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 4b32e44d2cacc..54fa58cc92fc0 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -6,6 +6,6 @@ tqdm >=4.57.0, <4.67.0 PyYAML >=5.4, <6.1.0 fsspec[http] >=2022.5.0, <2024.4.0 torchmetrics >=0.7.0, <1.8.0 -packaging >=20.0, <=23.1 +packaging >=20.0, <=25.0 typing-extensions >=4.4.0, <4.11.0 lightning-utilities >=0.10.0, <0.12.0 From ce38c8d81f366f8082f4c40e65f23bc2b78e7882 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Apr 2025 20:55:05 +0200 Subject: [PATCH 023/112] build(deps): update lightning-utilities requirement from <0.12.0,>=0.11.1 to >=0.11.1,<0.15.0 in /requirements (#20737) build(deps): update lightning-utilities requirement in /requirements Updates the requirements on [lightning-utilities](https://github.com/Lightning-AI/utilities) to permit the latest version. - [Release notes](https://github.com/Lightning-AI/utilities/releases) - [Changelog](https://github.com/Lightning-AI/utilities/blob/main/CHANGELOG.md) - [Commits](https://github.com/Lightning-AI/utilities/compare/v0.11.1...v0.14.3) --- updated-dependencies: - dependency-name: lightning-utilities dependency-version: 0.14.3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 948577b8cd12ffbaef70b62c90e9421bae9482f9) --- requirements/docs.txt | 2 +- requirements/fabric/base.txt | 2 +- requirements/fabric/examples.txt | 2 +- requirements/pytorch/base.txt | 2 +- requirements/pytorch/examples.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements/docs.txt b/requirements/docs.txt index c1ceaa92e6a43..c1b93d987a6e9 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -17,7 +17,7 @@ sphinx-rtd-dark-mode sphinxcontrib-video ==0.2.0 jinja2 <3.2.0 -lightning-utilities >=0.11.1, <0.12.0 +lightning-utilities >=0.11.1, <0.15.0 # installed from S3 location and fetched in advance lai-sphinx-theme diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index fc12498cbbdd5..96d24861b0d41 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -5,4 +5,4 @@ torch >=2.1.0, <2.6.0 fsspec[http] >=2022.5.0, <2024.4.0 packaging >=20.0, <=25.0 typing-extensions >=4.4.0, <4.11.0 -lightning-utilities >=0.10.0, <0.12.0 +lightning-utilities >=0.10.0, <0.15.0 diff --git a/requirements/fabric/examples.txt b/requirements/fabric/examples.txt index 6be089ebb9767..fe69b50f9d943 100644 --- a/requirements/fabric/examples.txt +++ b/requirements/fabric/examples.txt @@ -3,4 +3,4 @@ torchvision >=0.16.0, <0.21.0 torchmetrics >=0.10.0, <1.8.0 -lightning-utilities >=0.8.0, <0.12.0 +lightning-utilities >=0.8.0, <0.15.0 diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 54fa58cc92fc0..6a3ee0e94883b 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -8,4 +8,4 @@ fsspec[http] >=2022.5.0, <2024.4.0 torchmetrics >=0.7.0, <1.8.0 packaging >=20.0, <=25.0 typing-extensions >=4.4.0, <4.11.0 -lightning-utilities >=0.10.0, <0.12.0 +lightning-utilities >=0.10.0, <0.15.0 diff --git a/requirements/pytorch/examples.txt b/requirements/pytorch/examples.txt index 8a19179b813e0..8d629188ef5a0 100644 --- a/requirements/pytorch/examples.txt +++ b/requirements/pytorch/examples.txt @@ -5,4 +5,4 @@ requests <2.32.0 torchvision >=0.16.0, <0.21.0 ipython[all] <8.19.0 torchmetrics >=0.10.0, <1.8.0 -lightning-utilities >=0.8.0, <0.12.0 +lightning-utilities >=0.8.0, <0.15.0 From ed2e57575361bb31275dde6a8d0d5c81ceb3a26d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 22 Apr 2025 21:11:11 +0200 Subject: [PATCH 024/112] build(deps): bump pytest from 7.4.0 to 8.3.5 in /requirements (#20732) * build(deps): bump pytest from 7.4.0 to 8.3.5 in /requirements Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.4.0 to 8.3.5. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.4.0...8.3.5) --- updated-dependencies: - dependency-name: pytest dependency-version: 8.3.5 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * pytest-doctestplus ==1.4.0 --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka B (cherry picked from commit 3fb8dcf1d8c1ea7aa98972776de353f49acfeb33) --- requirements/doctests.txt | 4 ++-- requirements/fabric/test.txt | 2 +- requirements/pytorch/test.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/doctests.txt b/requirements/doctests.txt index 703f221660c70..96794a9afb460 100644 --- a/requirements/doctests.txt +++ b/requirements/doctests.txt @@ -1,2 +1,2 @@ -pytest ==7.4.0 -pytest-doctestplus ==1.0.0 +pytest ==8.3.5 +pytest-doctestplus ==1.4.0 diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 4ce2ff40f5bce..4801f2c5c2a1f 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -1,6 +1,6 @@ coverage ==7.3.1 numpy >=1.17.2, <1.27.0 -pytest ==7.4.0 +pytest ==8.3.5 pytest-cov ==4.1.0 pytest-timeout ==2.1.0 pytest-rerunfailures ==12.0 diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index fcb0d50c8efda..a55d4713a8eca 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -1,5 +1,5 @@ coverage ==7.3.1 -pytest ==7.4.0 +pytest ==8.3.5 pytest-cov ==4.1.0 pytest-timeout ==2.1.0 pytest-rerunfailures ==12.0 From ebe9deb29b9e0f5895539f872d1f03544b9c20df Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 23 Apr 2025 11:28:36 +0200 Subject: [PATCH 025/112] build(deps): bump `torch` from 2.5.1 to 2.6.0 & `torchvision` from <0.21.0,>=0.16.0 to >=0.16.0,<0.22.0 in /requirements (#20728) * build(deps): bump torch from 2.5.1 to 2.6.0 in /requirements Bumps [torch](https://github.com/pytorch/pytorch) from 2.5.1 to 2.6.0. - [Release notes](https://github.com/pytorch/pytorch/releases) - [Changelog](https://github.com/pytorch/pytorch/blob/main/RELEASE.md) - [Commits](https://github.com/pytorch/pytorch/compare/v2.5.1...v2.6.0) --- updated-dependencies: - dependency-name: torch dependency-version: 2.6.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] * build(deps): update torchvision requirement from <0.21.0,>=0.16.0 to >=0.16.0,<0.22.0 in /requirements (#20736) build(deps): update torchvision requirement in /requirements Updates the requirements on [torchvision](https://github.com/pytorch/vision) to permit the latest version. - [Release notes](https://github.com/pytorch/vision/releases) - [Commits](https://github.com/pytorch/vision/compare/v0.16.0...v0.21.0) --- updated-dependencies: - dependency-name: torchvision dependency-version: 0.21.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * # type: ignore[arg-type] * type: ignore[arg-type] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka B Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 0c9d4147e3569e24bf6130e78ae21b136562766e) --- requirements/fabric/base.txt | 2 +- requirements/fabric/examples.txt | 2 +- requirements/pytorch/base.txt | 2 +- requirements/pytorch/examples.txt | 2 +- requirements/typing.txt | 2 +- .../plugins/collectives/torch_collective.py | 18 +++++++++--------- src/lightning/fabric/strategies/xla_fsdp.py | 1 + src/lightning/fabric/utilities/init.py | 3 ++- src/lightning/pytorch/callbacks/finetuning.py | 2 +- .../pytorch/callbacks/throughput_monitor.py | 2 +- src/lightning/pytorch/overrides/distributed.py | 4 +--- .../pytorch/plugins/precision/double.py | 4 +++- 12 files changed, 23 insertions(+), 21 deletions(-) diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 96d24861b0d41..3568a39f4779a 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.6.0 +torch >=2.1.0, <2.7.0 fsspec[http] >=2022.5.0, <2024.4.0 packaging >=20.0, <=25.0 typing-extensions >=4.4.0, <4.11.0 diff --git a/requirements/fabric/examples.txt b/requirements/fabric/examples.txt index fe69b50f9d943..ac3d16354f17e 100644 --- a/requirements/fabric/examples.txt +++ b/requirements/fabric/examples.txt @@ -1,6 +1,6 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torchvision >=0.16.0, <0.21.0 +torchvision >=0.16.0, <0.22.0 torchmetrics >=0.10.0, <1.8.0 lightning-utilities >=0.8.0, <0.15.0 diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 6a3ee0e94883b..4badafd7cc73d 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.6.0 +torch >=2.1.0, <2.7.0 tqdm >=4.57.0, <4.67.0 PyYAML >=5.4, <6.1.0 fsspec[http] >=2022.5.0, <2024.4.0 diff --git a/requirements/pytorch/examples.txt b/requirements/pytorch/examples.txt index 8d629188ef5a0..184d0f0449c99 100644 --- a/requirements/pytorch/examples.txt +++ b/requirements/pytorch/examples.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment requests <2.32.0 -torchvision >=0.16.0, <0.21.0 +torchvision >=0.16.0, <0.22.0 ipython[all] <8.19.0 torchmetrics >=0.10.0, <1.8.0 lightning-utilities >=0.8.0, <0.15.0 diff --git a/requirements/typing.txt b/requirements/typing.txt index 71414998dd7f3..52f4df899e9d8 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy==1.11.0 -torch==2.5.1 +torch==2.6.0 types-Markdown types-PyYAML diff --git a/src/lightning/fabric/plugins/collectives/torch_collective.py b/src/lightning/fabric/plugins/collectives/torch_collective.py index 81e15a33cb983..883380bb881aa 100644 --- a/src/lightning/fabric/plugins/collectives/torch_collective.py +++ b/src/lightning/fabric/plugins/collectives/torch_collective.py @@ -50,7 +50,7 @@ def world_size(self) -> int: @override def broadcast(self, tensor: Tensor, src: int) -> Tensor: - dist.broadcast(tensor, src, group=self.group) + dist.broadcast(tensor, src, group=self.group) # type: ignore[arg-type] return tensor @override @@ -62,7 +62,7 @@ def all_reduce(self, tensor: Tensor, op: Union[str, ReduceOp, RedOpType] = "sum" @override def reduce(self, tensor: Tensor, dst: int, op: Union[str, ReduceOp, RedOpType] = "sum") -> Tensor: op = self._convert_to_native_op(op) - dist.reduce(tensor, dst, op=op, group=self.group) + dist.reduce(tensor, dst, op=op, group=self.group) # type: ignore[arg-type] return tensor @override @@ -72,12 +72,12 @@ def all_gather(self, tensor_list: list[Tensor], tensor: Tensor) -> list[Tensor]: @override def gather(self, tensor: Tensor, gather_list: list[Tensor], dst: int = 0) -> list[Tensor]: - dist.gather(tensor, gather_list, dst, group=self.group) + dist.gather(tensor, gather_list, dst, group=self.group) # type: ignore[arg-type] return gather_list @override def scatter(self, tensor: Tensor, scatter_list: list[Tensor], src: int = 0) -> Tensor: - dist.scatter(tensor, scatter_list, src, group=self.group) + dist.scatter(tensor, scatter_list, src, group=self.group) # type: ignore[arg-type] return tensor @override @@ -109,27 +109,27 @@ def all_gather_object(self, object_list: list[Any], obj: Any) -> list[Any]: def broadcast_object_list( self, object_list: list[Any], src: int, device: Optional[torch.device] = None ) -> list[Any]: - dist.broadcast_object_list(object_list, src, group=self.group, device=device) + dist.broadcast_object_list(object_list, src, group=self.group, device=device) # type: ignore[arg-type] return object_list def gather_object(self, obj: Any, object_gather_list: list[Any], dst: int = 0) -> list[Any]: - dist.gather_object(obj, object_gather_list, dst, group=self.group) + dist.gather_object(obj, object_gather_list, dst, group=self.group) # type: ignore[arg-type] return object_gather_list def scatter_object_list( self, scatter_object_output_list: list[Any], scatter_object_input_list: list[Any], src: int = 0 ) -> list[Any]: - dist.scatter_object_list(scatter_object_output_list, scatter_object_input_list, src, group=self.group) + dist.scatter_object_list(scatter_object_output_list, scatter_object_input_list, src, group=self.group) # type: ignore[arg-type] return scatter_object_output_list @override def barrier(self, device_ids: Optional[list[int]] = None) -> None: if self.group == dist.GroupMember.NON_GROUP_MEMBER: return - dist.barrier(group=self.group, device_ids=device_ids) + dist.barrier(group=self.group, device_ids=device_ids) # type: ignore[arg-type] def monitored_barrier(self, timeout: Optional[datetime.timedelta] = None, wait_all_ranks: bool = False) -> None: - dist.monitored_barrier(group=self.group, timeout=timeout, wait_all_ranks=wait_all_ranks) + dist.monitored_barrier(group=self.group, timeout=timeout, wait_all_ranks=wait_all_ranks) # type: ignore[arg-type] @override def setup(self, main_address: Optional[str] = None, main_port: Optional[str] = None, **kwargs: Any) -> Self: diff --git a/src/lightning/fabric/strategies/xla_fsdp.py b/src/lightning/fabric/strategies/xla_fsdp.py index 935ef72713bcc..87e45293e5e47 100644 --- a/src/lightning/fabric/strategies/xla_fsdp.py +++ b/src/lightning/fabric/strategies/xla_fsdp.py @@ -295,6 +295,7 @@ def clip_gradients_norm( ) -> Tensor: """Clip gradients by norm.""" self.precision.unscale_gradients(optimizer) + assert callable(module.clip_grad_norm_) return module.clip_grad_norm_(max_norm=max_norm, norm_type=norm_type) @override diff --git a/src/lightning/fabric/utilities/init.py b/src/lightning/fabric/utilities/init.py index 2760c6bd227c1..4f8519eec9610 100644 --- a/src/lightning/fabric/utilities/init.py +++ b/src/lightning/fabric/utilities/init.py @@ -67,7 +67,8 @@ def _materialize(module: Module, device: _DEVICE) -> None: f"Materialization requires that the `{type(module).__name__}.reset_parameters` method is implemented." " This method is used to initialize any children parameters or buffers in this module." ) - module.reset_parameters() + if callable(module.reset_parameters): + module.reset_parameters() def _materialize_meta_tensors(module: Module, device: _DEVICE) -> None: diff --git a/src/lightning/pytorch/callbacks/finetuning.py b/src/lightning/pytorch/callbacks/finetuning.py index 356ab221777ae..cec83fee0f4d7 100644 --- a/src/lightning/pytorch/callbacks/finetuning.py +++ b/src/lightning/pytorch/callbacks/finetuning.py @@ -133,7 +133,7 @@ def flatten_modules(modules: Union[Module, Iterable[Union[Module, Iterable]]]) - if isinstance(modules, Iterable): _flatten_modules = [] - for m in modules: # type: ignore[union-attr] + for m in modules: _flatten_modules.extend(BaseFinetuning.flatten_modules(m)) _modules = iter(_flatten_modules) diff --git a/src/lightning/pytorch/callbacks/throughput_monitor.py b/src/lightning/pytorch/callbacks/throughput_monitor.py index a49610a912e57..8b618ae2be912 100644 --- a/src/lightning/pytorch/callbacks/throughput_monitor.py +++ b/src/lightning/pytorch/callbacks/throughput_monitor.py @@ -140,7 +140,7 @@ def _update(self, trainer: "Trainer", pl_module: "LightningModule", batch: Any, # this assumes that all iterations used the same batch size samples=iter_num * batch_size, lengths=None if self.length_fn is None else self._lengths[stage], - flops=flops_per_batch, + flops=flops_per_batch, # type: ignore[arg-type] ) def _compute(self, trainer: "Trainer", iter_num: Optional[int] = None) -> None: diff --git a/src/lightning/pytorch/overrides/distributed.py b/src/lightning/pytorch/overrides/distributed.py index 196008b7ed29f..92d444338ff0f 100644 --- a/src/lightning/pytorch/overrides/distributed.py +++ b/src/lightning/pytorch/overrides/distributed.py @@ -163,9 +163,7 @@ def _register_ddp_comm_hook( def _sync_module_states(module: torch.nn.Module) -> None: """Taken from https://github.com/pytorch/pytorch/blob/v2.0.0/torch/nn/parallel/distributed.py#L675-L682.""" - parameters_to_ignore = ( - set(module._ddp_params_and_buffers_to_ignore) if hasattr(module, "_ddp_params_and_buffers_to_ignore") else set() - ) + parameters_to_ignore = set(getattr(module, "_ddp_params_and_buffers_to_ignore", [])) from torch.distributed.distributed_c10d import _get_default_group from torch.distributed.utils import _sync_module_states as torch_sync_module_states diff --git a/src/lightning/pytorch/plugins/precision/double.py b/src/lightning/pytorch/plugins/precision/double.py index efa1aa008a35e..739d8f1d06526 100644 --- a/src/lightning/pytorch/plugins/precision/double.py +++ b/src/lightning/pytorch/plugins/precision/double.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from collections.abc import Generator +from collections.abc import Generator, Iterable from contextlib import AbstractContextManager, contextmanager from typing import Any, Literal @@ -72,6 +72,8 @@ class LightningDoublePrecisionModule(_DeviceDtypeModuleMixin, nn.Module): """ + _ddp_params_and_buffers_to_ignore: Iterable[str] + def __init__(self, pl_module: "pl.LightningModule") -> None: super().__init__() rank_zero_deprecation( From 51f3bf8db9029a85e5d858fabfb033cc8c08ef9f Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 25 Apr 2025 20:29:32 +0200 Subject: [PATCH 026/112] bump: testing latest PT on GPU to `2.7` (#20754) (cherry picked from commit 69d8fa9b95261da552a42fc3bbde36d6de452e99) --- .azure/gpu-tests-fabric.yml | 8 ++++---- .azure/gpu-tests-pytorch.yml | 8 ++++---- .github/checkgroup.yml | 3 ++- .github/workflows/docker-build.yml | 3 ++- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index 9e514fdd853cd..583451fa0cdfa 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -60,13 +60,13 @@ jobs: image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1" PACKAGE_NAME: "fabric" "Fabric | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.6-cuda12.4.1" - PACKAGE_NAME: "fabric" - "Fabric | future": image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3" PACKAGE_NAME: "fabric" + #"Fabric | future": + # image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3" + # PACKAGE_NAME: "fabric" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.6-cuda12.4.1" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3" PACKAGE_NAME: "lightning" workspace: clean: all diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 986be6ae74a1a..803460c770c13 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -53,13 +53,13 @@ jobs: image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1" PACKAGE_NAME: "pytorch" "PyTorch | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.6-cuda12.4.1" - PACKAGE_NAME: "pytorch" - "PyTorch | future": image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3" PACKAGE_NAME: "pytorch" + #"PyTorch | future": + # image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3" + # PACKAGE_NAME: "pytorch" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.6-cuda12.4.1" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3" PACKAGE_NAME: "lightning" pool: lit-rtx-3090 variables: diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 5dbf9fdbb1763..5269704f40698 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -134,7 +134,8 @@ subprojects: - "build-pl (3.11, 2.3, 12.1.1)" - "build-pl (3.11, 2.4, 12.1.1)" - "build-pl (3.12, 2.5, 12.1.1)" - - "build-pl (3.12, 2.6, 12.4.1, true)" + - "build-pl (3.12, 2.6, 12.4.1)" + - "build-pl (3.12, 2.7, 12.6.3, true)" # SECTION: lightning_fabric diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 97031afaa5ab2..fe0c9c0d560bd 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -48,7 +48,8 @@ jobs: - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.1" } - { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.1" } - { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.1" } - - { python_version: "3.12", pytorch_version: "2.6", cuda_version: "12.4.1", latest: "true" } + - { python_version: "3.12", pytorch_version: "2.6", cuda_version: "12.4.1" } + - { python_version: "3.12", pytorch_version: "2.7", cuda_version: "12.6.3", latest: "true" } steps: - uses: actions/checkout@v4 with: From 626480ace334dfc83181ff33514970b1050e06bd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 09:08:04 +0200 Subject: [PATCH 027/112] build(deps): update fsspec[http] requirement from <2024.4.0,>=2022.5.0 to >=2022.5.0,<2025.4.0 in /requirements (#20767) build(deps): update fsspec[http] requirement in /requirements Updates the requirements on [fsspec[http]](https://github.com/fsspec/filesystem_spec) to permit the latest version. - [Commits](https://github.com/fsspec/filesystem_spec/compare/2022.5.0...2025.3.2) --- updated-dependencies: - dependency-name: fsspec[http] dependency-version: 2025.3.2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 90354eb407e8bb137af3e44df762d778feddfe2f) --- requirements/fabric/base.txt | 2 +- requirements/pytorch/base.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 3568a39f4779a..7740c623a46e3 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment torch >=2.1.0, <2.7.0 -fsspec[http] >=2022.5.0, <2024.4.0 +fsspec[http] >=2022.5.0, <2025.4.0 packaging >=20.0, <=25.0 typing-extensions >=4.4.0, <4.11.0 lightning-utilities >=0.10.0, <0.15.0 diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 4badafd7cc73d..25dbb518f4d54 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -4,7 +4,7 @@ torch >=2.1.0, <2.7.0 tqdm >=4.57.0, <4.67.0 PyYAML >=5.4, <6.1.0 -fsspec[http] >=2022.5.0, <2024.4.0 +fsspec[http] >=2022.5.0, <2025.4.0 torchmetrics >=0.7.0, <1.8.0 packaging >=20.0, <=25.0 typing-extensions >=4.4.0, <4.11.0 From e5a2eed910e084877bb23662b50e091a66ef6c3b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 09:08:19 +0200 Subject: [PATCH 028/112] build(deps): update awscli requirement from <1.31.0,>=1.30.0 to >=1.30.0,<1.41.0 in /requirements (#20766) build(deps): update awscli requirement in /requirements Updates the requirements on [awscli](https://github.com/aws/aws-cli) to permit the latest version. - [Release notes](https://github.com/aws/aws-cli/releases) - [Commits](https://github.com/aws/aws-cli/compare/1.30.0...1.40.2) --- updated-dependencies: - dependency-name: awscli dependency-version: 1.40.2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 3fda67be536588b895048252ae923df527203c27) --- requirements/ci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index d6a693ad50ca3..4646d84b37e9c 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,6 +1,6 @@ setuptools <70.1.1 wheel <0.44.0 -awscli >=1.30.0, <1.31.0 +awscli >=1.30.0, <1.41.0 twine ==6.0.1 importlib-metadata <8.0.0 wget From f2712c0c7ddbfbc814eee2ea1036e7712e834e2c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 09:08:58 +0200 Subject: [PATCH 029/112] build(deps): update requests requirement from <2.32.0 to <2.33.0 in /requirements (#20762) build(deps): update requests requirement in /requirements Updates the requirements on [requests](https://github.com/psf/requests) to permit the latest version. - [Release notes](https://github.com/psf/requests/releases) - [Changelog](https://github.com/psf/requests/blob/main/HISTORY.md) - [Commits](https://github.com/psf/requests/compare/v0.2.0...v2.32.3) --- updated-dependencies: - dependency-name: requests dependency-version: 2.32.3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit efb02d79ebada7fb9d507f24293b5c23b3f545de) --- requirements/pytorch/examples.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/examples.txt b/requirements/pytorch/examples.txt index 184d0f0449c99..307d4cdb179b6 100644 --- a/requirements/pytorch/examples.txt +++ b/requirements/pytorch/examples.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -requests <2.32.0 +requests <2.33.0 torchvision >=0.16.0, <0.22.0 ipython[all] <8.19.0 torchmetrics >=0.10.0, <1.8.0 From 5168acf5011b4377ebfd47ca69010477e1285e55 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 10:30:23 +0200 Subject: [PATCH 030/112] build(deps): bump pytest-cov from 4.1.0 to 6.1.1 in /requirements (#20768) * build(deps): bump pytest-cov from 4.1.0 to 6.1.1 in /requirements Bumps [pytest-cov](https://github.com/pytest-dev/pytest-cov) from 4.1.0 to 6.1.1. - [Changelog](https://github.com/pytest-dev/pytest-cov/blob/master/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest-cov/compare/v4.1.0...v6.1.1) --- updated-dependencies: - dependency-name: pytest-cov dependency-version: 6.1.1 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * coverage ==7.8.0 --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka B (cherry picked from commit 8220d057f5c934292499528a5e38092d9c5053fa) --- requirements/fabric/test.txt | 4 ++-- requirements/pytorch/test.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 4801f2c5c2a1f..00496eb58d4d0 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -1,7 +1,7 @@ -coverage ==7.3.1 +coverage ==7.8.0 numpy >=1.17.2, <1.27.0 pytest ==8.3.5 -pytest-cov ==4.1.0 +pytest-cov ==6.1.1 pytest-timeout ==2.1.0 pytest-rerunfailures ==12.0 pytest-random-order ==1.1.0 diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index a55d4713a8eca..ebc9e4eebf485 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -1,6 +1,6 @@ -coverage ==7.3.1 +coverage ==7.8.0 pytest ==8.3.5 -pytest-cov ==4.1.0 +pytest-cov ==6.1.1 pytest-timeout ==2.1.0 pytest-rerunfailures ==12.0 pytest-random-order ==1.1.0 From 307014b47659151146f4339aadde287ad91daea1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 11:03:21 +0200 Subject: [PATCH 031/112] build(deps): update wheel requirement from <0.44.0 to <0.46.0 in /requirements (#20763) build(deps): update wheel requirement in /requirements Updates the requirements on [wheel](https://github.com/pypa/wheel) to permit the latest version. - [Release notes](https://github.com/pypa/wheel/releases) - [Changelog](https://github.com/pypa/wheel/blob/main/docs/news.rst) - [Commits](https://github.com/pypa/wheel/compare/0.5...0.45.1) --- updated-dependencies: - dependency-name: wheel dependency-version: 0.45.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit c71cc8a100ec87ded8ff17ff3a891aab4ac7433e) --- requirements/ci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index 4646d84b37e9c..f4f38126f440f 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,5 +1,5 @@ setuptools <70.1.1 -wheel <0.44.0 +wheel <0.46.0 awscli >=1.30.0, <1.41.0 twine ==6.0.1 importlib-metadata <8.0.0 From 79f8cc96c4a0ed774f21d14b238261983abb1615 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 11:06:16 +0200 Subject: [PATCH 032/112] build(deps): bump torch from 2.6.0 to 2.7.0 in /requirements (#20765) * build(deps): bump torch from 2.6.0 to 2.7.0 in /requirements Bumps [torch](https://github.com/pytorch/pytorch) from 2.6.0 to 2.7.0. - [Release notes](https://github.com/pytorch/pytorch/releases) - [Changelog](https://github.com/pytorch/pytorch/blob/main/RELEASE.md) - [Commits](https://github.com/pytorch/pytorch/compare/v2.6.0...v2.7.0) --- updated-dependencies: - dependency-name: torch dependency-version: 2.7.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * torchvision >=0.16.0, <0.23.0 --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka B (cherry picked from commit 82c584aca949589bc21c021b58706117df03f871) --- requirements/fabric/base.txt | 2 +- requirements/fabric/examples.txt | 2 +- requirements/pytorch/base.txt | 2 +- requirements/pytorch/examples.txt | 2 +- requirements/typing.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 7740c623a46e3..3fe9168c48e11 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.7.0 +torch >=2.1.0, <2.8.0 fsspec[http] >=2022.5.0, <2025.4.0 packaging >=20.0, <=25.0 typing-extensions >=4.4.0, <4.11.0 diff --git a/requirements/fabric/examples.txt b/requirements/fabric/examples.txt index ac3d16354f17e..72eabb238f3bb 100644 --- a/requirements/fabric/examples.txt +++ b/requirements/fabric/examples.txt @@ -1,6 +1,6 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torchvision >=0.16.0, <0.22.0 +torchvision >=0.16.0, <0.23.0 torchmetrics >=0.10.0, <1.8.0 lightning-utilities >=0.8.0, <0.15.0 diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 25dbb518f4d54..7bc20cec191d7 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -1,7 +1,7 @@ # NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment -torch >=2.1.0, <2.7.0 +torch >=2.1.0, <2.8.0 tqdm >=4.57.0, <4.67.0 PyYAML >=5.4, <6.1.0 fsspec[http] >=2022.5.0, <2025.4.0 diff --git a/requirements/pytorch/examples.txt b/requirements/pytorch/examples.txt index 307d4cdb179b6..d9ad8150693b9 100644 --- a/requirements/pytorch/examples.txt +++ b/requirements/pytorch/examples.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment requests <2.33.0 -torchvision >=0.16.0, <0.22.0 +torchvision >=0.16.0, <0.23.0 ipython[all] <8.19.0 torchmetrics >=0.10.0, <1.8.0 lightning-utilities >=0.8.0, <0.15.0 diff --git a/requirements/typing.txt b/requirements/typing.txt index 52f4df899e9d8..dfdb747eb8a66 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy==1.11.0 -torch==2.6.0 +torch==2.7.0 types-Markdown types-PyYAML From 28780a91d3a24c79b297cd3f7f28b6bab222b51d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 11:06:30 +0200 Subject: [PATCH 033/112] build(deps): update pandas requirement from <2.2.0,>1.0 to >1.0,<2.3.0 in /requirements (#20764) build(deps): update pandas requirement in /requirements Updates the requirements on [pandas](https://github.com/pandas-dev/pandas) to permit the latest version. - [Release notes](https://github.com/pandas-dev/pandas/releases) - [Commits](https://github.com/pandas-dev/pandas/compare/v1.0.1...v2.2.3) --- updated-dependencies: - dependency-name: pandas dependency-version: 2.2.3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit c51b3472852ea7e3a9b38d2aad6035dce8707b2c) --- requirements/pytorch/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index ebc9e4eebf485..55e2a56dd8e9d 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -12,7 +12,7 @@ numpy >=1.17.2, <1.27.0 onnx >=1.12.0, <1.17.0 onnxruntime >=1.12.0, <1.21.0 psutil <5.9.6 # for `DeviceStatsMonitor` -pandas >1.0, <2.2.0 # needed in benchmarks +pandas >1.0, <2.3.0 # needed in benchmarks fastapi # for `ServableModuleValidator` # not setting version as re-defined in App uvicorn # for `ServableModuleValidator` # not setting version as re-defined in App From c0fdf6913d7af6d283d0d7a8ea76a04eb542a15d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 11:07:51 +0200 Subject: [PATCH 034/112] build(deps): update scikit-learn requirement from <1.4.0,>0.22.1 to >0.22.1,<1.7.0 in /requirements (#20760) build(deps): update scikit-learn requirement in /requirements Updates the requirements on [scikit-learn](https://github.com/scikit-learn/scikit-learn) to permit the latest version. - [Release notes](https://github.com/scikit-learn/scikit-learn/releases) - [Commits](https://github.com/scikit-learn/scikit-learn/compare/0.22.2...1.6.1) --- updated-dependencies: - dependency-name: scikit-learn dependency-version: 1.6.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 2b5d38d4d467c293c33a585740eb6156f536d622) --- requirements/pytorch/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 55e2a56dd8e9d..7c526b93d3ad8 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -7,7 +7,7 @@ pytest-random-order ==1.1.0 # needed in tests cloudpickle >=1.3, <3.2.0 -scikit-learn >0.22.1, <1.4.0 +scikit-learn >0.22.1, <1.7.0 numpy >=1.17.2, <1.27.0 onnx >=1.12.0, <1.17.0 onnxruntime >=1.12.0, <1.21.0 From 5fedcb3b291b2a983ba3ca7fab68496c37bf16ed Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 28 Apr 2025 14:32:02 +0200 Subject: [PATCH 035/112] docs: update repo link preventing flakiness (#20769) (cherry picked from commit 383c980b0f75e116b13361207779bad2941e3e59) --- .github/CONTRIBUTING.md | 2 +- .github/markdown-links-config.json | 5 +- docs/source-pytorch/data/iterables.rst | 2 +- .../upgrade/sections/1_4_advanced.rst | 12 +- .../upgrade/sections/1_4_regular.rst | 18 +- .../upgrade/sections/1_5_advanced.rst | 12 +- .../upgrade/sections/1_5_devel.rst | 8 +- .../upgrade/sections/1_5_regular.rst | 12 +- .../upgrade/sections/1_6_advanced.rst | 16 +- .../upgrade/sections/1_6_devel.rst | 8 +- .../upgrade/sections/1_6_regular.rst | 32 +- .../upgrade/sections/1_7_advanced.rst | 36 +- .../upgrade/sections/1_7_devel.rst | 40 +- .../upgrade/sections/1_7_regular.rst | 12 +- .../upgrade/sections/1_8_advanced.rst | 14 +- .../upgrade/sections/1_8_devel.rst | 6 +- .../upgrade/sections/1_8_regular.rst | 6 +- .../upgrade/sections/1_9_advanced.rst | 38 +- .../upgrade/sections/1_9_devel.rst | 70 +- .../upgrade/sections/1_9_regular.rst | 14 +- .../upgrade/sections/2_0_advanced.rst | 6 +- .../upgrade/sections/2_0_devel.rst | 2 +- .../upgrade/sections/2_0_regular.rst | 16 +- src/lightning/fabric/CHANGELOG.md | 374 +- src/lightning/fabric/strategies/xla.py | 2 +- src/lightning/fabric/strategies/xla_fsdp.py | 2 +- src/lightning/pytorch/CHANGELOG.md | 6154 ++++++++--------- .../pytorch/demos/mnist_datamodule.py | 2 +- src/lightning/pytorch/loops/utilities.py | 2 +- src/lightning/pytorch/strategies/xla.py | 2 +- .../trainer/configuration_validator.py | 6 +- .../pytorch/utilities/migration/utils.py | 2 +- tests/tests_fabric/strategies/test_xla.py | 2 +- .../utilities/test_distributed.py | 2 +- tests/tests_pytorch/models/test_tpu.py | 2 +- 35 files changed, 3468 insertions(+), 3471 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 91cf94023786c..f3edd2bae51ab 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -97,7 +97,7 @@ _**Note**, even if you do not find the solution, sending a PR with a test coveri 1. Add/update the relevant tests! -- [This PR](https://github.com/Lightning-AI/lightning/pull/2671) is a good example for adding a new metric, and [this one for a new logger](https://github.com/Lightning-AI/lightning/pull/2721). +- [This PR](https://github.com/Lightning-AI/pytorch-lightning/pull/2671) is a good example for adding a new metric, and [this one for a new logger](https://github.com/Lightning-AI/pytorch-lightning/pull/2721). ### Test cases: diff --git a/.github/markdown-links-config.json b/.github/markdown-links-config.json index 2568c8c8f65ba..1447e5c8ea832 100644 --- a/.github/markdown-links-config.json +++ b/.github/markdown-links-config.json @@ -1,10 +1,7 @@ { "ignorePatterns": [ { - "pattern": "^https://github.com/Lightning-AI/lightning/pull/.*" - }, - { - "pattern": "^https://dev.azure.com/Lightning-AI/lightning/_apis/build/status.*" + "pattern": "^https://github.com/Lightning-AI/pytorch-lightning/pull/" } ], "httpHeaders": [ diff --git a/docs/source-pytorch/data/iterables.rst b/docs/source-pytorch/data/iterables.rst index 759400714d3de..276af8b8eab72 100644 --- a/docs/source-pytorch/data/iterables.rst +++ b/docs/source-pytorch/data/iterables.rst @@ -53,7 +53,7 @@ Currently, the ``trainer.predict`` method only supports the ``"sequential"`` mod Support for this feature is tracked in this `issue `__. Note that when using the ``"sequential"`` mode, you need to add an additional argument ``dataloader_idx`` to some specific hooks. -Lightning will `raise an error `__ informing you of this requirement. +Lightning will `raise an error `__ informing you of this requirement. Using LightningDataModule ------------------------- diff --git a/docs/source-pytorch/upgrade/sections/1_4_advanced.rst b/docs/source-pytorch/upgrade/sections/1_4_advanced.rst index b43a31618f05c..7accf98d13392 100644 --- a/docs/source-pytorch/upgrade/sections/1_4_advanced.rst +++ b/docs/source-pytorch/upgrade/sections/1_4_advanced.rst @@ -35,9 +35,9 @@ - `PR6388`_ -.. _pr7201: https://github.com/Lightning-AI/lightning/pull/7201 -.. _pr4945: https://github.com/Lightning-AI/lightning/pull/4945 -.. _pr7292: https://github.com/Lightning-AI/lightning/pull/7292 -.. _pr6834: https://github.com/Lightning-AI/lightning/pull/6834 -.. _pr7180: https://github.com/Lightning-AI/lightning/pull/7180 -.. _pr6388: https://github.com/Lightning-AI/lightning/pull/6388 +.. _pr7201: https://github.com/Lightning-AI/pytorch-lightning/pull/7201 +.. _pr4945: https://github.com/Lightning-AI/pytorch-lightning/pull/4945 +.. _pr7292: https://github.com/Lightning-AI/pytorch-lightning/pull/7292 +.. _pr6834: https://github.com/Lightning-AI/pytorch-lightning/pull/6834 +.. _pr7180: https://github.com/Lightning-AI/pytorch-lightning/pull/7180 +.. _pr6388: https://github.com/Lightning-AI/pytorch-lightning/pull/6388 diff --git a/docs/source-pytorch/upgrade/sections/1_4_regular.rst b/docs/source-pytorch/upgrade/sections/1_4_regular.rst index a422775197836..67fe19b5aee30 100644 --- a/docs/source-pytorch/upgrade/sections/1_4_regular.rst +++ b/docs/source-pytorch/upgrade/sections/1_4_regular.rst @@ -48,12 +48,12 @@ .. _torchmetrics: https://torchmetrics.readthedocs.io/en/stable -.. _pr7339: https://github.com/Lightning-AI/lightning/pull/7339 -.. _pr7323: https://github.com/Lightning-AI/lightning/pull/7323 -.. _pr7066: https://github.com/Lightning-AI/lightning/pull/7066 -.. _pr6146: https://github.com/Lightning-AI/lightning/pull/6146 -.. _pr6621: https://github.com/Lightning-AI/lightning/pull/6621 -.. _pr6349: https://github.com/Lightning-AI/lightning/pull/6349 -.. _pr6993: https://github.com/Lightning-AI/lightning/pull/6993 -.. _pr7253: https://github.com/Lightning-AI/lightning/pull/7253 -.. _pr7168: https://github.com/Lightning-AI/lightning/pull/7168 +.. _pr7339: https://github.com/Lightning-AI/pytorch-lightning/pull/7339 +.. _pr7323: https://github.com/Lightning-AI/pytorch-lightning/pull/7323 +.. _pr7066: https://github.com/Lightning-AI/pytorch-lightning/pull/7066 +.. _pr6146: https://github.com/Lightning-AI/pytorch-lightning/pull/6146 +.. _pr6621: https://github.com/Lightning-AI/pytorch-lightning/pull/6621 +.. _pr6349: https://github.com/Lightning-AI/pytorch-lightning/pull/6349 +.. _pr6993: https://github.com/Lightning-AI/pytorch-lightning/pull/6993 +.. _pr7253: https://github.com/Lightning-AI/pytorch-lightning/pull/7253 +.. _pr7168: https://github.com/Lightning-AI/pytorch-lightning/pull/7168 diff --git a/docs/source-pytorch/upgrade/sections/1_5_advanced.rst b/docs/source-pytorch/upgrade/sections/1_5_advanced.rst index 22ba000bebe52..ac85322ca5f79 100644 --- a/docs/source-pytorch/upgrade/sections/1_5_advanced.rst +++ b/docs/source-pytorch/upgrade/sections/1_5_advanced.rst @@ -67,9 +67,9 @@ - `PR8291`_ -.. _pr7891: https://github.com/Lightning-AI/lightning/pull/7891 -.. _pr7918: https://github.com/Lightning-AI/lightning/pull/7918 -.. _pr7994: https://github.com/Lightning-AI/lightning/pull/7994 -.. _pr7657: https://github.com/Lightning-AI/lightning/pull/7657 -.. _pr8203: https://github.com/Lightning-AI/lightning/pull/8203 -.. _pr8291: https://github.com/Lightning-AI/lightning/pull/8291 +.. _pr7891: https://github.com/Lightning-AI/pytorch-lightning/pull/7891 +.. _pr7918: https://github.com/Lightning-AI/pytorch-lightning/pull/7918 +.. _pr7994: https://github.com/Lightning-AI/pytorch-lightning/pull/7994 +.. _pr7657: https://github.com/Lightning-AI/pytorch-lightning/pull/7657 +.. _pr8203: https://github.com/Lightning-AI/pytorch-lightning/pull/8203 +.. _pr8291: https://github.com/Lightning-AI/pytorch-lightning/pull/8291 diff --git a/docs/source-pytorch/upgrade/sections/1_5_devel.rst b/docs/source-pytorch/upgrade/sections/1_5_devel.rst index dcaefe42782e2..c4129b77a2560 100644 --- a/docs/source-pytorch/upgrade/sections/1_5_devel.rst +++ b/docs/source-pytorch/upgrade/sections/1_5_devel.rst @@ -24,7 +24,7 @@ - `PR8229`_ -.. _pr7652: https://github.com/Lightning-AI/lightning/pull/7652 -.. _pr7422: https://github.com/Lightning-AI/lightning/pull/7422 -.. _pr8025: https://github.com/Lightning-AI/lightning/pull/8025 -.. _pr8229: https://github.com/Lightning-AI/lightning/pull/8229 +.. _pr7652: https://github.com/Lightning-AI/pytorch-lightning/pull/7652 +.. _pr7422: https://github.com/Lightning-AI/pytorch-lightning/pull/7422 +.. _pr8025: https://github.com/Lightning-AI/pytorch-lightning/pull/8025 +.. _pr8229: https://github.com/Lightning-AI/pytorch-lightning/pull/8229 diff --git a/docs/source-pytorch/upgrade/sections/1_5_regular.rst b/docs/source-pytorch/upgrade/sections/1_5_regular.rst index 994cfae13f1f8..78be0dd8c9c77 100644 --- a/docs/source-pytorch/upgrade/sections/1_5_regular.rst +++ b/docs/source-pytorch/upgrade/sections/1_5_regular.rst @@ -39,9 +39,9 @@ - `PR8575`_ -.. _pr7431: https://github.com/Lightning-AI/lightning/pull/7431 -.. _pr7026: https://github.com/Lightning-AI/lightning/pull/7026 -.. _pr7907: https://github.com/Lightning-AI/lightning/pull/7907 -.. _pr8383: https://github.com/Lightning-AI/lightning/pull/8383 -.. _pr5043: https://github.com/Lightning-AI/lightning/pull/5043 -.. _pr8575: https://github.com/Lightning-AI/lightning/pull/8575 +.. _pr7431: https://github.com/Lightning-AI/pytorch-lightning/pull/7431 +.. _pr7026: https://github.com/Lightning-AI/pytorch-lightning/pull/7026 +.. _pr7907: https://github.com/Lightning-AI/pytorch-lightning/pull/7907 +.. _pr8383: https://github.com/Lightning-AI/pytorch-lightning/pull/8383 +.. _pr5043: https://github.com/Lightning-AI/pytorch-lightning/pull/5043 +.. _pr8575: https://github.com/Lightning-AI/pytorch-lightning/pull/8575 diff --git a/docs/source-pytorch/upgrade/sections/1_6_advanced.rst b/docs/source-pytorch/upgrade/sections/1_6_advanced.rst index c17be4ac0e26f..2f6357c4d197e 100644 --- a/docs/source-pytorch/upgrade/sections/1_6_advanced.rst +++ b/docs/source-pytorch/upgrade/sections/1_6_advanced.rst @@ -51,11 +51,11 @@ - `PR8851`_ -.. _pr8958: https://github.com/Lightning-AI/lightning/pull/8958 -.. _pr9222: https://github.com/Lightning-AI/lightning/pull/9222 -.. _pr9118: https://github.com/Lightning-AI/lightning/pull/9118 -.. _pr9525: https://github.com/Lightning-AI/lightning/pull/9525 -.. _pr9691: https://github.com/Lightning-AI/lightning/pull/9691 -.. _pr9677: https://github.com/Lightning-AI/lightning/pull/9677 -.. _pr10066: https://github.com/Lightning-AI/lightning/pull/10066 -.. _pr8851: https://github.com/Lightning-AI/lightning/pull/8851 +.. _pr8958: https://github.com/Lightning-AI/pytorch-lightning/pull/8958 +.. _pr9222: https://github.com/Lightning-AI/pytorch-lightning/pull/9222 +.. _pr9118: https://github.com/Lightning-AI/pytorch-lightning/pull/9118 +.. _pr9525: https://github.com/Lightning-AI/pytorch-lightning/pull/9525 +.. _pr9691: https://github.com/Lightning-AI/pytorch-lightning/pull/9691 +.. _pr9677: https://github.com/Lightning-AI/pytorch-lightning/pull/9677 +.. _pr10066: https://github.com/Lightning-AI/pytorch-lightning/pull/10066 +.. _pr8851: https://github.com/Lightning-AI/pytorch-lightning/pull/8851 diff --git a/docs/source-pytorch/upgrade/sections/1_6_devel.rst b/docs/source-pytorch/upgrade/sections/1_6_devel.rst index 9369de0d9d10a..654cd7c39fe55 100644 --- a/docs/source-pytorch/upgrade/sections/1_6_devel.rst +++ b/docs/source-pytorch/upgrade/sections/1_6_devel.rst @@ -31,7 +31,7 @@ - `PR10105`_ -.. _pr9422: https://github.com/Lightning-AI/lightning/pull/9422 -.. _pr10101: https://github.com/Lightning-AI/lightning/pull/10101 -.. _pr10105: https://github.com/Lightning-AI/lightning/pull/10105 -.. _pr10106: https://github.com/Lightning-AI/lightning/pull/10106 +.. _pr9422: https://github.com/Lightning-AI/pytorch-lightning/pull/9422 +.. _pr10101: https://github.com/Lightning-AI/pytorch-lightning/pull/10101 +.. _pr10105: https://github.com/Lightning-AI/pytorch-lightning/pull/10105 +.. _pr10106: https://github.com/Lightning-AI/pytorch-lightning/pull/10106 diff --git a/docs/source-pytorch/upgrade/sections/1_6_regular.rst b/docs/source-pytorch/upgrade/sections/1_6_regular.rst index e36d328698a4a..81ba818dcc83b 100644 --- a/docs/source-pytorch/upgrade/sections/1_6_regular.rst +++ b/docs/source-pytorch/upgrade/sections/1_6_regular.rst @@ -87,19 +87,19 @@ - `PR9924`_ -.. _pr9175: https://github.com/Lightning-AI/lightning/pull/9175 -.. _pr9699: https://github.com/Lightning-AI/lightning/pull/9699 -.. _pr9754: https://github.com/Lightning-AI/lightning/pull/9754 -.. _pr8989: https://github.com/Lightning-AI/lightning/pull/8989 -.. _pr9366: https://github.com/Lightning-AI/lightning/pull/9366 -.. _pr9460: https://github.com/Lightning-AI/lightning/pull/9460 -.. _pr9693: https://github.com/Lightning-AI/lightning/pull/9693 -.. _pr9921: https://github.com/Lightning-AI/lightning/pull/9921 -.. _pr9616: https://github.com/Lightning-AI/lightning/pull/9616 -.. _pr8513: https://github.com/Lightning-AI/lightning/pull/8513 -.. _pr8495: https://github.com/Lightning-AI/lightning/pull/8495 -.. _pr9098: https://github.com/Lightning-AI/lightning/pull/9098 -.. _pr9260: https://github.com/Lightning-AI/lightning/pull/9260 -.. _pr9065: https://github.com/Lightning-AI/lightning/pull/9065 -.. _pr10134: https://github.com/Lightning-AI/lightning/pull/10134 -.. _pr9924: https://github.com/Lightning-AI/lightning/pull/9924 +.. _pr9175: https://github.com/Lightning-AI/pytorch-lightning/pull/9175 +.. _pr9699: https://github.com/Lightning-AI/pytorch-lightning/pull/9699 +.. _pr9754: https://github.com/Lightning-AI/pytorch-lightning/pull/9754 +.. _pr8989: https://github.com/Lightning-AI/pytorch-lightning/pull/8989 +.. _pr9366: https://github.com/Lightning-AI/pytorch-lightning/pull/9366 +.. _pr9460: https://github.com/Lightning-AI/pytorch-lightning/pull/9460 +.. _pr9693: https://github.com/Lightning-AI/pytorch-lightning/pull/9693 +.. _pr9921: https://github.com/Lightning-AI/pytorch-lightning/pull/9921 +.. _pr9616: https://github.com/Lightning-AI/pytorch-lightning/pull/9616 +.. _pr8513: https://github.com/Lightning-AI/pytorch-lightning/pull/8513 +.. _pr8495: https://github.com/Lightning-AI/pytorch-lightning/pull/8495 +.. _pr9098: https://github.com/Lightning-AI/pytorch-lightning/pull/9098 +.. _pr9260: https://github.com/Lightning-AI/pytorch-lightning/pull/9260 +.. _pr9065: https://github.com/Lightning-AI/pytorch-lightning/pull/9065 +.. _pr10134: https://github.com/Lightning-AI/pytorch-lightning/pull/10134 +.. _pr9924: https://github.com/Lightning-AI/pytorch-lightning/pull/9924 diff --git a/docs/source-pytorch/upgrade/sections/1_7_advanced.rst b/docs/source-pytorch/upgrade/sections/1_7_advanced.rst index 21461c070058f..cfe05564a03ae 100644 --- a/docs/source-pytorch/upgrade/sections/1_7_advanced.rst +++ b/docs/source-pytorch/upgrade/sections/1_7_advanced.rst @@ -119,21 +119,21 @@ - `PR11887`_ -.. _pr14026: https://github.com/Lightning-AI/lightning/pull/14026 -.. _pr11141: https://github.com/Lightning-AI/lightning/pull/11141 -.. _pr11120: https://github.com/Lightning-AI/lightning/pull/11120 -.. _pr10505: https://github.com/Lightning-AI/lightning/pull/10505 -.. _pr10503: https://github.com/Lightning-AI/lightning/pull/10503 -.. _pr13868: https://github.com/Lightning-AI/lightning/pull/13868 -.. _pr14834: https://github.com/Lightning-AI/lightning/pull/14834 -.. _pr10940: https://github.com/Lightning-AI/lightning/pull/10940 -.. _pr12388: https://github.com/Lightning-AI/lightning/pull/12388 -.. _pr12436: https://github.com/Lightning-AI/lightning/pull/12436 -.. _pr12384: https://github.com/Lightning-AI/lightning/pull/12384 -.. _pr12386: https://github.com/Lightning-AI/lightning/pull/12386 -.. _pr12437: https://github.com/Lightning-AI/lightning/pull/12437 -.. _pr14830: https://github.com/Lightning-AI/lightning/pull/14830 -.. _pr11832: https://github.com/Lightning-AI/lightning/pull/11832 -.. _pr11871: https://github.com/Lightning-AI/lightning/pull/11871 -.. _pr11696: https://github.com/Lightning-AI/lightning/pull/11696 -.. _pr11887: https://github.com/Lightning-AI/lightning/pull/11887 +.. _pr14026: https://github.com/Lightning-AI/pytorch-lightning/pull/14026 +.. _pr11141: https://github.com/Lightning-AI/pytorch-lightning/pull/11141 +.. _pr11120: https://github.com/Lightning-AI/pytorch-lightning/pull/11120 +.. _pr10505: https://github.com/Lightning-AI/pytorch-lightning/pull/10505 +.. _pr10503: https://github.com/Lightning-AI/pytorch-lightning/pull/10503 +.. _pr13868: https://github.com/Lightning-AI/pytorch-lightning/pull/13868 +.. _pr14834: https://github.com/Lightning-AI/pytorch-lightning/pull/14834 +.. _pr10940: https://github.com/Lightning-AI/pytorch-lightning/pull/10940 +.. _pr12388: https://github.com/Lightning-AI/pytorch-lightning/pull/12388 +.. _pr12436: https://github.com/Lightning-AI/pytorch-lightning/pull/12436 +.. _pr12384: https://github.com/Lightning-AI/pytorch-lightning/pull/12384 +.. _pr12386: https://github.com/Lightning-AI/pytorch-lightning/pull/12386 +.. _pr12437: https://github.com/Lightning-AI/pytorch-lightning/pull/12437 +.. _pr14830: https://github.com/Lightning-AI/pytorch-lightning/pull/14830 +.. _pr11832: https://github.com/Lightning-AI/pytorch-lightning/pull/11832 +.. _pr11871: https://github.com/Lightning-AI/pytorch-lightning/pull/11871 +.. _pr11696: https://github.com/Lightning-AI/pytorch-lightning/pull/11696 +.. _pr11887: https://github.com/Lightning-AI/pytorch-lightning/pull/11887 diff --git a/docs/source-pytorch/upgrade/sections/1_7_devel.rst b/docs/source-pytorch/upgrade/sections/1_7_devel.rst index 211b56e861ae9..54518491a3446 100644 --- a/docs/source-pytorch/upgrade/sections/1_7_devel.rst +++ b/docs/source-pytorch/upgrade/sections/1_7_devel.rst @@ -123,23 +123,23 @@ - `PR12182`_ -.. _pr14415: https://github.com/Lightning-AI/lightning/pull/14415 -.. _pr11000: https://github.com/Lightning-AI/lightning/pull/11000 -.. _pr11747: https://github.com/Lightning-AI/lightning/pull/11747 -.. _pr12072: https://github.com/Lightning-AI/lightning/pull/12072 -.. _pr14401: https://github.com/Lightning-AI/lightning/pull/14401 -.. _pr12150: https://github.com/Lightning-AI/lightning/pull/12150 -.. _pr11745: https://github.com/Lightning-AI/lightning/pull/11745 -.. _pr11978: https://github.com/Lightning-AI/lightning/pull/11978 -.. _pr12262: https://github.com/Lightning-AI/lightning/pull/12262 -.. _pr12312: https://github.com/Lightning-AI/lightning/pull/12312 -.. _pr12315: https://github.com/Lightning-AI/lightning/pull/12315 -.. _pr10931: https://github.com/Lightning-AI/lightning/pull/10931 -.. _pr11068: https://github.com/Lightning-AI/lightning/pull/11068 -.. _pr11155: https://github.com/Lightning-AI/lightning/pull/11155 -.. _pr11282: https://github.com/Lightning-AI/lightning/pull/11282 -.. _pr11444: https://github.com/Lightning-AI/lightning/pull/11444 -.. _pr10979: https://github.com/Lightning-AI/lightning/pull/10979 -.. _pr12102: https://github.com/Lightning-AI/lightning/pull/12102 -.. _pr11254: https://github.com/Lightning-AI/lightning/pull/11254 -.. _pr12182: https://github.com/Lightning-AI/lightning/pull/12182 +.. _pr14415: https://github.com/Lightning-AI/pytorch-lightning/pull/14415 +.. _pr11000: https://github.com/Lightning-AI/pytorch-lightning/pull/11000 +.. _pr11747: https://github.com/Lightning-AI/pytorch-lightning/pull/11747 +.. _pr12072: https://github.com/Lightning-AI/pytorch-lightning/pull/12072 +.. _pr14401: https://github.com/Lightning-AI/pytorch-lightning/pull/14401 +.. _pr12150: https://github.com/Lightning-AI/pytorch-lightning/pull/12150 +.. _pr11745: https://github.com/Lightning-AI/pytorch-lightning/pull/11745 +.. _pr11978: https://github.com/Lightning-AI/pytorch-lightning/pull/11978 +.. _pr12262: https://github.com/Lightning-AI/pytorch-lightning/pull/12262 +.. _pr12312: https://github.com/Lightning-AI/pytorch-lightning/pull/12312 +.. _pr12315: https://github.com/Lightning-AI/pytorch-lightning/pull/12315 +.. _pr10931: https://github.com/Lightning-AI/pytorch-lightning/pull/10931 +.. _pr11068: https://github.com/Lightning-AI/pytorch-lightning/pull/11068 +.. _pr11155: https://github.com/Lightning-AI/pytorch-lightning/pull/11155 +.. _pr11282: https://github.com/Lightning-AI/pytorch-lightning/pull/11282 +.. _pr11444: https://github.com/Lightning-AI/pytorch-lightning/pull/11444 +.. _pr10979: https://github.com/Lightning-AI/pytorch-lightning/pull/10979 +.. _pr12102: https://github.com/Lightning-AI/pytorch-lightning/pull/12102 +.. _pr11254: https://github.com/Lightning-AI/pytorch-lightning/pull/11254 +.. _pr12182: https://github.com/Lightning-AI/pytorch-lightning/pull/12182 diff --git a/docs/source-pytorch/upgrade/sections/1_7_regular.rst b/docs/source-pytorch/upgrade/sections/1_7_regular.rst index acfb74152a7fc..c6b37ba004e97 100644 --- a/docs/source-pytorch/upgrade/sections/1_7_regular.rst +++ b/docs/source-pytorch/upgrade/sections/1_7_regular.rst @@ -43,9 +43,9 @@ - `PR14424`_ -.. _pr12147: https://github.com/Lightning-AI/lightning/pull/12147 -.. _pr11443: https://github.com/Lightning-AI/lightning/pull/11443 -.. _pr14727: https://github.com/Lightning-AI/lightning/pull/14727 -.. _pr11887: https://github.com/Lightning-AI/lightning/pull/11887 -.. _pr14315: https://github.com/Lightning-AI/lightning/pull/14315 -.. _pr14424: https://github.com/Lightning-AI/lightning/pull/14424 +.. _pr12147: https://github.com/Lightning-AI/pytorch-lightning/pull/12147 +.. _pr11443: https://github.com/Lightning-AI/pytorch-lightning/pull/11443 +.. _pr14727: https://github.com/Lightning-AI/pytorch-lightning/pull/14727 +.. _pr11887: https://github.com/Lightning-AI/pytorch-lightning/pull/11887 +.. _pr14315: https://github.com/Lightning-AI/pytorch-lightning/pull/14315 +.. _pr14424: https://github.com/Lightning-AI/pytorch-lightning/pull/14424 diff --git a/docs/source-pytorch/upgrade/sections/1_8_advanced.rst b/docs/source-pytorch/upgrade/sections/1_8_advanced.rst index 2d4a45efbaa47..47893fae9496a 100644 --- a/docs/source-pytorch/upgrade/sections/1_8_advanced.rst +++ b/docs/source-pytorch/upgrade/sections/1_8_advanced.rst @@ -43,10 +43,10 @@ - `PR9921`_ -.. _pr13031: https://github.com/Lightning-AI/lightning/pull/13031 -.. _pr13043: https://github.com/Lightning-AI/lightning/pull/13043 -.. _pr13767: https://github.com/Lightning-AI/lightning/pull/13767 -.. _pr12308: https://github.com/Lightning-AI/lightning/pull/12308 -.. _pr13636: https://github.com/Lightning-AI/lightning/pull/13636 -.. _pr15689: https://github.com/Lightning-AI/lightning/pull/15689 -.. _pr9921: https://github.com/Lightning-AI/lightning/pull/9921 +.. _pr13031: https://github.com/Lightning-AI/pytorch-lightning/pull/13031 +.. _pr13043: https://github.com/Lightning-AI/pytorch-lightning/pull/13043 +.. _pr13767: https://github.com/Lightning-AI/pytorch-lightning/pull/13767 +.. _pr12308: https://github.com/Lightning-AI/pytorch-lightning/pull/12308 +.. _pr13636: https://github.com/Lightning-AI/pytorch-lightning/pull/13636 +.. _pr15689: https://github.com/Lightning-AI/pytorch-lightning/pull/15689 +.. _pr9921: https://github.com/Lightning-AI/pytorch-lightning/pull/9921 diff --git a/docs/source-pytorch/upgrade/sections/1_8_devel.rst b/docs/source-pytorch/upgrade/sections/1_8_devel.rst index 8c69736e0cd62..07e86b7e70178 100644 --- a/docs/source-pytorch/upgrade/sections/1_8_devel.rst +++ b/docs/source-pytorch/upgrade/sections/1_8_devel.rst @@ -19,6 +19,6 @@ - `PR12106`_ -.. _pr12014: https://github.com/Lightning-AI/lightning/pull/12014 -.. _pr12150: https://github.com/Lightning-AI/lightning/pull/12150 -.. _pr12106: https://github.com/Lightning-AI/lightning/pull/12106 +.. _pr12014: https://github.com/Lightning-AI/pytorch-lightning/pull/12014 +.. _pr12150: https://github.com/Lightning-AI/pytorch-lightning/pull/12150 +.. _pr12106: https://github.com/Lightning-AI/pytorch-lightning/pull/12106 diff --git a/docs/source-pytorch/upgrade/sections/1_8_regular.rst b/docs/source-pytorch/upgrade/sections/1_8_regular.rst index 7dbcb24b1a3d7..ffdd89dd2c4d1 100644 --- a/docs/source-pytorch/upgrade/sections/1_8_regular.rst +++ b/docs/source-pytorch/upgrade/sections/1_8_regular.rst @@ -19,6 +19,6 @@ - `PR12740`_ -.. _pr12804: https://github.com/Lightning-AI/lightning/pull/12804 -.. _pr12184: https://github.com/Lightning-AI/lightning/pull/12184 -.. _pr12740: https://github.com/Lightning-AI/lightning/pull/12740 +.. _pr12804: https://github.com/Lightning-AI/pytorch-lightning/pull/12804 +.. _pr12184: https://github.com/Lightning-AI/pytorch-lightning/pull/12184 +.. _pr12740: https://github.com/Lightning-AI/pytorch-lightning/pull/12740 diff --git a/docs/source-pytorch/upgrade/sections/1_9_advanced.rst b/docs/source-pytorch/upgrade/sections/1_9_advanced.rst index fab97e957a89a..fba7c19981497 100644 --- a/docs/source-pytorch/upgrade/sections/1_9_advanced.rst +++ b/docs/source-pytorch/upgrade/sections/1_9_advanced.rst @@ -264,22 +264,22 @@ .. _lightning-ColossalAI: https://lightning.ai/docs/pytorch/2.1.0/integrations/strategies/colossalai.html .. _lightning-Fairscale: https://github.com/Lightning-Sandbox/lightning-Fairscale -.. _pr15953: https://github.com/Lightning-AI/lightning/pull/15953 -.. _pr16748: https://github.com/Lightning-AI/lightning/pull/16748 -.. _pr16520: https://github.com/Lightning-AI/lightning/pull/16520 -.. _pr16800: https://github.com/Lightning-AI/lightning/pull/16800 -.. _pr16358: https://github.com/Lightning-AI/lightning/pull/16358 -.. _pr16745: https://github.com/Lightning-AI/lightning/pull/16745 -.. _pr16537: https://github.com/Lightning-AI/lightning/pull/16537 -.. _pr16538: https://github.com/Lightning-AI/lightning/pull/16538 -.. _pr16039: https://github.com/Lightning-AI/lightning/pull/16039 -.. _pr16400: https://github.com/Lightning-AI/lightning/pull/16400 -.. _pr14998: https://github.com/Lightning-AI/lightning/pull/14998 -.. _pr16539: https://github.com/Lightning-AI/lightning/pull/16539 -.. _pr16355: https://github.com/Lightning-AI/lightning/pull/16355 -.. _pr16750: https://github.com/Lightning-AI/lightning/pull/16750 -.. _pr16791: https://github.com/Lightning-AI/lightning/pull/16791 -.. _pr16809: https://github.com/Lightning-AI/lightning/pull/16809 -.. _pr16192: https://github.com/Lightning-AI/lightning/pull/16192 -.. _pr16655: https://github.com/Lightning-AI/lightning/pull/16655 -.. _pr16389: https://github.com/Lightning-AI/lightning/pull/16389 +.. _pr15953: https://github.com/Lightning-AI/pytorch-lightning/pull/15953 +.. _pr16748: https://github.com/Lightning-AI/pytorch-lightning/pull/16748 +.. _pr16520: https://github.com/Lightning-AI/pytorch-lightning/pull/16520 +.. _pr16800: https://github.com/Lightning-AI/pytorch-lightning/pull/16800 +.. _pr16358: https://github.com/Lightning-AI/pytorch-lightning/pull/16358 +.. _pr16745: https://github.com/Lightning-AI/pytorch-lightning/pull/16745 +.. _pr16537: https://github.com/Lightning-AI/pytorch-lightning/pull/16537 +.. _pr16538: https://github.com/Lightning-AI/pytorch-lightning/pull/16538 +.. _pr16039: https://github.com/Lightning-AI/pytorch-lightning/pull/16039 +.. _pr16400: https://github.com/Lightning-AI/pytorch-lightning/pull/16400 +.. _pr14998: https://github.com/Lightning-AI/pytorch-lightning/pull/14998 +.. _pr16539: https://github.com/Lightning-AI/pytorch-lightning/pull/16539 +.. _pr16355: https://github.com/Lightning-AI/pytorch-lightning/pull/16355 +.. _pr16750: https://github.com/Lightning-AI/pytorch-lightning/pull/16750 +.. _pr16791: https://github.com/Lightning-AI/pytorch-lightning/pull/16791 +.. _pr16809: https://github.com/Lightning-AI/pytorch-lightning/pull/16809 +.. _pr16192: https://github.com/Lightning-AI/pytorch-lightning/pull/16192 +.. _pr16655: https://github.com/Lightning-AI/pytorch-lightning/pull/16655 +.. _pr16389: https://github.com/Lightning-AI/pytorch-lightning/pull/16389 diff --git a/docs/source-pytorch/upgrade/sections/1_9_devel.rst b/docs/source-pytorch/upgrade/sections/1_9_devel.rst index 440b21dfc1b8c..cc9eea5e75d6e 100644 --- a/docs/source-pytorch/upgrade/sections/1_9_devel.rst +++ b/docs/source-pytorch/upgrade/sections/1_9_devel.rst @@ -275,38 +275,38 @@ - `PR16618`_ -.. _pr16386: https://github.com/Lightning-AI/lightning/pull/16386 -.. _pr16748: https://github.com/Lightning-AI/lightning/pull/16748 -.. _pr16039: https://github.com/Lightning-AI/lightning/pull/16039 -.. _pr16803: https://github.com/Lightning-AI/lightning/pull/16803 -.. _pr16703: https://github.com/Lightning-AI/lightning/pull/16703 -.. _pr14514: https://github.com/Lightning-AI/lightning/pull/14514 -.. _pr14550: https://github.com/Lightning-AI/lightning/pull/14550 -.. _pr14492: https://github.com/Lightning-AI/lightning/pull/14492 -.. _pr14753: https://github.com/Lightning-AI/lightning/pull/14753 -.. _pr14515: https://github.com/Lightning-AI/lightning/pull/14515 -.. _pr14516: https://github.com/Lightning-AI/lightning/pull/14516 -.. _pr14537: https://github.com/Lightning-AI/lightning/pull/14537 -.. _pr16424: https://github.com/Lightning-AI/lightning/pull/16424 -.. _pr16390: https://github.com/Lightning-AI/lightning/pull/16390 -.. _pr16440: https://github.com/Lightning-AI/lightning/pull/16440 -.. _pr16439: https://github.com/Lightning-AI/lightning/pull/16439 -.. _pr16422: https://github.com/Lightning-AI/lightning/pull/16422 -.. _pr16172: https://github.com/Lightning-AI/lightning/pull/16172 -.. _pr16437: https://github.com/Lightning-AI/lightning/pull/16437 -.. _pr16708: https://github.com/Lightning-AI/lightning/pull/16708 -.. _pr15364: https://github.com/Lightning-AI/lightning/pull/15364 -.. _pr16204: https://github.com/Lightning-AI/lightning/pull/16204 -.. _pr16999: https://github.com/Lightning-AI/lightning/pull/16999 -.. _pr16436: https://github.com/Lightning-AI/lightning/pull/16436 -.. _pr16516: https://github.com/Lightning-AI/lightning/pull/16516 -.. _pr16533: https://github.com/Lightning-AI/lightning/pull/16533 -.. _pr16826: https://github.com/Lightning-AI/lightning/pull/16826 -.. _pr16726: https://github.com/Lightning-AI/lightning/pull/16726 -.. _pr16203: https://github.com/Lightning-AI/lightning/pull/16203 -.. _pr16462: https://github.com/Lightning-AI/lightning/pull/16462 -.. _pr16714: https://github.com/Lightning-AI/lightning/pull/16714 -.. _pr17058: https://github.com/Lightning-AI/lightning/pull/17058 -.. _pr16760: https://github.com/Lightning-AI/lightning/pull/16760 -.. _pr16759: https://github.com/Lightning-AI/lightning/pull/16759 -.. _pr16618: https://github.com/Lightning-AI/lightning/pull/16618 +.. _pr16386: https://github.com/Lightning-AI/pytorch-lightning/pull/16386 +.. _pr16748: https://github.com/Lightning-AI/pytorch-lightning/pull/16748 +.. _pr16039: https://github.com/Lightning-AI/pytorch-lightning/pull/16039 +.. _pr16803: https://github.com/Lightning-AI/pytorch-lightning/pull/16803 +.. _pr16703: https://github.com/Lightning-AI/pytorch-lightning/pull/16703 +.. _pr14514: https://github.com/Lightning-AI/pytorch-lightning/pull/14514 +.. _pr14550: https://github.com/Lightning-AI/pytorch-lightning/pull/14550 +.. _pr14492: https://github.com/Lightning-AI/pytorch-lightning/pull/14492 +.. _pr14753: https://github.com/Lightning-AI/pytorch-lightning/pull/14753 +.. _pr14515: https://github.com/Lightning-AI/pytorch-lightning/pull/14515 +.. _pr14516: https://github.com/Lightning-AI/pytorch-lightning/pull/14516 +.. _pr14537: https://github.com/Lightning-AI/pytorch-lightning/pull/14537 +.. _pr16424: https://github.com/Lightning-AI/pytorch-lightning/pull/16424 +.. _pr16390: https://github.com/Lightning-AI/pytorch-lightning/pull/16390 +.. _pr16440: https://github.com/Lightning-AI/pytorch-lightning/pull/16440 +.. _pr16439: https://github.com/Lightning-AI/pytorch-lightning/pull/16439 +.. _pr16422: https://github.com/Lightning-AI/pytorch-lightning/pull/16422 +.. _pr16172: https://github.com/Lightning-AI/pytorch-lightning/pull/16172 +.. _pr16437: https://github.com/Lightning-AI/pytorch-lightning/pull/16437 +.. _pr16708: https://github.com/Lightning-AI/pytorch-lightning/pull/16708 +.. _pr15364: https://github.com/Lightning-AI/pytorch-lightning/pull/15364 +.. _pr16204: https://github.com/Lightning-AI/pytorch-lightning/pull/16204 +.. _pr16999: https://github.com/Lightning-AI/pytorch-lightning/pull/16999 +.. _pr16436: https://github.com/Lightning-AI/pytorch-lightning/pull/16436 +.. _pr16516: https://github.com/Lightning-AI/pytorch-lightning/pull/16516 +.. _pr16533: https://github.com/Lightning-AI/pytorch-lightning/pull/16533 +.. _pr16826: https://github.com/Lightning-AI/pytorch-lightning/pull/16826 +.. _pr16726: https://github.com/Lightning-AI/pytorch-lightning/pull/16726 +.. _pr16203: https://github.com/Lightning-AI/pytorch-lightning/pull/16203 +.. _pr16462: https://github.com/Lightning-AI/pytorch-lightning/pull/16462 +.. _pr16714: https://github.com/Lightning-AI/pytorch-lightning/pull/16714 +.. _pr17058: https://github.com/Lightning-AI/pytorch-lightning/pull/17058 +.. _pr16760: https://github.com/Lightning-AI/pytorch-lightning/pull/16760 +.. _pr16759: https://github.com/Lightning-AI/pytorch-lightning/pull/16759 +.. _pr16618: https://github.com/Lightning-AI/pytorch-lightning/pull/16618 diff --git a/docs/source-pytorch/upgrade/sections/1_9_regular.rst b/docs/source-pytorch/upgrade/sections/1_9_regular.rst index 2f35957bcd21b..195a9df478f26 100644 --- a/docs/source-pytorch/upgrade/sections/1_9_regular.rst +++ b/docs/source-pytorch/upgrade/sections/1_9_regular.rst @@ -66,10 +66,10 @@ - use callbacks ``LearningRateFinder`` callback instead and the ``Trainer.tune()`` method was removed - -.. _pr16579: https://github.com/Lightning-AI/lightning/pull/16579 -.. _pr16492: https://github.com/Lightning-AI/lightning/pull/16492 -.. _pr10061: https://github.com/Lightning-AI/lightning/pull/10061 -.. _pr16171: https://github.com/Lightning-AI/lightning/pull/16171 -.. _pr16184: https://github.com/Lightning-AI/lightning/pull/16184 -.. _pr16729: https://github.com/Lightning-AI/lightning/pull/16729 -.. _pr16359: https://github.com/Lightning-AI/lightning/pull/16359 +.. _pr16579: https://github.com/Lightning-AI/pytorch-lightning/pull/16579 +.. _pr16492: https://github.com/Lightning-AI/pytorch-lightning/pull/16492 +.. _pr10061: https://github.com/Lightning-AI/pytorch-lightning/pull/10061 +.. _pr16171: https://github.com/Lightning-AI/pytorch-lightning/pull/16171 +.. _pr16184: https://github.com/Lightning-AI/pytorch-lightning/pull/16184 +.. _pr16729: https://github.com/Lightning-AI/pytorch-lightning/pull/16729 +.. _pr16359: https://github.com/Lightning-AI/pytorch-lightning/pull/16359 diff --git a/docs/source-pytorch/upgrade/sections/2_0_advanced.rst b/docs/source-pytorch/upgrade/sections/2_0_advanced.rst index c53700c184ae4..b1ce4a3198ddb 100644 --- a/docs/source-pytorch/upgrade/sections/2_0_advanced.rst +++ b/docs/source-pytorch/upgrade/sections/2_0_advanced.rst @@ -35,6 +35,6 @@ - `PR18137`_ -.. _pr17995: https://github.com/Lightning-AI/lightning/pull/17995 -.. _pr18390: https://github.com/Lightning-AI/lightning/pull/18390 -.. _pr18137: https://github.com/Lightning-AI/lightning/pull/18390 +.. _pr17995: https://github.com/Lightning-AI/pytorch-lightning/pull/17995 +.. _pr18390: https://github.com/Lightning-AI/pytorch-lightning/pull/18390 +.. _pr18137: https://github.com/Lightning-AI/pytorch-lightning/pull/18390 diff --git a/docs/source-pytorch/upgrade/sections/2_0_devel.rst b/docs/source-pytorch/upgrade/sections/2_0_devel.rst index f46ea270f7d86..661f9159adbb7 100644 --- a/docs/source-pytorch/upgrade/sections/2_0_devel.rst +++ b/docs/source-pytorch/upgrade/sections/2_0_devel.rst @@ -15,4 +15,4 @@ - `PR17381`_ -.. _pr17381: https://github.com/Lightning-AI/lightning/pull/17381 +.. _pr17381: https://github.com/Lightning-AI/pytorch-lightning/pull/17381 diff --git a/docs/source-pytorch/upgrade/sections/2_0_regular.rst b/docs/source-pytorch/upgrade/sections/2_0_regular.rst index 2f94ef7ab66fd..e36f7d8860b31 100644 --- a/docs/source-pytorch/upgrade/sections/2_0_regular.rst +++ b/docs/source-pytorch/upgrade/sections/2_0_regular.rst @@ -47,11 +47,11 @@ - `PR18386`_ -.. _pr18691: https://github.com/Lightning-AI/lightning/pull/18691 -.. _pr16579: https://github.com/Lightning-AI/lightning/pull/16579 -.. _pr17309: https://github.com/Lightning-AI/lightning/pull/17309 -.. _pr17227: https://github.com/Lightning-AI/lightning/pull/17227 -.. _pr17368: https://github.com/Lightning-AI/lightning/pull/17368 -.. _pr18441: https://github.com/Lightning-AI/lightning/pull/18441 -.. _pr18291: https://github.com/Lightning-AI/lightning/pull/18291 -.. _pr18386: https://github.com/Lightning-AI/lightning/pull/18386 +.. _pr18691: https://github.com/Lightning-AI/pytorch-lightning/pull/18691 +.. _pr16579: https://github.com/Lightning-AI/pytorch-lightning/pull/16579 +.. _pr17309: https://github.com/Lightning-AI/pytorch-lightning/pull/17309 +.. _pr17227: https://github.com/Lightning-AI/pytorch-lightning/pull/17227 +.. _pr17368: https://github.com/Lightning-AI/pytorch-lightning/pull/17368 +.. _pr18441: https://github.com/Lightning-AI/pytorch-lightning/pull/18441 +.. _pr18291: https://github.com/Lightning-AI/pytorch-lightning/pull/18291 +.. _pr18386: https://github.com/Lightning-AI/pytorch-lightning/pull/18386 diff --git a/src/lightning/fabric/CHANGELOG.md b/src/lightning/fabric/CHANGELOG.md index 0c922f0d31d0e..55cb317399f5e 100644 --- a/src/lightning/fabric/CHANGELOG.md +++ b/src/lightning/fabric/CHANGELOG.md @@ -53,13 +53,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed -- Removed support for PyTorch 2.1 ([#20009](https://github.com/Lightning-AI/lightning/pull/20009)) -- Removed support for Python 3.8 ([#20071](https://github.com/Lightning-AI/lightning/pull/20071)) +- Removed support for PyTorch 2.1 ([#20009](https://github.com/Lightning-AI/pytorch-lightning/pull/20009)) +- Removed support for Python 3.8 ([#20071](https://github.com/Lightning-AI/pytorch-lightning/pull/20071)) ### Fixed -- Fixed an attribute error when loading a checkpoint into a quantized model using the `_lazy_load()` function ([#20121](https://github.com/Lightning-AI/lightning/pull/20121)) -- Fixed `_optimizer_to_device` logic for special 'step' key in optimizer state causing performance regression ([#20019](https://github.com/Lightning-AI/lightning/pull/20019)) +- Fixed an attribute error when loading a checkpoint into a quantized model using the `_lazy_load()` function ([#20121](https://github.com/Lightning-AI/pytorch-lightning/pull/20121)) +- Fixed `_optimizer_to_device` logic for special 'step' key in optimizer state causing performance regression ([#20019](https://github.com/Lightning-AI/pytorch-lightning/pull/20019)) ## [2.3.0] - 2024-06-13 @@ -77,17 +77,17 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed - Renamed `lightning run model` to `fabric run` ([#19442](https://github.com/Lightning-AI/pytorch-lightning/pull/19442), [#19527](https://github.com/Lightning-AI/pytorch-lightning/pull/19527)) -- The `Fabric.rank_zero_first` context manager now uses a barrier without timeout to avoid long-running tasks to be interrupted ([#19448](https://github.com/Lightning-AI/lightning/pull/19448)) -- Fabric now raises an error if you forget to call `fabric.backward()` when it is needed by the strategy or precision selection ([#19447](https://github.com/Lightning-AI/lightning/pull/19447), [#19493](https://github.com/Lightning-AI/lightning/pull/19493)) -- `_BackwardSyncControl` can now control what to do when gradient accumulation is disabled ([#19577](https://github.com/Lightning-AI/lightning/pull/19577)) +- The `Fabric.rank_zero_first` context manager now uses a barrier without timeout to avoid long-running tasks to be interrupted ([#19448](https://github.com/Lightning-AI/pytorch-lightning/pull/19448)) +- Fabric now raises an error if you forget to call `fabric.backward()` when it is needed by the strategy or precision selection ([#19447](https://github.com/Lightning-AI/pytorch-lightning/pull/19447), [#19493](https://github.com/Lightning-AI/pytorch-lightning/pull/19493)) +- `_BackwardSyncControl` can now control what to do when gradient accumulation is disabled ([#19577](https://github.com/Lightning-AI/pytorch-lightning/pull/19577)) ### Removed -- Removed support for PyTorch 1.13 ([#19706](https://github.com/Lightning-AI/lightning/pull/19706)) +- Removed support for PyTorch 1.13 ([#19706](https://github.com/Lightning-AI/pytorch-lightning/pull/19706)) ### Fixed -- Fixed a matrix shape mismatch issue when running a model loaded from a quantized checkpoint (bitsandbytes) ([#19886](https://github.com/Lightning-AI/lightning/pull/19886)) +- Fixed a matrix shape mismatch issue when running a model loaded from a quantized checkpoint (bitsandbytes) ([#19886](https://github.com/Lightning-AI/pytorch-lightning/pull/19886)) ## [2.2.2] - 2024-04-11 @@ -103,181 +103,181 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- Fixed an issue with CSVLogger trying to append to file from a previous run when the version is set manually ([#19446](https://github.com/Lightning-AI/lightning/pull/19446)) +- Fixed an issue with CSVLogger trying to append to file from a previous run when the version is set manually ([#19446](https://github.com/Lightning-AI/pytorch-lightning/pull/19446)) ## [2.2.0] - 2024-02-08 ### Added -- Added `lightning.fabric.utilities.ThroughputMonitor` and `lightning.fabric.utilities.Throughput` to track throughput and log it ([#18848](https://github.com/Lightning-AI/lightning/pull/18848)) -- Added `lightning.fabric.utilities.AttributeDict` for convenient dict-attribute access to represent state in script ([#18943](https://github.com/Lightning-AI/lightning/pull/18943)) -- Added support for meta-device initialization and materialization of 4-bit Bitsandbytes layers ([#19150](https://github.com/Lightning-AI/lightning/pull/19150)) -- Added `TransformerEnginePrecision(fallback_compute_dtype=)` to control the dtype of operations that don't support fp8 ([#19082](https://github.com/Lightning-AI/lightning/pull/19082)) -- Added support for clipping gradients by value with FSDP ([#19236](https://github.com/Lightning-AI/lightning/pull/19236)) -- Added a utility function and CLI to consolidate FSDP sharded checkpoints into a single file ([#19213](https://github.com/Lightning-AI/lightning/pull/19213)) -- Added support for re-compiling the model inside `Fabric.setup()` over the FSDP/DDP wrappers ([#19280](https://github.com/Lightning-AI/lightning/pull/19280)) +- Added `lightning.fabric.utilities.ThroughputMonitor` and `lightning.fabric.utilities.Throughput` to track throughput and log it ([#18848](https://github.com/Lightning-AI/pytorch-lightning/pull/18848)) +- Added `lightning.fabric.utilities.AttributeDict` for convenient dict-attribute access to represent state in script ([#18943](https://github.com/Lightning-AI/pytorch-lightning/pull/18943)) +- Added support for meta-device initialization and materialization of 4-bit Bitsandbytes layers ([#19150](https://github.com/Lightning-AI/pytorch-lightning/pull/19150)) +- Added `TransformerEnginePrecision(fallback_compute_dtype=)` to control the dtype of operations that don't support fp8 ([#19082](https://github.com/Lightning-AI/pytorch-lightning/pull/19082)) +- Added support for clipping gradients by value with FSDP ([#19236](https://github.com/Lightning-AI/pytorch-lightning/pull/19236)) +- Added a utility function and CLI to consolidate FSDP sharded checkpoints into a single file ([#19213](https://github.com/Lightning-AI/pytorch-lightning/pull/19213)) +- Added support for re-compiling the model inside `Fabric.setup()` over the FSDP/DDP wrappers ([#19280](https://github.com/Lightning-AI/pytorch-lightning/pull/19280)) ### Changed -- `seed_everything()` without passing in a seed no longer randomly selects a seed, and now defaults to `0` ([#18846](https://github.com/Lightning-AI/lightning/pull/18846)) -- Changed the `TransformerEnginePrecision(dtype=)` argument to `weights_dtype` and made it required ([#19082](https://github.com/Lightning-AI/lightning/pull/19082)) -- The columns in the `metrics.csv` file produced by `CSVLogger` are now sorted alphabetically ([#19159](https://github.com/Lightning-AI/lightning/pull/19159)) +- `seed_everything()` without passing in a seed no longer randomly selects a seed, and now defaults to `0` ([#18846](https://github.com/Lightning-AI/pytorch-lightning/pull/18846)) +- Changed the `TransformerEnginePrecision(dtype=)` argument to `weights_dtype` and made it required ([#19082](https://github.com/Lightning-AI/pytorch-lightning/pull/19082)) +- The columns in the `metrics.csv` file produced by `CSVLogger` are now sorted alphabetically ([#19159](https://github.com/Lightning-AI/pytorch-lightning/pull/19159)) ### Removed -- Removed support for PyTorch 1.12 ([#19300](https://github.com/Lightning-AI/lightning/pull/19300)) +- Removed support for PyTorch 1.12 ([#19300](https://github.com/Lightning-AI/pytorch-lightning/pull/19300)) ### Fixed -- Fixed parsing of v100s GPUs in `get_available_flops` ([#18952](https://github.com/Lightning-AI/lightning/pull/18952)) -- Fixed issue where the `precision="transformer-engine"` argument would not replace layers by default ([#19082](https://github.com/Lightning-AI/lightning/pull/19082)) -- Fixed the input validation logic in `FSDPStrategy` to accept a `device_mesh` ([#19392](https://github.com/Lightning-AI/lightning/pull/19392)) +- Fixed parsing of v100s GPUs in `get_available_flops` ([#18952](https://github.com/Lightning-AI/pytorch-lightning/pull/18952)) +- Fixed issue where the `precision="transformer-engine"` argument would not replace layers by default ([#19082](https://github.com/Lightning-AI/pytorch-lightning/pull/19082)) +- Fixed the input validation logic in `FSDPStrategy` to accept a `device_mesh` ([#19392](https://github.com/Lightning-AI/pytorch-lightning/pull/19392)) ## [2.1.4] - 2024-01-31 ### Fixed -- Fixed an issue preventing Fabric to run on CPU when the system's CUDA driver is outdated or broken ([#19234](https://github.com/Lightning-AI/lightning/pull/19234)) -- Fixed typo in kwarg in SpikeDetection ([#19282](https://github.com/Lightning-AI/lightning/pull/19282)) +- Fixed an issue preventing Fabric to run on CPU when the system's CUDA driver is outdated or broken ([#19234](https://github.com/Lightning-AI/pytorch-lightning/pull/19234)) +- Fixed typo in kwarg in SpikeDetection ([#19282](https://github.com/Lightning-AI/pytorch-lightning/pull/19282)) ## [2.1.3] - 2023-12-21 ### Fixed -- Avoid moving the model to device if `move_to_device=False` is passed ([#19152](https://github.com/Lightning-AI/lightning/pull/19152)) -- Fixed broadcast at initialization in `MPIEnvironment` ([#19074](https://github.com/Lightning-AI/lightning/pull/19074)) +- Avoid moving the model to device if `move_to_device=False` is passed ([#19152](https://github.com/Lightning-AI/pytorch-lightning/pull/19152)) +- Fixed broadcast at initialization in `MPIEnvironment` ([#19074](https://github.com/Lightning-AI/pytorch-lightning/pull/19074)) ## [2.1.2] - 2023-11-15 ### Fixed -- Fixed precision default from environment ([#18928](https://github.com/Lightning-AI/lightning/pull/18928)) +- Fixed precision default from environment ([#18928](https://github.com/Lightning-AI/pytorch-lightning/pull/18928)) ## [2.1.1] - 2023-11-06 ### Changed -- Calling a method other than `forward` that invokes submodules is now an error when the model is wrapped (e.g., with DDP) ([#18819](https://github.com/Lightning-AI/lightning/pull/18819)) +- Calling a method other than `forward` that invokes submodules is now an error when the model is wrapped (e.g., with DDP) ([#18819](https://github.com/Lightning-AI/pytorch-lightning/pull/18819)) ### Fixed -- Fixed false-positive warnings about method calls on the Fabric-wrapped module ([#18819](https://github.com/Lightning-AI/lightning/pull/18819)) -- Refined the FSDP saving logic and error messaging when path exists ([#18884](https://github.com/Lightning-AI/lightning/pull/18884)) -- Fixed layer conversion under `Fabric.init_module()` context manager when using the `BitsandbytesPrecision` plugin ([#18914](https://github.com/Lightning-AI/lightning/pull/18914)) +- Fixed false-positive warnings about method calls on the Fabric-wrapped module ([#18819](https://github.com/Lightning-AI/pytorch-lightning/pull/18819)) +- Refined the FSDP saving logic and error messaging when path exists ([#18884](https://github.com/Lightning-AI/pytorch-lightning/pull/18884)) +- Fixed layer conversion under `Fabric.init_module()` context manager when using the `BitsandbytesPrecision` plugin ([#18914](https://github.com/Lightning-AI/pytorch-lightning/pull/18914)) ## [2.1.0] - 2023-10-11 ### Added -- Added support for the TPU-v4 architecture ([#17227](https://github.com/Lightning-AI/lightning/pull/17227)) -- Added support for XLA's new PJRT runtime ([#17352](https://github.com/Lightning-AI/lightning/pull/17352)) -- Added support for Fully Sharded Data Parallel (FSDP) training with XLA ([#18126](https://github.com/Lightning-AI/lightning/pull/18126), [#18424](https://github.com/Lightning-AI/lightning/pull/18424), [#18430](https://github.com/Lightning-AI/lightning/pull/18430)) -- Check for invalid TPU device inputs ([#17227](https://github.com/Lightning-AI/lightning/pull/17227)) -- Added `XLAStrategy(sync_module_states=bool)` to control whether to broadcast the parameters to all devices ([#17522](https://github.com/Lightning-AI/lightning/pull/17522)) -- Added support for joint setup of model and optimizer with FSDP ([#17305](https://github.com/Lightning-AI/lightning/pull/17305)) -- Added support for handling multiple parameter groups in optimizers set up with FSDP ([#17305](https://github.com/Lightning-AI/lightning/pull/17305)) -- Added support for saving and loading sharded model and optimizer state with `FSDPStrategy` ([#17323](https://github.com/Lightning-AI/lightning/pull/17323)) -- Added a warning when calling methods on `_FabricModule` that bypass the strategy-specific wrappers ([#17424](https://github.com/Lightning-AI/lightning/pull/17424)) -- Added `Fabric.init_tensor()` context manager to instantiate tensors efficiently directly on device and dtype ([#17488](https://github.com/Lightning-AI/lightning/pull/17488)) -- Added `Fabric.init_module()` context manager to instantiate large models efficiently directly on device, dtype, and with sharding support ([#17462](https://github.com/Lightning-AI/lightning/pull/17462)) +- Added support for the TPU-v4 architecture ([#17227](https://github.com/Lightning-AI/pytorch-lightning/pull/17227)) +- Added support for XLA's new PJRT runtime ([#17352](https://github.com/Lightning-AI/pytorch-lightning/pull/17352)) +- Added support for Fully Sharded Data Parallel (FSDP) training with XLA ([#18126](https://github.com/Lightning-AI/pytorch-lightning/pull/18126), [#18424](https://github.com/Lightning-AI/pytorch-lightning/pull/18424), [#18430](https://github.com/Lightning-AI/pytorch-lightning/pull/18430)) +- Check for invalid TPU device inputs ([#17227](https://github.com/Lightning-AI/pytorch-lightning/pull/17227)) +- Added `XLAStrategy(sync_module_states=bool)` to control whether to broadcast the parameters to all devices ([#17522](https://github.com/Lightning-AI/pytorch-lightning/pull/17522)) +- Added support for joint setup of model and optimizer with FSDP ([#17305](https://github.com/Lightning-AI/pytorch-lightning/pull/17305)) +- Added support for handling multiple parameter groups in optimizers set up with FSDP ([#17305](https://github.com/Lightning-AI/pytorch-lightning/pull/17305)) +- Added support for saving and loading sharded model and optimizer state with `FSDPStrategy` ([#17323](https://github.com/Lightning-AI/pytorch-lightning/pull/17323)) +- Added a warning when calling methods on `_FabricModule` that bypass the strategy-specific wrappers ([#17424](https://github.com/Lightning-AI/pytorch-lightning/pull/17424)) +- Added `Fabric.init_tensor()` context manager to instantiate tensors efficiently directly on device and dtype ([#17488](https://github.com/Lightning-AI/pytorch-lightning/pull/17488)) +- Added `Fabric.init_module()` context manager to instantiate large models efficiently directly on device, dtype, and with sharding support ([#17462](https://github.com/Lightning-AI/pytorch-lightning/pull/17462)) * Creates the model parameters in the desired dtype (`torch.float32`, `torch.float64`, `torch.float16`, or `torch.bfloat16`) depending on the 'true' precision choice in `Fabric(precision='32-true'|'64-true'|'16-true'|'bf16-true')` * Handles initialization for FSDP models before wrapping and the Zero stage 3 initialization for DeepSpeed before sharding -- Added support for empty weight initialization with `Fabric.init_module(empty_init=True)` for checkpoint loading ([#17627](https://github.com/Lightning-AI/lightning/pull/17627)) -- Added support for meta-device initialization with `Fabric.init_module(empty_init=True)` in FSDP ([#18122](https://github.com/Lightning-AI/lightning/pull/18122)) -- Added `lightning.fabric.plugins.Precision.module_init_context()` and `lightning.fabric.strategies.Strategy.module_init_context()` context managers to control model and tensor instantiation ([#17462](https://github.com/Lightning-AI/lightning/pull/17462)) -- `lightning.fabric.strategies.Strategy.tensor_init_context()` context manager to instantiate tensors efficiently directly on device and dtype ([#17607](https://github.com/Lightning-AI/lightning/pull/17607)) -- Run the DDP wrapper in a CUDA stream ([#17334](https://github.com/Lightning-AI/lightning/pull/17334)) -- Added support for true half-precision as `Fabric(precision="16-true"|"bf16-true")` ([#17287](https://github.com/Lightning-AI/lightning/pull/17287)) -- Added support for mixed 8-bit precision as `Fabric(precision="transformer-engine")` using [Nvidia's Transformer Engine](https://docs.nvidia.com/deeplearning/transformer-engine) ([#17597](https://github.com/Lightning-AI/lightning/pull/17597)) -- Added support for linear layer quantization with `Fabric(plugins=BitsandbytesPrecision())` using [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) ([#18655](https://github.com/Lightning-AI/lightning/pull/18655)) -- Added error messaging for missed `.launch()` when it is required ([#17570](https://github.com/Lightning-AI/lightning/pull/17570)) -- Added support for saving checkpoints with either full state-dict or sharded state dict via `FSDPStrategy(state_dict_type="full"|"sharded")` ([#17526](https://github.com/Lightning-AI/lightning/pull/17526)) -- Added support for loading a full-state checkpoint file into a sharded model ([#17623](https://github.com/Lightning-AI/lightning/pull/17623)) -- Added support for calling hooks on a LightningModule via `Fabric.call` ([#17874](https://github.com/Lightning-AI/lightning/pull/17874)) -- Added the parameter `Fabric.load(..., strict=True|False)` to enable non-strict loading of partial checkpoint state ([#17645](https://github.com/Lightning-AI/lightning/pull/17645)) -- Added the parameter `Fabric.save(..., filter=...)` to enable saving a partial checkpoint state ([#17845](https://github.com/Lightning-AI/lightning/pull/17845)) -- Added support for loading optimizer states from a full-state checkpoint file ([#17747](https://github.com/Lightning-AI/lightning/pull/17747)) -- Automatically call `xla_model.mark_step()` before saving checkpoints with XLA ([#17882](https://github.com/Lightning-AI/lightning/pull/17882)) -- Automatically call `xla_model.mark_step()` after `optimizer.step()` with XLA ([#17883](https://github.com/Lightning-AI/lightning/pull/17883)) -- Added support for all half-precision modes in FSDP precision plugin ([#17807](https://github.com/Lightning-AI/lightning/pull/17807)) -- Added `FSDPStrategy(activation_checkpointing_policy=...)` to customize the layer policy for automatic activation checkpointing (requires torch>=2.1) ([#18045](https://github.com/Lightning-AI/lightning/pull/18045)) -- Added a callback for spike-detection ([#18014](https://github.com/Lightning-AI/lightning/pull/18014)) -- Added the ability to set the `torch.distributed.fsdp.ShardingStrategy` via string in `FSDPStrategy` ([#18087](https://github.com/Lightning-AI/lightning/pull/18087)) -- Improved error messages when attempting to load a DeepSpeed checkpoint at an invalid path ([#17795](https://github.com/Lightning-AI/lightning/pull/17795)) -- Added `Fabric.load_raw()` for loading raw PyTorch state dict checkpoints for model or optimizer objects ([#18049](https://github.com/Lightning-AI/lightning/pull/18049)) -- Allowed accessing rank information in the main process before processes are launched when using the `XLAStrategy` ([#18194](https://github.com/Lightning-AI/lightning/pull/18194)) -- Added automatic process cleanup to avoid zombie child processes and stalls when exceptions are raised ([#18218](https://github.com/Lightning-AI/lightning/pull/18218)) -- Added validation of user input for `devices` and `num_nodes` when running with `SLURM` or `TorchElastic` ([#18292](https://github.com/Lightning-AI/lightning/pull/18292)) -- Improved the error messaging and instructions when handling custom batch samplers in distributed settings ([#18402](https://github.com/Lightning-AI/lightning/pull/18402)) -- Added support for saving and loading stateful objects other than modules and optimizers ([#18513](https://github.com/Lightning-AI/lightning/pull/18513)) -- Enabled the default process group configuration for FSDP's hybrid sharding ([#18583](https://github.com/Lightning-AI/lightning/pull/18583)) -- Added `lightning.fabric.utilities.suggested_max_num_workers` to assist with setting a good value in distributed settings ([#18591](https://github.com/Lightning-AI/lightning/pull/18591)) -- Added `lightning.fabric.utilities.is_shared_filesystem` utility function to automatically check whether the filesystem is shared between machines ([#18586](https://github.com/Lightning-AI/lightning/pull/18586)) -- Removed support for PyTorch 1.11 ([#18691](https://github.com/Lightning-AI/lightning/pull/18691)) -- Added support for passing the argument `.load_state_dict(..., assign=True|False)` on Fabric-wrapped modules in PyTorch 2.1 or newer ([#18690](https://github.com/Lightning-AI/lightning/pull/18690)) +- Added support for empty weight initialization with `Fabric.init_module(empty_init=True)` for checkpoint loading ([#17627](https://github.com/Lightning-AI/pytorch-lightning/pull/17627)) +- Added support for meta-device initialization with `Fabric.init_module(empty_init=True)` in FSDP ([#18122](https://github.com/Lightning-AI/pytorch-lightning/pull/18122)) +- Added `lightning.fabric.plugins.Precision.module_init_context()` and `lightning.fabric.strategies.Strategy.module_init_context()` context managers to control model and tensor instantiation ([#17462](https://github.com/Lightning-AI/pytorch-lightning/pull/17462)) +- `lightning.fabric.strategies.Strategy.tensor_init_context()` context manager to instantiate tensors efficiently directly on device and dtype ([#17607](https://github.com/Lightning-AI/pytorch-lightning/pull/17607)) +- Run the DDP wrapper in a CUDA stream ([#17334](https://github.com/Lightning-AI/pytorch-lightning/pull/17334)) +- Added support for true half-precision as `Fabric(precision="16-true"|"bf16-true")` ([#17287](https://github.com/Lightning-AI/pytorch-lightning/pull/17287)) +- Added support for mixed 8-bit precision as `Fabric(precision="transformer-engine")` using [Nvidia's Transformer Engine](https://docs.nvidia.com/deeplearning/transformer-engine) ([#17597](https://github.com/Lightning-AI/pytorch-lightning/pull/17597)) +- Added support for linear layer quantization with `Fabric(plugins=BitsandbytesPrecision())` using [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) ([#18655](https://github.com/Lightning-AI/pytorch-lightning/pull/18655)) +- Added error messaging for missed `.launch()` when it is required ([#17570](https://github.com/Lightning-AI/pytorch-lightning/pull/17570)) +- Added support for saving checkpoints with either full state-dict or sharded state dict via `FSDPStrategy(state_dict_type="full"|"sharded")` ([#17526](https://github.com/Lightning-AI/pytorch-lightning/pull/17526)) +- Added support for loading a full-state checkpoint file into a sharded model ([#17623](https://github.com/Lightning-AI/pytorch-lightning/pull/17623)) +- Added support for calling hooks on a LightningModule via `Fabric.call` ([#17874](https://github.com/Lightning-AI/pytorch-lightning/pull/17874)) +- Added the parameter `Fabric.load(..., strict=True|False)` to enable non-strict loading of partial checkpoint state ([#17645](https://github.com/Lightning-AI/pytorch-lightning/pull/17645)) +- Added the parameter `Fabric.save(..., filter=...)` to enable saving a partial checkpoint state ([#17845](https://github.com/Lightning-AI/pytorch-lightning/pull/17845)) +- Added support for loading optimizer states from a full-state checkpoint file ([#17747](https://github.com/Lightning-AI/pytorch-lightning/pull/17747)) +- Automatically call `xla_model.mark_step()` before saving checkpoints with XLA ([#17882](https://github.com/Lightning-AI/pytorch-lightning/pull/17882)) +- Automatically call `xla_model.mark_step()` after `optimizer.step()` with XLA ([#17883](https://github.com/Lightning-AI/pytorch-lightning/pull/17883)) +- Added support for all half-precision modes in FSDP precision plugin ([#17807](https://github.com/Lightning-AI/pytorch-lightning/pull/17807)) +- Added `FSDPStrategy(activation_checkpointing_policy=...)` to customize the layer policy for automatic activation checkpointing (requires torch>=2.1) ([#18045](https://github.com/Lightning-AI/pytorch-lightning/pull/18045)) +- Added a callback for spike-detection ([#18014](https://github.com/Lightning-AI/pytorch-lightning/pull/18014)) +- Added the ability to set the `torch.distributed.fsdp.ShardingStrategy` via string in `FSDPStrategy` ([#18087](https://github.com/Lightning-AI/pytorch-lightning/pull/18087)) +- Improved error messages when attempting to load a DeepSpeed checkpoint at an invalid path ([#17795](https://github.com/Lightning-AI/pytorch-lightning/pull/17795)) +- Added `Fabric.load_raw()` for loading raw PyTorch state dict checkpoints for model or optimizer objects ([#18049](https://github.com/Lightning-AI/pytorch-lightning/pull/18049)) +- Allowed accessing rank information in the main process before processes are launched when using the `XLAStrategy` ([#18194](https://github.com/Lightning-AI/pytorch-lightning/pull/18194)) +- Added automatic process cleanup to avoid zombie child processes and stalls when exceptions are raised ([#18218](https://github.com/Lightning-AI/pytorch-lightning/pull/18218)) +- Added validation of user input for `devices` and `num_nodes` when running with `SLURM` or `TorchElastic` ([#18292](https://github.com/Lightning-AI/pytorch-lightning/pull/18292)) +- Improved the error messaging and instructions when handling custom batch samplers in distributed settings ([#18402](https://github.com/Lightning-AI/pytorch-lightning/pull/18402)) +- Added support for saving and loading stateful objects other than modules and optimizers ([#18513](https://github.com/Lightning-AI/pytorch-lightning/pull/18513)) +- Enabled the default process group configuration for FSDP's hybrid sharding ([#18583](https://github.com/Lightning-AI/pytorch-lightning/pull/18583)) +- Added `lightning.fabric.utilities.suggested_max_num_workers` to assist with setting a good value in distributed settings ([#18591](https://github.com/Lightning-AI/pytorch-lightning/pull/18591)) +- Added `lightning.fabric.utilities.is_shared_filesystem` utility function to automatically check whether the filesystem is shared between machines ([#18586](https://github.com/Lightning-AI/pytorch-lightning/pull/18586)) +- Removed support for PyTorch 1.11 ([#18691](https://github.com/Lightning-AI/pytorch-lightning/pull/18691)) +- Added support for passing the argument `.load_state_dict(..., assign=True|False)` on Fabric-wrapped modules in PyTorch 2.1 or newer ([#18690](https://github.com/Lightning-AI/pytorch-lightning/pull/18690)) ### Changed -- Allow using iterable-style datasets with TPUs ([#17331](https://github.com/Lightning-AI/lightning/pull/17331)) -- Increased the minimum XLA requirement to 1.13 ([#17368](https://github.com/Lightning-AI/lightning/pull/17368)) -- Fabric argument validation now only raises an error if conflicting settings are set through the CLI ([#17679](https://github.com/Lightning-AI/lightning/pull/17679)) -- DataLoader re-instantiation is now only performed when a distributed sampler is required ([#18191](https://github.com/Lightning-AI/lightning/pull/18191)) -- Improved the formatting of emitted warnings ([#18288](https://github.com/Lightning-AI/lightning/pull/18288)) -- Broadcast and reduction of tensors with XLA-based strategies now preserve the input's device ([#18275](https://github.com/Lightning-AI/lightning/pull/18275)) -- Due to lack of reliability, Fabric now only runs on one GPU instead of all GPUs in a Jupyter notebook if `devices="auto"` (default) ([#18291](https://github.com/Lightning-AI/lightning/pull/18291)) -- Enabled launching via `torchrun` in a SLURM environment; the `TorchElasticEnvironment` now gets chosen over the `SLURMEnvironment` if both are detected ([#18618](https://github.com/Lightning-AI/lightning/pull/18618)) -- If not set by the user, Lightning will set `OMP_NUM_THREADS` to `num_cpus / num_processes` when launching subprocesses (e.g. when DDP is used) to avoid system overload for CPU-intensive tasks ([#18677](https://github.com/Lightning-AI/lightning/pull/18677)) +- Allow using iterable-style datasets with TPUs ([#17331](https://github.com/Lightning-AI/pytorch-lightning/pull/17331)) +- Increased the minimum XLA requirement to 1.13 ([#17368](https://github.com/Lightning-AI/pytorch-lightning/pull/17368)) +- Fabric argument validation now only raises an error if conflicting settings are set through the CLI ([#17679](https://github.com/Lightning-AI/pytorch-lightning/pull/17679)) +- DataLoader re-instantiation is now only performed when a distributed sampler is required ([#18191](https://github.com/Lightning-AI/pytorch-lightning/pull/18191)) +- Improved the formatting of emitted warnings ([#18288](https://github.com/Lightning-AI/pytorch-lightning/pull/18288)) +- Broadcast and reduction of tensors with XLA-based strategies now preserve the input's device ([#18275](https://github.com/Lightning-AI/pytorch-lightning/pull/18275)) +- Due to lack of reliability, Fabric now only runs on one GPU instead of all GPUs in a Jupyter notebook if `devices="auto"` (default) ([#18291](https://github.com/Lightning-AI/pytorch-lightning/pull/18291)) +- Enabled launching via `torchrun` in a SLURM environment; the `TorchElasticEnvironment` now gets chosen over the `SLURMEnvironment` if both are detected ([#18618](https://github.com/Lightning-AI/pytorch-lightning/pull/18618)) +- If not set by the user, Lightning will set `OMP_NUM_THREADS` to `num_cpus / num_processes` when launching subprocesses (e.g. when DDP is used) to avoid system overload for CPU-intensive tasks ([#18677](https://github.com/Lightning-AI/pytorch-lightning/pull/18677)) ### Deprecated -- Deprecated the `DDPStrategy.is_distributed` property. This strategy is distributed by definition ([#17381](https://github.com/Lightning-AI/lightning/pull/17381)) -- Deprecated the `SingleTPUStrategy` (`strategy="single_tpu"`) in favor of `SingleDeviceXLAStrategy` (`strategy="single_xla"`) ([#17383](https://github.com/Lightning-AI/lightning/pull/17383)) -- Deprecated the `TPUAccelerator` in favor of `XLAAccelerator` ([#17383](https://github.com/Lightning-AI/lightning/pull/17383)) -- Deprecated the `TPUPrecision` in favor of `XLAPrecision` ([#17383](https://github.com/Lightning-AI/lightning/pull/17383)) -- Deprecated the `TPUBf16Precision` in favor of `XLABf16Precision` ([#17383](https://github.com/Lightning-AI/lightning/pull/17383)) +- Deprecated the `DDPStrategy.is_distributed` property. This strategy is distributed by definition ([#17381](https://github.com/Lightning-AI/pytorch-lightning/pull/17381)) +- Deprecated the `SingleTPUStrategy` (`strategy="single_tpu"`) in favor of `SingleDeviceXLAStrategy` (`strategy="single_xla"`) ([#17383](https://github.com/Lightning-AI/pytorch-lightning/pull/17383)) +- Deprecated the `TPUAccelerator` in favor of `XLAAccelerator` ([#17383](https://github.com/Lightning-AI/pytorch-lightning/pull/17383)) +- Deprecated the `TPUPrecision` in favor of `XLAPrecision` ([#17383](https://github.com/Lightning-AI/pytorch-lightning/pull/17383)) +- Deprecated the `TPUBf16Precision` in favor of `XLABf16Precision` ([#17383](https://github.com/Lightning-AI/pytorch-lightning/pull/17383)) ### Removed -- Removed automatic sharding support with `Fabric.run` or using `fabric.launch(fn)`. This only impacts FSDP and DeepSpeed strategy users. Please instantiate your module under the newly added `fabric.init_module` context manager ([#17832](https://github.com/Lightning-AI/lightning/pull/17832)) -- Removed the unsupported `checkpoint_io` argument from the `FSDPStrategy` ([#18192](https://github.com/Lightning-AI/lightning/pull/18192)) +- Removed automatic sharding support with `Fabric.run` or using `fabric.launch(fn)`. This only impacts FSDP and DeepSpeed strategy users. Please instantiate your module under the newly added `fabric.init_module` context manager ([#17832](https://github.com/Lightning-AI/pytorch-lightning/pull/17832)) +- Removed the unsupported `checkpoint_io` argument from the `FSDPStrategy` ([#18192](https://github.com/Lightning-AI/pytorch-lightning/pull/18192)) ### Fixed -- Fixed issue where running on TPUs would select the wrong device index ([#17227](https://github.com/Lightning-AI/lightning/pull/17227)) -- Removed the need to call `.launch()` when using the DP-strategy (`strategy="dp"`) ([#17931](https://github.com/Lightning-AI/lightning/pull/17931)) -- Fixed FSDP re-applying activation checkpointing when the user had manually applied it already ([#18006](https://github.com/Lightning-AI/lightning/pull/18006)) -- Fixed FSDP re-wrapping the module root when the user had manually wrapped the model ([#18054](https://github.com/Lightning-AI/lightning/pull/18054)) -- Fixed issue where unexpected exceptions would leave the default torch dtype modified when using true precision settings ([#18500](https://github.com/Lightning-AI/lightning/pull/18500)) -- Fixed redundant input-type casting in FSDP precision ([#18630](https://github.com/Lightning-AI/lightning/pull/18630)) -- Fixed an issue with `find_usable_cuda_devices(0)` incorrectly returning a list of devices ([#18722](https://github.com/Lightning-AI/lightning/pull/18722)) -- Fixed redundant file writes in `CSVLogger` ([#18567](https://github.com/Lightning-AI/lightning/pull/18567)) +- Fixed issue where running on TPUs would select the wrong device index ([#17227](https://github.com/Lightning-AI/pytorch-lightning/pull/17227)) +- Removed the need to call `.launch()` when using the DP-strategy (`strategy="dp"`) ([#17931](https://github.com/Lightning-AI/pytorch-lightning/pull/17931)) +- Fixed FSDP re-applying activation checkpointing when the user had manually applied it already ([#18006](https://github.com/Lightning-AI/pytorch-lightning/pull/18006)) +- Fixed FSDP re-wrapping the module root when the user had manually wrapped the model ([#18054](https://github.com/Lightning-AI/pytorch-lightning/pull/18054)) +- Fixed issue where unexpected exceptions would leave the default torch dtype modified when using true precision settings ([#18500](https://github.com/Lightning-AI/pytorch-lightning/pull/18500)) +- Fixed redundant input-type casting in FSDP precision ([#18630](https://github.com/Lightning-AI/pytorch-lightning/pull/18630)) +- Fixed an issue with `find_usable_cuda_devices(0)` incorrectly returning a list of devices ([#18722](https://github.com/Lightning-AI/pytorch-lightning/pull/18722)) +- Fixed redundant file writes in `CSVLogger` ([#18567](https://github.com/Lightning-AI/pytorch-lightning/pull/18567)) ## [2.0.9] - 2023-09-14 ### Fixed -- Fixed an issue causing the `_FabricOptimizer.state` to remain outdated after loading with `load_state_dict` ([#18488](https://github.com/Lightning-AI/lightning/pull/18488)) +- Fixed an issue causing the `_FabricOptimizer.state` to remain outdated after loading with `load_state_dict` ([#18488](https://github.com/Lightning-AI/pytorch-lightning/pull/18488)) ## [2.0.8] - 2023-08-29 ### Changed -- On XLA, avoid setting the global rank before processes have been launched as this will initialize the PJRT computation client in the main process ([#16966](https://github.com/Lightning-AI/lightning/pull/16966)) +- On XLA, avoid setting the global rank before processes have been launched as this will initialize the PJRT computation client in the main process ([#16966](https://github.com/Lightning-AI/pytorch-lightning/pull/16966)) ### Fixed -- Fixed model parameters getting shared between processes when running with `strategy="ddp_spawn"` and `accelerator="cpu"`; this has a necessary memory impact, as parameters are replicated for each process now ([#18238](https://github.com/Lightning-AI/lightning/pull/18238)) -- Removed false positive warning when using `fabric.no_backward_sync` with XLA strategies ([#17761](https://github.com/Lightning-AI/lightning/pull/17761)) -- Fixed issue where Fabric would not initialize the global rank, world size, and rank-zero-only rank after initialization and before launch ([#16966](https://github.com/Lightning-AI/lightning/pull/16966)) -- Fixed FSDP full-precision `param_dtype` training (`16-mixed`, `bf16-mixed` and `32-true` configurations) to avoid FSDP assertion errors with PyTorch < 2.0 ([#18278](https://github.com/Lightning-AI/lightning/pull/18278)) +- Fixed model parameters getting shared between processes when running with `strategy="ddp_spawn"` and `accelerator="cpu"`; this has a necessary memory impact, as parameters are replicated for each process now ([#18238](https://github.com/Lightning-AI/pytorch-lightning/pull/18238)) +- Removed false positive warning when using `fabric.no_backward_sync` with XLA strategies ([#17761](https://github.com/Lightning-AI/pytorch-lightning/pull/17761)) +- Fixed issue where Fabric would not initialize the global rank, world size, and rank-zero-only rank after initialization and before launch ([#16966](https://github.com/Lightning-AI/pytorch-lightning/pull/16966)) +- Fixed FSDP full-precision `param_dtype` training (`16-mixed`, `bf16-mixed` and `32-true` configurations) to avoid FSDP assertion errors with PyTorch < 2.0 ([#18278](https://github.com/Lightning-AI/pytorch-lightning/pull/18278)) @@ -285,64 +285,64 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- Disabled the auto-detection of the Kubeflow environment ([#18137](https://github.com/Lightning-AI/lightning/pull/18137)) +- Disabled the auto-detection of the Kubeflow environment ([#18137](https://github.com/Lightning-AI/pytorch-lightning/pull/18137)) ### Fixed -- Fixed issue where DDP subprocesses that used Hydra would set hydra's working directory to current directory ([#18145](https://github.com/Lightning-AI/lightning/pull/18145)) -- Fixed an issue that would prevent the user to set the multiprocessing start method after importing lightning ([#18177](https://github.com/Lightning-AI/lightning/pull/18177)) -- Fixed an issue with `Fabric.all_reduce()` not performing an inplace operation for all backends consistently ([#18235](https://github.com/Lightning-AI/lightning/pull/18235)) +- Fixed issue where DDP subprocesses that used Hydra would set hydra's working directory to current directory ([#18145](https://github.com/Lightning-AI/pytorch-lightning/pull/18145)) +- Fixed an issue that would prevent the user to set the multiprocessing start method after importing lightning ([#18177](https://github.com/Lightning-AI/pytorch-lightning/pull/18177)) +- Fixed an issue with `Fabric.all_reduce()` not performing an inplace operation for all backends consistently ([#18235](https://github.com/Lightning-AI/pytorch-lightning/pull/18235)) ## [2.0.6] - 2023-07-20 ### Fixed -- Fixed `TensorBoardLogger.log_graph` not unwrapping the `_FabricModule` ([#17844](https://github.com/Lightning-AI/lightning/pull/17844)) +- Fixed `TensorBoardLogger.log_graph` not unwrapping the `_FabricModule` ([#17844](https://github.com/Lightning-AI/pytorch-lightning/pull/17844)) ## [2.0.5] - 2023-07-07 ### Added -- Added validation against misconfigured device selection when using the DeepSpeed strategy ([#17952](https://github.com/Lightning-AI/lightning/pull/17952)) +- Added validation against misconfigured device selection when using the DeepSpeed strategy ([#17952](https://github.com/Lightning-AI/pytorch-lightning/pull/17952)) ### Changed -- Avoid info message when loading 0 entry point callbacks ([#17990](https://github.com/Lightning-AI/lightning/pull/17990)) +- Avoid info message when loading 0 entry point callbacks ([#17990](https://github.com/Lightning-AI/pytorch-lightning/pull/17990)) ### Fixed -- Fixed the emission of a false-positive warning when calling a method on the Fabric-wrapped module that accepts no arguments ([#17875](https://github.com/Lightning-AI/lightning/pull/17875)) -- Fixed check for FSDP's flat parameters in all parameter groups ([#17914](https://github.com/Lightning-AI/lightning/pull/17914)) -- Fixed automatic step tracking in Fabric's CSVLogger ([#17942](https://github.com/Lightning-AI/lightning/pull/17942)) -- Fixed an issue causing the `torch.set_float32_matmul_precision` info message to show multiple times ([#17960](https://github.com/Lightning-AI/lightning/pull/17960)) -- Fixed loading model state when `Fabric.load()` is called after `Fabric.setup()` ([#17997](https://github.com/Lightning-AI/lightning/pull/17997)) +- Fixed the emission of a false-positive warning when calling a method on the Fabric-wrapped module that accepts no arguments ([#17875](https://github.com/Lightning-AI/pytorch-lightning/pull/17875)) +- Fixed check for FSDP's flat parameters in all parameter groups ([#17914](https://github.com/Lightning-AI/pytorch-lightning/pull/17914)) +- Fixed automatic step tracking in Fabric's CSVLogger ([#17942](https://github.com/Lightning-AI/pytorch-lightning/pull/17942)) +- Fixed an issue causing the `torch.set_float32_matmul_precision` info message to show multiple times ([#17960](https://github.com/Lightning-AI/pytorch-lightning/pull/17960)) +- Fixed loading model state when `Fabric.load()` is called after `Fabric.setup()` ([#17997](https://github.com/Lightning-AI/pytorch-lightning/pull/17997)) ## [2.0.4] - 2023-06-22 ### Fixed -- Fixed validation of parameters of `plugins.precision.MixedPrecision` ([#17687](https://github.com/Lightning-AI/lightning/pull/17687)) -- Fixed an issue with hpu imports leading to performance degradation ([#17788](https://github.com/Lightning-AI/lightning/pull/17788)) +- Fixed validation of parameters of `plugins.precision.MixedPrecision` ([#17687](https://github.com/Lightning-AI/pytorch-lightning/pull/17687)) +- Fixed an issue with hpu imports leading to performance degradation ([#17788](https://github.com/Lightning-AI/pytorch-lightning/pull/17788)) -- Fixed computing the next version folder in `CSVLogger` ([#17139](https://github.com/Lightning-AI/lightning/pull/17139), [#17139](https://github.com/Lightning-AI/lightning/pull/17986)) +- Fixed computing the next version folder in `CSVLogger` ([#17139](https://github.com/Lightning-AI/pytorch-lightning/pull/17139), [#17139](https://github.com/Lightning-AI/pytorch-lightning/pull/17986)) ## [2.0.3] - 2023-06-07 -- Added support for `Callback` registration through entry points ([#17756](https://github.com/Lightning-AI/lightning/pull/17756)) +- Added support for `Callback` registration through entry points ([#17756](https://github.com/Lightning-AI/pytorch-lightning/pull/17756)) ### Changed -- Made type hints public ([#17100](https://github.com/Lightning-AI/lightning/pull/17100)) -- Support compiling a module after it was set up by Fabric ([#17529](https://github.com/Lightning-AI/lightning/pull/17529)) +- Made type hints public ([#17100](https://github.com/Lightning-AI/pytorch-lightning/pull/17100)) +- Support compiling a module after it was set up by Fabric ([#17529](https://github.com/Lightning-AI/pytorch-lightning/pull/17529)) ### Fixed -- Fixed computing the next version folder in `CSVLogger` ([#17139](https://github.com/Lightning-AI/lightning/pull/17139)) +- Fixed computing the next version folder in `CSVLogger` ([#17139](https://github.com/Lightning-AI/pytorch-lightning/pull/17139)) - Fixed inconsistent settings for FSDP Precision ([#17670](https://github.com/Lightning-AI/pytorch-lightning/issues/17670)) @@ -350,143 +350,143 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- Enabled precision autocast for LightningModule step methods in Fabric ([#17439](https://github.com/Lightning-AI/lightning/pull/17439)) +- Enabled precision autocast for LightningModule step methods in Fabric ([#17439](https://github.com/Lightning-AI/pytorch-lightning/pull/17439)) ### Fixed -- Fixed an issue with `LightningModule.*_step` methods bypassing the DDP/FSDP wrapper ([#17424](https://github.com/Lightning-AI/lightning/pull/17424)) -- Fixed device handling in `Fabric.setup()` when the model has no parameters ([#17441](https://github.com/Lightning-AI/lightning/pull/17441)) +- Fixed an issue with `LightningModule.*_step` methods bypassing the DDP/FSDP wrapper ([#17424](https://github.com/Lightning-AI/pytorch-lightning/pull/17424)) +- Fixed device handling in `Fabric.setup()` when the model has no parameters ([#17441](https://github.com/Lightning-AI/pytorch-lightning/pull/17441)) ## [2.0.1] - 2023-03-30 ### Changed -- Generalized `Optimizer` validation to accommodate both FSDP 1.x and 2.x ([#16733](https://github.com/Lightning-AI/lightning/pull/16733)) +- Generalized `Optimizer` validation to accommodate both FSDP 1.x and 2.x ([#16733](https://github.com/Lightning-AI/pytorch-lightning/pull/16733)) ## [2.0.0] - 2023-03-15 ### Added -- Added `Fabric.all_reduce` ([#16459](https://github.com/Lightning-AI/lightning/pull/16459)) -- Added support for saving and loading DeepSpeed checkpoints through `Fabric.save/load()` ([#16452](https://github.com/Lightning-AI/lightning/pull/16452)) -- Added support for automatically calling `set_epoch` on the `dataloader.batch_sampler.sampler` ([#16841](https://github.com/Lightning-AI/lightning/pull/16841)) -- Added support for writing logs to remote file systems with the `CSVLogger` ([#16880](https://github.com/Lightning-AI/lightning/pull/16880)) -- Added support for frozen dataclasses in the optimizer state ([#16656](https://github.com/Lightning-AI/lightning/pull/16656)) -- Added `lightning.fabric.is_wrapped` to check whether a module, optimizer, or dataloader was already wrapped by Fabric ([#16953](https://github.com/Lightning-AI/lightning/pull/16953)) +- Added `Fabric.all_reduce` ([#16459](https://github.com/Lightning-AI/pytorch-lightning/pull/16459)) +- Added support for saving and loading DeepSpeed checkpoints through `Fabric.save/load()` ([#16452](https://github.com/Lightning-AI/pytorch-lightning/pull/16452)) +- Added support for automatically calling `set_epoch` on the `dataloader.batch_sampler.sampler` ([#16841](https://github.com/Lightning-AI/pytorch-lightning/pull/16841)) +- Added support for writing logs to remote file systems with the `CSVLogger` ([#16880](https://github.com/Lightning-AI/pytorch-lightning/pull/16880)) +- Added support for frozen dataclasses in the optimizer state ([#16656](https://github.com/Lightning-AI/pytorch-lightning/pull/16656)) +- Added `lightning.fabric.is_wrapped` to check whether a module, optimizer, or dataloader was already wrapped by Fabric ([#16953](https://github.com/Lightning-AI/pytorch-lightning/pull/16953)) ### Changed -- Fabric now chooses `accelerator="auto", strategy="auto", devices="auto"` as defaults ([#16842](https://github.com/Lightning-AI/lightning/pull/16842)) -- Checkpoint saving and loading redesign ([#16434](https://github.com/Lightning-AI/lightning/pull/16434)) +- Fabric now chooses `accelerator="auto", strategy="auto", devices="auto"` as defaults ([#16842](https://github.com/Lightning-AI/pytorch-lightning/pull/16842)) +- Checkpoint saving and loading redesign ([#16434](https://github.com/Lightning-AI/pytorch-lightning/pull/16434)) * Changed the method signatrue of `Fabric.save` and `Fabric.load` * Changed the method signature of `Strategy.save_checkpoint` and `Fabric.load_checkpoint` * `Fabric.save` accepts a state that can contain model and optimizer references * `Fabric.load` can now load state in-place onto models and optimizers * `Fabric.load` returns a dictionary of objects that weren't loaded into the state * `Strategy.save_checkpoint` and `Fabric.load_checkpoint` are now responsible for accessing the state of the model and optimizers -- `DataParallelStrategy.get_module_state_dict()` and `DDPStrategy.get_module_state_dict()` now correctly extracts the state dict without keys prefixed with 'module' ([#16487](https://github.com/Lightning-AI/lightning/pull/16487)) -- "Native" suffix removal ([#16490](https://github.com/Lightning-AI/lightning/pull/16490)) +- `DataParallelStrategy.get_module_state_dict()` and `DDPStrategy.get_module_state_dict()` now correctly extracts the state dict without keys prefixed with 'module' ([#16487](https://github.com/Lightning-AI/pytorch-lightning/pull/16487)) +- "Native" suffix removal ([#16490](https://github.com/Lightning-AI/pytorch-lightning/pull/16490)) * `strategy="fsdp_full_shard_offload"` is now `strategy="fsdp_cpu_offload"` * `lightning.fabric.plugins.precision.native_amp` is now `lightning.fabric.plugins.precision.amp` -- Enabled all shorthand strategy names that can be supported in the CLI ([#16485](https://github.com/Lightning-AI/lightning/pull/16485)) -- Renamed `strategy='tpu_spawn'` to `strategy='xla'` and `strategy='tpu_spawn_debug'` to `strategy='xla_debug'` ([#16781](https://github.com/Lightning-AI/lightning/pull/16781)) -- Changed arguments for precision settings (from [64|32|16|bf16] to ["64-true"|"32-true"|"16-mixed"|"bf16-mixed"]) ([#16767](https://github.com/Lightning-AI/lightning/pull/16767)) -- The selection `Fabric(strategy="ddp_spawn", ...)` no longer falls back to "ddp" when a cluster environment gets detected ([#16780](https://github.com/Lightning-AI/lightning/pull/16780)) -- Renamed `setup_dataloaders(replace_sampler=...)` to `setup_dataloaders(use_distributed_sampler=...)` ([#16829](https://github.com/Lightning-AI/lightning/pull/16829)) +- Enabled all shorthand strategy names that can be supported in the CLI ([#16485](https://github.com/Lightning-AI/pytorch-lightning/pull/16485)) +- Renamed `strategy='tpu_spawn'` to `strategy='xla'` and `strategy='tpu_spawn_debug'` to `strategy='xla_debug'` ([#16781](https://github.com/Lightning-AI/pytorch-lightning/pull/16781)) +- Changed arguments for precision settings (from [64|32|16|bf16] to ["64-true"|"32-true"|"16-mixed"|"bf16-mixed"]) ([#16767](https://github.com/Lightning-AI/pytorch-lightning/pull/16767)) +- The selection `Fabric(strategy="ddp_spawn", ...)` no longer falls back to "ddp" when a cluster environment gets detected ([#16780](https://github.com/Lightning-AI/pytorch-lightning/pull/16780)) +- Renamed `setup_dataloaders(replace_sampler=...)` to `setup_dataloaders(use_distributed_sampler=...)` ([#16829](https://github.com/Lightning-AI/pytorch-lightning/pull/16829)) ### Removed -- Removed support for PyTorch 1.10 ([#16492](https://github.com/Lightning-AI/lightning/pull/16492)) -- Removed support for Python 3.7 ([#16579](https://github.com/Lightning-AI/lightning/pull/16579)) +- Removed support for PyTorch 1.10 ([#16492](https://github.com/Lightning-AI/pytorch-lightning/pull/16492)) +- Removed support for Python 3.7 ([#16579](https://github.com/Lightning-AI/pytorch-lightning/pull/16579)) ### Fixed -- Fixed issue where the wrapped dataloader `iter()` would be called twice ([#16841](https://github.com/Lightning-AI/lightning/pull/16841)) +- Fixed issue where the wrapped dataloader `iter()` would be called twice ([#16841](https://github.com/Lightning-AI/pytorch-lightning/pull/16841)) -- Improved the error message for installing tensorboard or tensorboardx ([#17053](https://github.com/Lightning-AI/lightning/pull/17053)) +- Improved the error message for installing tensorboard or tensorboardx ([#17053](https://github.com/Lightning-AI/pytorch-lightning/pull/17053)) ## [1.9.4] - 2023-03-01 ### Added -- Added `Fabric(strategy="auto")` support ([#16916](https://github.com/Lightning-AI/lightning/pull/16916)) +- Added `Fabric(strategy="auto")` support ([#16916](https://github.com/Lightning-AI/pytorch-lightning/pull/16916)) ### Fixed -- Fixed edge cases in parsing device ids using NVML ([#16795](https://github.com/Lightning-AI/lightning/pull/16795)) -- Fixed DDP spawn hang on TPU Pods ([#16844](https://github.com/Lightning-AI/lightning/pull/16844)) -- Fixed an error when passing `find_usable_cuda_devices(num_devices=-1)` ([#16866](https://github.com/Lightning-AI/lightning/pull/16866)) +- Fixed edge cases in parsing device ids using NVML ([#16795](https://github.com/Lightning-AI/pytorch-lightning/pull/16795)) +- Fixed DDP spawn hang on TPU Pods ([#16844](https://github.com/Lightning-AI/pytorch-lightning/pull/16844)) +- Fixed an error when passing `find_usable_cuda_devices(num_devices=-1)` ([#16866](https://github.com/Lightning-AI/pytorch-lightning/pull/16866)) ## [1.9.3] - 2023-02-21 ### Fixed -- Fixed an issue causing a wrong environment plugin to be selected when `accelerator=tpu` and `devices > 1` ([#16806](https://github.com/Lightning-AI/lightning/pull/16806)) -- Fixed parsing of defaults for `--accelerator` and `--precision` in Fabric CLI when `accelerator` and `precision` are set to non-default values in the code ([#16818](https://github.com/Lightning-AI/lightning/pull/16818)) +- Fixed an issue causing a wrong environment plugin to be selected when `accelerator=tpu` and `devices > 1` ([#16806](https://github.com/Lightning-AI/pytorch-lightning/pull/16806)) +- Fixed parsing of defaults for `--accelerator` and `--precision` in Fabric CLI when `accelerator` and `precision` are set to non-default values in the code ([#16818](https://github.com/Lightning-AI/pytorch-lightning/pull/16818)) ## [1.9.2] - 2023-02-15 ### Fixed -- Fixed an attribute error and improved input validation for invalid strategy types being passed to Trainer ([#16693](https://github.com/Lightning-AI/lightning/pull/16693)) +- Fixed an attribute error and improved input validation for invalid strategy types being passed to Trainer ([#16693](https://github.com/Lightning-AI/pytorch-lightning/pull/16693)) ## [1.9.1] - 2023-02-10 ### Fixed -- Fixed error handling for `accelerator="mps"` and `ddp` strategy pairing ([#16455](https://github.com/Lightning-AI/lightning/pull/16455)) -- Fixed strict availability check for `torch_xla` requirement ([#16476](https://github.com/Lightning-AI/lightning/pull/16476)) -- Fixed an issue where PL would wrap DataLoaders with XLA's MpDeviceLoader more than once ([#16571](https://github.com/Lightning-AI/lightning/pull/16571)) -- Fixed the batch_sampler reference for DataLoaders wrapped with XLA's MpDeviceLoader ([#16571](https://github.com/Lightning-AI/lightning/pull/16571)) -- Fixed an import error when `torch.distributed` is not available ([#16658](https://github.com/Lightning-AI/lightning/pull/16658)) +- Fixed error handling for `accelerator="mps"` and `ddp` strategy pairing ([#16455](https://github.com/Lightning-AI/pytorch-lightning/pull/16455)) +- Fixed strict availability check for `torch_xla` requirement ([#16476](https://github.com/Lightning-AI/pytorch-lightning/pull/16476)) +- Fixed an issue where PL would wrap DataLoaders with XLA's MpDeviceLoader more than once ([#16571](https://github.com/Lightning-AI/pytorch-lightning/pull/16571)) +- Fixed the batch_sampler reference for DataLoaders wrapped with XLA's MpDeviceLoader ([#16571](https://github.com/Lightning-AI/pytorch-lightning/pull/16571)) +- Fixed an import error when `torch.distributed` is not available ([#16658](https://github.com/Lightning-AI/pytorch-lightning/pull/16658)) ## [1.9.0] - 2023-01-17 ### Added -- Added `Fabric.launch()` to programmatically launch processes (e.g. in Jupyter notebook) ([#14992](https://github.com/Lightning-AI/lightning/pull/14992)) -- Added the option to launch Fabric scripts from the CLI, without the need to wrap the code into the `run` method ([#14992](https://github.com/Lightning-AI/lightning/pull/14992)) -- Added `Fabric.setup_module()` and `Fabric.setup_optimizers()` to support strategies that need to set up the model before an optimizer can be created ([#15185](https://github.com/Lightning-AI/lightning/pull/15185)) -- Added support for Fully Sharded Data Parallel (FSDP) training in Lightning Lite ([#14967](https://github.com/Lightning-AI/lightning/pull/14967)) -- Added `lightning.fabric.accelerators.find_usable_cuda_devices` utility function ([#16147](https://github.com/Lightning-AI/lightning/pull/16147)) -- Added basic support for LightningModules ([#16048](https://github.com/Lightning-AI/lightning/pull/16048)) -- Added support for managing callbacks via `Fabric(callbacks=...)` and emitting events through `Fabric.call()` ([#16074](https://github.com/Lightning-AI/lightning/pull/16074)) -- Added Logger support ([#16121](https://github.com/Lightning-AI/lightning/pull/16121)) +- Added `Fabric.launch()` to programmatically launch processes (e.g. in Jupyter notebook) ([#14992](https://github.com/Lightning-AI/pytorch-lightning/pull/14992)) +- Added the option to launch Fabric scripts from the CLI, without the need to wrap the code into the `run` method ([#14992](https://github.com/Lightning-AI/pytorch-lightning/pull/14992)) +- Added `Fabric.setup_module()` and `Fabric.setup_optimizers()` to support strategies that need to set up the model before an optimizer can be created ([#15185](https://github.com/Lightning-AI/pytorch-lightning/pull/15185)) +- Added support for Fully Sharded Data Parallel (FSDP) training in Lightning Lite ([#14967](https://github.com/Lightning-AI/pytorch-lightning/pull/14967)) +- Added `lightning.fabric.accelerators.find_usable_cuda_devices` utility function ([#16147](https://github.com/Lightning-AI/pytorch-lightning/pull/16147)) +- Added basic support for LightningModules ([#16048](https://github.com/Lightning-AI/pytorch-lightning/pull/16048)) +- Added support for managing callbacks via `Fabric(callbacks=...)` and emitting events through `Fabric.call()` ([#16074](https://github.com/Lightning-AI/pytorch-lightning/pull/16074)) +- Added Logger support ([#16121](https://github.com/Lightning-AI/pytorch-lightning/pull/16121)) * Added `Fabric(loggers=...)` to support different Logger frameworks in Fabric * Added `Fabric.log` for logging scalars using multiple loggers * Added `Fabric.log_dict` for logging a dictionary of multiple metrics at once * Added `Fabric.loggers` and `Fabric.logger` attributes to access the individual logger instances * Added support for calling `self.log` and `self.log_dict` in a LightningModule when using Fabric * Added access to `self.logger` and `self.loggers` in a LightningModule when using Fabric -- Added `lightning.fabric.loggers.TensorBoardLogger` ([#16121](https://github.com/Lightning-AI/lightning/pull/16121)) -- Added `lightning.fabric.loggers.CSVLogger` ([#16346](https://github.com/Lightning-AI/lightning/pull/16346)) -- Added support for a consistent `.zero_grad(set_to_none=...)` on the wrapped optimizer regardless of which strategy is used ([#16275](https://github.com/Lightning-AI/lightning/pull/16275)) +- Added `lightning.fabric.loggers.TensorBoardLogger` ([#16121](https://github.com/Lightning-AI/pytorch-lightning/pull/16121)) +- Added `lightning.fabric.loggers.CSVLogger` ([#16346](https://github.com/Lightning-AI/pytorch-lightning/pull/16346)) +- Added support for a consistent `.zero_grad(set_to_none=...)` on the wrapped optimizer regardless of which strategy is used ([#16275](https://github.com/Lightning-AI/pytorch-lightning/pull/16275)) ### Changed -- Renamed the class `LightningLite` to `Fabric` ([#15932](https://github.com/Lightning-AI/lightning/pull/15932), [#15938](https://github.com/Lightning-AI/lightning/pull/15938)) -- The `Fabric.run()` method is no longer abstract ([#14992](https://github.com/Lightning-AI/lightning/pull/14992)) -- The `XLAStrategy` now inherits from `ParallelStrategy` instead of `DDPSpawnStrategy` ([#15838](https://github.com/Lightning-AI/lightning/pull/15838)) -- Merged the implementation of `DDPSpawnStrategy` into `DDPStrategy` and removed `DDPSpawnStrategy` ([#14952](https://github.com/Lightning-AI/lightning/pull/14952)) -- The dataloader wrapper returned from `.setup_dataloaders()` now calls `.set_epoch()` on the distributed sampler if one is used ([#16101](https://github.com/Lightning-AI/lightning/pull/16101)) -- Renamed `Strategy.reduce` to `Strategy.all_reduce` in all strategies ([#16370](https://github.com/Lightning-AI/lightning/pull/16370)) -- When using multiple devices, the strategy now defaults to "ddp" instead of "ddp_spawn" when none is set ([#16388](https://github.com/Lightning-AI/lightning/pull/16388)) +- Renamed the class `LightningLite` to `Fabric` ([#15932](https://github.com/Lightning-AI/pytorch-lightning/pull/15932), [#15938](https://github.com/Lightning-AI/pytorch-lightning/pull/15938)) +- The `Fabric.run()` method is no longer abstract ([#14992](https://github.com/Lightning-AI/pytorch-lightning/pull/14992)) +- The `XLAStrategy` now inherits from `ParallelStrategy` instead of `DDPSpawnStrategy` ([#15838](https://github.com/Lightning-AI/pytorch-lightning/pull/15838)) +- Merged the implementation of `DDPSpawnStrategy` into `DDPStrategy` and removed `DDPSpawnStrategy` ([#14952](https://github.com/Lightning-AI/pytorch-lightning/pull/14952)) +- The dataloader wrapper returned from `.setup_dataloaders()` now calls `.set_epoch()` on the distributed sampler if one is used ([#16101](https://github.com/Lightning-AI/pytorch-lightning/pull/16101)) +- Renamed `Strategy.reduce` to `Strategy.all_reduce` in all strategies ([#16370](https://github.com/Lightning-AI/pytorch-lightning/pull/16370)) +- When using multiple devices, the strategy now defaults to "ddp" instead of "ddp_spawn" when none is set ([#16388](https://github.com/Lightning-AI/pytorch-lightning/pull/16388)) ### Removed -- Removed support for FairScale's sharded training (`strategy='ddp_sharded'|'ddp_sharded_spawn'`). Use Fully-Sharded Data Parallel instead (`strategy='fsdp'`) ([#16329](https://github.com/Lightning-AI/lightning/pull/16329)) +- Removed support for FairScale's sharded training (`strategy='ddp_sharded'|'ddp_sharded_spawn'`). Use Fully-Sharded Data Parallel instead (`strategy='fsdp'`) ([#16329](https://github.com/Lightning-AI/pytorch-lightning/pull/16329)) ### Fixed -- Restored sampling parity between PyTorch and Fabric dataloaders when using the `DistributedSampler` ([#16101](https://github.com/Lightning-AI/lightning/pull/16101)) -- Fixes an issue where the error message wouldn't tell the user the real value that was passed through the CLI ([#16334](https://github.com/Lightning-AI/lightning/pull/16334)) +- Restored sampling parity between PyTorch and Fabric dataloaders when using the `DistributedSampler` ([#16101](https://github.com/Lightning-AI/pytorch-lightning/pull/16101)) +- Fixes an issue where the error message wouldn't tell the user the real value that was passed through the CLI ([#16334](https://github.com/Lightning-AI/pytorch-lightning/pull/16334)) ## [1.8.6] - 2022-12-21 @@ -503,26 +503,26 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- Fixed `shuffle=False` having no effect when using DDP/DistributedSampler ([#15931](https://github.com/Lightning-AI/lightning/pull/15931)) +- Fixed `shuffle=False` having no effect when using DDP/DistributedSampler ([#15931](https://github.com/Lightning-AI/pytorch-lightning/pull/15931)) ## [1.8.3] - 2022-11-22 ### Changed -- Temporarily removed support for Hydra multi-run ([#15737](https://github.com/Lightning-AI/lightning/pull/15737)) +- Temporarily removed support for Hydra multi-run ([#15737](https://github.com/Lightning-AI/pytorch-lightning/pull/15737)) ## [1.8.2] - 2022-11-17 ### Fixed -- Fixed the automatic fallback from `LightningLite(strategy="ddp_spawn", ...)` to `LightningLite(strategy="ddp", ...)` when on an LSF cluster ([#15103](https://github.com/Lightning-AI/lightning/pull/15103)) +- Fixed the automatic fallback from `LightningLite(strategy="ddp_spawn", ...)` to `LightningLite(strategy="ddp", ...)` when on an LSF cluster ([#15103](https://github.com/Lightning-AI/pytorch-lightning/pull/15103)) ## [1.8.1] - 2022-11-10 ### Fixed -- Fix an issue with the SLURM `srun` detection causing permission errors ([#15485](https://github.com/Lightning-AI/lightning/pull/15485)) -- Fixed the import of `lightning_lite` causing a warning 'Redirects are currently not supported in Windows or MacOs' ([#15610](https://github.com/Lightning-AI/lightning/pull/15610)) +- Fix an issue with the SLURM `srun` detection causing permission errors ([#15485](https://github.com/Lightning-AI/pytorch-lightning/pull/15485)) +- Fixed the import of `lightning_lite` causing a warning 'Redirects are currently not supported in Windows or MacOs' ([#15610](https://github.com/Lightning-AI/pytorch-lightning/pull/15610)) diff --git a/src/lightning/fabric/strategies/xla.py b/src/lightning/fabric/strategies/xla.py index 3b2e10e87b0a7..5fd6651b12710 100644 --- a/src/lightning/fabric/strategies/xla.py +++ b/src/lightning/fabric/strategies/xla.py @@ -133,7 +133,7 @@ def setup_environment(self) -> None: assert self.parallel_devices is not None if len(self.parallel_devices) == 1: # spawning only 1 device with PjRT is not supported: - # https://github.com/Lightning-AI/lightning/pull/17408#discussion_r1170671732 + # https://github.com/Lightning-AI/pytorch-lightning/pull/17408#discussion_r1170671732 raise NotImplementedError( f"The {type(self).__name__} does not support running on a single device with the PjRT runtime." " Try using all devices or the `SingleDeviceXLAStrategy` strategy" diff --git a/src/lightning/fabric/strategies/xla_fsdp.py b/src/lightning/fabric/strategies/xla_fsdp.py index 87e45293e5e47..8e19fc032e910 100644 --- a/src/lightning/fabric/strategies/xla_fsdp.py +++ b/src/lightning/fabric/strategies/xla_fsdp.py @@ -184,7 +184,7 @@ def setup_environment(self) -> None: assert self.parallel_devices is not None if len(self.parallel_devices) == 1: # spawning only 1 device with PjRT is not supported: - # https://github.com/Lightning-AI/lightning/pull/17408#discussion_r1170671732 + # https://github.com/Lightning-AI/pytorch-lightning/pull/17408#discussion_r1170671732 raise NotImplementedError( f"The {type(self).__name__} does not support running on a single device with the PjRT runtime." " Try using all devices or the `SingleDeviceXLAStrategy` strategy" diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index 627e8790cb940..a24ff385cb12c 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -89,15 +89,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed -- Removed support for PyTorch 2.1 ([#20009](https://github.com/Lightning-AI/lightning/pull/20009)) -- Removed support for Python 3.8 ([#20071](https://github.com/Lightning-AI/lightning/pull/20071)) +- Removed support for PyTorch 2.1 ([#20009](https://github.com/Lightning-AI/pytorch-lightning/pull/20009)) +- Removed support for Python 3.8 ([#20071](https://github.com/Lightning-AI/pytorch-lightning/pull/20071)) ### Fixed - Avoid LightningCLI saving hyperparameters with `class_path` and `init_args` since this would be a breaking change ([#20068](https://github.com/Lightning-AI/pytorch-lightning/pull/20068)) - Fixed an issue that would cause too many printouts of the seed info when using `seed_everything()` ([#20108](https://github.com/Lightning-AI/pytorch-lightning/pull/20108)) - Fixed `_LoggerConnector`'s `_ResultMetric` to move all registered keys to the device of the logged value if needed ([#19814](https://github.com/Lightning-AI/pytorch-lightning/issues/19814)) -- Fixed `_optimizer_to_device` logic for special 'step' key in optimizer state causing performance regression ([#20019](https://github.com/Lightning-AI/lightning/pull/20019)) +- Fixed `_optimizer_to_device` logic for special 'step' key in optimizer state causing performance regression ([#20019](https://github.com/Lightning-AI/pytorch-lightning/pull/20019)) - Fixed parameter counts in `ModelSummary` when model has distributed parameters (DTensor) ([#20163](https://github.com/Lightning-AI/pytorch-lightning/pull/20163)) - Fixed PyTorch Lightning FSDP takes more memory than PyTorch FSDP ([#20323](https://github.com/Lightning-AI/pytorch-lightning/pull/20323)) @@ -106,8 +106,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added -- The `ModelSummary` and `RichModelSummary` callbacks now display the training mode of each layer in the column "Mode" ([#19468](https://github.com/Lightning-AI/lightning/pull/19468)) -- Added `load_from_checkpoint` support for `LightningCLI` when using dependency injection ([#18105](https://github.com/Lightning-AI/lightning/pull/18105)) +- The `ModelSummary` and `RichModelSummary` callbacks now display the training mode of each layer in the column "Mode" ([#19468](https://github.com/Lightning-AI/pytorch-lightning/pull/19468)) +- Added `load_from_checkpoint` support for `LightningCLI` when using dependency injection ([#18105](https://github.com/Lightning-AI/pytorch-lightning/pull/18105)) - Added robust timer duration parsing with an informative error message when parsing fails ([#19513](https://github.com/Lightning-AI/pytorch-lightning/pull/19513)) - Added `on_exception` hook to `LightningDataModule` ([#19601](https://github.com/Lightning-AI/pytorch-lightning/pull/19601)) - Added support for PyTorch 2.3 ([#19708](https://github.com/Lightning-AI/pytorch-lightning/pull/19708)) @@ -117,18 +117,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- The `prepare_data()` hook in `LightningModule` and `LightningDataModule` is now subject to a barrier without timeout to avoid long-running tasks to be interrupted ([#19448](https://github.com/Lightning-AI/lightning/pull/19448)) +- The `prepare_data()` hook in `LightningModule` and `LightningDataModule` is now subject to a barrier without timeout to avoid long-running tasks to be interrupted ([#19448](https://github.com/Lightning-AI/pytorch-lightning/pull/19448)) - Relaxed the requirement for custom batch samplers to expose `drop_last` for prediction ([#19678](https://github.com/Lightning-AI/pytorch-lightning/pull/19678)) - It is no longer allowed to skip `training_step()` by returning `None` in distributed training ([#19918](https://github.com/Lightning-AI/pytorch-lightning/pull/19918)) ### Removed -- Removed the Bagua integration (`Trainer(strategy="bagua")`) ([#19445](https://github.com/Lightning-AI/lightning/pull/19445)) -- Removed support for PyTorch 1.13 ([#19706](https://github.com/Lightning-AI/lightning/pull/19706)) +- Removed the Bagua integration (`Trainer(strategy="bagua")`) ([#19445](https://github.com/Lightning-AI/pytorch-lightning/pull/19445)) +- Removed support for PyTorch 1.13 ([#19706](https://github.com/Lightning-AI/pytorch-lightning/pull/19706)) ### Fixed -- Fixed a matrix shape mismatch issue when running a model loaded from a quantized checkpoint (bitsandbytes) ([#19886](https://github.com/Lightning-AI/lightning/pull/19886)) +- Fixed a matrix shape mismatch issue when running a model loaded from a quantized checkpoint (bitsandbytes) ([#19886](https://github.com/Lightning-AI/pytorch-lightning/pull/19886)) - Fixed `WandbLogger.log_hyperparameters()` raising an error if hyperparameters are not JSON serializable ([#19769](https://github.com/Lightning-AI/pytorch-lightning/pull/19769)) - Fixed an issue with the LightningCLI not being able to set the `ModelCheckpoint(save_last=...)` argument ([#19808](https://github.com/Lightning-AI/pytorch-lightning/pull/19808)) - Fixed an issue causing ValueError for certain object such as TorchMetrics when dumping hyperparameters to YAML ([#19804](https://github.com/Lightning-AI/pytorch-lightning/pull/19804)) @@ -148,8 +148,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- Fixed an issue with CSVLogger trying to append to file from a previous run when the version is set manually ([#19446](https://github.com/Lightning-AI/lightning/pull/19446)) -- Fixed the divisibility check for `Trainer.accumulate_grad_batches` and `Trainer.log_every_n_steps` in ThroughputMonitor ([#19470](https://github.com/Lightning-AI/lightning/pull/19470)) +- Fixed an issue with CSVLogger trying to append to file from a previous run when the version is set manually ([#19446](https://github.com/Lightning-AI/pytorch-lightning/pull/19446)) +- Fixed the divisibility check for `Trainer.accumulate_grad_batches` and `Trainer.log_every_n_steps` in ThroughputMonitor ([#19470](https://github.com/Lightning-AI/pytorch-lightning/pull/19470)) - Fixed support for Remote Stop and Remote Abort with NeptuneLogger ([#19130](https://github.com/Lightning-AI/pytorch-lightning/pull/19130)) - Fixed infinite recursion error in precision plugin graveyard ([#19542](https://github.com/Lightning-AI/pytorch-lightning/pull/19542)) @@ -158,72 +158,72 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added -- Added `lightning.pytorch.callbacks.ThroughputMonitor` to track throughput and log it ([#18848](https://github.com/Lightning-AI/lightning/pull/18848)) -- The Trainer now restores the training mode set through `.train()` or `.eval()` on a submodule-level when switching from validation to training ([#18951](https://github.com/Lightning-AI/lightning/pull/18951)) -- Added support for meta-device initialization and materialization of 4-bit Bitsandbytes layers ([#19150](https://github.com/Lightning-AI/lightning/pull/19150)) -- Added `TransformerEnginePrecision(fallback_compute_dtype=)` to control the dtype of operations that don't support fp8 ([#19082](https://github.com/Lightning-AI/lightning/pull/19082)) -- Added the option `ModelCheckpoint(save_last='link')` to create a symbolic link for the 'last.ckpt' file ([#19191](https://github.com/Lightning-AI/lightning/pull/19191)) -- Added a utility function and CLI to consolidate FSDP sharded checkpoints into a single file ([#19213](https://github.com/Lightning-AI/lightning/pull/19213)) -- The TQDM progress bar now respects the env variable `TQDM_MINITERS` for setting the refresh rate ([#19381](https://github.com/Lightning-AI/lightning/pull/19381)) -- Added support for saving and loading stateful training DataLoaders ([#19361](https://github.com/Lightning-AI/lightning/pull/19361)) -- Added shortcut name `strategy='deepspeed_stage_1_offload'` to the strategy registry ([#19075](https://github.com/Lightning-AI/lightning/pull/19075)) -- Added support for non-strict state-dict loading in Trainer via the new `LightningModule.strict_loading = True | False` attribute ([#19404](https://github.com/Lightning-AI/lightning/pull/19404)) +- Added `lightning.pytorch.callbacks.ThroughputMonitor` to track throughput and log it ([#18848](https://github.com/Lightning-AI/pytorch-lightning/pull/18848)) +- The Trainer now restores the training mode set through `.train()` or `.eval()` on a submodule-level when switching from validation to training ([#18951](https://github.com/Lightning-AI/pytorch-lightning/pull/18951)) +- Added support for meta-device initialization and materialization of 4-bit Bitsandbytes layers ([#19150](https://github.com/Lightning-AI/pytorch-lightning/pull/19150)) +- Added `TransformerEnginePrecision(fallback_compute_dtype=)` to control the dtype of operations that don't support fp8 ([#19082](https://github.com/Lightning-AI/pytorch-lightning/pull/19082)) +- Added the option `ModelCheckpoint(save_last='link')` to create a symbolic link for the 'last.ckpt' file ([#19191](https://github.com/Lightning-AI/pytorch-lightning/pull/19191)) +- Added a utility function and CLI to consolidate FSDP sharded checkpoints into a single file ([#19213](https://github.com/Lightning-AI/pytorch-lightning/pull/19213)) +- The TQDM progress bar now respects the env variable `TQDM_MINITERS` for setting the refresh rate ([#19381](https://github.com/Lightning-AI/pytorch-lightning/pull/19381)) +- Added support for saving and loading stateful training DataLoaders ([#19361](https://github.com/Lightning-AI/pytorch-lightning/pull/19361)) +- Added shortcut name `strategy='deepspeed_stage_1_offload'` to the strategy registry ([#19075](https://github.com/Lightning-AI/pytorch-lightning/pull/19075)) +- Added support for non-strict state-dict loading in Trainer via the new `LightningModule.strict_loading = True | False` attribute ([#19404](https://github.com/Lightning-AI/pytorch-lightning/pull/19404)) ### Changed -- `seed_everything()` without passing in a seed no longer randomly selects a seed, and now defaults to `0` ([#18846](https://github.com/Lightning-AI/lightning/pull/18846)) -- The `LightningModule.on_{validation,test,predict}_model_{eval,train}` now only get called if they are overridden by the user ([#18951](https://github.com/Lightning-AI/lightning/pull/18951)) -- The `Trainer.fit()` loop no longer calls `LightningModule.train()` at the start; it now preserves the user's configuration of frozen layers ([#18951](https://github.com/Lightning-AI/lightning/pull/18951)) -- The `LightningModule.load_from_checkpoint()` function now calls `.configure_model()` on the model if it is overridden, to ensure all layers can be loaded from the checkpoint ([#19036](https://github.com/Lightning-AI/lightning/pull/19036)) +- `seed_everything()` without passing in a seed no longer randomly selects a seed, and now defaults to `0` ([#18846](https://github.com/Lightning-AI/pytorch-lightning/pull/18846)) +- The `LightningModule.on_{validation,test,predict}_model_{eval,train}` now only get called if they are overridden by the user ([#18951](https://github.com/Lightning-AI/pytorch-lightning/pull/18951)) +- The `Trainer.fit()` loop no longer calls `LightningModule.train()` at the start; it now preserves the user's configuration of frozen layers ([#18951](https://github.com/Lightning-AI/pytorch-lightning/pull/18951)) +- The `LightningModule.load_from_checkpoint()` function now calls `.configure_model()` on the model if it is overridden, to ensure all layers can be loaded from the checkpoint ([#19036](https://github.com/Lightning-AI/pytorch-lightning/pull/19036)) - Restored usage of `step` parameter when logging metrics with `NeptuneLogger` ([#19126](https://github.com/Lightning-AI/pytorch-lightning/pull/19126)) -- Changed the `TransformerEnginePrecision(dtype=)` argument to `weights_dtype` and made it required ([#19082](https://github.com/Lightning-AI/lightning/pull/19082)) -- The columns in the `metrics.csv` file produced by `CSVLogger` are now sorted alphabetically ([#19159](https://github.com/Lightning-AI/lightning/pull/19159)) -- Reverted back to creating a checkpoint copy when `ModelCheckpoint(save_last=True)` instead of creating a symbolic link ([#19191](https://github.com/Lightning-AI/lightning/pull/19191)) +- Changed the `TransformerEnginePrecision(dtype=)` argument to `weights_dtype` and made it required ([#19082](https://github.com/Lightning-AI/pytorch-lightning/pull/19082)) +- The columns in the `metrics.csv` file produced by `CSVLogger` are now sorted alphabetically ([#19159](https://github.com/Lightning-AI/pytorch-lightning/pull/19159)) +- Reverted back to creating a checkpoint copy when `ModelCheckpoint(save_last=True)` instead of creating a symbolic link ([#19191](https://github.com/Lightning-AI/pytorch-lightning/pull/19191)) ### Deprecated -- Deprecated all precision plugin classes under `lightning.pytorch.plugins` with the suffix `Plugin` in the name ([#18840](https://github.com/Lightning-AI/lightning/pull/18840)) +- Deprecated all precision plugin classes under `lightning.pytorch.plugins` with the suffix `Plugin` in the name ([#18840](https://github.com/Lightning-AI/pytorch-lightning/pull/18840)) ### Removed -- Removed support for PyTorch 1.12 ([#19300](https://github.com/Lightning-AI/lightning/pull/19300)) +- Removed support for PyTorch 1.12 ([#19300](https://github.com/Lightning-AI/pytorch-lightning/pull/19300)) ### Fixed -- Fixed issue where the `precision="transformer-engine"` argument would not replace layers by default ([#19082](https://github.com/Lightning-AI/lightning/pull/19082)) -- Fixed issue where layers created in `LightningModule.setup` or `LightningModule.configure_model` wouldn't get converted when using the Bitsandbytes or TransformerEngine plugins ([#19061](https://github.com/Lightning-AI/lightning/pull/19061)) -- Fixed the input validation logic in `FSDPStrategy` to accept a `device_mesh` ([#19392](https://github.com/Lightning-AI/lightning/pull/19392)) +- Fixed issue where the `precision="transformer-engine"` argument would not replace layers by default ([#19082](https://github.com/Lightning-AI/pytorch-lightning/pull/19082)) +- Fixed issue where layers created in `LightningModule.setup` or `LightningModule.configure_model` wouldn't get converted when using the Bitsandbytes or TransformerEngine plugins ([#19061](https://github.com/Lightning-AI/pytorch-lightning/pull/19061)) +- Fixed the input validation logic in `FSDPStrategy` to accept a `device_mesh` ([#19392](https://github.com/Lightning-AI/pytorch-lightning/pull/19392)) ## [2.1.4] - 2024-01-31 ### Fixed -- Fixed `Trainer` not expanding the `default_root_dir` if it has the `~` (home) prefix ([#19179](https://github.com/Lightning-AI/lightning/pull/19179)) -- Fixed warning for Dataloader if `num_workers=1` and CPU count is 1 ([#19224](https://github.com/Lightning-AI/lightning/pull/19224)) -- Fixed `WandbLogger.watch()` method annotation to accept `None` for the log parameter ([#19237](https://github.com/Lightning-AI/lightning/pull/19237)) -- Fixed an issue preventing the Trainer to run on CPU when the system's CUDA driver is outdated or broken ([#19234](https://github.com/Lightning-AI/lightning/pull/19234)) -- Fixed an issue with the ModelCheckpoint callback not saving relative symlinks with `ModelCheckpoint(save_last="link")` ([#19303](https://github.com/Lightning-AI/lightning/pull/19303)) -- Fixed issue where the `_restricted_classmethod_impl` would incorrectly raise a TypeError on inspection rather than on call ([#19332](https://github.com/Lightning-AI/lightning/pull/19332)) -- Fixed exporting `__version__` in `__init__` ([#19221](https://github.com/Lightning-AI/lightning/pull/19221)) +- Fixed `Trainer` not expanding the `default_root_dir` if it has the `~` (home) prefix ([#19179](https://github.com/Lightning-AI/pytorch-lightning/pull/19179)) +- Fixed warning for Dataloader if `num_workers=1` and CPU count is 1 ([#19224](https://github.com/Lightning-AI/pytorch-lightning/pull/19224)) +- Fixed `WandbLogger.watch()` method annotation to accept `None` for the log parameter ([#19237](https://github.com/Lightning-AI/pytorch-lightning/pull/19237)) +- Fixed an issue preventing the Trainer to run on CPU when the system's CUDA driver is outdated or broken ([#19234](https://github.com/Lightning-AI/pytorch-lightning/pull/19234)) +- Fixed an issue with the ModelCheckpoint callback not saving relative symlinks with `ModelCheckpoint(save_last="link")` ([#19303](https://github.com/Lightning-AI/pytorch-lightning/pull/19303)) +- Fixed issue where the `_restricted_classmethod_impl` would incorrectly raise a TypeError on inspection rather than on call ([#19332](https://github.com/Lightning-AI/pytorch-lightning/pull/19332)) +- Fixed exporting `__version__` in `__init__` ([#19221](https://github.com/Lightning-AI/pytorch-lightning/pull/19221)) ## [2.1.3] - 2023-12-21 ### Changed -- `LightningCLI` no longer allows setting a normal class instance as default. A `lazy_instance` can be used instead ([#18822](https://github.com/Lightning-AI/lightning/pull/18822)) +- `LightningCLI` no longer allows setting a normal class instance as default. A `lazy_instance` can be used instead ([#18822](https://github.com/Lightning-AI/pytorch-lightning/pull/18822)) ### Fixed -- Fixed checks for local file protocol due to fsspec changes in 2023.10.0 ([#19023](https://github.com/Lightning-AI/lightning/pull/19023)) -- Fixed automatic detection of 'last.ckpt' files to respect the extension when filtering ([#17072](https://github.com/Lightning-AI/lightning/pull/17072)) -- Fixed an issue where setting `CHECKPOINT_JOIN_CHAR` or `CHECKPOINT_EQUALS_CHAR` would only work on the `ModelCheckpoint` class but not on an instance ([#19054](https://github.com/Lightning-AI/lightning/pull/19054)) -- Fixed `ModelCheckpoint` not expanding the `dirpath` if it has the `~` (home) prefix ([#19058](https://github.com/Lightning-AI/lightning/pull/19058)) -- Fixed handling checkpoint dirpath suffix in NeptuneLogger ([#18863](https://github.com/Lightning-AI/lightning/pull/18863)) -- Fixed an edge case where `ModelCheckpoint` would alternate between versioned and unversioned filename ([#19064](https://github.com/Lightning-AI/lightning/pull/19064)) -- Fixed broadcast at initialization in `MPIEnvironment` ([#19074](https://github.com/Lightning-AI/lightning/pull/19074)) +- Fixed checks for local file protocol due to fsspec changes in 2023.10.0 ([#19023](https://github.com/Lightning-AI/pytorch-lightning/pull/19023)) +- Fixed automatic detection of 'last.ckpt' files to respect the extension when filtering ([#17072](https://github.com/Lightning-AI/pytorch-lightning/pull/17072)) +- Fixed an issue where setting `CHECKPOINT_JOIN_CHAR` or `CHECKPOINT_EQUALS_CHAR` would only work on the `ModelCheckpoint` class but not on an instance ([#19054](https://github.com/Lightning-AI/pytorch-lightning/pull/19054)) +- Fixed `ModelCheckpoint` not expanding the `dirpath` if it has the `~` (home) prefix ([#19058](https://github.com/Lightning-AI/pytorch-lightning/pull/19058)) +- Fixed handling checkpoint dirpath suffix in NeptuneLogger ([#18863](https://github.com/Lightning-AI/pytorch-lightning/pull/18863)) +- Fixed an edge case where `ModelCheckpoint` would alternate between versioned and unversioned filename ([#19064](https://github.com/Lightning-AI/pytorch-lightning/pull/19064)) +- Fixed broadcast at initialization in `MPIEnvironment` ([#19074](https://github.com/Lightning-AI/pytorch-lightning/pull/19074)) - Fixed the tensor conversion in `self.log` to respect the default dtype ([#19046](https://github.com/Lightning-AI/pytorch-lightning/issues/19046)) @@ -240,10 +240,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- Fixed an issue when replacing an existing `last.ckpt` file with a symlink ([#18793](https://github.com/Lightning-AI/lightning/pull/18793)) +- Fixed an issue when replacing an existing `last.ckpt` file with a symlink ([#18793](https://github.com/Lightning-AI/pytorch-lightning/pull/18793)) - Fixed an issue when `BatchSizeFinder` `steps_per_trial` parameter ends up defining how many validation batches to run during the entire training ([#18394](https://github.com/Lightning-AI/pytorch-lightning/issues/18394)) - Fixed an issue saving the `last.ckpt` file when using `ModelCheckpoint` on a remote filesystem and no logger is used ([#18867](https://github.com/Lightning-AI/pytorch-lightning/issues/18867)) -- Refined the FSDP saving logic and error messaging when path exists ([#18884](https://github.com/Lightning-AI/lightning/pull/18884)) +- Refined the FSDP saving logic and error messaging when path exists ([#18884](https://github.com/Lightning-AI/pytorch-lightning/pull/18884)) - Fixed an issue parsing the version from folders that don't include a version number in `TensorBoardLogger` and `CSVLogger` ([#18897](https://github.com/Lightning-AI/pytorch-lightning/issues/18897)) @@ -251,334 +251,334 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added -- Added `metrics_format` attribute to `RichProgressBarTheme` class ([#18373](https://github.com/Lightning-AI/lightning/pull/18373)) -- Added `CHECKPOINT_EQUALS_CHAR` attribute to `ModelCheckpoint` class ([#17999](https://github.com/Lightning-AI/lightning/pull/17999)) -- Added `**summarize_kwargs` to `ModelSummary` and `RichModelSummary` callbacks ([#16788](https://github.com/Lightning-AI/lightning/pull/16788)) -- Added support for the `max_size_cycle|max_size|min_size` iteration modes during evaluation ([#17163](https://github.com/Lightning-AI/lightning/pull/17163)) -- Added support for the TPU-v4 architecture ([#17227](https://github.com/Lightning-AI/lightning/pull/17227)) -- Added support for XLA's new PJRT runtime ([#17352](https://github.com/Lightning-AI/lightning/pull/17352)) -- Check for invalid TPU device inputs ([#17227](https://github.com/Lightning-AI/lightning/pull/17227)) -- Added `XLAStrategy(sync_module_states=bool)` to control whether to broadcast the parameters to all devices ([#17522](https://github.com/Lightning-AI/lightning/pull/17522)) -- Added support for multiple optimizer parameter groups when using the FSDP strategy ([#17309](https://github.com/Lightning-AI/lightning/pull/17309)) -- Enabled saving the full model state dict when using the `FSDPStrategy` ([#16558](https://github.com/Lightning-AI/lightning/pull/16558)) -- Update `LightningDataModule.from_datasets` to support arbitrary iterables ([#17402](https://github.com/Lightning-AI/lightning/pull/17402)) -- Run the DDP wrapper in a CUDA stream ([#17334](https://github.com/Lightning-AI/lightning/pull/17334)) -- Added `SaveConfigCallback.save_config` to ease use cases such as saving the config to a logger ([#17475](https://github.com/Lightning-AI/lightning/pull/17475)) -- Enabled optional file versioning of model checkpoints ([#17320](https://github.com/Lightning-AI/lightning/pull/17320)) -- Added the process group timeout argument `FSDPStrategy(timeout=...)` for the FSDP strategy ([#17274](https://github.com/Lightning-AI/lightning/pull/17274)) -- Added `FSDPStrategy(activation_checkpointing_policy=...)` to customize the layer policy for automatic activation checkpointing (requires torch>=2.1) ([#18045](https://github.com/Lightning-AI/lightning/pull/18045)) -- Added CLI option `--map-to-cpu` to the checkpoint upgrade script to enable converting GPU checkpoints on a CPU-only machine ([#17527](https://github.com/Lightning-AI/lightning/pull/17527)) -- Added non-layer param count to the model summary ([#17005](https://github.com/Lightning-AI/lightning/pull/17005)) -- Updated `LearningRateMonitor` to log monitored values to `trainer.callback_metrics` ([#17626](https://github.com/Lightning-AI/lightning/pull/17626)) -- Added `log_weight_decay` argument to `LearningRateMonitor` callback ([#18439](https://github.com/Lightning-AI/lightning/pull/18439)) -- Added `Trainer.print()` to print on local rank zero only ([#17980](https://github.com/Lightning-AI/lightning/pull/17980)) -- Added `Trainer.init_module()` context manager to instantiate large models efficiently directly on device, dtype ([#18004](https://github.com/Lightning-AI/lightning/pull/18004)) +- Added `metrics_format` attribute to `RichProgressBarTheme` class ([#18373](https://github.com/Lightning-AI/pytorch-lightning/pull/18373)) +- Added `CHECKPOINT_EQUALS_CHAR` attribute to `ModelCheckpoint` class ([#17999](https://github.com/Lightning-AI/pytorch-lightning/pull/17999)) +- Added `**summarize_kwargs` to `ModelSummary` and `RichModelSummary` callbacks ([#16788](https://github.com/Lightning-AI/pytorch-lightning/pull/16788)) +- Added support for the `max_size_cycle|max_size|min_size` iteration modes during evaluation ([#17163](https://github.com/Lightning-AI/pytorch-lightning/pull/17163)) +- Added support for the TPU-v4 architecture ([#17227](https://github.com/Lightning-AI/pytorch-lightning/pull/17227)) +- Added support for XLA's new PJRT runtime ([#17352](https://github.com/Lightning-AI/pytorch-lightning/pull/17352)) +- Check for invalid TPU device inputs ([#17227](https://github.com/Lightning-AI/pytorch-lightning/pull/17227)) +- Added `XLAStrategy(sync_module_states=bool)` to control whether to broadcast the parameters to all devices ([#17522](https://github.com/Lightning-AI/pytorch-lightning/pull/17522)) +- Added support for multiple optimizer parameter groups when using the FSDP strategy ([#17309](https://github.com/Lightning-AI/pytorch-lightning/pull/17309)) +- Enabled saving the full model state dict when using the `FSDPStrategy` ([#16558](https://github.com/Lightning-AI/pytorch-lightning/pull/16558)) +- Update `LightningDataModule.from_datasets` to support arbitrary iterables ([#17402](https://github.com/Lightning-AI/pytorch-lightning/pull/17402)) +- Run the DDP wrapper in a CUDA stream ([#17334](https://github.com/Lightning-AI/pytorch-lightning/pull/17334)) +- Added `SaveConfigCallback.save_config` to ease use cases such as saving the config to a logger ([#17475](https://github.com/Lightning-AI/pytorch-lightning/pull/17475)) +- Enabled optional file versioning of model checkpoints ([#17320](https://github.com/Lightning-AI/pytorch-lightning/pull/17320)) +- Added the process group timeout argument `FSDPStrategy(timeout=...)` for the FSDP strategy ([#17274](https://github.com/Lightning-AI/pytorch-lightning/pull/17274)) +- Added `FSDPStrategy(activation_checkpointing_policy=...)` to customize the layer policy for automatic activation checkpointing (requires torch>=2.1) ([#18045](https://github.com/Lightning-AI/pytorch-lightning/pull/18045)) +- Added CLI option `--map-to-cpu` to the checkpoint upgrade script to enable converting GPU checkpoints on a CPU-only machine ([#17527](https://github.com/Lightning-AI/pytorch-lightning/pull/17527)) +- Added non-layer param count to the model summary ([#17005](https://github.com/Lightning-AI/pytorch-lightning/pull/17005)) +- Updated `LearningRateMonitor` to log monitored values to `trainer.callback_metrics` ([#17626](https://github.com/Lightning-AI/pytorch-lightning/pull/17626)) +- Added `log_weight_decay` argument to `LearningRateMonitor` callback ([#18439](https://github.com/Lightning-AI/pytorch-lightning/pull/18439)) +- Added `Trainer.print()` to print on local rank zero only ([#17980](https://github.com/Lightning-AI/pytorch-lightning/pull/17980)) +- Added `Trainer.init_module()` context manager to instantiate large models efficiently directly on device, dtype ([#18004](https://github.com/Lightning-AI/pytorch-lightning/pull/18004)) * Creates the model parameters in the desired dtype (`torch.float32`, `torch.float64`) depending on the 'true' precision choice in `Trainer(precision='32-true'|'64-true')` -- Added the `LightningModule.configure_model()` hook to instantiate large models efficiently directly on device, dtype, and with sharding support ([#18004](https://github.com/Lightning-AI/lightning/pull/18004)) +- Added the `LightningModule.configure_model()` hook to instantiate large models efficiently directly on device, dtype, and with sharding support ([#18004](https://github.com/Lightning-AI/pytorch-lightning/pull/18004)) * Handles initialization for FSDP models before wrapping and the Zero stage 3 initialization for DeepSpeed before sharding -- Added support for meta-device initialization with `Trainer.init_module(empty_init=True)` in FSDP ([#18385](https://github.com/Lightning-AI/lightning/pull/18385)) -- Added `lightning.pytorch.plugins.PrecisionPlugin.module_init_context()` and `lightning.pytorch.strategies.Strategy.tensor_init_context()` context managers to control model and tensor instantiation ([#18004](https://github.com/Lightning-AI/lightning/pull/18004)) -- Automatically call `xla_model.mark_step()` before saving checkpoints with XLA ([#17882](https://github.com/Lightning-AI/lightning/pull/17882)) -- Added a callback for spike-detection ([#18014](https://github.com/Lightning-AI/lightning/pull/18014)) -- Added the ability to set the `torch.distributed.fsdp.ShardingStrategy` via string in `FSDPStrategy` ([#18087](https://github.com/Lightning-AI/lightning/pull/18087)) -- Improved error messages when attempting to load a DeepSpeed checkpoint at an invalid path ([#17795](https://github.com/Lightning-AI/lightning/pull/17795)) -- Allowed accessing rank information in the main process before processes are launched when using the `XLAStrategy` ([#18194](https://github.com/Lightning-AI/lightning/pull/18194)) -- Added support for true half-precision training via `Trainer(precision="16-true"|"bf16-true")` ([#18193](https://github.com/Lightning-AI/lightning/pull/18193), [#18217](https://github.com/Lightning-AI/lightning/pull/18217), [#18213](https://github.com/Lightning-AI/lightning/pull/18213), [#18219](https://github.com/Lightning-AI/lightning/pull/18219)) -- Added automatic process cleanup to avoid zombie child processes and stalls when exceptions are raised ([#18218](https://github.com/Lightning-AI/lightning/pull/18218)) -- Added validation of user input for `devices` and `num_nodes` when running with `SLURM` or `TorchElastic` ([#18292](https://github.com/Lightning-AI/lightning/pull/18292)) -- Added support for saving checkpoints with either full state-dict or sharded state dict via `FSDPStrategy(state_dict_type="full"|"sharded")` ([#18364](https://github.com/Lightning-AI/lightning/pull/18364)) -- Added support for loading sharded/distributed checkpoints in FSDP ([#18358](https://github.com/Lightning-AI/lightning/pull/18358)) -- Made the text delimiter in the rich progress bar configurable ([#18372](https://github.com/Lightning-AI/lightning/pull/18372)) -- Improved the error messaging and instructions when handling custom batch samplers in distributed settings ([#18402](https://github.com/Lightning-AI/lightning/pull/18402)) -- Added support for mixed 8-bit precision as `Trainer(precision="transformer-engine")` using [Nvidia's Transformer Engine](https://docs.nvidia.com/deeplearning/transformer-engine) ([#18459](https://github.com/Lightning-AI/lightning/pull/18459)) -- Added support for linear layer quantization with `Trainer(plugins=BitsandbytesPrecision())` using [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) ([#18655](https://github.com/Lightning-AI/lightning/pull/18655)) -- Added support for passing the process group to the `FSDPStrategy` ([#18583](https://github.com/Lightning-AI/lightning/pull/18583)) -- Enabled the default process group configuration for FSDP's hybrid sharding ([#18583](https://github.com/Lightning-AI/lightning/pull/18583)) -- Added `lightning.pytorch.utilities.suggested_max_num_workers` to assist with setting a good value in distributed settings ([#18591](https://github.com/Lightning-AI/lightning/pull/18591)) -- Improved the `num_workers` warning to give a more accurate upper limit on the `num_workers` suggestion ([#18591](https://github.com/Lightning-AI/lightning/pull/18591)) -- Added `lightning.pytorch.utilities.is_shared_filesystem` utility function to automatically check whether the filesystem is shared between machines ([#18586](https://github.com/Lightning-AI/lightning/pull/18586)) -- Added support for returning an object of type `Mapping` from `LightningModule.training_step()` ([#18657](https://github.com/Lightning-AI/lightning/pull/18657)) -- Added the hook `LightningModule.on_validation_model_zero_grad()` to allow overriding the behavior of zeroing the gradients before entering the validation loop ([#18710](https://github.com/Lightning-AI/lightning/pull/18710)) +- Added support for meta-device initialization with `Trainer.init_module(empty_init=True)` in FSDP ([#18385](https://github.com/Lightning-AI/pytorch-lightning/pull/18385)) +- Added `lightning.pytorch.plugins.PrecisionPlugin.module_init_context()` and `lightning.pytorch.strategies.Strategy.tensor_init_context()` context managers to control model and tensor instantiation ([#18004](https://github.com/Lightning-AI/pytorch-lightning/pull/18004)) +- Automatically call `xla_model.mark_step()` before saving checkpoints with XLA ([#17882](https://github.com/Lightning-AI/pytorch-lightning/pull/17882)) +- Added a callback for spike-detection ([#18014](https://github.com/Lightning-AI/pytorch-lightning/pull/18014)) +- Added the ability to set the `torch.distributed.fsdp.ShardingStrategy` via string in `FSDPStrategy` ([#18087](https://github.com/Lightning-AI/pytorch-lightning/pull/18087)) +- Improved error messages when attempting to load a DeepSpeed checkpoint at an invalid path ([#17795](https://github.com/Lightning-AI/pytorch-lightning/pull/17795)) +- Allowed accessing rank information in the main process before processes are launched when using the `XLAStrategy` ([#18194](https://github.com/Lightning-AI/pytorch-lightning/pull/18194)) +- Added support for true half-precision training via `Trainer(precision="16-true"|"bf16-true")` ([#18193](https://github.com/Lightning-AI/pytorch-lightning/pull/18193), [#18217](https://github.com/Lightning-AI/pytorch-lightning/pull/18217), [#18213](https://github.com/Lightning-AI/pytorch-lightning/pull/18213), [#18219](https://github.com/Lightning-AI/pytorch-lightning/pull/18219)) +- Added automatic process cleanup to avoid zombie child processes and stalls when exceptions are raised ([#18218](https://github.com/Lightning-AI/pytorch-lightning/pull/18218)) +- Added validation of user input for `devices` and `num_nodes` when running with `SLURM` or `TorchElastic` ([#18292](https://github.com/Lightning-AI/pytorch-lightning/pull/18292)) +- Added support for saving checkpoints with either full state-dict or sharded state dict via `FSDPStrategy(state_dict_type="full"|"sharded")` ([#18364](https://github.com/Lightning-AI/pytorch-lightning/pull/18364)) +- Added support for loading sharded/distributed checkpoints in FSDP ([#18358](https://github.com/Lightning-AI/pytorch-lightning/pull/18358)) +- Made the text delimiter in the rich progress bar configurable ([#18372](https://github.com/Lightning-AI/pytorch-lightning/pull/18372)) +- Improved the error messaging and instructions when handling custom batch samplers in distributed settings ([#18402](https://github.com/Lightning-AI/pytorch-lightning/pull/18402)) +- Added support for mixed 8-bit precision as `Trainer(precision="transformer-engine")` using [Nvidia's Transformer Engine](https://docs.nvidia.com/deeplearning/transformer-engine) ([#18459](https://github.com/Lightning-AI/pytorch-lightning/pull/18459)) +- Added support for linear layer quantization with `Trainer(plugins=BitsandbytesPrecision())` using [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) ([#18655](https://github.com/Lightning-AI/pytorch-lightning/pull/18655)) +- Added support for passing the process group to the `FSDPStrategy` ([#18583](https://github.com/Lightning-AI/pytorch-lightning/pull/18583)) +- Enabled the default process group configuration for FSDP's hybrid sharding ([#18583](https://github.com/Lightning-AI/pytorch-lightning/pull/18583)) +- Added `lightning.pytorch.utilities.suggested_max_num_workers` to assist with setting a good value in distributed settings ([#18591](https://github.com/Lightning-AI/pytorch-lightning/pull/18591)) +- Improved the `num_workers` warning to give a more accurate upper limit on the `num_workers` suggestion ([#18591](https://github.com/Lightning-AI/pytorch-lightning/pull/18591)) +- Added `lightning.pytorch.utilities.is_shared_filesystem` utility function to automatically check whether the filesystem is shared between machines ([#18586](https://github.com/Lightning-AI/pytorch-lightning/pull/18586)) +- Added support for returning an object of type `Mapping` from `LightningModule.training_step()` ([#18657](https://github.com/Lightning-AI/pytorch-lightning/pull/18657)) +- Added the hook `LightningModule.on_validation_model_zero_grad()` to allow overriding the behavior of zeroing the gradients before entering the validation loop ([#18710](https://github.com/Lightning-AI/pytorch-lightning/pull/18710)) ### Changed -- Changed default metric formatting from `round(..., 3)` to `".3f"` format string in `MetricsTextColumn` class ([#18483](https://github.com/Lightning-AI/lightning/pull/18483)) -- Removed the limitation to call `self.trainer.model.parameters()` in `LightningModule.configure_optimizers()` ([#17309](https://github.com/Lightning-AI/lightning/pull/17309)) -- `Trainer(accelerator="tpu", devices=[i])"` now selects the i-th TPU core (0-based, previously it was 1-based) ([#17227](https://github.com/Lightning-AI/lightning/pull/17227)) -- Allow using iterable-style datasets with TPUs ([#17331](https://github.com/Lightning-AI/lightning/pull/17331)) -- Increased the minimum XLA requirement to 1.13 ([#17368](https://github.com/Lightning-AI/lightning/pull/17368)) -- `self.log`ed tensors are now kept in the original device to reduce unnecessary host-to-device synchronizations ([#17334](https://github.com/Lightning-AI/lightning/pull/17334)) -- Made the run initialization in `WandbLogger` lazy to avoid creating artifacts when the CLI is used ([#17573](https://github.com/Lightning-AI/lightning/pull/17573)) -- Simplified redirection of `*_step` methods in strategies by removing the `_LightningModuleWrapperBase` wrapper module ([#17531](https://github.com/Lightning-AI/lightning/pull/17531)) -- Support kwargs input for LayerSummary ([#17709](https://github.com/Lightning-AI/lightning/pull/17709)) -- Dropped support for `wandb` versions older than 0.12.0 in `WandbLogger` ([#17876](https://github.com/Lightning-AI/lightning/pull/17876)) -- During `LightningModule.setup()`, the `self.device` now returns the device the module will be placed on instead of `cpu` ([#18021](https://github.com/Lightning-AI/lightning/pull/18021)) -- Increased the minimum supported `wandb` version for `WandbLogger` from 0.12.0 to 0.12.10 ([#18171](https://github.com/Lightning-AI/lightning/pull/18171)) -- The input tensors now get cast to the right precision type before transfer to the device ([#18264](https://github.com/Lightning-AI/lightning/pull/18264)) -- Improved the formatting of emitted warnings ([#18288](https://github.com/Lightning-AI/lightning/pull/18288)) -- Broadcast and reduction of tensors with XLA-based strategies now preserve the input's device ([#18275](https://github.com/Lightning-AI/lightning/pull/18275)) -- The `FSDPStrategy` now loads checkpoints after the `configure_model`/`configure_sharded_model` hook ([#18358](https://github.com/Lightning-AI/lightning/pull/18358)) -- The `FSDPStrategy.load_optimizer_state_dict` and `FSDPStrategy.load_model_state_dict` are a no-op now ([#18358](https://github.com/Lightning-AI/lightning/pull/18358)) -- The `Trainer.num_val_batches`, `Trainer.num_test_batches` and `Trainer.num_sanity_val_batches` now return a list of sizes per dataloader instead of a single integer ([#18441](https://github.com/Lightning-AI/lightning/pull/18441)) -- The `*_step(dataloader_iter)` flavor now no longer takes the `batch_idx` in the signature ([#18390](https://github.com/Lightning-AI/lightning/pull/18390)) -- Calling `next(dataloader_iter)` now returns a triplet `(batch, batch_idx, dataloader_idx)` ([#18390](https://github.com/Lightning-AI/lightning/pull/18390)) -- Calling `next(combined_loader)` now returns a triplet `(batch, batch_idx, dataloader_idx)` ([#18390](https://github.com/Lightning-AI/lightning/pull/18390)) -- Due to lack of reliability, Trainer now only runs on one GPU instead of all GPUs in a Jupyter notebook if `devices="auto"` (default) ([#18291](https://github.com/Lightning-AI/lightning/pull/18291)) -- Made the `batch_idx` argument optional in `validation_step`, `test_step` and `predict_step` to maintain consistency with `training_step` ([#18512](https://github.com/Lightning-AI/lightning/pull/18512)) -- The `TQDMProgressBar` now consistently shows it/s for the speed even when the iteration time becomes larger than one second ([#18593](https://github.com/Lightning-AI/lightning/pull/18593)) -- The `LightningDataModule.load_from_checkpoint` and `LightningModule.load_from_checkpoint` methods now raise an error if they are called on an instance instead of the class ([#18432](https://github.com/Lightning-AI/lightning/pull/18432)) -- Enabled launching via `torchrun` in a SLURM environment; the `TorchElasticEnvironment` now gets chosen over the `SLURMEnvironment` if both are detected ([#18618](https://github.com/Lightning-AI/lightning/pull/18618)) -- If not set by the user, Lightning will set `OMP_NUM_THREADS` to `num_cpus / num_processes` when launching subprocesses (e.g. when DDP is used) to avoid system overload for CPU-intensive tasks ([#18677](https://github.com/Lightning-AI/lightning/pull/18677)) -- The `ModelCheckpoint` no longer deletes files under the save-top-k mechanism when resuming from a folder that is not the same as the current checkpoint folder ([#18750](https://github.com/Lightning-AI/lightning/pull/18750)) -- The `ModelCheckpoint` no longer deletes the file that was passed to `Trainer.fit(ckpt_path=...)` ([#18750](https://github.com/Lightning-AI/lightning/pull/18750)) -- Calling `trainer.fit()` twice now raises an error with strategies that spawn subprocesses through `multiprocessing` (ddp_spawn, xla) ([#18776](https://github.com/Lightning-AI/lightning/pull/18776)) -- The `ModelCheckpoint` now saves a symbolic link if `save_last=True` and `save_top_k != 0` ([#18748](https://github.com/Lightning-AI/lightning/pull/18748)) +- Changed default metric formatting from `round(..., 3)` to `".3f"` format string in `MetricsTextColumn` class ([#18483](https://github.com/Lightning-AI/pytorch-lightning/pull/18483)) +- Removed the limitation to call `self.trainer.model.parameters()` in `LightningModule.configure_optimizers()` ([#17309](https://github.com/Lightning-AI/pytorch-lightning/pull/17309)) +- `Trainer(accelerator="tpu", devices=[i])"` now selects the i-th TPU core (0-based, previously it was 1-based) ([#17227](https://github.com/Lightning-AI/pytorch-lightning/pull/17227)) +- Allow using iterable-style datasets with TPUs ([#17331](https://github.com/Lightning-AI/pytorch-lightning/pull/17331)) +- Increased the minimum XLA requirement to 1.13 ([#17368](https://github.com/Lightning-AI/pytorch-lightning/pull/17368)) +- `self.log`ed tensors are now kept in the original device to reduce unnecessary host-to-device synchronizations ([#17334](https://github.com/Lightning-AI/pytorch-lightning/pull/17334)) +- Made the run initialization in `WandbLogger` lazy to avoid creating artifacts when the CLI is used ([#17573](https://github.com/Lightning-AI/pytorch-lightning/pull/17573)) +- Simplified redirection of `*_step` methods in strategies by removing the `_LightningModuleWrapperBase` wrapper module ([#17531](https://github.com/Lightning-AI/pytorch-lightning/pull/17531)) +- Support kwargs input for LayerSummary ([#17709](https://github.com/Lightning-AI/pytorch-lightning/pull/17709)) +- Dropped support for `wandb` versions older than 0.12.0 in `WandbLogger` ([#17876](https://github.com/Lightning-AI/pytorch-lightning/pull/17876)) +- During `LightningModule.setup()`, the `self.device` now returns the device the module will be placed on instead of `cpu` ([#18021](https://github.com/Lightning-AI/pytorch-lightning/pull/18021)) +- Increased the minimum supported `wandb` version for `WandbLogger` from 0.12.0 to 0.12.10 ([#18171](https://github.com/Lightning-AI/pytorch-lightning/pull/18171)) +- The input tensors now get cast to the right precision type before transfer to the device ([#18264](https://github.com/Lightning-AI/pytorch-lightning/pull/18264)) +- Improved the formatting of emitted warnings ([#18288](https://github.com/Lightning-AI/pytorch-lightning/pull/18288)) +- Broadcast and reduction of tensors with XLA-based strategies now preserve the input's device ([#18275](https://github.com/Lightning-AI/pytorch-lightning/pull/18275)) +- The `FSDPStrategy` now loads checkpoints after the `configure_model`/`configure_sharded_model` hook ([#18358](https://github.com/Lightning-AI/pytorch-lightning/pull/18358)) +- The `FSDPStrategy.load_optimizer_state_dict` and `FSDPStrategy.load_model_state_dict` are a no-op now ([#18358](https://github.com/Lightning-AI/pytorch-lightning/pull/18358)) +- The `Trainer.num_val_batches`, `Trainer.num_test_batches` and `Trainer.num_sanity_val_batches` now return a list of sizes per dataloader instead of a single integer ([#18441](https://github.com/Lightning-AI/pytorch-lightning/pull/18441)) +- The `*_step(dataloader_iter)` flavor now no longer takes the `batch_idx` in the signature ([#18390](https://github.com/Lightning-AI/pytorch-lightning/pull/18390)) +- Calling `next(dataloader_iter)` now returns a triplet `(batch, batch_idx, dataloader_idx)` ([#18390](https://github.com/Lightning-AI/pytorch-lightning/pull/18390)) +- Calling `next(combined_loader)` now returns a triplet `(batch, batch_idx, dataloader_idx)` ([#18390](https://github.com/Lightning-AI/pytorch-lightning/pull/18390)) +- Due to lack of reliability, Trainer now only runs on one GPU instead of all GPUs in a Jupyter notebook if `devices="auto"` (default) ([#18291](https://github.com/Lightning-AI/pytorch-lightning/pull/18291)) +- Made the `batch_idx` argument optional in `validation_step`, `test_step` and `predict_step` to maintain consistency with `training_step` ([#18512](https://github.com/Lightning-AI/pytorch-lightning/pull/18512)) +- The `TQDMProgressBar` now consistently shows it/s for the speed even when the iteration time becomes larger than one second ([#18593](https://github.com/Lightning-AI/pytorch-lightning/pull/18593)) +- The `LightningDataModule.load_from_checkpoint` and `LightningModule.load_from_checkpoint` methods now raise an error if they are called on an instance instead of the class ([#18432](https://github.com/Lightning-AI/pytorch-lightning/pull/18432)) +- Enabled launching via `torchrun` in a SLURM environment; the `TorchElasticEnvironment` now gets chosen over the `SLURMEnvironment` if both are detected ([#18618](https://github.com/Lightning-AI/pytorch-lightning/pull/18618)) +- If not set by the user, Lightning will set `OMP_NUM_THREADS` to `num_cpus / num_processes` when launching subprocesses (e.g. when DDP is used) to avoid system overload for CPU-intensive tasks ([#18677](https://github.com/Lightning-AI/pytorch-lightning/pull/18677)) +- The `ModelCheckpoint` no longer deletes files under the save-top-k mechanism when resuming from a folder that is not the same as the current checkpoint folder ([#18750](https://github.com/Lightning-AI/pytorch-lightning/pull/18750)) +- The `ModelCheckpoint` no longer deletes the file that was passed to `Trainer.fit(ckpt_path=...)` ([#18750](https://github.com/Lightning-AI/pytorch-lightning/pull/18750)) +- Calling `trainer.fit()` twice now raises an error with strategies that spawn subprocesses through `multiprocessing` (ddp_spawn, xla) ([#18776](https://github.com/Lightning-AI/pytorch-lightning/pull/18776)) +- The `ModelCheckpoint` now saves a symbolic link if `save_last=True` and `save_top_k != 0` ([#18748](https://github.com/Lightning-AI/pytorch-lightning/pull/18748)) ### Deprecated -- Deprecated the `SingleTPUStrategy` (`strategy="single_tpu"`) in favor of `SingleDeviceXLAStrategy` (`strategy="single_xla"`) ([#17383](https://github.com/Lightning-AI/lightning/pull/17383)) -- Deprecated the `TPUAccelerator` in favor of `XLAAccelerator` ([#17383](https://github.com/Lightning-AI/lightning/pull/17383)) -- Deprecated the `TPUPrecisionPlugin` in favor of `XLAPrecisionPlugin` ([#17383](https://github.com/Lightning-AI/lightning/pull/17383)) -- Deprecated the `TPUBf16PrecisionPlugin` in favor of `XLABf16PrecisionPlugin` ([#17383](https://github.com/Lightning-AI/lightning/pull/17383)) -- Deprecated the `Strategy.post_training_step` method ([#17531](https://github.com/Lightning-AI/lightning/pull/17531)) -- Deprecated the `LightningModule.configure_sharded_model` hook in favor of `LightningModule.configure_model` ([#18004](https://github.com/Lightning-AI/lightning/pull/18004)) -- Deprecated the `LightningDoublePrecisionModule` wrapper in favor of calling `Trainer.precision_plugin.convert_input()` ([#18209](https://github.com/Lightning-AI/lightning/pull/18209)) +- Deprecated the `SingleTPUStrategy` (`strategy="single_tpu"`) in favor of `SingleDeviceXLAStrategy` (`strategy="single_xla"`) ([#17383](https://github.com/Lightning-AI/pytorch-lightning/pull/17383)) +- Deprecated the `TPUAccelerator` in favor of `XLAAccelerator` ([#17383](https://github.com/Lightning-AI/pytorch-lightning/pull/17383)) +- Deprecated the `TPUPrecisionPlugin` in favor of `XLAPrecisionPlugin` ([#17383](https://github.com/Lightning-AI/pytorch-lightning/pull/17383)) +- Deprecated the `TPUBf16PrecisionPlugin` in favor of `XLABf16PrecisionPlugin` ([#17383](https://github.com/Lightning-AI/pytorch-lightning/pull/17383)) +- Deprecated the `Strategy.post_training_step` method ([#17531](https://github.com/Lightning-AI/pytorch-lightning/pull/17531)) +- Deprecated the `LightningModule.configure_sharded_model` hook in favor of `LightningModule.configure_model` ([#18004](https://github.com/Lightning-AI/pytorch-lightning/pull/18004)) +- Deprecated the `LightningDoublePrecisionModule` wrapper in favor of calling `Trainer.precision_plugin.convert_input()` ([#18209](https://github.com/Lightning-AI/pytorch-lightning/pull/18209)) ### Removed -- Removed the `XLAStrategy.is_distributed` property. It is always True ([#17381](https://github.com/Lightning-AI/lightning/pull/17381)) -- Removed the `SingleTPUStrategy.is_distributed` property. It is always False ([#17381](https://github.com/Lightning-AI/lightning/pull/17381)) -- Removed experimental support for `torchdistx` due to a lack of project maintenance ([#17995](https://github.com/Lightning-AI/lightning/pull/17995)) -- Removed support for PyTorch 1.11 ([#18691](https://github.com/Lightning-AI/lightning/pull/18691)) +- Removed the `XLAStrategy.is_distributed` property. It is always True ([#17381](https://github.com/Lightning-AI/pytorch-lightning/pull/17381)) +- Removed the `SingleTPUStrategy.is_distributed` property. It is always False ([#17381](https://github.com/Lightning-AI/pytorch-lightning/pull/17381)) +- Removed experimental support for `torchdistx` due to a lack of project maintenance ([#17995](https://github.com/Lightning-AI/pytorch-lightning/pull/17995)) +- Removed support for PyTorch 1.11 ([#18691](https://github.com/Lightning-AI/pytorch-lightning/pull/18691)) ### Fixed -- Fixed an issue with reusing the same model across multiple trainer stages when using the `DeepSpeedStrategy` ([#17531](https://github.com/Lightning-AI/lightning/pull/17531)) -- Fixed the saving and loading of FSDP optimizer states ([#17819](https://github.com/Lightning-AI/lightning/pull/17819)) -- Fixed FSDP re-applying activation checkpointing when the user had manually applied it already ([#18006](https://github.com/Lightning-AI/lightning/pull/18006)) -- Fixed issue where unexpected exceptions would leave the default torch dtype modified when using true precision settings ([#18500](https://github.com/Lightning-AI/lightning/pull/18500)) -- Fixed issue where not including the `batch_idx` argument in the `training_step` would disable gradient accumulation ([#18619](https://github.com/Lightning-AI/lightning/pull/18619)) -- Fixed the replacement of callbacks returned in `LightningModule.configure_callbacks` when the callback was a subclass of an existing Trainer callback ([#18508](https://github.com/Lightning-AI/lightning/pull/18508)) -- Fixed `Trainer.log_dir` not returning the correct directory for the `CSVLogger` ([#18548](https://github.com/Lightning-AI/lightning/pull/18548)) -- Fixed redundant input-type casting in FSDP precision ([#18630](https://github.com/Lightning-AI/lightning/pull/18630)) -- Fixed numerical issues when reducing values in low precision with `self.log` ([#18686](https://github.com/Lightning-AI/lightning/pull/18686)) -- Fixed an issue that would cause the gradients to be erased if validation happened in the middle of a gradient accumulation phase ([#18710](https://github.com/Lightning-AI/lightning/pull/18710)) -- Fixed redundant file writes in `CSVLogger` ([#18567](https://github.com/Lightning-AI/lightning/pull/18567)) -- Fixed an issue that could lead to checkpoint files being deleted accidentally when resuming training ([#18750](https://github.com/Lightning-AI/lightning/pull/18750)) +- Fixed an issue with reusing the same model across multiple trainer stages when using the `DeepSpeedStrategy` ([#17531](https://github.com/Lightning-AI/pytorch-lightning/pull/17531)) +- Fixed the saving and loading of FSDP optimizer states ([#17819](https://github.com/Lightning-AI/pytorch-lightning/pull/17819)) +- Fixed FSDP re-applying activation checkpointing when the user had manually applied it already ([#18006](https://github.com/Lightning-AI/pytorch-lightning/pull/18006)) +- Fixed issue where unexpected exceptions would leave the default torch dtype modified when using true precision settings ([#18500](https://github.com/Lightning-AI/pytorch-lightning/pull/18500)) +- Fixed issue where not including the `batch_idx` argument in the `training_step` would disable gradient accumulation ([#18619](https://github.com/Lightning-AI/pytorch-lightning/pull/18619)) +- Fixed the replacement of callbacks returned in `LightningModule.configure_callbacks` when the callback was a subclass of an existing Trainer callback ([#18508](https://github.com/Lightning-AI/pytorch-lightning/pull/18508)) +- Fixed `Trainer.log_dir` not returning the correct directory for the `CSVLogger` ([#18548](https://github.com/Lightning-AI/pytorch-lightning/pull/18548)) +- Fixed redundant input-type casting in FSDP precision ([#18630](https://github.com/Lightning-AI/pytorch-lightning/pull/18630)) +- Fixed numerical issues when reducing values in low precision with `self.log` ([#18686](https://github.com/Lightning-AI/pytorch-lightning/pull/18686)) +- Fixed an issue that would cause the gradients to be erased if validation happened in the middle of a gradient accumulation phase ([#18710](https://github.com/Lightning-AI/pytorch-lightning/pull/18710)) +- Fixed redundant file writes in `CSVLogger` ([#18567](https://github.com/Lightning-AI/pytorch-lightning/pull/18567)) +- Fixed an issue that could lead to checkpoint files being deleted accidentally when resuming training ([#18750](https://github.com/Lightning-AI/pytorch-lightning/pull/18750)) ## [2.0.9] - 2023-09-14 ### Fixed -- Fixed an issue that wouldn't prevent the user to set the `log_model` parameter in `WandbLogger` via the LightningCLI ([#18458](https://github.com/Lightning-AI/lightning/pull/18458)) -- Fixed the display of `v_num` in the progress bar when running with `Trainer(fast_dev_run=True)` ([#18491](https://github.com/Lightning-AI/lightning/pull/18491)) -- Fixed `UnboundLocalError` when running with `python -O` ([#18496](https://github.com/Lightning-AI/lightning/pull/18496)) -- Fixed visual glitch with the TQDM progress bar leaving the validation bar incomplete before switching back to the training display ([#18503](https://github.com/Lightning-AI/lightning/pull/18503)) -- Fixed false positive warning about logging interval when running with `Trainer(fast_dev_run=True)` ([#18550](https://github.com/Lightning-AI/lightning/pull/18550)) +- Fixed an issue that wouldn't prevent the user to set the `log_model` parameter in `WandbLogger` via the LightningCLI ([#18458](https://github.com/Lightning-AI/pytorch-lightning/pull/18458)) +- Fixed the display of `v_num` in the progress bar when running with `Trainer(fast_dev_run=True)` ([#18491](https://github.com/Lightning-AI/pytorch-lightning/pull/18491)) +- Fixed `UnboundLocalError` when running with `python -O` ([#18496](https://github.com/Lightning-AI/pytorch-lightning/pull/18496)) +- Fixed visual glitch with the TQDM progress bar leaving the validation bar incomplete before switching back to the training display ([#18503](https://github.com/Lightning-AI/pytorch-lightning/pull/18503)) +- Fixed false positive warning about logging interval when running with `Trainer(fast_dev_run=True)` ([#18550](https://github.com/Lightning-AI/pytorch-lightning/pull/18550)) ## [2.0.8] - 2023-08-29 ### Changed -- On XLA, avoid setting the global rank before processes have been launched as this will initialize the PJRT computation client in the main process ([#16966](https://github.com/Lightning-AI/lightning/pull/16966)) -- Fix inefficiency in rich progress bar ([#18369](https://github.com/Lightning-AI/lightning/pull/18369)) +- On XLA, avoid setting the global rank before processes have been launched as this will initialize the PJRT computation client in the main process ([#16966](https://github.com/Lightning-AI/pytorch-lightning/pull/16966)) +- Fix inefficiency in rich progress bar ([#18369](https://github.com/Lightning-AI/pytorch-lightning/pull/18369)) ### Fixed -- Fixed FSDP full-precision `param_dtype` training (`16-mixed` and `bf16-mixed` configurations) to avoid FSDP assertion errors with PyTorch < 2.0 ([#18278](https://github.com/Lightning-AI/lightning/pull/18278)) -- Fixed an issue that prevented the use of custom logger classes without an `experiment` property defined ([#18093](https://github.com/Lightning-AI/lightning/pull/18093)) -- Fixed setting the tracking uri in `MLFlowLogger` for logging artifacts to the MLFlow server ([#18395](https://github.com/Lightning-AI/lightning/pull/18395)) -- Fixed redundant `iter()` call to dataloader when checking dataloading configuration ([#18415](https://github.com/Lightning-AI/lightning/pull/18415)) -- Fixed model parameters getting shared between processes when running with `strategy="ddp_spawn"` and `accelerator="cpu"`; this has a necessary memory impact, as parameters are replicated for each process now ([#18238](https://github.com/Lightning-AI/lightning/pull/18238)) -- Properly manage `fetcher.done` with `dataloader_iter` ([#18376](https://github.com/Lightning-AI/lightning/pull/18376)) +- Fixed FSDP full-precision `param_dtype` training (`16-mixed` and `bf16-mixed` configurations) to avoid FSDP assertion errors with PyTorch < 2.0 ([#18278](https://github.com/Lightning-AI/pytorch-lightning/pull/18278)) +- Fixed an issue that prevented the use of custom logger classes without an `experiment` property defined ([#18093](https://github.com/Lightning-AI/pytorch-lightning/pull/18093)) +- Fixed setting the tracking uri in `MLFlowLogger` for logging artifacts to the MLFlow server ([#18395](https://github.com/Lightning-AI/pytorch-lightning/pull/18395)) +- Fixed redundant `iter()` call to dataloader when checking dataloading configuration ([#18415](https://github.com/Lightning-AI/pytorch-lightning/pull/18415)) +- Fixed model parameters getting shared between processes when running with `strategy="ddp_spawn"` and `accelerator="cpu"`; this has a necessary memory impact, as parameters are replicated for each process now ([#18238](https://github.com/Lightning-AI/pytorch-lightning/pull/18238)) +- Properly manage `fetcher.done` with `dataloader_iter` ([#18376](https://github.com/Lightning-AI/pytorch-lightning/pull/18376)) ## [2.0.7] - 2023-08-14 ### Added -- Added `LightningOptimizer.refresh()` to update the `__dict__` in case the optimizer it wraps has changed its internal state ([#18280](https://github.com/Lightning-AI/lightning/pull/18280)) +- Added `LightningOptimizer.refresh()` to update the `__dict__` in case the optimizer it wraps has changed its internal state ([#18280](https://github.com/Lightning-AI/pytorch-lightning/pull/18280)) ### Changed -- Disabled the auto-detection of the Kubeflow environment ([#18137](https://github.com/Lightning-AI/lightning/pull/18137)) +- Disabled the auto-detection of the Kubeflow environment ([#18137](https://github.com/Lightning-AI/pytorch-lightning/pull/18137)) ### Fixed -- Fixed a `Missing folder` exception when using a Google Storage URL as a `default_root_dir` ([#18088](https://github.com/Lightning-AI/lightning/pull/18088)) -- Fixed an issue that would prevent the user to set the multiprocessing start method after importing lightning ([#18177](https://github.com/Lightning-AI/lightning/pull/18177)) -- Fixed the gradient unscaling logic if the training step skipped backward (by returning `None`) ([#18267](https://github.com/Lightning-AI/lightning/pull/18267)) -- Ensure that the closure running inside the optimizer step has gradients enabled, even if the optimizer step has it disabled ([#18268](https://github.com/Lightning-AI/lightning/pull/18268)) -- Fixed an issue that could cause the `LightningOptimizer` wrapper returned by `LightningModule.optimizers()` have different internal state than the optimizer it wraps ([#18280](https://github.com/Lightning-AI/lightning/pull/18280)) +- Fixed a `Missing folder` exception when using a Google Storage URL as a `default_root_dir` ([#18088](https://github.com/Lightning-AI/pytorch-lightning/pull/18088)) +- Fixed an issue that would prevent the user to set the multiprocessing start method after importing lightning ([#18177](https://github.com/Lightning-AI/pytorch-lightning/pull/18177)) +- Fixed the gradient unscaling logic if the training step skipped backward (by returning `None`) ([#18267](https://github.com/Lightning-AI/pytorch-lightning/pull/18267)) +- Ensure that the closure running inside the optimizer step has gradients enabled, even if the optimizer step has it disabled ([#18268](https://github.com/Lightning-AI/pytorch-lightning/pull/18268)) +- Fixed an issue that could cause the `LightningOptimizer` wrapper returned by `LightningModule.optimizers()` have different internal state than the optimizer it wraps ([#18280](https://github.com/Lightning-AI/pytorch-lightning/pull/18280)) ## [2.0.6] - 2023-07-20 ### Fixed -- `LightningCLI` not saving correctly `seed_everything` when `run=True` and `seed_everything=True` ([#18056](https://github.com/Lightning-AI/lightning/pull/18056)) -- Fixed validation of non-PyTorch LR schedulers in manual optimization mode ([#18092](https://github.com/Lightning-AI/lightning/pull/18092)) -- Fixed an attribute error for `_FaultTolerantMode` when loading an old checkpoint that pickled the enum ([#18094](https://github.com/Lightning-AI/lightning/pull/18094)) +- `LightningCLI` not saving correctly `seed_everything` when `run=True` and `seed_everything=True` ([#18056](https://github.com/Lightning-AI/pytorch-lightning/pull/18056)) +- Fixed validation of non-PyTorch LR schedulers in manual optimization mode ([#18092](https://github.com/Lightning-AI/pytorch-lightning/pull/18092)) +- Fixed an attribute error for `_FaultTolerantMode` when loading an old checkpoint that pickled the enum ([#18094](https://github.com/Lightning-AI/pytorch-lightning/pull/18094)) ## [2.0.5] - 2023-07-07 ### Fixed -- Fixed delayed creation of experiment metadata and checkpoint/log dir name when using `WandbLogger` ([#17818](https://github.com/Lightning-AI/lightning/pull/17818)) -- Fixed incorrect parsing of arguments when augmenting exception messages in DDP ([#17948](https://github.com/Lightning-AI/lightning/pull/17948)) -- Fixed an issue causing the `torch.set_float32_matmul_precision` info message to show multiple times ([#17960](https://github.com/Lightning-AI/lightning/pull/17960)) -- Added missing `map_location` argument for the `LightningDataModule.load_from_checkpoint` function ([#17950](https://github.com/Lightning-AI/lightning/pull/17950)) -- Fix support for `neptune-client` ([#17939](https://github.com/Lightning-AI/lightning/pull/17939)) +- Fixed delayed creation of experiment metadata and checkpoint/log dir name when using `WandbLogger` ([#17818](https://github.com/Lightning-AI/pytorch-lightning/pull/17818)) +- Fixed incorrect parsing of arguments when augmenting exception messages in DDP ([#17948](https://github.com/Lightning-AI/pytorch-lightning/pull/17948)) +- Fixed an issue causing the `torch.set_float32_matmul_precision` info message to show multiple times ([#17960](https://github.com/Lightning-AI/pytorch-lightning/pull/17960)) +- Added missing `map_location` argument for the `LightningDataModule.load_from_checkpoint` function ([#17950](https://github.com/Lightning-AI/pytorch-lightning/pull/17950)) +- Fix support for `neptune-client` ([#17939](https://github.com/Lightning-AI/pytorch-lightning/pull/17939)) ## [2.0.4] - 2023-06-22 -- Added validation against misconfigured device selection when using the DeepSpeed strategy ([#17952](https://github.com/Lightning-AI/lightning/pull/17952)) +- Added validation against misconfigured device selection when using the DeepSpeed strategy ([#17952](https://github.com/Lightning-AI/pytorch-lightning/pull/17952)) ### Changed -- Changes to the `NeptuneLogger` ([#16761](https://github.com/Lightning-AI/lightning/pull/16761)): +- Changes to the `NeptuneLogger` ([#16761](https://github.com/Lightning-AI/pytorch-lightning/pull/16761)): * It now supports neptune-client 0.16.16 and neptune >=1.0, and we have replaced the `log()` method with `append()` and `extend()`. * It now accepts a namespace `Handler` as an alternative to `Run` for the `run` argument. This means that you can call it like `NeptuneLogger(run=run["some/namespace"])` to log everything to the `some/namespace/` location of the run. ### Fixed -- Fixed validation of parameters of `plugins.precision.MixedPrecisionPlugin` ([#17687](https://github.com/Lightning-AI/lightning/pull/17687)) -- Fixed deriving default map location in `LightningModule.load_from_checkpoint` when there is extra state ([#17812](https://github.com/Lightning-AI/lightning/pull/17812)) +- Fixed validation of parameters of `plugins.precision.MixedPrecisionPlugin` ([#17687](https://github.com/Lightning-AI/pytorch-lightning/pull/17687)) +- Fixed deriving default map location in `LightningModule.load_from_checkpoint` when there is extra state ([#17812](https://github.com/Lightning-AI/pytorch-lightning/pull/17812)) ## [2.0.3] - 2023-06-07 ### Changed -- Made type hints public ([#17100](https://github.com/Lightning-AI/lightning/pull/17100)) +- Made type hints public ([#17100](https://github.com/Lightning-AI/pytorch-lightning/pull/17100)) ### Fixed -- `CombinedLoader` only starts DataLoader workers when necessary when operating in sequential mode ([#17639](https://github.com/Lightning-AI/lightning/pull/17639)) -- Fixed a potential bug with uploading model checkpoints to Neptune.ai by uploading files from stream ([#17430](https://github.com/Lightning-AI/lightning/pull/17430)) -- Fixed signature inspection of decorated hooks ([#17507](https://github.com/Lightning-AI/lightning/pull/17507)) -- The `WandbLogger` no longer flattens dictionaries in the hyperparameters logged to the dashboard ([#17574](https://github.com/Lightning-AI/lightning/pull/17574)) -- Fixed computing the next version folder in `CSVLogger` ([#17139](https://github.com/Lightning-AI/lightning/pull/17139)) -- Fixed a formatting issue when the filename in `ModelCheckpoint` contained metrics that were substrings of each other ([#17610](https://github.com/Lightning-AI/lightning/pull/17610)) -- Fixed `WandbLogger` ignoring the `WANDB_PROJECT` environment variable ([#16222](https://github.com/Lightning-AI/lightning/pull/16222)) -- Fixed inconsistent settings for FSDP Precision ([#17670](https://github.com/Lightning-AI/lightning/pull/17670)) -- Fixed an edge case causing overlapping samples in DDP when no global seed is set ([#17713](https://github.com/Lightning-AI/lightning/pull/17713)) -- Fallback to module available check for mlflow ([#17467](https://github.com/Lightning-AI/lightning/pull/17467)) -- Fixed LR finder max val batches ([#17636](https://github.com/Lightning-AI/lightning/pull/17636)) -- Fixed multithreading checkpoint loading ([#17678](https://github.com/Lightning-AI/lightning/pull/17678)) +- `CombinedLoader` only starts DataLoader workers when necessary when operating in sequential mode ([#17639](https://github.com/Lightning-AI/pytorch-lightning/pull/17639)) +- Fixed a potential bug with uploading model checkpoints to Neptune.ai by uploading files from stream ([#17430](https://github.com/Lightning-AI/pytorch-lightning/pull/17430)) +- Fixed signature inspection of decorated hooks ([#17507](https://github.com/Lightning-AI/pytorch-lightning/pull/17507)) +- The `WandbLogger` no longer flattens dictionaries in the hyperparameters logged to the dashboard ([#17574](https://github.com/Lightning-AI/pytorch-lightning/pull/17574)) +- Fixed computing the next version folder in `CSVLogger` ([#17139](https://github.com/Lightning-AI/pytorch-lightning/pull/17139)) +- Fixed a formatting issue when the filename in `ModelCheckpoint` contained metrics that were substrings of each other ([#17610](https://github.com/Lightning-AI/pytorch-lightning/pull/17610)) +- Fixed `WandbLogger` ignoring the `WANDB_PROJECT` environment variable ([#16222](https://github.com/Lightning-AI/pytorch-lightning/pull/16222)) +- Fixed inconsistent settings for FSDP Precision ([#17670](https://github.com/Lightning-AI/pytorch-lightning/pull/17670)) +- Fixed an edge case causing overlapping samples in DDP when no global seed is set ([#17713](https://github.com/Lightning-AI/pytorch-lightning/pull/17713)) +- Fallback to module available check for mlflow ([#17467](https://github.com/Lightning-AI/pytorch-lightning/pull/17467)) +- Fixed LR finder max val batches ([#17636](https://github.com/Lightning-AI/pytorch-lightning/pull/17636)) +- Fixed multithreading checkpoint loading ([#17678](https://github.com/Lightning-AI/pytorch-lightning/pull/17678)) ## [2.0.2] - 2023-04-24 ### Fixed -- Fixed issue where `Model.load_from_checkpoint("checkpoint.ckpt", map_location=map_location)` would always return model on CPU ([#17308](https://github.com/Lightning-AI/lightning/pull/17308)) -- Fixed Sync module states during non-fit ([#17370](https://github.com/Lightning-AI/lightning/pull/17370)) -- Fixed an issue that caused `num_nodes` not to be set correctly for `FSDPStrategy` ([#17438](https://github.com/Lightning-AI/lightning/pull/17438)) +- Fixed issue where `Model.load_from_checkpoint("checkpoint.ckpt", map_location=map_location)` would always return model on CPU ([#17308](https://github.com/Lightning-AI/pytorch-lightning/pull/17308)) +- Fixed Sync module states during non-fit ([#17370](https://github.com/Lightning-AI/pytorch-lightning/pull/17370)) +- Fixed an issue that caused `num_nodes` not to be set correctly for `FSDPStrategy` ([#17438](https://github.com/Lightning-AI/pytorch-lightning/pull/17438)) ## [2.0.1] - 2023-03-30 ### Changed -- Pickling the `LightningModule` no longer pickles the `Trainer` ([#17133](https://github.com/Lightning-AI/lightning/pull/17133)) -- Generalized `Optimizer` validation to accommodate both FSDP 1.x and 2.x ([#16733](https://github.com/Lightning-AI/lightning/pull/16733)) -- Disable `torch.inference_mode` with `torch.compile` in PyTorch 2.0 ([#17215](https://github.com/Lightning-AI/lightning/pull/17215)) +- Pickling the `LightningModule` no longer pickles the `Trainer` ([#17133](https://github.com/Lightning-AI/pytorch-lightning/pull/17133)) +- Generalized `Optimizer` validation to accommodate both FSDP 1.x and 2.x ([#16733](https://github.com/Lightning-AI/pytorch-lightning/pull/16733)) +- Disable `torch.inference_mode` with `torch.compile` in PyTorch 2.0 ([#17215](https://github.com/Lightning-AI/pytorch-lightning/pull/17215)) ### Fixed -- Fixed issue where pickling the module instance would fail with a DataLoader error ([#17130](https://github.com/Lightning-AI/lightning/pull/17130)) -- Fixed WandbLogger not showing "best" aliases for model checkpoints when `ModelCheckpoint(save_top_k>0)` is used ([#17121](https://github.com/Lightning-AI/lightning/pull/17121)) -- Fixed the availability check for `rich` that prevented Lightning to be imported in Google Colab ([#17156](https://github.com/Lightning-AI/lightning/pull/17156)) -- Fixed parsing the precision config for inference in `DeepSpeedStrategy` ([#16973](https://github.com/Lightning-AI/lightning/pull/16973)) -- Fixed issue where `torch.compile` would fail when logging to WandB ([#17216](https://github.com/Lightning-AI/lightning/pull/17216)) -- Changed the `is_picklable` util function to handle the edge case that throws a `TypeError` ([#17270](https://github.com/Lightning-AI/lightning/pull/17270)) +- Fixed issue where pickling the module instance would fail with a DataLoader error ([#17130](https://github.com/Lightning-AI/pytorch-lightning/pull/17130)) +- Fixed WandbLogger not showing "best" aliases for model checkpoints when `ModelCheckpoint(save_top_k>0)` is used ([#17121](https://github.com/Lightning-AI/pytorch-lightning/pull/17121)) +- Fixed the availability check for `rich` that prevented Lightning to be imported in Google Colab ([#17156](https://github.com/Lightning-AI/pytorch-lightning/pull/17156)) +- Fixed parsing the precision config for inference in `DeepSpeedStrategy` ([#16973](https://github.com/Lightning-AI/pytorch-lightning/pull/16973)) +- Fixed issue where `torch.compile` would fail when logging to WandB ([#17216](https://github.com/Lightning-AI/pytorch-lightning/pull/17216)) +- Changed the `is_picklable` util function to handle the edge case that throws a `TypeError` ([#17270](https://github.com/Lightning-AI/pytorch-lightning/pull/17270)) ## [2.0.0] - 2023-03-15 ### Added -- Added migration logic to warn about checkpoints with apex AMP state ([#16161](https://github.com/Lightning-AI/lightning/pull/16161)) -- Added the `Trainer.ckpt_path = ...` setter to statefully set the checkpoint path to load. This can act as a replacement for the removed `Trainer(resume_from_checkpoint=...)` flag ([#16187](https://github.com/Lightning-AI/lightning/pull/16187)) -- Added an argument `include_cuda` in `pl.utilities.seed.isolate_rng` to disable managing `torch.cuda`'s rng ([#16423](https://github.com/Lightning-AI/lightning/pull/16423)) -- Added `Tuner.lr_find(attr_name=...)` to specify custom learning rate attribute names ([#16462](https://github.com/Lightning-AI/lightning/pull/16462)) -- Added an `OnExceptionCheckpoint` callback to save a checkpoint on exception ([#16512](https://github.com/Lightning-AI/lightning/pull/16512)) -- Added support for running the `MLFlowLogger` with the `mlflow-skinny` package ([16513](https://github.com/Lightning-AI/lightning/pull/16513)) -- Added a `Trainer.received_sigterm` property to check whether a SIGTERM signal was received ([#16501](https://github.com/Lightning-AI/lightning/pull/16501)) -- Added support for cascading a SIGTERM signal to launched processes after the launching process (rank 0) receives it ([#16525](https://github.com/Lightning-AI/lightning/pull/16525)) -- Added a `kill` method to launchers to kill all launched processes ([#16525](https://github.com/Lightning-AI/lightning/pull/16525)) -- Added suffix option to DDP strategy names to enable `find_unused_parameters=True`, for example `strategy="ddp_find_unused_parameters_true"` ([#16611](https://github.com/Lightning-AI/lightning/pull/16611)) -- Added a new method `Strategy.on_exception` to the strategy base interface ([#16646](https://github.com/Lightning-AI/lightning/pull/16646)) -- Added support for `predict_step(dataloader_iter, batch_index)` ([#16726](https://github.com/Lightning-AI/lightning/pull/16726)) -- Added support for arbitrary iterables as dataloaders ([#16726](https://github.com/Lightning-AI/lightning/pull/16726)) -- Added "sequential" mode support to `CombinedLoader` to consume multiple iterables in sequence ([#16743](https://github.com/Lightning-AI/lightning/pull/16743), [#16784](https://github.com/Lightning-AI/lightning/pull/16784)) -- Added "max_size" mode support to `CombinedLoader` to consume multiple iterables entirely without cycling ([#16939](https://github.com/Lightning-AI/lightning/pull/16939) -- Added a `Trainer(barebones=True)` argument where all features that may impact raw speed are disabled ([#16854](https://github.com/Lightning-AI/lightning/pull/16854)) -- Added support for writing logs remote file systems on `CSVLoggers`. ([#16880](https://github.com/Lightning-AI/lightning/pull/16880)) -- Added `DDPStrategy(start_method=...)` argument, defaulting to 'popen' ([#16809](https://github.com/Lightning-AI/lightning/pull/16809)) -- Added checks for whether the iterables used by the loops are valid ([#17007](https://github.com/Lightning-AI/lightning/pull/17007)) +- Added migration logic to warn about checkpoints with apex AMP state ([#16161](https://github.com/Lightning-AI/pytorch-lightning/pull/16161)) +- Added the `Trainer.ckpt_path = ...` setter to statefully set the checkpoint path to load. This can act as a replacement for the removed `Trainer(resume_from_checkpoint=...)` flag ([#16187](https://github.com/Lightning-AI/pytorch-lightning/pull/16187)) +- Added an argument `include_cuda` in `pl.utilities.seed.isolate_rng` to disable managing `torch.cuda`'s rng ([#16423](https://github.com/Lightning-AI/pytorch-lightning/pull/16423)) +- Added `Tuner.lr_find(attr_name=...)` to specify custom learning rate attribute names ([#16462](https://github.com/Lightning-AI/pytorch-lightning/pull/16462)) +- Added an `OnExceptionCheckpoint` callback to save a checkpoint on exception ([#16512](https://github.com/Lightning-AI/pytorch-lightning/pull/16512)) +- Added support for running the `MLFlowLogger` with the `mlflow-skinny` package ([16513](https://github.com/Lightning-AI/pytorch-lightning/pull/16513)) +- Added a `Trainer.received_sigterm` property to check whether a SIGTERM signal was received ([#16501](https://github.com/Lightning-AI/pytorch-lightning/pull/16501)) +- Added support for cascading a SIGTERM signal to launched processes after the launching process (rank 0) receives it ([#16525](https://github.com/Lightning-AI/pytorch-lightning/pull/16525)) +- Added a `kill` method to launchers to kill all launched processes ([#16525](https://github.com/Lightning-AI/pytorch-lightning/pull/16525)) +- Added suffix option to DDP strategy names to enable `find_unused_parameters=True`, for example `strategy="ddp_find_unused_parameters_true"` ([#16611](https://github.com/Lightning-AI/pytorch-lightning/pull/16611)) +- Added a new method `Strategy.on_exception` to the strategy base interface ([#16646](https://github.com/Lightning-AI/pytorch-lightning/pull/16646)) +- Added support for `predict_step(dataloader_iter, batch_index)` ([#16726](https://github.com/Lightning-AI/pytorch-lightning/pull/16726)) +- Added support for arbitrary iterables as dataloaders ([#16726](https://github.com/Lightning-AI/pytorch-lightning/pull/16726)) +- Added "sequential" mode support to `CombinedLoader` to consume multiple iterables in sequence ([#16743](https://github.com/Lightning-AI/pytorch-lightning/pull/16743), [#16784](https://github.com/Lightning-AI/pytorch-lightning/pull/16784)) +- Added "max_size" mode support to `CombinedLoader` to consume multiple iterables entirely without cycling ([#16939](https://github.com/Lightning-AI/pytorch-lightning/pull/16939) +- Added a `Trainer(barebones=True)` argument where all features that may impact raw speed are disabled ([#16854](https://github.com/Lightning-AI/pytorch-lightning/pull/16854)) +- Added support for writing logs remote file systems on `CSVLoggers`. ([#16880](https://github.com/Lightning-AI/pytorch-lightning/pull/16880)) +- Added `DDPStrategy(start_method=...)` argument, defaulting to 'popen' ([#16809](https://github.com/Lightning-AI/pytorch-lightning/pull/16809)) +- Added checks for whether the iterables used by the loops are valid ([#17007](https://github.com/Lightning-AI/pytorch-lightning/pull/17007)) ### Changed -- The Trainer's signal handlers are now registered for `trainer.{validate,test,predict}` ([#17017](https://github.com/Lightning-AI/lightning/pull/17017)) -- Renamed `ProgressBarBase` to `ProgressBar` ([#17058](https://github.com/Lightning-AI/lightning/pull/17058)) -- The `Trainer` now chooses `accelerator="auto", strategy="auto", devices="auto"` as defaults ([#16847](https://github.com/Lightning-AI/lightning/pull/16847)) -- "Native" suffix removal ([#16490](https://github.com/Lightning-AI/lightning/pull/16490)) +- The Trainer's signal handlers are now registered for `trainer.{validate,test,predict}` ([#17017](https://github.com/Lightning-AI/pytorch-lightning/pull/17017)) +- Renamed `ProgressBarBase` to `ProgressBar` ([#17058](https://github.com/Lightning-AI/pytorch-lightning/pull/17058)) +- The `Trainer` now chooses `accelerator="auto", strategy="auto", devices="auto"` as defaults ([#16847](https://github.com/Lightning-AI/pytorch-lightning/pull/16847)) +- "Native" suffix removal ([#16490](https://github.com/Lightning-AI/pytorch-lightning/pull/16490)) * `strategy="fsdp_native"` is now `strategy="fsdp"` * `strategy="fsdp_native_full_shard_offload"` is now `strategy="fsdp_cpu_offload"` * `pl.strategies.fully_sharded_native.DDPFullyShardedNativeStrategy` is now `pl.strategies.fsdp.FSDPStrategy` * `pl.plugins.precision.fsdp_native_native_amp.FullyShardedNativeNativeMixedPrecisionPlugin` is now `pl.plugins.precision.fsdp.FSDPMixedPrecisionPlugin` * `pl.plugins.precision.native_amp` is now `pl.plugins.precision.amp` * `NativeSyncBatchNorm` is now `TorchSyncBatchNorm` -- Changed the default of `LearningRateFinder(update_attr=...)` and `Tuner.lr_find(update_attr=...)` to `True` ([#16462](https://github.com/Lightning-AI/lightning/pull/16462)) -- Renamed the `pl.utilities.exceptions.GracefulExitException` to `SIGTERMException` ([#16501](https://github.com/Lightning-AI/lightning/pull/16501)) -- The `Callback.on_train_epoch_end` hook now runs after the `LightningModule.on_train_epoch_end` hook for instances of `EarlyStopping` and `Checkpoint` callbacks ([#16567](https://github.com/Lightning-AI/lightning/pull/16567)) -- The `LightningModule.{un}toggle_optimizer` methods no longer accept a `optimizer_idx` argument to select the relevant optimizer. Instead, the optimizer object can be passed in directly ([#16560](https://github.com/Lightning-AI/lightning/pull/16560)) -- Manual optimization is now required for working with multiple optimizers ([#16539](https://github.com/Lightning-AI/lightning/pull/16539)) -- DDP's `find_unused_parameters` now defaults to `False` ([#16611](https://github.com/Lightning-AI/lightning/pull/16611)) -- The strategy selected by `accelerator="hpu"` now defaults to `find_unused_parameters=False` ([#16611](https://github.com/Lightning-AI/lightning/pull/16611)) -- The main progress bar displayed during training no longer includes the combined progress for validation ([#16695](https://github.com/Lightning-AI/lightning/pull/16695)) -- Renamed `TQDMProgressBar.main_progress_bar` to `TQDMProgressBar.train_progress_bar` ([#16695](https://github.com/Lightning-AI/lightning/pull/16695)) -- Marked the progress tracking classes as protected ([#17009](https://github.com/Lightning-AI/lightning/pull/17009)) -- Marked the `lightning.pytorch.trainer.configuration_validator.verify_loop_configurations` function as protected ([#17009](https://github.com/Lightning-AI/lightning/pull/17009)) -- Marked the `lightning.pytorch.utiltiies.distributed.register_ddp_comm_hook` function as protected ([#17009](https://github.com/Lightning-AI/lightning/pull/17009)) -- Marked `lightning.pytorch.utilities.supporters.CombinedDataset` as protected ([#16714](https://github.com/Lightning-AI/lightning/pull/16714)) -- Marked the `{Accelerator,Signal,Callback,Checkpoint,Data,Logger}Connector` classes as protected ([#17008](https://github.com/Lightning-AI/lightning/pull/17008)) -- Marked the `lightning.pytorch.trainer.connectors.signal_connector.HandlersCompose` class as protected ([#17008](https://github.com/Lightning-AI/lightning/pull/17008)) -- Disabled strict loading in multiprocessing launcher ("ddp_spawn", etc.) when loading weights back into the main process ([#16365](https://github.com/Lightning-AI/lightning/pull/16365)) -- Renamed `CombinedLoader.loaders` to `CombinedLoader.iterables` ([#16743](https://github.com/Lightning-AI/lightning/pull/16743)) -- Renamed `Trainer(replace_sampler_ddp=...)` to `Trainer(use_distributed_sampler=...)` ([#16829](https://github.com/Lightning-AI/lightning/pull/16829)) -- Moved the `CombinedLoader` class from `lightning.pytorch.trainer.supporters` to `lightning.pytorch.combined_loader` ([#16819](https://github.com/Lightning-AI/lightning/pull/16819)) -- The top-level loops now own the data sources and combined dataloaders ([#16726](https://github.com/Lightning-AI/lightning/pull/16726)) -- The `trainer.*_dataloader` properties now return what the user returned in their `LightningModule.*_dataloader()` hook ([#16726](https://github.com/Lightning-AI/lightning/pull/16726), [#16800](https://github.com/Lightning-AI/lightning/pull/16800)) -- The `dataloader_idx` argument is now optional for the `on_{validation,test,predict}_batch_{start,end}` hooks. Remove it or default it to 0 if you don't use multiple dataloaders ([#16753](https://github.com/Lightning-AI/lightning/pull/16753)) -- Renamed `TPUSpawnStrategy` to `XLAStrategy` ([#16781](https://github.com/Lightning-AI/lightning/pull/16781)) -- Renamed `strategy='tpu_spawn'` to `strategy='xla'` and `strategy='tpu_spawn_debug'` to `strategy='xla_debug'` ([#16781](https://github.com/Lightning-AI/lightning/pull/16781)) -- Changed arguments for precision settings (from [64|32|16|bf16] to ["64-true"|"32-true"|"16-mixed"|"bf16-mixed"]) ([#16783](https://github.com/Lightning-AI/lightning/pull/16783)) -- When using multiple devices, the strategy now defaults to "ddp" instead of "ddp_spawn" when none is set ([#16780](https://github.com/Lightning-AI/lightning/pull/16780)) -- The selection `Trainer(strategy="ddp_spawn", ...)` no longer falls back to "ddp" when a cluster environment gets detected ([#16780](https://github.com/Lightning-AI/lightning/pull/16780)) -- Predict's custom BatchSampler that tracks the batch indices no longer consumes the entire batch sampler at the beginning ([#16826](https://github.com/Lightning-AI/lightning/pull/16826)) -- Gradient norm tracking with `track_grad_norm` no longer rounds the norms to 4 digits, but instead logs them at full resolution ([#16877](https://github.com/Lightning-AI/lightning/pull/16877)) -- Merged the `DDPSpawnStrategy` into `DDPStrategy` ([#16809](https://github.com/Lightning-AI/lightning/pull/16809)) -- The `NeptuneLogger` now requires `neptune>=1.0.0` ([#16888](https://github.com/Lightning-AI/lightning/pull/16888)) -- Changed minimum supported version of `rich` from `10.14.0` to `12.13.0` ([#16798](https://github.com/Lightning-AI/lightning/pull/16798)) -- Removed the `lightning.pytorch.overrides.torch_distributed.broadcast_object_list` function ([#17011](https://github.com/Lightning-AI/lightning/pull/17011)) -- The `ServableModule` is now an abstract interface ([#17000](https://github.com/Lightning-AI/lightning/pull/17000)) -- The `psutil` package is now required for CPU monitoring ([#17010](https://github.com/Lightning-AI/lightning/pull/17010)) -- The Trainer no longer accepts positional arguments to ([#17022](https://github.com/Lightning-AI/lightning/pull/17022)) +- Changed the default of `LearningRateFinder(update_attr=...)` and `Tuner.lr_find(update_attr=...)` to `True` ([#16462](https://github.com/Lightning-AI/pytorch-lightning/pull/16462)) +- Renamed the `pl.utilities.exceptions.GracefulExitException` to `SIGTERMException` ([#16501](https://github.com/Lightning-AI/pytorch-lightning/pull/16501)) +- The `Callback.on_train_epoch_end` hook now runs after the `LightningModule.on_train_epoch_end` hook for instances of `EarlyStopping` and `Checkpoint` callbacks ([#16567](https://github.com/Lightning-AI/pytorch-lightning/pull/16567)) +- The `LightningModule.{un}toggle_optimizer` methods no longer accept a `optimizer_idx` argument to select the relevant optimizer. Instead, the optimizer object can be passed in directly ([#16560](https://github.com/Lightning-AI/pytorch-lightning/pull/16560)) +- Manual optimization is now required for working with multiple optimizers ([#16539](https://github.com/Lightning-AI/pytorch-lightning/pull/16539)) +- DDP's `find_unused_parameters` now defaults to `False` ([#16611](https://github.com/Lightning-AI/pytorch-lightning/pull/16611)) +- The strategy selected by `accelerator="hpu"` now defaults to `find_unused_parameters=False` ([#16611](https://github.com/Lightning-AI/pytorch-lightning/pull/16611)) +- The main progress bar displayed during training no longer includes the combined progress for validation ([#16695](https://github.com/Lightning-AI/pytorch-lightning/pull/16695)) +- Renamed `TQDMProgressBar.main_progress_bar` to `TQDMProgressBar.train_progress_bar` ([#16695](https://github.com/Lightning-AI/pytorch-lightning/pull/16695)) +- Marked the progress tracking classes as protected ([#17009](https://github.com/Lightning-AI/pytorch-lightning/pull/17009)) +- Marked the `lightning.pytorch.trainer.configuration_validator.verify_loop_configurations` function as protected ([#17009](https://github.com/Lightning-AI/pytorch-lightning/pull/17009)) +- Marked the `lightning.pytorch.utiltiies.distributed.register_ddp_comm_hook` function as protected ([#17009](https://github.com/Lightning-AI/pytorch-lightning/pull/17009)) +- Marked `lightning.pytorch.utilities.supporters.CombinedDataset` as protected ([#16714](https://github.com/Lightning-AI/pytorch-lightning/pull/16714)) +- Marked the `{Accelerator,Signal,Callback,Checkpoint,Data,Logger}Connector` classes as protected ([#17008](https://github.com/Lightning-AI/pytorch-lightning/pull/17008)) +- Marked the `lightning.pytorch.trainer.connectors.signal_connector.HandlersCompose` class as protected ([#17008](https://github.com/Lightning-AI/pytorch-lightning/pull/17008)) +- Disabled strict loading in multiprocessing launcher ("ddp_spawn", etc.) when loading weights back into the main process ([#16365](https://github.com/Lightning-AI/pytorch-lightning/pull/16365)) +- Renamed `CombinedLoader.loaders` to `CombinedLoader.iterables` ([#16743](https://github.com/Lightning-AI/pytorch-lightning/pull/16743)) +- Renamed `Trainer(replace_sampler_ddp=...)` to `Trainer(use_distributed_sampler=...)` ([#16829](https://github.com/Lightning-AI/pytorch-lightning/pull/16829)) +- Moved the `CombinedLoader` class from `lightning.pytorch.trainer.supporters` to `lightning.pytorch.combined_loader` ([#16819](https://github.com/Lightning-AI/pytorch-lightning/pull/16819)) +- The top-level loops now own the data sources and combined dataloaders ([#16726](https://github.com/Lightning-AI/pytorch-lightning/pull/16726)) +- The `trainer.*_dataloader` properties now return what the user returned in their `LightningModule.*_dataloader()` hook ([#16726](https://github.com/Lightning-AI/pytorch-lightning/pull/16726), [#16800](https://github.com/Lightning-AI/pytorch-lightning/pull/16800)) +- The `dataloader_idx` argument is now optional for the `on_{validation,test,predict}_batch_{start,end}` hooks. Remove it or default it to 0 if you don't use multiple dataloaders ([#16753](https://github.com/Lightning-AI/pytorch-lightning/pull/16753)) +- Renamed `TPUSpawnStrategy` to `XLAStrategy` ([#16781](https://github.com/Lightning-AI/pytorch-lightning/pull/16781)) +- Renamed `strategy='tpu_spawn'` to `strategy='xla'` and `strategy='tpu_spawn_debug'` to `strategy='xla_debug'` ([#16781](https://github.com/Lightning-AI/pytorch-lightning/pull/16781)) +- Changed arguments for precision settings (from [64|32|16|bf16] to ["64-true"|"32-true"|"16-mixed"|"bf16-mixed"]) ([#16783](https://github.com/Lightning-AI/pytorch-lightning/pull/16783)) +- When using multiple devices, the strategy now defaults to "ddp" instead of "ddp_spawn" when none is set ([#16780](https://github.com/Lightning-AI/pytorch-lightning/pull/16780)) +- The selection `Trainer(strategy="ddp_spawn", ...)` no longer falls back to "ddp" when a cluster environment gets detected ([#16780](https://github.com/Lightning-AI/pytorch-lightning/pull/16780)) +- Predict's custom BatchSampler that tracks the batch indices no longer consumes the entire batch sampler at the beginning ([#16826](https://github.com/Lightning-AI/pytorch-lightning/pull/16826)) +- Gradient norm tracking with `track_grad_norm` no longer rounds the norms to 4 digits, but instead logs them at full resolution ([#16877](https://github.com/Lightning-AI/pytorch-lightning/pull/16877)) +- Merged the `DDPSpawnStrategy` into `DDPStrategy` ([#16809](https://github.com/Lightning-AI/pytorch-lightning/pull/16809)) +- The `NeptuneLogger` now requires `neptune>=1.0.0` ([#16888](https://github.com/Lightning-AI/pytorch-lightning/pull/16888)) +- Changed minimum supported version of `rich` from `10.14.0` to `12.13.0` ([#16798](https://github.com/Lightning-AI/pytorch-lightning/pull/16798)) +- Removed the `lightning.pytorch.overrides.torch_distributed.broadcast_object_list` function ([#17011](https://github.com/Lightning-AI/pytorch-lightning/pull/17011)) +- The `ServableModule` is now an abstract interface ([#17000](https://github.com/Lightning-AI/pytorch-lightning/pull/17000)) +- The `psutil` package is now required for CPU monitoring ([#17010](https://github.com/Lightning-AI/pytorch-lightning/pull/17010)) +- The Trainer no longer accepts positional arguments to ([#17022](https://github.com/Lightning-AI/pytorch-lightning/pull/17022)) ### Removed -- Removed support for PyTorch 1.10 ([#16492](https://github.com/Lightning-AI/lightning/pull/16492)) -- Removed support for Python 3.7 ([#16579](https://github.com/Lightning-AI/lightning/pull/16579)) -- Removed the `pl.lite` module in favor of `lightning_fabric` ([#15953](https://github.com/Lightning-AI/lightning/pull/15953)) -- `nvidia/apex` removal ([#16149](https://github.com/Lightning-AI/lightning/pull/16149)) +- Removed support for PyTorch 1.10 ([#16492](https://github.com/Lightning-AI/pytorch-lightning/pull/16492)) +- Removed support for Python 3.7 ([#16579](https://github.com/Lightning-AI/pytorch-lightning/pull/16579)) +- Removed the `pl.lite` module in favor of `lightning_fabric` ([#15953](https://github.com/Lightning-AI/pytorch-lightning/pull/15953)) +- `nvidia/apex` removal ([#16149](https://github.com/Lightning-AI/pytorch-lightning/pull/16149)) * Removed `pl.plugins.NativeMixedPrecisionPlugin` in favor of `pl.plugins.MixedPrecisionPlugin` * Removed the `LightningModule.optimizer_step(using_native_amp=...)` argument * Removed the `Trainer(amp_backend=...)` argument @@ -587,94 +587,94 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). * Removed the `pl.plugins.ApexMixedPrecisionPlugin` class * Removed the `pl.utilities.enums.AMPType` enum * Removed the `DeepSpeedPrecisionPlugin(amp_type=..., amp_level=...)` arguments -- Removed `Trainer(strategy='horovod')` support ([#16150](https://github.com/Lightning-AI/lightning/pull/16150)) -- `FairScale` removal (in favor of PyTorch's FSDP implementation) ([#16400](https://github.com/Lightning-AI/lightning/pull/16400)) +- Removed `Trainer(strategy='horovod')` support ([#16150](https://github.com/Lightning-AI/pytorch-lightning/pull/16150)) +- `FairScale` removal (in favor of PyTorch's FSDP implementation) ([#16400](https://github.com/Lightning-AI/pytorch-lightning/pull/16400)) * Removed the `pl.overrides.fairscale.LightningShardedDataParallel` class * Removed the `pl.plugins.precision.fully_sharded_native_amp.FullyShardedNativeMixedPrecisionPlugin` class * Removed the `pl.plugins.precision.sharded_native_amp.ShardedNativeMixedPrecisionPlugin` class * Removed the `pl.strategies.fully_sharded.DDPFullyShardedStrategy` (fsdp) class * Removed the `pl.strategies.sharded.DDPShardedStrategy` (ddp_sharded) class * Removed the `pl.strategies.sharded_spawn.DDPSpawnShardedStrategy` (ddp_sharded_spawn) class -- Removed legacy device arguments in Trainer ([#16171](https://github.com/Lightning-AI/lightning/pull/16171)) +- Removed legacy device arguments in Trainer ([#16171](https://github.com/Lightning-AI/pytorch-lightning/pull/16171)) * Removed the `Trainer(gpus=...)` argument * Removed the `Trainer(tpu_cores=...)` argument * Removed the `Trainer(ipus=...)` argument * Removed the `Trainer(num_processes=...)` argument -- Removed the deprecated `pl.utilities.AllGatherGrad` class ([#16360](https://github.com/Lightning-AI/lightning/pull/16360)) -- Removed the deprecated `resume_from_checkpoint` Trainer argument ([#16167](https://github.com/Lightning-AI/lightning/pull/16167)) -- Removed the deprecated `pl.profiler` module ([#16359](https://github.com/Lightning-AI/lightning/pull/16359)) -- Removed deadlock detection / process reconciliation (`PL_RECONCILE_PROCESS=1`) ([#16204](https://github.com/Lightning-AI/lightning/pull/16204)) -- Removed the `{training,validation,test}_epoch_end` hooks which would retain step outputs in memory. Alternative implementations are suggested by implementing their `on_*_epoch_end` hooks instead ([#16520](https://github.com/Lightning-AI/lightning/pull/16520)) -- Removed the `outputs` argument from the `on_predict_epoch_end` hook. You can access them via `trainer.predict_loop.predictions` ([#16655](https://github.com/Lightning-AI/lightning/pull/16655)) -- Removed support for the experimental `PL_FAULT_TOLERANT_TRAINING` environment flag ([#16516](https://github.com/Lightning-AI/lightning/pull/16516), [#16533](https://github.com/Lightning-AI/lightning/pull/16533)) -- Removed the deprecated `LightningCLI` arguments ([#16380](https://github.com/Lightning-AI/lightning/pull/16380)) +- Removed the deprecated `pl.utilities.AllGatherGrad` class ([#16360](https://github.com/Lightning-AI/pytorch-lightning/pull/16360)) +- Removed the deprecated `resume_from_checkpoint` Trainer argument ([#16167](https://github.com/Lightning-AI/pytorch-lightning/pull/16167)) +- Removed the deprecated `pl.profiler` module ([#16359](https://github.com/Lightning-AI/pytorch-lightning/pull/16359)) +- Removed deadlock detection / process reconciliation (`PL_RECONCILE_PROCESS=1`) ([#16204](https://github.com/Lightning-AI/pytorch-lightning/pull/16204)) +- Removed the `{training,validation,test}_epoch_end` hooks which would retain step outputs in memory. Alternative implementations are suggested by implementing their `on_*_epoch_end` hooks instead ([#16520](https://github.com/Lightning-AI/pytorch-lightning/pull/16520)) +- Removed the `outputs` argument from the `on_predict_epoch_end` hook. You can access them via `trainer.predict_loop.predictions` ([#16655](https://github.com/Lightning-AI/pytorch-lightning/pull/16655)) +- Removed support for the experimental `PL_FAULT_TOLERANT_TRAINING` environment flag ([#16516](https://github.com/Lightning-AI/pytorch-lightning/pull/16516), [#16533](https://github.com/Lightning-AI/pytorch-lightning/pull/16533)) +- Removed the deprecated `LightningCLI` arguments ([#16380](https://github.com/Lightning-AI/pytorch-lightning/pull/16380)) * `save_config_filename` * `save_config_overwrite` * `save_config_multifile` * `description` * `env_prefix` * `env_parse` -- Removed the deprecated `pl.strategies.utils.on_colab_kaggle` function ([#16437](https://github.com/Lightning-AI/lightning/pull/16437)) +- Removed the deprecated `pl.strategies.utils.on_colab_kaggle` function ([#16437](https://github.com/Lightning-AI/pytorch-lightning/pull/16437)) - Removed the deprecated code in: - * `pl.core.mixins` ([#16424](https://github.com/Lightning-AI/lightning/pull/16424)) - * `pl.utilities.distributed` ([#16390](https://github.com/Lightning-AI/lightning/pull/16390)) - * `pl.utilities.apply_func` ([#16413](https://github.com/Lightning-AI/lightning/pull/16413)) - * `pl.utilities.xla_device` ([#16404](https://github.com/Lightning-AI/lightning/pull/16404)) - * `pl.utilities.data` ([#16440](https://github.com/Lightning-AI/lightning/pull/16440)) - * `pl.utilities.device_parser` ([#16412](https://github.com/Lightning-AI/lightning/pull/16412)) - * `pl.utilities.optimizer` ([#16439](https://github.com/Lightning-AI/lightning/pull/16439)) - * `pl.utilities.seed` ([#16422](https://github.com/Lightning-AI/lightning/pull/16422)) - * `pl.utilities.cloud_io` ([#16438](https://github.com/Lightning-AI/lightning/pull/16438)) -- Removed the deprecated `Accelerator.setup_environment` method ([#16436](https://github.com/Lightning-AI/lightning/pull/16436)) -- Mark the `forward_module` argument as required ([#16386](https://github.com/Lightning-AI/lightning/pull/16386)) + * `pl.core.mixins` ([#16424](https://github.com/Lightning-AI/pytorch-lightning/pull/16424)) + * `pl.utilities.distributed` ([#16390](https://github.com/Lightning-AI/pytorch-lightning/pull/16390)) + * `pl.utilities.apply_func` ([#16413](https://github.com/Lightning-AI/pytorch-lightning/pull/16413)) + * `pl.utilities.xla_device` ([#16404](https://github.com/Lightning-AI/pytorch-lightning/pull/16404)) + * `pl.utilities.data` ([#16440](https://github.com/Lightning-AI/pytorch-lightning/pull/16440)) + * `pl.utilities.device_parser` ([#16412](https://github.com/Lightning-AI/pytorch-lightning/pull/16412)) + * `pl.utilities.optimizer` ([#16439](https://github.com/Lightning-AI/pytorch-lightning/pull/16439)) + * `pl.utilities.seed` ([#16422](https://github.com/Lightning-AI/pytorch-lightning/pull/16422)) + * `pl.utilities.cloud_io` ([#16438](https://github.com/Lightning-AI/pytorch-lightning/pull/16438)) +- Removed the deprecated `Accelerator.setup_environment` method ([#16436](https://github.com/Lightning-AI/pytorch-lightning/pull/16436)) +- Mark the `forward_module` argument as required ([#16386](https://github.com/Lightning-AI/pytorch-lightning/pull/16386)) * Removed the deprecated `pl_module` argument from the distributed module wrappers * Removed the deprecated `pl.overrides.base.unwrap_lightning_module` function * Removed the `pl.overrides.distributed.LightningDistributedModule` class * Removed the deprecated `pl.overrides.fairscale.unwrap_lightning_module_sharded` function * Removed the `pl.overrides.fairscale.LightningDistributedModule` class -- Removed the deprecated automatic GPU selection ([#16184](https://github.com/Lightning-AI/lightning/pull/16184)) +- Removed the deprecated automatic GPU selection ([#16184](https://github.com/Lightning-AI/pytorch-lightning/pull/16184)) * Removed the `Trainer(auto_select_gpus=...)` argument * Removed the `pl.tuner.auto_gpu_select.{pick_single_gpu,pick_multiple_gpus}` functions - Removed support for loop customization - * Removed `Loop.replace()` ([#16361](https://github.com/Lightning-AI/lightning/pull/16361)) - * Removed `Loop.connect()` ([#16384](https://github.com/Lightning-AI/lightning/pull/16384)) - * Removed the `trainer.{fit,validate,test,predict}_loop` properties ([#16384](https://github.com/Lightning-AI/lightning/pull/16384)) - * Removed the default `Loop.run()` implementation ([#16384](https://github.com/Lightning-AI/lightning/pull/16384)) - * The loop classes are now marked as protected ([#16445](https://github.com/Lightning-AI/lightning/pull/16445)) - * The fetching classes are now marked as protected ([#16664](https://github.com/Lightning-AI/lightning/pull/16664)) -- The `lightning.pytorch.overrides.distributed.IndexBatchSamplerWrapper` class is now marked as protected ([#16826](https://github.com/Lightning-AI/lightning/pull/16826)) -- Removed the `DataLoaderLoop`, `EvaluationEpochLoop`, and `PredictionEpochLoop` classes ([#16726](https://github.com/Lightning-AI/lightning/pull/16726)) -- Removed `trainer.reset_*_dataloader()` methods in favor of `Loop.setup_data()` for the top-level loops ([#16726](https://github.com/Lightning-AI/lightning/pull/16726)) -- Removed special support for truncated backpropagation through time (TBPTT) ([#16172](https://github.com/Lightning-AI/lightning/pull/16172)) + * Removed `Loop.replace()` ([#16361](https://github.com/Lightning-AI/pytorch-lightning/pull/16361)) + * Removed `Loop.connect()` ([#16384](https://github.com/Lightning-AI/pytorch-lightning/pull/16384)) + * Removed the `trainer.{fit,validate,test,predict}_loop` properties ([#16384](https://github.com/Lightning-AI/pytorch-lightning/pull/16384)) + * Removed the default `Loop.run()` implementation ([#16384](https://github.com/Lightning-AI/pytorch-lightning/pull/16384)) + * The loop classes are now marked as protected ([#16445](https://github.com/Lightning-AI/pytorch-lightning/pull/16445)) + * The fetching classes are now marked as protected ([#16664](https://github.com/Lightning-AI/pytorch-lightning/pull/16664)) +- The `lightning.pytorch.overrides.distributed.IndexBatchSamplerWrapper` class is now marked as protected ([#16826](https://github.com/Lightning-AI/pytorch-lightning/pull/16826)) +- Removed the `DataLoaderLoop`, `EvaluationEpochLoop`, and `PredictionEpochLoop` classes ([#16726](https://github.com/Lightning-AI/pytorch-lightning/pull/16726)) +- Removed `trainer.reset_*_dataloader()` methods in favor of `Loop.setup_data()` for the top-level loops ([#16726](https://github.com/Lightning-AI/pytorch-lightning/pull/16726)) +- Removed special support for truncated backpropagation through time (TBPTT) ([#16172](https://github.com/Lightning-AI/pytorch-lightning/pull/16172)) * Removed the `LightningModule.truncated_bptt_steps` attribute * Removed the `LightningModule.tbptt_split_batch` hook * The `LightningModule.training_step` no longer accepts a `hiddens` argument * Removed the `pl.loops.batch.TrainingBatchLoop` * Removed the `FitLoop.split_idx` property * Removed the `LoggerConnector.on_train_split_start` method -- Removed the experimental `PL_INTER_BATCH_PARALLELISM` environment flag ([#16355](https://github.com/Lightning-AI/lightning/pull/16355)) -- Removed the `Trainer(move_metrics_to_cpu=True)` argument ([#16358](https://github.com/Lightning-AI/lightning/pull/16358)) -- Removed the `LightningModule.precision` attribute ([#16203](https://github.com/Lightning-AI/lightning/pull/16203)) -- Removed the automatic addition of a moving average of the `training_step` loss in the progress bar. Use `self.log("loss", ..., prog_bar=True)` instead. ([#16192](https://github.com/Lightning-AI/lightning/pull/16192)) -- Removed support for passing a dictionary value to `self.log()` ([#16389](https://github.com/Lightning-AI/lightning/pull/16389)) -- Removed `Trainer.model` setter ([#16462](https://github.com/Lightning-AI/lightning/pull/16462)) -- Removed the argument `Trainer(multiple_trainloader_mode=...)`. You can use `CombinedLoader(..., mode=...)` directly now ([#16800](https://github.com/Lightning-AI/lightning/pull/16800)) -- Removed the unused `lightning.pytorch.utilities.finite_checks.print_nan_gradients` function ([#16682](https://github.com/Lightning-AI/lightning/pull/16682)) -- Removed the unused `lightning.pytorch.utilities.finite_checks.detect_nan_parameters` function ([#16682](https://github.com/Lightning-AI/lightning/pull/16682)) -- Removed the unused `lightning.pytorch.utilities.parsing.flatten_dict` function ([#16744](https://github.com/Lightning-AI/lightning/pull/16744)) -- Removed the unused `lightning.pytorch.utilities.metrics.metrics_to_scalars` function ([#16681](https://github.com/Lightning-AI/lightning/pull/16681)) -- Removed the unused `lightning.pytorch.utilities.supporters.{SharedCycleIteratorState,CombinedLoaderIterator}` classes ([#16714](https://github.com/Lightning-AI/lightning/pull/16714)) +- Removed the experimental `PL_INTER_BATCH_PARALLELISM` environment flag ([#16355](https://github.com/Lightning-AI/pytorch-lightning/pull/16355)) +- Removed the `Trainer(move_metrics_to_cpu=True)` argument ([#16358](https://github.com/Lightning-AI/pytorch-lightning/pull/16358)) +- Removed the `LightningModule.precision` attribute ([#16203](https://github.com/Lightning-AI/pytorch-lightning/pull/16203)) +- Removed the automatic addition of a moving average of the `training_step` loss in the progress bar. Use `self.log("loss", ..., prog_bar=True)` instead. ([#16192](https://github.com/Lightning-AI/pytorch-lightning/pull/16192)) +- Removed support for passing a dictionary value to `self.log()` ([#16389](https://github.com/Lightning-AI/pytorch-lightning/pull/16389)) +- Removed `Trainer.model` setter ([#16462](https://github.com/Lightning-AI/pytorch-lightning/pull/16462)) +- Removed the argument `Trainer(multiple_trainloader_mode=...)`. You can use `CombinedLoader(..., mode=...)` directly now ([#16800](https://github.com/Lightning-AI/pytorch-lightning/pull/16800)) +- Removed the unused `lightning.pytorch.utilities.finite_checks.print_nan_gradients` function ([#16682](https://github.com/Lightning-AI/pytorch-lightning/pull/16682)) +- Removed the unused `lightning.pytorch.utilities.finite_checks.detect_nan_parameters` function ([#16682](https://github.com/Lightning-AI/pytorch-lightning/pull/16682)) +- Removed the unused `lightning.pytorch.utilities.parsing.flatten_dict` function ([#16744](https://github.com/Lightning-AI/pytorch-lightning/pull/16744)) +- Removed the unused `lightning.pytorch.utilities.metrics.metrics_to_scalars` function ([#16681](https://github.com/Lightning-AI/pytorch-lightning/pull/16681)) +- Removed the unused `lightning.pytorch.utilities.supporters.{SharedCycleIteratorState,CombinedLoaderIterator}` classes ([#16714](https://github.com/Lightning-AI/pytorch-lightning/pull/16714)) - Tuner removal - * Removed the deprecated `trainer.tuning` property ([#16379](https://github.com/Lightning-AI/lightning/pull/16379)) - * Removed the deprecated `TrainerFn.TUNING` and `RunningStage.TUNING` enums ([#16379](https://github.com/Lightning-AI/lightning/pull/16379)) - * Removed `Trainer.tune()` in favor of `Tuner(trainer).{lr_find,scale_batch_size}` ([#16462](https://github.com/Lightning-AI/lightning/pull/16462)) - * Removed `Trainer(auto_scale_batch_size=...)` in favor of `Tuner(trainer).scale_batch_size()` ([#16462](https://github.com/Lightning-AI/lightning/pull/16462)) - * Removed `Trainer(auto_lr_find=...)` in favor of `Tuner(trainer).lr_find()` ([#16462](https://github.com/Lightning-AI/lightning/pull/16462)) -- Removed the `on_tpu` argument from `LightningModule.optimizer_step` hook ([#16537](https://github.com/Lightning-AI/lightning/pull/16537)) -- Removed the `using_lbfgs` argument from `LightningModule.optimizer_step` hook ([#16538](https://github.com/Lightning-AI/lightning/pull/16538)) -- Removed the `Trainer.data_parallel` property. Use `isinstance(trainer.strategy, ParallelStrategy)` instead ([#16703](https://github.com/Lightning-AI/lightning/pull/16703)) -- Removed the `Trainer.prediction_writer_callbacks` property ([#16759](https://github.com/Lightning-AI/lightning/pull/16759)) -- Removed support for multiple optimizers in automatic optimization mode ([#16539](https://github.com/Lightning-AI/lightning/pull/16539)) + * Removed the deprecated `trainer.tuning` property ([#16379](https://github.com/Lightning-AI/pytorch-lightning/pull/16379)) + * Removed the deprecated `TrainerFn.TUNING` and `RunningStage.TUNING` enums ([#16379](https://github.com/Lightning-AI/pytorch-lightning/pull/16379)) + * Removed `Trainer.tune()` in favor of `Tuner(trainer).{lr_find,scale_batch_size}` ([#16462](https://github.com/Lightning-AI/pytorch-lightning/pull/16462)) + * Removed `Trainer(auto_scale_batch_size=...)` in favor of `Tuner(trainer).scale_batch_size()` ([#16462](https://github.com/Lightning-AI/pytorch-lightning/pull/16462)) + * Removed `Trainer(auto_lr_find=...)` in favor of `Tuner(trainer).lr_find()` ([#16462](https://github.com/Lightning-AI/pytorch-lightning/pull/16462)) +- Removed the `on_tpu` argument from `LightningModule.optimizer_step` hook ([#16537](https://github.com/Lightning-AI/pytorch-lightning/pull/16537)) +- Removed the `using_lbfgs` argument from `LightningModule.optimizer_step` hook ([#16538](https://github.com/Lightning-AI/pytorch-lightning/pull/16538)) +- Removed the `Trainer.data_parallel` property. Use `isinstance(trainer.strategy, ParallelStrategy)` instead ([#16703](https://github.com/Lightning-AI/pytorch-lightning/pull/16703)) +- Removed the `Trainer.prediction_writer_callbacks` property ([#16759](https://github.com/Lightning-AI/pytorch-lightning/pull/16759)) +- Removed support for multiple optimizers in automatic optimization mode ([#16539](https://github.com/Lightning-AI/pytorch-lightning/pull/16539)) * Removed `opt_idx` argument from `BaseFinetuning.finetune_function` callback method * Removed `opt_idx` argument from `Callback.on_before_optimizer_step` callback method * Removed `optimizer_idx` as an optional argument in `LightningModule.training_step` @@ -689,118 +689,118 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). * Removed `optimizer_idx` argument from `PrecisionPlugin.{optimizer_step,backward}` and all of its overrides in subclasses * Removed `optimizer_idx` argument from `Strategy.{optimizer_step,backward}` and all of its overrides in subclasses * Removed `Trainer.optimizer_frequencies` attribute -- Removed `Strategy.dispatch` ([#16618](https://github.com/Lightning-AI/lightning/pull/16618)) -- Removed `PrecisionPlugin.dispatch` ([#16618](https://github.com/Lightning-AI/lightning/pull/16618)) -- Removed legacy argparse utilities ([#16708](https://github.com/Lightning-AI/lightning/pull/16708)) +- Removed `Strategy.dispatch` ([#16618](https://github.com/Lightning-AI/pytorch-lightning/pull/16618)) +- Removed `PrecisionPlugin.dispatch` ([#16618](https://github.com/Lightning-AI/pytorch-lightning/pull/16618)) +- Removed legacy argparse utilities ([#16708](https://github.com/Lightning-AI/pytorch-lightning/pull/16708)) * Removed `LightningDataModule` methods: `add_argparse_args()`, `from_argparse_args()`, `parse_argparser()`, `get_init_arguments_and_types()` * Removed class methods from Trainer: `default_attributes()`, `from_argparse_args()`, `parse_argparser()`, `match_env_arguments()`, `add_argparse_args()` * Removed functions from `lightning.pytorch.utilities.argparse`: `from_argparse_args()`, `parse_argparser()`, `parse_env_variables()`, `get_init_arguments_and_types()`, `add_argparse_args()` * Removed functions from `lightning.pytorch.utilities.parsing`: `import str_to_bool()`, `str_to_bool_or_int()`, `str_to_bool_or_str()` -- Removed support for passing a scheduling dictionary to `Trainer(accumulate_grad_batches=...)` ([#16729](https://github.com/Lightning-AI/lightning/pull/16729)) -- Removed support for `DataParallel` (`strategy='dp'`) and the `LightningParallelModule`-Wrapper, ([#16748](https://github.com/Lightning-AI/lightning/pull/16748)) -- Removed the unused `lightning.pytorch.utilities.supporters.{SharedCycleIteratorState,CombinedLoaderIterator}` classes ([#16714](https://github.com/Lightning-AI/lightning/pull/16714)) -- Removed `ProgressBarBase.{train_batch_idx,val_batch_idx,test_batch_idx,predict_batch_idx}` properties ([#16760](https://github.com/Lightning-AI/lightning/pull/16760)) -- Removed the `fit_loop.{min,max}_steps` setters ([#16803](https://github.com/Lightning-AI/lightning/pull/16803)) -- Removed the `Trainer(track_grad_norm=...)` argument ([#16745](https://github.com/Lightning-AI/lightning/pull/16745)) -- Removed the `LightningModule.log_grad_norm()` hook method ([#16745](https://github.com/Lightning-AI/lightning/pull/16745)) -- Removed the `QuantizationAwareTraining` callback ([#16750](https://github.com/Lightning-AI/lightning/pull/16750)) -- Removed the `ColossalAIStrategy` and `ColossalAIPrecisionPlugin` in favor of the new [lightning-colossalai](https://github.com/Lightning-AI/lightning-colossalai) package ([#16757](https://github.com/Lightning-AI/lightning/pull/16757), [#16778](https://github.com/Lightning-AI/lightning/pull/16778)) -- Removed the `training_step_end`, `validation_step_end`, and `test_step_end` hooks from the `LightningModule` in favor of the `*_batch_end` hooks ([#16791](https://github.com/Lightning-AI/lightning/pull/16791)) -- Removed the `lightning.pytorch.strategies.DDPSpawnStrategy` in favor of `DDPStrategy(start_method='spawn')` (merged both classes) ([#16809](https://github.com/Lightning-AI/lightning/pull/16809)) -- Removed registration of `ShardedTensor` state dict hooks in `LightningModule.__init__` with `torch>=2.1` ([#16892](https://github.com/Lightning-AI/lightning/pull/16892)) -- Removed the `lightning.pytorch.core.saving.ModelIO` class interface ([#16999](https://github.com/Lightning-AI/lightning/pull/16999)) -- Removed the unused `lightning.pytorch.utilities.memory.get_model_size_mb` function ([#17001](https://github.com/Lightning-AI/lightning/pull/17001)) +- Removed support for passing a scheduling dictionary to `Trainer(accumulate_grad_batches=...)` ([#16729](https://github.com/Lightning-AI/pytorch-lightning/pull/16729)) +- Removed support for `DataParallel` (`strategy='dp'`) and the `LightningParallelModule`-Wrapper, ([#16748](https://github.com/Lightning-AI/pytorch-lightning/pull/16748)) +- Removed the unused `lightning.pytorch.utilities.supporters.{SharedCycleIteratorState,CombinedLoaderIterator}` classes ([#16714](https://github.com/Lightning-AI/pytorch-lightning/pull/16714)) +- Removed `ProgressBarBase.{train_batch_idx,val_batch_idx,test_batch_idx,predict_batch_idx}` properties ([#16760](https://github.com/Lightning-AI/pytorch-lightning/pull/16760)) +- Removed the `fit_loop.{min,max}_steps` setters ([#16803](https://github.com/Lightning-AI/pytorch-lightning/pull/16803)) +- Removed the `Trainer(track_grad_norm=...)` argument ([#16745](https://github.com/Lightning-AI/pytorch-lightning/pull/16745)) +- Removed the `LightningModule.log_grad_norm()` hook method ([#16745](https://github.com/Lightning-AI/pytorch-lightning/pull/16745)) +- Removed the `QuantizationAwareTraining` callback ([#16750](https://github.com/Lightning-AI/pytorch-lightning/pull/16750)) +- Removed the `ColossalAIStrategy` and `ColossalAIPrecisionPlugin` in favor of the new [lightning-colossalai](https://github.com/Lightning-AI/lightning-colossalai) package ([#16757](https://github.com/Lightning-AI/pytorch-lightning/pull/16757), [#16778](https://github.com/Lightning-AI/pytorch-lightning/pull/16778)) +- Removed the `training_step_end`, `validation_step_end`, and `test_step_end` hooks from the `LightningModule` in favor of the `*_batch_end` hooks ([#16791](https://github.com/Lightning-AI/pytorch-lightning/pull/16791)) +- Removed the `lightning.pytorch.strategies.DDPSpawnStrategy` in favor of `DDPStrategy(start_method='spawn')` (merged both classes) ([#16809](https://github.com/Lightning-AI/pytorch-lightning/pull/16809)) +- Removed registration of `ShardedTensor` state dict hooks in `LightningModule.__init__` with `torch>=2.1` ([#16892](https://github.com/Lightning-AI/pytorch-lightning/pull/16892)) +- Removed the `lightning.pytorch.core.saving.ModelIO` class interface ([#16999](https://github.com/Lightning-AI/pytorch-lightning/pull/16999)) +- Removed the unused `lightning.pytorch.utilities.memory.get_model_size_mb` function ([#17001](https://github.com/Lightning-AI/pytorch-lightning/pull/17001)) ### Fixed -- Fixed an issue where `DistributedSampler.set_epoch` wasn't getting called during `trainer.predict` ([#16785](https://github.com/Lightning-AI/lightning/pull/16785), [#16826](https://github.com/Lightning-AI/lightning/pull/16826)) +- Fixed an issue where `DistributedSampler.set_epoch` wasn't getting called during `trainer.predict` ([#16785](https://github.com/Lightning-AI/pytorch-lightning/pull/16785), [#16826](https://github.com/Lightning-AI/pytorch-lightning/pull/16826)) -- Fixed an issue with comparing torch versions when using a version of torch built from source ([#17030](https://github.com/Lightning-AI/lightning/pull/17030)) +- Fixed an issue with comparing torch versions when using a version of torch built from source ([#17030](https://github.com/Lightning-AI/pytorch-lightning/pull/17030)) -- Improved the error message for installing tensorboard or tensorboardx ([#17053](https://github.com/Lightning-AI/lightning/pull/17053)) +- Improved the error message for installing tensorboard or tensorboardx ([#17053](https://github.com/Lightning-AI/pytorch-lightning/pull/17053)) ## [1.9.4] - 2023-03-01 ### Added -- Added `Fabric(strategy="auto")` support. It will choose DDP over DDP-spawn, contrary to `strategy=None` (default) ([#16916](https://github.com/Lightning-AI/lightning/pull/16916)) +- Added `Fabric(strategy="auto")` support. It will choose DDP over DDP-spawn, contrary to `strategy=None` (default) ([#16916](https://github.com/Lightning-AI/pytorch-lightning/pull/16916)) ### Fixed -- Fixed DDP spawn hang on TPU Pods ([#16844](https://github.com/Lightning-AI/lightning/pull/16844)) -- Fixed edge cases in parsing device ids using NVML ([#16795](https://github.com/Lightning-AI/lightning/pull/16795)) -- Fixed backwards compatibility for `lightning.pytorch.utilities.parsing.get_init_args` ([#16851](https://github.com/Lightning-AI/lightning/pull/16851)) +- Fixed DDP spawn hang on TPU Pods ([#16844](https://github.com/Lightning-AI/pytorch-lightning/pull/16844)) +- Fixed edge cases in parsing device ids using NVML ([#16795](https://github.com/Lightning-AI/pytorch-lightning/pull/16795)) +- Fixed backwards compatibility for `lightning.pytorch.utilities.parsing.get_init_args` ([#16851](https://github.com/Lightning-AI/pytorch-lightning/pull/16851)) ## [1.9.3] - 2023-02-21 ### Fixed -- Fixed an issue causing a wrong environment plugin to be selected when `accelerator=tpu` and `devices > 1` ([#16806](https://github.com/Lightning-AI/lightning/pull/16806)) +- Fixed an issue causing a wrong environment plugin to be selected when `accelerator=tpu` and `devices > 1` ([#16806](https://github.com/Lightning-AI/pytorch-lightning/pull/16806)) ## [1.9.2] - 2023-02-15 ### Fixed -- Fixed an attribute error and improved input validation for invalid strategy types being passed to Trainer ([#16693](https://github.com/Lightning-AI/lightning/pull/16693)) -- Fixed early stopping triggering extra validation runs after reaching `min_epochs` or `min_steps` ([#16719](https://github.com/Lightning-AI/lightning/pull/16719)) +- Fixed an attribute error and improved input validation for invalid strategy types being passed to Trainer ([#16693](https://github.com/Lightning-AI/pytorch-lightning/pull/16693)) +- Fixed early stopping triggering extra validation runs after reaching `min_epochs` or `min_steps` ([#16719](https://github.com/Lightning-AI/pytorch-lightning/pull/16719)) ## [1.9.1] - 2023-02-10 ### Fixed -- Fixed an unintended limitation for calling `save_hyperparameters` on mixin classes that don't subclass `LightningModule`/`LightningDataModule` ([#16369](https://github.com/Lightning-AI/lightning/pull/16369)) -- Fixed an issue with `MLFlowLogger` logging the wrong keys with `.log_hyperparams()` ([#16418](https://github.com/Lightning-AI/lightning/pull/16418)) -- Fixed logging more than 100 parameters with `MLFlowLogger` and long values are truncated ([#16451](https://github.com/Lightning-AI/lightning/pull/16451)) -- Fixed strict availability check for `torch_xla` requirement ([#16476](https://github.com/Lightning-AI/lightning/pull/16476)) -- Fixed an issue where PL would wrap DataLoaders with XLA's MpDeviceLoader more than once ([#16571](https://github.com/Lightning-AI/lightning/pull/16571)) -- Fixed the batch_sampler reference for DataLoaders wrapped with XLA's MpDeviceLoader ([#16571](https://github.com/Lightning-AI/lightning/pull/16571)) -- Fixed an import error when `torch.distributed` is not available ([#16658](https://github.com/Lightning-AI/lightning/pull/16658)) +- Fixed an unintended limitation for calling `save_hyperparameters` on mixin classes that don't subclass `LightningModule`/`LightningDataModule` ([#16369](https://github.com/Lightning-AI/pytorch-lightning/pull/16369)) +- Fixed an issue with `MLFlowLogger` logging the wrong keys with `.log_hyperparams()` ([#16418](https://github.com/Lightning-AI/pytorch-lightning/pull/16418)) +- Fixed logging more than 100 parameters with `MLFlowLogger` and long values are truncated ([#16451](https://github.com/Lightning-AI/pytorch-lightning/pull/16451)) +- Fixed strict availability check for `torch_xla` requirement ([#16476](https://github.com/Lightning-AI/pytorch-lightning/pull/16476)) +- Fixed an issue where PL would wrap DataLoaders with XLA's MpDeviceLoader more than once ([#16571](https://github.com/Lightning-AI/pytorch-lightning/pull/16571)) +- Fixed the batch_sampler reference for DataLoaders wrapped with XLA's MpDeviceLoader ([#16571](https://github.com/Lightning-AI/pytorch-lightning/pull/16571)) +- Fixed an import error when `torch.distributed` is not available ([#16658](https://github.com/Lightning-AI/pytorch-lightning/pull/16658)) ## [1.9.0] - 2023-01-17 ### Added -- Added support for native logging of `MetricCollection` with enabled compute groups ([#15580](https://github.com/Lightning-AI/lightning/pull/15580)) -- Added support for custom artifact names in `pl.loggers.WandbLogger` ([#16173](https://github.com/Lightning-AI/lightning/pull/16173)) -- Added support for DDP with `LRFinder` ([#15304](https://github.com/Lightning-AI/lightning/pull/15304)) -- Added utilities to migrate checkpoints from one Lightning version to another ([#15237](https://github.com/Lightning-AI/lightning/pull/15237)) -- Added support to upgrade all checkpoints in a folder using the `pl.utilities.upgrade_checkpoint` script ([#15333](https://github.com/Lightning-AI/lightning/pull/15333)) -- Add an axes argument `ax` to the `.lr_find().plot()` to enable writing to a user-defined axes in a matplotlib figure ([#15652](https://github.com/Lightning-AI/lightning/pull/15652)) -- Added `log_model` parameter to `MLFlowLogger` ([#9187](https://github.com/Lightning-AI/lightning/pull/9187)) -- Added a check to validate that wrapped FSDP models are used while initializing optimizers ([#15301](https://github.com/Lightning-AI/lightning/pull/15301)) -- Added a warning when `self.log(..., logger=True)` is called without a configured logger ([#15814](https://github.com/Lightning-AI/lightning/pull/15814)) -- Added support for colossalai 0.1.11 ([#15888](https://github.com/Lightning-AI/lightning/pull/15888)) -- Added `LightningCLI` support for optimizer and learning schedulers via callable type dependency injection ([#15869](https://github.com/Lightning-AI/lightning/pull/15869)) -- Added support for activation checkpointing for the `DDPFullyShardedNativeStrategy` strategy ([#15826](https://github.com/Lightning-AI/lightning/pull/15826)) -- Added the option to set `DDPFullyShardedNativeStrategy(cpu_offload=True|False)` via bool instead of needing to pass a configuration object ([#15832](https://github.com/Lightning-AI/lightning/pull/15832)) -- Added info message for Ampere CUDA GPU users to enable tf32 matmul precision ([#16037](https://github.com/Lightning-AI/lightning/pull/16037)) -- Added support for returning optimizer-like classes in `LightningModule.configure_optimizers` ([#16189](https://github.com/Lightning-AI/lightning/pull/16189)) +- Added support for native logging of `MetricCollection` with enabled compute groups ([#15580](https://github.com/Lightning-AI/pytorch-lightning/pull/15580)) +- Added support for custom artifact names in `pl.loggers.WandbLogger` ([#16173](https://github.com/Lightning-AI/pytorch-lightning/pull/16173)) +- Added support for DDP with `LRFinder` ([#15304](https://github.com/Lightning-AI/pytorch-lightning/pull/15304)) +- Added utilities to migrate checkpoints from one Lightning version to another ([#15237](https://github.com/Lightning-AI/pytorch-lightning/pull/15237)) +- Added support to upgrade all checkpoints in a folder using the `pl.utilities.upgrade_checkpoint` script ([#15333](https://github.com/Lightning-AI/pytorch-lightning/pull/15333)) +- Add an axes argument `ax` to the `.lr_find().plot()` to enable writing to a user-defined axes in a matplotlib figure ([#15652](https://github.com/Lightning-AI/pytorch-lightning/pull/15652)) +- Added `log_model` parameter to `MLFlowLogger` ([#9187](https://github.com/Lightning-AI/pytorch-lightning/pull/9187)) +- Added a check to validate that wrapped FSDP models are used while initializing optimizers ([#15301](https://github.com/Lightning-AI/pytorch-lightning/pull/15301)) +- Added a warning when `self.log(..., logger=True)` is called without a configured logger ([#15814](https://github.com/Lightning-AI/pytorch-lightning/pull/15814)) +- Added support for colossalai 0.1.11 ([#15888](https://github.com/Lightning-AI/pytorch-lightning/pull/15888)) +- Added `LightningCLI` support for optimizer and learning schedulers via callable type dependency injection ([#15869](https://github.com/Lightning-AI/pytorch-lightning/pull/15869)) +- Added support for activation checkpointing for the `DDPFullyShardedNativeStrategy` strategy ([#15826](https://github.com/Lightning-AI/pytorch-lightning/pull/15826)) +- Added the option to set `DDPFullyShardedNativeStrategy(cpu_offload=True|False)` via bool instead of needing to pass a configuration object ([#15832](https://github.com/Lightning-AI/pytorch-lightning/pull/15832)) +- Added info message for Ampere CUDA GPU users to enable tf32 matmul precision ([#16037](https://github.com/Lightning-AI/pytorch-lightning/pull/16037)) +- Added support for returning optimizer-like classes in `LightningModule.configure_optimizers` ([#16189](https://github.com/Lightning-AI/pytorch-lightning/pull/16189)) ### Changed -- Drop PyTorch 1.9 support ([#15347](https://github.com/Lightning-AI/lightning/pull/15347)) -- Switch from `tensorboard` to `tensorboardx` in `TensorBoardLogger` ([#15728](https://github.com/Lightning-AI/lightning/pull/15728)) -- From now on, Lightning Trainer and `LightningModule.load_from_checkpoint` automatically upgrade the loaded checkpoint if it was produced in an old version of Lightning ([#15237](https://github.com/Lightning-AI/lightning/pull/15237)) -- `Trainer.{validate,test,predict}(ckpt_path=...)` no longer restores the `Trainer.global_step` and `trainer.current_epoch` value from the checkpoints - From now on, only `Trainer.fit` will restore this value ([#15532](https://github.com/Lightning-AI/lightning/pull/15532)) -- The `ModelCheckpoint.save_on_train_epoch_end` attribute is now computed dynamically every epoch, accounting for changes to the validation dataloaders ([#15300](https://github.com/Lightning-AI/lightning/pull/15300)) -- The Trainer now raises an error if it is given multiple stateful callbacks of the same time with colliding state keys ([#15634](https://github.com/Lightning-AI/lightning/pull/15634)) -- `MLFlowLogger` now logs hyperparameters and metrics in batched API calls ([#15915](https://github.com/Lightning-AI/lightning/pull/15915)) -- Overriding the `on_train_batch_{start,end}` hooks in conjunction with taking a `dataloader_iter` in the `training_step` no longer errors out and instead shows a warning ([#16062](https://github.com/Lightning-AI/lightning/pull/16062)) -- Move `tensorboardX` to extra dependencies. Use the `CSVLogger` by default ([#16349](https://github.com/Lightning-AI/lightning/pull/16349)) +- Drop PyTorch 1.9 support ([#15347](https://github.com/Lightning-AI/pytorch-lightning/pull/15347)) +- Switch from `tensorboard` to `tensorboardx` in `TensorBoardLogger` ([#15728](https://github.com/Lightning-AI/pytorch-lightning/pull/15728)) +- From now on, Lightning Trainer and `LightningModule.load_from_checkpoint` automatically upgrade the loaded checkpoint if it was produced in an old version of Lightning ([#15237](https://github.com/Lightning-AI/pytorch-lightning/pull/15237)) +- `Trainer.{validate,test,predict}(ckpt_path=...)` no longer restores the `Trainer.global_step` and `trainer.current_epoch` value from the checkpoints - From now on, only `Trainer.fit` will restore this value ([#15532](https://github.com/Lightning-AI/pytorch-lightning/pull/15532)) +- The `ModelCheckpoint.save_on_train_epoch_end` attribute is now computed dynamically every epoch, accounting for changes to the validation dataloaders ([#15300](https://github.com/Lightning-AI/pytorch-lightning/pull/15300)) +- The Trainer now raises an error if it is given multiple stateful callbacks of the same time with colliding state keys ([#15634](https://github.com/Lightning-AI/pytorch-lightning/pull/15634)) +- `MLFlowLogger` now logs hyperparameters and metrics in batched API calls ([#15915](https://github.com/Lightning-AI/pytorch-lightning/pull/15915)) +- Overriding the `on_train_batch_{start,end}` hooks in conjunction with taking a `dataloader_iter` in the `training_step` no longer errors out and instead shows a warning ([#16062](https://github.com/Lightning-AI/pytorch-lightning/pull/16062)) +- Move `tensorboardX` to extra dependencies. Use the `CSVLogger` by default ([#16349](https://github.com/Lightning-AI/pytorch-lightning/pull/16349)) ### Deprecated -- Deprecated `description`, `env_prefix` and `env_parse` parameters in `LightningCLI.__init__` in favour of giving them through `parser_kwargs` ([#15651](https://github.com/Lightning-AI/lightning/pull/15651)) -- Deprecated `pl.profiler` in favor of `pl.profilers` ([#16059](https://github.com/Lightning-AI/lightning/pull/16059)) -- Deprecated `Trainer(auto_select_gpus=...)` in favor of `pl.accelerators.find_usable_cuda_devices` ([#16147](https://github.com/Lightning-AI/lightning/pull/16147)) -- Deprecated `pl.tuner.auto_gpu_select.{pick_single_gpu,pick_multiple_gpus}` in favor of `pl.accelerators.find_usable_cuda_devices` ([#16147](https://github.com/Lightning-AI/lightning/pull/16147)) -- `nvidia/apex` deprecation ([#16039](https://github.com/Lightning-AI/lightning/pull/16039)) +- Deprecated `description`, `env_prefix` and `env_parse` parameters in `LightningCLI.__init__` in favour of giving them through `parser_kwargs` ([#15651](https://github.com/Lightning-AI/pytorch-lightning/pull/15651)) +- Deprecated `pl.profiler` in favor of `pl.profilers` ([#16059](https://github.com/Lightning-AI/pytorch-lightning/pull/16059)) +- Deprecated `Trainer(auto_select_gpus=...)` in favor of `pl.accelerators.find_usable_cuda_devices` ([#16147](https://github.com/Lightning-AI/pytorch-lightning/pull/16147)) +- Deprecated `pl.tuner.auto_gpu_select.{pick_single_gpu,pick_multiple_gpus}` in favor of `pl.accelerators.find_usable_cuda_devices` ([#16147](https://github.com/Lightning-AI/pytorch-lightning/pull/16147)) +- `nvidia/apex` deprecation ([#16039](https://github.com/Lightning-AI/pytorch-lightning/pull/16039)) * Deprecated `pl.plugins.NativeMixedPrecisionPlugin` in favor of `pl.plugins.MixedPrecisionPlugin` * Deprecated the `LightningModule.optimizer_step(using_native_amp=...)` argument * Deprecated the `Trainer(amp_backend=...)` argument @@ -809,11 +809,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). * Deprecated the `pl.plugins.ApexMixedPrecisionPlugin` class * Deprecates the `pl.utilities.enums.AMPType` enum * Deprecates the `DeepSpeedPrecisionPlugin(amp_type=..., amp_level=...)` arguments -- `horovod` deprecation ([#16141](https://github.com/Lightning-AI/lightning/pull/16141)) +- `horovod` deprecation ([#16141](https://github.com/Lightning-AI/pytorch-lightning/pull/16141)) * Deprecated `Trainer(strategy="horovod")` * Deprecated the `HorovodStrategy` class -- Deprecated `pl.lite.LightningLite` in favor of `lightning.fabric.Fabric` ([#16314](https://github.com/Lightning-AI/lightning/pull/16314)) -- `FairScale` deprecation (in favor of PyTorch's FSDP implementation) ([#16353](https://github.com/Lightning-AI/lightning/pull/16353)) +- Deprecated `pl.lite.LightningLite` in favor of `lightning.fabric.Fabric` ([#16314](https://github.com/Lightning-AI/pytorch-lightning/pull/16314)) +- `FairScale` deprecation (in favor of PyTorch's FSDP implementation) ([#16353](https://github.com/Lightning-AI/pytorch-lightning/pull/16353)) * Deprecated the `pl.overrides.fairscale.LightningShardedDataParallel` class * Deprecated the `pl.plugins.precision.fully_sharded_native_amp.FullyShardedNativeMixedPrecisionPlugin` class * Deprecated the `pl.plugins.precision.sharded_native_amp.ShardedNativeMixedPrecisionPlugin` class @@ -824,31 +824,31 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed -- Removed deprecated `pl.utilities.memory.get_gpu_memory_map` in favor of `pl.accelerators.cuda.get_nvidia_gpu_stats` ([#15617](https://github.com/Lightning-AI/lightning/pull/15617)) -- Temporarily removed support for Hydra multi-run ([#15737](https://github.com/Lightning-AI/lightning/pull/15737)) -- Removed deprecated `pl.profiler.base.AbstractProfiler` in favor of `pl.profilers.profiler.Profiler` ([#15637](https://github.com/Lightning-AI/lightning/pull/15637)) -- Removed deprecated `pl.profiler.base.BaseProfiler` in favor of `pl.profilers.profiler.Profiler` ([#15637](https://github.com/Lightning-AI/lightning/pull/15637)) -- Removed deprecated code in `pl.utilities.meta` ([#16038](https://github.com/Lightning-AI/lightning/pull/16038)) -- Removed the deprecated `LightningDeepSpeedModule` ([#16041](https://github.com/Lightning-AI/lightning/pull/16041)) -- Removed the deprecated `pl.accelerators.GPUAccelerator` in favor of `pl.accelerators.CUDAAccelerator` ([#16050](https://github.com/Lightning-AI/lightning/pull/16050)) -- Removed the deprecated `pl.profiler.*` classes in favor of `pl.profilers` ([#16059](https://github.com/Lightning-AI/lightning/pull/16059)) -- Removed the deprecated `pl.utilities.cli` module in favor of `pl.cli` ([#16116](https://github.com/Lightning-AI/lightning/pull/16116)) -- Removed the deprecated `pl.loggers.base` module in favor of `pl.loggers.logger` ([#16120](https://github.com/Lightning-AI/lightning/pull/16120)) -- Removed the deprecated `pl.loops.base` module in favor of `pl.loops.loop` ([#16142](https://github.com/Lightning-AI/lightning/pull/16142)) -- Removed the deprecated `pl.core.lightning` module in favor of `pl.core.module` ([#16318](https://github.com/Lightning-AI/lightning/pull/16318)) -- Removed the deprecated `pl.callbacks.base` module in favor of `pl.callbacks.callback` ([#16319](https://github.com/Lightning-AI/lightning/pull/16319)) -- Removed the deprecated `Trainer.reset_train_val_dataloaders()` in favor of `Trainer.reset_{train,val}_dataloader` ([#16131](https://github.com/Lightning-AI/lightning/pull/16131)) -- Removed support for `LightningCLI(seed_everything_default=None)` ([#16131](https://github.com/Lightning-AI/lightning/pull/16131)) -- Removed support in LightningLite for FairScale's sharded training (`strategy='ddp_sharded'|'ddp_sharded_spawn'`). Use Fully-Sharded Data Parallel instead (`strategy='fsdp'`) ([#16329](https://github.com/Lightning-AI/lightning/pull/16329)) +- Removed deprecated `pl.utilities.memory.get_gpu_memory_map` in favor of `pl.accelerators.cuda.get_nvidia_gpu_stats` ([#15617](https://github.com/Lightning-AI/pytorch-lightning/pull/15617)) +- Temporarily removed support for Hydra multi-run ([#15737](https://github.com/Lightning-AI/pytorch-lightning/pull/15737)) +- Removed deprecated `pl.profiler.base.AbstractProfiler` in favor of `pl.profilers.profiler.Profiler` ([#15637](https://github.com/Lightning-AI/pytorch-lightning/pull/15637)) +- Removed deprecated `pl.profiler.base.BaseProfiler` in favor of `pl.profilers.profiler.Profiler` ([#15637](https://github.com/Lightning-AI/pytorch-lightning/pull/15637)) +- Removed deprecated code in `pl.utilities.meta` ([#16038](https://github.com/Lightning-AI/pytorch-lightning/pull/16038)) +- Removed the deprecated `LightningDeepSpeedModule` ([#16041](https://github.com/Lightning-AI/pytorch-lightning/pull/16041)) +- Removed the deprecated `pl.accelerators.GPUAccelerator` in favor of `pl.accelerators.CUDAAccelerator` ([#16050](https://github.com/Lightning-AI/pytorch-lightning/pull/16050)) +- Removed the deprecated `pl.profiler.*` classes in favor of `pl.profilers` ([#16059](https://github.com/Lightning-AI/pytorch-lightning/pull/16059)) +- Removed the deprecated `pl.utilities.cli` module in favor of `pl.cli` ([#16116](https://github.com/Lightning-AI/pytorch-lightning/pull/16116)) +- Removed the deprecated `pl.loggers.base` module in favor of `pl.loggers.logger` ([#16120](https://github.com/Lightning-AI/pytorch-lightning/pull/16120)) +- Removed the deprecated `pl.loops.base` module in favor of `pl.loops.loop` ([#16142](https://github.com/Lightning-AI/pytorch-lightning/pull/16142)) +- Removed the deprecated `pl.core.lightning` module in favor of `pl.core.module` ([#16318](https://github.com/Lightning-AI/pytorch-lightning/pull/16318)) +- Removed the deprecated `pl.callbacks.base` module in favor of `pl.callbacks.callback` ([#16319](https://github.com/Lightning-AI/pytorch-lightning/pull/16319)) +- Removed the deprecated `Trainer.reset_train_val_dataloaders()` in favor of `Trainer.reset_{train,val}_dataloader` ([#16131](https://github.com/Lightning-AI/pytorch-lightning/pull/16131)) +- Removed support for `LightningCLI(seed_everything_default=None)` ([#16131](https://github.com/Lightning-AI/pytorch-lightning/pull/16131)) +- Removed support in LightningLite for FairScale's sharded training (`strategy='ddp_sharded'|'ddp_sharded_spawn'`). Use Fully-Sharded Data Parallel instead (`strategy='fsdp'`) ([#16329](https://github.com/Lightning-AI/pytorch-lightning/pull/16329)) ### Fixed -- Enhanced `reduce_boolean_decision` to accommodate `any`-analogous semantics expected by the `EarlyStopping` callback ([#15253](https://github.com/Lightning-AI/lightning/pull/15253)) -- Fixed the incorrect optimizer step synchronization when running across multiple TPU devices ([#16020](https://github.com/Lightning-AI/lightning/pull/16020)) -- Fixed a type error when dividing the chunk size in the ColossalAI strategy ([#16212](https://github.com/Lightning-AI/lightning/pull/16212)) -- Fixed bug where the ``interval`` key of the scheduler would be ignored during manual optimization, making the LearningRateMonitor callback fail to log the learning rate ([#16308](https://github.com/Lightning-AI/lightning/pull/16308)) -- Fixed an issue with `MLFlowLogger` not finalizing correctly when status code 'finished' was passed ([#16340](https://github.com/Lightning-AI/lightning/pull/16340)) +- Enhanced `reduce_boolean_decision` to accommodate `any`-analogous semantics expected by the `EarlyStopping` callback ([#15253](https://github.com/Lightning-AI/pytorch-lightning/pull/15253)) +- Fixed the incorrect optimizer step synchronization when running across multiple TPU devices ([#16020](https://github.com/Lightning-AI/pytorch-lightning/pull/16020)) +- Fixed a type error when dividing the chunk size in the ColossalAI strategy ([#16212](https://github.com/Lightning-AI/pytorch-lightning/pull/16212)) +- Fixed bug where the ``interval`` key of the scheduler would be ignored during manual optimization, making the LearningRateMonitor callback fail to log the learning rate ([#16308](https://github.com/Lightning-AI/pytorch-lightning/pull/16308)) +- Fixed an issue with `MLFlowLogger` not finalizing correctly when status code 'finished' was passed ([#16340](https://github.com/Lightning-AI/pytorch-lightning/pull/16340)) ## [1.8.6] - 2022-12-21 @@ -858,7 +858,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ## [1.8.5] - 2022-12-15 -- Add function to remove checkpoint to allow override for extended classes ([#16067](https://github.com/Lightning-AI/lightning/pull/16067)) +- Add function to remove checkpoint to allow override for extended classes ([#16067](https://github.com/Lightning-AI/pytorch-lightning/pull/16067)) ## [1.8.4] - 2022-12-08 @@ -866,33 +866,33 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed - Direct support for compiled models ( - [#15922](https://github.com/Lightning-AI/lightning/pull/15922), - [#15957](https://github.com/Lightning-AI/lightning/pull/15957) + [#15922](https://github.com/Lightning-AI/pytorch-lightning/pull/15922), + [#15957](https://github.com/Lightning-AI/pytorch-lightning/pull/15957) ) ### Fixed -- Fixed issue with unsupported torch.inference_mode() on hpu backends ([#15918](https://github.com/Lightning-AI/lightning/pull/15918)) -- Fixed LRScheduler import for PyTorch 2.0 ([#15940](https://github.com/Lightning-AI/lightning/pull/15940)) -- Fixed `fit_loop.restarting` to be `False` for lr finder ([#15620](https://github.com/Lightning-AI/lightning/pull/15620)) -- Fixed `torch.jit.script`-ing a LightningModule causing an unintended error message about deprecated `use_amp` property ([#15947](https://github.com/Lightning-AI/lightning/pull/15947)) -- Fixed the `XLAProfiler` not recording anything due to mismatching of action names ([#15885](https://github.com/Lightning-AI/lightning/pull/15885)) +- Fixed issue with unsupported torch.inference_mode() on hpu backends ([#15918](https://github.com/Lightning-AI/pytorch-lightning/pull/15918)) +- Fixed LRScheduler import for PyTorch 2.0 ([#15940](https://github.com/Lightning-AI/pytorch-lightning/pull/15940)) +- Fixed `fit_loop.restarting` to be `False` for lr finder ([#15620](https://github.com/Lightning-AI/pytorch-lightning/pull/15620)) +- Fixed `torch.jit.script`-ing a LightningModule causing an unintended error message about deprecated `use_amp` property ([#15947](https://github.com/Lightning-AI/pytorch-lightning/pull/15947)) +- Fixed the `XLAProfiler` not recording anything due to mismatching of action names ([#15885](https://github.com/Lightning-AI/pytorch-lightning/pull/15885)) ## [1.8.3] - 2022-11-22 ### Changed -- Temporarily removed support for Hydra multi-run ([#15737](https://github.com/Lightning-AI/lightning/pull/15737)) -- Switch from `tensorboard` to `tensorboardx` in `TensorBoardLogger` ([#15728](https://github.com/Lightning-AI/lightning/pull/15728)) +- Temporarily removed support for Hydra multi-run ([#15737](https://github.com/Lightning-AI/pytorch-lightning/pull/15737)) +- Switch from `tensorboard` to `tensorboardx` in `TensorBoardLogger` ([#15728](https://github.com/Lightning-AI/pytorch-lightning/pull/15728)) ## [1.8.2] - 2022-11-17 ### Fixed -- Make sure save_dir can be empty str ([#15638](https://github.com/Lightning-AI/lightning/pull/15638)) -- Fixed the automatic fallback from `Trainer(strategy="ddp_spawn", ...)` to `Trainer(strategy="ddp", ...)` when on an LSF cluster ([#15103](https://github.com/Lightning-AI/lightning/pull/15103)) +- Make sure save_dir can be empty str ([#15638](https://github.com/Lightning-AI/pytorch-lightning/pull/15638)) +- Fixed the automatic fallback from `Trainer(strategy="ddp_spawn", ...)` to `Trainer(strategy="ddp", ...)` when on an LSF cluster ([#15103](https://github.com/Lightning-AI/pytorch-lightning/pull/15103)) @@ -900,104 +900,104 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added -- Added back the accidentally removed `pl.utilities.distributed.rank_zero_only` function ([#15536](https://github.com/Lightning-AI/lightning/pull/15536)) +- Added back the accidentally removed `pl.utilities.distributed.rank_zero_only` function ([#15536](https://github.com/Lightning-AI/pytorch-lightning/pull/15536)) ### Deprecated -- Deprecated `pl.utilities.distributed.rank_zero_only` in favor of `pl.utilities.rank_zero_only` ([#15536](https://github.com/Lightning-AI/lightning/pull/15536)) +- Deprecated `pl.utilities.distributed.rank_zero_only` in favor of `pl.utilities.rank_zero_only` ([#15536](https://github.com/Lightning-AI/pytorch-lightning/pull/15536)) ### Fixed -- Fixed `TensorBoardLogger` not validating the input array type when logging the model graph ([#15323](https://github.com/Lightning-AI/lightning/pull/15323)) -- Fixed an attribute error in `ColossalAIStrategy` at import time when `torch.distributed` is not available ([#15535](https://github.com/Lightning-AI/lightning/pull/15535)) -- Fixed an issue when calling `fs.listdir` with file URI instead of path in `CheckpointConnector` ([#15413](https://github.com/Lightning-AI/lightning/pull/15413)) -- Fixed an issue with the `BaseFinetuning` callback not setting the `track_running_stats` attribute for batch normaliztion layers ([#15063](https://github.com/Lightning-AI/lightning/pull/15063)) -- Fixed an issue with `WandbLogger(log_model=True|'all)` raising an error and not being able to serialize tensors in the metadata ([#15544](https://github.com/Lightning-AI/lightning/pull/15544)) -- Fixed the gradient unscaling logic when using `Trainer(precision=16)` and fused optimizers such as `Adam(..., fused=True)` ([#15544](https://github.com/Lightning-AI/lightning/pull/15544)) -- Fixed model state transfer in multiprocessing launcher when running multi-node ([#15567](https://github.com/Lightning-AI/lightning/pull/15567)) -- Fixed manual optimization raising `AttributeError` with Bagua Strategy ([#12534](https://github.com/Lightning-AI/lightning/pull/12534)) -- Fixed the import of `pytorch_lightning` causing a warning 'Redirects are currently not supported in Windows or MacOs' ([#15610](https://github.com/Lightning-AI/lightning/pull/15610)) +- Fixed `TensorBoardLogger` not validating the input array type when logging the model graph ([#15323](https://github.com/Lightning-AI/pytorch-lightning/pull/15323)) +- Fixed an attribute error in `ColossalAIStrategy` at import time when `torch.distributed` is not available ([#15535](https://github.com/Lightning-AI/pytorch-lightning/pull/15535)) +- Fixed an issue when calling `fs.listdir` with file URI instead of path in `CheckpointConnector` ([#15413](https://github.com/Lightning-AI/pytorch-lightning/pull/15413)) +- Fixed an issue with the `BaseFinetuning` callback not setting the `track_running_stats` attribute for batch normaliztion layers ([#15063](https://github.com/Lightning-AI/pytorch-lightning/pull/15063)) +- Fixed an issue with `WandbLogger(log_model=True|'all)` raising an error and not being able to serialize tensors in the metadata ([#15544](https://github.com/Lightning-AI/pytorch-lightning/pull/15544)) +- Fixed the gradient unscaling logic when using `Trainer(precision=16)` and fused optimizers such as `Adam(..., fused=True)` ([#15544](https://github.com/Lightning-AI/pytorch-lightning/pull/15544)) +- Fixed model state transfer in multiprocessing launcher when running multi-node ([#15567](https://github.com/Lightning-AI/pytorch-lightning/pull/15567)) +- Fixed manual optimization raising `AttributeError` with Bagua Strategy ([#12534](https://github.com/Lightning-AI/pytorch-lightning/pull/12534)) +- Fixed the import of `pytorch_lightning` causing a warning 'Redirects are currently not supported in Windows or MacOs' ([#15610](https://github.com/Lightning-AI/pytorch-lightning/pull/15610)) ## [1.8.0] - 2022-11-01 ### Added -- Added support for requeueing slurm array jobs ([#15040](https://github.com/Lightning-AI/lightning/pull/15040)) -- Added native AMP support for `ddp_fork` (and associated alias strategies) with CUDA GPUs ([#14983](https://github.com/Lightning-AI/lightning/pull/14983)) -- Added `BatchSizeFinder` callback ([#11089](https://github.com/Lightning-AI/lightning/pull/11089)) -- Added `LearningRateFinder` callback ([#13802](https://github.com/Lightning-AI/lightning/pull/13802)) -- Tuner now supports a new `method` argument which will determine when to run the `BatchSizeFinder`: one of `fit`, `validate`, `test` or `predict` ([#11089](https://github.com/Lightning-AI/lightning/pull/11089)) -- Added prefix to log message in `seed_everything` with rank info ([#14031](https://github.com/Lightning-AI/lightning/pull/14031)) -- Added support for auto wrapping for `DDPFullyShardedNativeStrategy` ([#14252](https://github.com/Lightning-AI/lightning/pull/14252)) -- Added support for passing extra init-parameters to the `LightningDataModule.from_datasets` ([#14185](https://github.com/Lightning-AI/lightning/pull/14185)) -- Added support for saving sharded optimizer state dict outside of `DDPShardedStrategy` ([#14208](https://github.com/Lightning-AI/lightning/pull/14208)) -- Added support for auto wrapping for `DDPFullyShardedStrategy` ([#14383](https://github.com/Lightning-AI/lightning/pull/14383)) +- Added support for requeueing slurm array jobs ([#15040](https://github.com/Lightning-AI/pytorch-lightning/pull/15040)) +- Added native AMP support for `ddp_fork` (and associated alias strategies) with CUDA GPUs ([#14983](https://github.com/Lightning-AI/pytorch-lightning/pull/14983)) +- Added `BatchSizeFinder` callback ([#11089](https://github.com/Lightning-AI/pytorch-lightning/pull/11089)) +- Added `LearningRateFinder` callback ([#13802](https://github.com/Lightning-AI/pytorch-lightning/pull/13802)) +- Tuner now supports a new `method` argument which will determine when to run the `BatchSizeFinder`: one of `fit`, `validate`, `test` or `predict` ([#11089](https://github.com/Lightning-AI/pytorch-lightning/pull/11089)) +- Added prefix to log message in `seed_everything` with rank info ([#14031](https://github.com/Lightning-AI/pytorch-lightning/pull/14031)) +- Added support for auto wrapping for `DDPFullyShardedNativeStrategy` ([#14252](https://github.com/Lightning-AI/pytorch-lightning/pull/14252)) +- Added support for passing extra init-parameters to the `LightningDataModule.from_datasets` ([#14185](https://github.com/Lightning-AI/pytorch-lightning/pull/14185)) +- Added support for saving sharded optimizer state dict outside of `DDPShardedStrategy` ([#14208](https://github.com/Lightning-AI/pytorch-lightning/pull/14208)) +- Added support for auto wrapping for `DDPFullyShardedStrategy` ([#14383](https://github.com/Lightning-AI/pytorch-lightning/pull/14383)) - Integrate the `lightning_utilities` package ( - [#14475](https://github.com/Lightning-AI/lightning/pull/14475), - [#14537](https://github.com/Lightning-AI/lightning/pull/14537), - [#14556](https://github.com/Lightning-AI/lightning/pull/14556), - [#14558](https://github.com/Lightning-AI/lightning/pull/14558), - [#14575](https://github.com/Lightning-AI/lightning/pull/14575), - [#14620](https://github.com/Lightning-AI/lightning/pull/14620)) -- Added `args` parameter to `LightningCLI` to ease running from within Python ([#14596](https://github.com/Lightning-AI/lightning/pull/14596)) -- Added `WandbLogger.download_artifact` and `WandbLogger.use_artifact` for managing artifacts with Weights and Biases ([#14551](https://github.com/Lightning-AI/lightning/pull/14551)) -- Added an option to configure the signal SLURM sends when a job is preempted or requeued ([#14626](https://github.com/Lightning-AI/lightning/pull/14626)) -- Added a warning when the model passed to `LightningLite.setup()` does not have all parameters on the same device ([#14822](https://github.com/Lightning-AI/lightning/pull/14822)) -- The `CometLogger` now flags the Comet Experiments as being created from Lightning for analytics purposes ([#14906](https://github.com/Lightning-AI/lightning/pull/14906)) -- Introduce `ckpt_path="hpc"` keyword for checkpoint loading ([#14911](https://github.com/Lightning-AI/lightning/pull/14911)) -- Added a more descriptive error message when attempting to fork processes with pre-initialized CUDA context ([#14709](https://github.com/Lightning-AI/lightning/pull/14709)) -- Added support for custom parameters in subclasses of `SaveConfigCallback` ([#14998](https://github.com/Lightning-AI/lightning/pull/14998)) -- Added `inference_mode` flag to Trainer to let users enable/disable inference mode during evaluation ([#15034](https://github.com/Lightning-AI/lightning/pull/15034)) -- Added `LightningLite.no_backward_sync` for control over efficient gradient accumulation with distributed strategies ([#14966](https://github.com/Lightning-AI/lightning/pull/14966)) -- Added a sanity check that scripts are executed with the `srun` command in SLURM and that environment variables are not conflicting ([#15011](https://github.com/Lightning-AI/lightning/pull/15011)) -- Added an error message when attempting to launch processes with `python -i` and an interactive-incompatible strategy ([#15293](https://github.com/Lightning-AI/lightning/pull/15293)) + [#14475](https://github.com/Lightning-AI/pytorch-lightning/pull/14475), + [#14537](https://github.com/Lightning-AI/pytorch-lightning/pull/14537), + [#14556](https://github.com/Lightning-AI/pytorch-lightning/pull/14556), + [#14558](https://github.com/Lightning-AI/pytorch-lightning/pull/14558), + [#14575](https://github.com/Lightning-AI/pytorch-lightning/pull/14575), + [#14620](https://github.com/Lightning-AI/pytorch-lightning/pull/14620)) +- Added `args` parameter to `LightningCLI` to ease running from within Python ([#14596](https://github.com/Lightning-AI/pytorch-lightning/pull/14596)) +- Added `WandbLogger.download_artifact` and `WandbLogger.use_artifact` for managing artifacts with Weights and Biases ([#14551](https://github.com/Lightning-AI/pytorch-lightning/pull/14551)) +- Added an option to configure the signal SLURM sends when a job is preempted or requeued ([#14626](https://github.com/Lightning-AI/pytorch-lightning/pull/14626)) +- Added a warning when the model passed to `LightningLite.setup()` does not have all parameters on the same device ([#14822](https://github.com/Lightning-AI/pytorch-lightning/pull/14822)) +- The `CometLogger` now flags the Comet Experiments as being created from Lightning for analytics purposes ([#14906](https://github.com/Lightning-AI/pytorch-lightning/pull/14906)) +- Introduce `ckpt_path="hpc"` keyword for checkpoint loading ([#14911](https://github.com/Lightning-AI/pytorch-lightning/pull/14911)) +- Added a more descriptive error message when attempting to fork processes with pre-initialized CUDA context ([#14709](https://github.com/Lightning-AI/pytorch-lightning/pull/14709)) +- Added support for custom parameters in subclasses of `SaveConfigCallback` ([#14998](https://github.com/Lightning-AI/pytorch-lightning/pull/14998)) +- Added `inference_mode` flag to Trainer to let users enable/disable inference mode during evaluation ([#15034](https://github.com/Lightning-AI/pytorch-lightning/pull/15034)) +- Added `LightningLite.no_backward_sync` for control over efficient gradient accumulation with distributed strategies ([#14966](https://github.com/Lightning-AI/pytorch-lightning/pull/14966)) +- Added a sanity check that scripts are executed with the `srun` command in SLURM and that environment variables are not conflicting ([#15011](https://github.com/Lightning-AI/pytorch-lightning/pull/15011)) +- Added an error message when attempting to launch processes with `python -i` and an interactive-incompatible strategy ([#15293](https://github.com/Lightning-AI/pytorch-lightning/pull/15293)) ### Changed -- The `Trainer.{fit,validate,test,predict,tune}` methods now raise a useful error message if the input is not a `LightningModule` ([#13892](https://github.com/Lightning-AI/lightning/pull/13892)) -- Raised a `MisconfigurationException` if batch transfer hooks are overridden with `IPUAccelerator` ([#13961](https://github.com/Lightning-AI/lightning/pull/13961)) -- Replaced the unwrapping logic in strategies with direct access to unwrapped `LightningModule` ([#13738](https://github.com/Lightning-AI/lightning/pull/13738)) -- Enabled `on_before_batch_transfer` for `DPStrategy` and `IPUAccelerator` ([#14023](https://github.com/Lightning-AI/lightning/pull/14023)) -- When resuming training with Apex enabled, the `Trainer` will now raise an error ([#14341](https://github.com/Lightning-AI/lightning/pull/14341)) -- Included `torch.cuda` rng state to the aggregate `_collect_rng_states()` and `_set_rng_states()` ([#14384](https://github.com/Lightning-AI/lightning/pull/14384)) -- Changed `trainer.should_stop` to not stop in between an epoch and run until `min_steps/min_epochs` only ([#13890](https://github.com/Lightning-AI/lightning/pull/13890)) -- The `pyDeprecate` dependency is no longer installed ([#14472](https://github.com/Lightning-AI/lightning/pull/14472)) -- When using multiple loggers, by default checkpoints and profiler output now get saved to the log dir of the first logger in the list ([#14325](https://github.com/Lightning-AI/lightning/pull/14325)) -- In Lightning Lite, state-dict access to the module wrapper now gets passed through to the original module reference ([#14629](https://github.com/Lightning-AI/lightning/pull/14629)) -- Removed fall-back to `LightningEnvironment` when number of SLURM tasks does not correspond to number of processes in Trainer ([#14300](https://github.com/Lightning-AI/lightning/pull/14300)) -- Aligned DDP and DDPSpawn strategies in setting up the environment ([#11073](https://github.com/Lightning-AI/lightning/pull/11073)) -- Integrated the Lite Precision plugins into the PL Precision plugins - the base class in PL now extends the `lightning_lite.precision.Precision` base class ([#14798](https://github.com/Lightning-AI/lightning/pull/14798)) +- The `Trainer.{fit,validate,test,predict,tune}` methods now raise a useful error message if the input is not a `LightningModule` ([#13892](https://github.com/Lightning-AI/pytorch-lightning/pull/13892)) +- Raised a `MisconfigurationException` if batch transfer hooks are overridden with `IPUAccelerator` ([#13961](https://github.com/Lightning-AI/pytorch-lightning/pull/13961)) +- Replaced the unwrapping logic in strategies with direct access to unwrapped `LightningModule` ([#13738](https://github.com/Lightning-AI/pytorch-lightning/pull/13738)) +- Enabled `on_before_batch_transfer` for `DPStrategy` and `IPUAccelerator` ([#14023](https://github.com/Lightning-AI/pytorch-lightning/pull/14023)) +- When resuming training with Apex enabled, the `Trainer` will now raise an error ([#14341](https://github.com/Lightning-AI/pytorch-lightning/pull/14341)) +- Included `torch.cuda` rng state to the aggregate `_collect_rng_states()` and `_set_rng_states()` ([#14384](https://github.com/Lightning-AI/pytorch-lightning/pull/14384)) +- Changed `trainer.should_stop` to not stop in between an epoch and run until `min_steps/min_epochs` only ([#13890](https://github.com/Lightning-AI/pytorch-lightning/pull/13890)) +- The `pyDeprecate` dependency is no longer installed ([#14472](https://github.com/Lightning-AI/pytorch-lightning/pull/14472)) +- When using multiple loggers, by default checkpoints and profiler output now get saved to the log dir of the first logger in the list ([#14325](https://github.com/Lightning-AI/pytorch-lightning/pull/14325)) +- In Lightning Lite, state-dict access to the module wrapper now gets passed through to the original module reference ([#14629](https://github.com/Lightning-AI/pytorch-lightning/pull/14629)) +- Removed fall-back to `LightningEnvironment` when number of SLURM tasks does not correspond to number of processes in Trainer ([#14300](https://github.com/Lightning-AI/pytorch-lightning/pull/14300)) +- Aligned DDP and DDPSpawn strategies in setting up the environment ([#11073](https://github.com/Lightning-AI/pytorch-lightning/pull/11073)) +- Integrated the Lite Precision plugins into the PL Precision plugins - the base class in PL now extends the `lightning_lite.precision.Precision` base class ([#14798](https://github.com/Lightning-AI/pytorch-lightning/pull/14798)) * The `PrecisionPlugin.backward` signature changed: The `closure_loss` argument was renamed to `tensor` * The `PrecisionPlugin.{pre_,post_}backward` signature changed: The `closure_loss` argument was renamed to `tensor` and moved as the first argument * The `PrecisionPlugin.optimizer_step` signature changed: The `model`, `optimizer_idx` and `closure` arguments need to be passed as keyword arguments now -- Trainer queries the CUDA devices through NVML if available to avoid initializing CUDA before forking, which eliminates the need for the `PL_DISABLE_FORK` environment variable introduced in v1.7.4 ([#14631](https://github.com/Lightning-AI/lightning/pull/14631)) -- The `MLFlowLogger.finalize()` now sets the status to `FAILED` when an exception occurred in `Trainer`, and sets the status to `FINISHED` on successful completion ([#12292](https://github.com/Lightning-AI/lightning/pull/12292)) -- It is no longer needed to call `model.double()` when using `precision=64` in Lightning Lite ([#14827](https://github.com/Lightning-AI/lightning/pull/14827)) -- HPC checkpoints are now loaded automatically only in slurm environment when no specific value for `ckpt_path` has been set ([#14911](https://github.com/Lightning-AI/lightning/pull/14911)) -- The `Callback.on_load_checkpoint` now gets the full checkpoint dictionary and the `callback_state` argument was renamed `checkpoint` ([#14835](https://github.com/Lightning-AI/lightning/pull/14835)) -- Moved the warning about saving nn.Module in `save_hyperparameters()` to before the deepcopy ([#15132](https://github.com/Lightning-AI/lightning/pull/15132)) -- To avoid issues with forking processes, from PyTorch 1.13 and higher, Lightning will directly use the PyTorch NVML-based check for `torch.cuda.device_count` and from PyTorch 2.0 and higher, Lightning will configure PyTorch to use a NVML-based check for `torch.cuda.is_available`. ([#15110](https://github.com/Lightning-AI/lightning/pull/15110), [#15133](https://github.com/Lightning-AI/lightning/pull/15133)) -- The `NeptuneLogger` now uses `neptune.init_run` instead of the deprecated `neptune.init` to initialize a run ([#15393](https://github.com/Lightning-AI/lightning/pull/15393)) +- Trainer queries the CUDA devices through NVML if available to avoid initializing CUDA before forking, which eliminates the need for the `PL_DISABLE_FORK` environment variable introduced in v1.7.4 ([#14631](https://github.com/Lightning-AI/pytorch-lightning/pull/14631)) +- The `MLFlowLogger.finalize()` now sets the status to `FAILED` when an exception occurred in `Trainer`, and sets the status to `FINISHED` on successful completion ([#12292](https://github.com/Lightning-AI/pytorch-lightning/pull/12292)) +- It is no longer needed to call `model.double()` when using `precision=64` in Lightning Lite ([#14827](https://github.com/Lightning-AI/pytorch-lightning/pull/14827)) +- HPC checkpoints are now loaded automatically only in slurm environment when no specific value for `ckpt_path` has been set ([#14911](https://github.com/Lightning-AI/pytorch-lightning/pull/14911)) +- The `Callback.on_load_checkpoint` now gets the full checkpoint dictionary and the `callback_state` argument was renamed `checkpoint` ([#14835](https://github.com/Lightning-AI/pytorch-lightning/pull/14835)) +- Moved the warning about saving nn.Module in `save_hyperparameters()` to before the deepcopy ([#15132](https://github.com/Lightning-AI/pytorch-lightning/pull/15132)) +- To avoid issues with forking processes, from PyTorch 1.13 and higher, Lightning will directly use the PyTorch NVML-based check for `torch.cuda.device_count` and from PyTorch 2.0 and higher, Lightning will configure PyTorch to use a NVML-based check for `torch.cuda.is_available`. ([#15110](https://github.com/Lightning-AI/pytorch-lightning/pull/15110), [#15133](https://github.com/Lightning-AI/pytorch-lightning/pull/15133)) +- The `NeptuneLogger` now uses `neptune.init_run` instead of the deprecated `neptune.init` to initialize a run ([#15393](https://github.com/Lightning-AI/pytorch-lightning/pull/15393)) ### Deprecated -- Deprecated `LightningDeepSpeedModule` ([#14000](https://github.com/Lightning-AI/lightning/pull/14000)) -- Deprecated `amp_level` from `Trainer` in favour of passing it explicitly via precision plugin ([#13898](https://github.com/Lightning-AI/lightning/pull/13898)) -- Deprecated the calls to `pl.utiltiies.meta` functions in favor of built-in https://github.com/pytorch/torchdistx support ([#13868](https://github.com/Lightning-AI/lightning/pull/13868)) -- Deprecated the `unwrap_lightning_module` and `unwrap_lightning_module_sharded` utility functions in favor of accessing the unwrapped `LightningModule` on the strategy directly ([#13738](https://github.com/Lightning-AI/lightning/pull/13738)) -- Deprecated the `pl_module` argument in `LightningParallelModule`, `LightningDistributedModule`, `LightningShardedDataParallel`, `LightningBaguaModule` and `LightningDeepSpeedModule` wrapper classes ([#13738](https://github.com/Lightning-AI/lightning/pull/13738)) -- Deprecated the `on_colab_kaggle` function ([#14247](https://github.com/Lightning-AI/lightning/pull/14247)) -- Deprecated the internal `pl.core.mixins.DeviceDtypeModuleMixin` class ([#14511](https://github.com/Lightning-AI/lightning/pull/14511), [#14548](https://github.com/Lightning-AI/lightning/pull/14548)) -- Deprecated all functions in `pl.utilities.xla_device` ([#14514](https://github.com/Lightning-AI/lightning/pull/14514), [#14550](https://github.com/Lightning-AI/lightning/pull/14550)) +- Deprecated `LightningDeepSpeedModule` ([#14000](https://github.com/Lightning-AI/pytorch-lightning/pull/14000)) +- Deprecated `amp_level` from `Trainer` in favour of passing it explicitly via precision plugin ([#13898](https://github.com/Lightning-AI/pytorch-lightning/pull/13898)) +- Deprecated the calls to `pl.utiltiies.meta` functions in favor of built-in https://github.com/pytorch/torchdistx support ([#13868](https://github.com/Lightning-AI/pytorch-lightning/pull/13868)) +- Deprecated the `unwrap_lightning_module` and `unwrap_lightning_module_sharded` utility functions in favor of accessing the unwrapped `LightningModule` on the strategy directly ([#13738](https://github.com/Lightning-AI/pytorch-lightning/pull/13738)) +- Deprecated the `pl_module` argument in `LightningParallelModule`, `LightningDistributedModule`, `LightningShardedDataParallel`, `LightningBaguaModule` and `LightningDeepSpeedModule` wrapper classes ([#13738](https://github.com/Lightning-AI/pytorch-lightning/pull/13738)) +- Deprecated the `on_colab_kaggle` function ([#14247](https://github.com/Lightning-AI/pytorch-lightning/pull/14247)) +- Deprecated the internal `pl.core.mixins.DeviceDtypeModuleMixin` class ([#14511](https://github.com/Lightning-AI/pytorch-lightning/pull/14511), [#14548](https://github.com/Lightning-AI/pytorch-lightning/pull/14548)) +- Deprecated all functions in `pl.utilities.xla_device` ([#14514](https://github.com/Lightning-AI/pytorch-lightning/pull/14514), [#14550](https://github.com/Lightning-AI/pytorch-lightning/pull/14550)) * Deprecated the internal `inner_f` function * Deprecated the internal `pl_multi_process` function * Deprecated the internal `XLADeviceUtils.xla_available` staticmethod * Deprecated the `XLADeviceUtils.tpu_device_exists` staticmethod in favor of `pl.accelerators.TPUAccelerator.is_available()` -- Deprecated `pl.utilities.distributed.tpu_distributed` in favor of `lightning_lite.accelerators.tpu.tpu_distributed` ([#14550](https://github.com/Lightning-AI/lightning/pull/14550)) -- Deprecated all functions in `pl.utilities.cloud_io` in favor of `lightning_lite.utilities.cloud_io` ([#14515](https://github.com/Lightning-AI/lightning/pull/14515)) -- Deprecated the functions in `pl.utilities.apply_func` in favor of `lightning_utilities.core.apply_func` ([#14516](https://github.com/Lightning-AI/lightning/pull/14516), [#14537](https://github.com/Lightning-AI/lightning/pull/14537)) -- Deprecated all functions in `pl.utilities.device_parser` ([#14492](https://github.com/Lightning-AI/lightning/pull/14492), [#14753](https://github.com/Lightning-AI/lightning/pull/14753)) +- Deprecated `pl.utilities.distributed.tpu_distributed` in favor of `lightning_lite.accelerators.tpu.tpu_distributed` ([#14550](https://github.com/Lightning-AI/pytorch-lightning/pull/14550)) +- Deprecated all functions in `pl.utilities.cloud_io` in favor of `lightning_lite.utilities.cloud_io` ([#14515](https://github.com/Lightning-AI/pytorch-lightning/pull/14515)) +- Deprecated the functions in `pl.utilities.apply_func` in favor of `lightning_utilities.core.apply_func` ([#14516](https://github.com/Lightning-AI/pytorch-lightning/pull/14516), [#14537](https://github.com/Lightning-AI/pytorch-lightning/pull/14537)) +- Deprecated all functions in `pl.utilities.device_parser` ([#14492](https://github.com/Lightning-AI/pytorch-lightning/pull/14492), [#14753](https://github.com/Lightning-AI/pytorch-lightning/pull/14753)) * Deprecated the `pl.utilities.device_parser.determine_root_gpu_device` in favor of `lightning_lite.utilities.device_parser.determine_root_gpu_device` * Deprecated the `pl.utilities.device_parser.parse_gpu_ids` in favor of `lightning_lite.utilities.device_parser.parse_gpu_ids` * Deprecated the `pl.utilities.device_parser.is_cuda_available` in favor of `lightning_lite.accelerators.cuda.is_cuda_available` @@ -1005,40 +1005,40 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). * Deprecated the `pl.utilities.device_parser.parse_cpu_cores` in favor of `lightning_lite.accelerators.cpu.parse_cpu_cores` * Deprecated the `pl.utilities.device_parser.parse_tpu_cores` in favor of `lightning_lite.accelerators.tpu.parse_tpu_cores` * Deprecated the `pl.utilities.device_parser.parse_hpus` in favor of `pl.accelerators.hpu.parse_hpus` -- Deprecated duplicate `SaveConfigCallback` parameters in `LightningCLI.__init__`: `save_config_kwargs`, `save_config_overwrite` and `save_config_multifile`. New `save_config_kwargs` parameter should be used instead ([#14998](https://github.com/Lightning-AI/lightning/pull/14998)) -- Deprecated `TrainerFn.TUNING`, `RunningStage.TUNING` and `trainer.tuning` property ([#15100](https://github.com/Lightning-AI/lightning/pull/15100)) -- Deprecated custom `pl.utilities.distributed.AllGatherGrad` implementation in favor of PyTorch's ([#15364](https://github.com/Lightning-AI/lightning/pull/15364)) +- Deprecated duplicate `SaveConfigCallback` parameters in `LightningCLI.__init__`: `save_config_kwargs`, `save_config_overwrite` and `save_config_multifile`. New `save_config_kwargs` parameter should be used instead ([#14998](https://github.com/Lightning-AI/pytorch-lightning/pull/14998)) +- Deprecated `TrainerFn.TUNING`, `RunningStage.TUNING` and `trainer.tuning` property ([#15100](https://github.com/Lightning-AI/pytorch-lightning/pull/15100)) +- Deprecated custom `pl.utilities.distributed.AllGatherGrad` implementation in favor of PyTorch's ([#15364](https://github.com/Lightning-AI/pytorch-lightning/pull/15364)) ### Removed -- Removed the deprecated `Trainer.training_type_plugin` property in favor of `Trainer.strategy` ([#14011](https://github.com/Lightning-AI/lightning/pull/14011)) -- Removed all deprecated training type plugins ([#14011](https://github.com/Lightning-AI/lightning/pull/14011)) -- Removed the deprecated `DDP2Strategy` ([#14026](https://github.com/Lightning-AI/lightning/pull/14026)) -- Removed the deprecated `DistributedType` and `DeviceType` enum classes ([#14045](https://github.com/Lightning-AI/lightning/pull/14045)) -- Removed deprecated support for passing the `rank_zero_warn` warning category positionally ([#14470](https://github.com/Lightning-AI/lightning/pull/14470)) -- Removed the legacy and unused `Trainer.get_deprecated_arg_names()` ([#14415](https://github.com/Lightning-AI/lightning/pull/14415)) -- Removed the deprecated `on_train_batch_end(outputs)` format when multiple optimizers are used and TBPTT is enabled ([#14373](https://github.com/Lightning-AI/lightning/pull/14373)) -- Removed the deprecated `training_epoch_end(outputs)` format when multiple optimizers are used and TBPTT is enabled ([#14373](https://github.com/Lightning-AI/lightning/pull/14373)) -- Removed the experimental `pl.utiltiies.meta` functions in favor of built-in https://github.com/pytorch/torchdistx support ([#13868](https://github.com/Lightning-AI/lightning/pull/13868)) -- Removed the deprecated `LoggerCollection`; `Trainer.logger` and `LightningModule.logger` now returns the first logger when more than one gets passed to the Trainer ([#14283](https://github.com/Lightning-AI/lightning/pull/14283)) -- Removed the deprecated the `trainer.lr_schedulers` ([#14408](https://github.com/Lightning-AI/lightning/pull/14408)) -- Removed the deprecated `LightningModule.{on_hpc_load,on_hpc_save}` hooks in favor of the general purpose hooks `LightningModule.{on_load_checkpoint,on_save_checkpoint}` ([#14315](https://github.com/Lightning-AI/lightning/pull/14315)) -- Removed deprecated support for old torchtext versions ([#14375](https://github.com/Lightning-AI/lightning/pull/14375)) -- Removed deprecated support for the old `neptune-client` API in the `NeptuneLogger` ([#14727](https://github.com/Lightning-AI/lightning/pull/14727)) -- Removed the deprecated `weights_save_path` Trainer argumnent and `Trainer.weights_save_path` property ([#14424](https://github.com/Lightning-AI/lightning/pull/14424)) -- Removed the deprecated ([#14471](https://github.com/Lightning-AI/lightning/pull/14471)) +- Removed the deprecated `Trainer.training_type_plugin` property in favor of `Trainer.strategy` ([#14011](https://github.com/Lightning-AI/pytorch-lightning/pull/14011)) +- Removed all deprecated training type plugins ([#14011](https://github.com/Lightning-AI/pytorch-lightning/pull/14011)) +- Removed the deprecated `DDP2Strategy` ([#14026](https://github.com/Lightning-AI/pytorch-lightning/pull/14026)) +- Removed the deprecated `DistributedType` and `DeviceType` enum classes ([#14045](https://github.com/Lightning-AI/pytorch-lightning/pull/14045)) +- Removed deprecated support for passing the `rank_zero_warn` warning category positionally ([#14470](https://github.com/Lightning-AI/pytorch-lightning/pull/14470)) +- Removed the legacy and unused `Trainer.get_deprecated_arg_names()` ([#14415](https://github.com/Lightning-AI/pytorch-lightning/pull/14415)) +- Removed the deprecated `on_train_batch_end(outputs)` format when multiple optimizers are used and TBPTT is enabled ([#14373](https://github.com/Lightning-AI/pytorch-lightning/pull/14373)) +- Removed the deprecated `training_epoch_end(outputs)` format when multiple optimizers are used and TBPTT is enabled ([#14373](https://github.com/Lightning-AI/pytorch-lightning/pull/14373)) +- Removed the experimental `pl.utiltiies.meta` functions in favor of built-in https://github.com/pytorch/torchdistx support ([#13868](https://github.com/Lightning-AI/pytorch-lightning/pull/13868)) +- Removed the deprecated `LoggerCollection`; `Trainer.logger` and `LightningModule.logger` now returns the first logger when more than one gets passed to the Trainer ([#14283](https://github.com/Lightning-AI/pytorch-lightning/pull/14283)) +- Removed the deprecated the `trainer.lr_schedulers` ([#14408](https://github.com/Lightning-AI/pytorch-lightning/pull/14408)) +- Removed the deprecated `LightningModule.{on_hpc_load,on_hpc_save}` hooks in favor of the general purpose hooks `LightningModule.{on_load_checkpoint,on_save_checkpoint}` ([#14315](https://github.com/Lightning-AI/pytorch-lightning/pull/14315)) +- Removed deprecated support for old torchtext versions ([#14375](https://github.com/Lightning-AI/pytorch-lightning/pull/14375)) +- Removed deprecated support for the old `neptune-client` API in the `NeptuneLogger` ([#14727](https://github.com/Lightning-AI/pytorch-lightning/pull/14727)) +- Removed the deprecated `weights_save_path` Trainer argumnent and `Trainer.weights_save_path` property ([#14424](https://github.com/Lightning-AI/pytorch-lightning/pull/14424)) +- Removed the deprecated ([#14471](https://github.com/Lightning-AI/pytorch-lightning/pull/14471)) * `pl.utilities.distributed.rank_zero_only` in favor of `pl.utilities.rank_zero.rank_zero_only` * `pl.utilities.distributed.rank_zero_debug` in favor of `pl.utilities.rank_zero.rank_zero_debug` * `pl.utilities.distributed.rank_zero_info` in favor of `pl.utilities.rank_zero.rank_zero_info` * `pl.utilities.warnings.rank_zero_warn` in favor of `pl.utilities.rank_zero.rank_zero_warn` * `pl.utilities.warnings.rank_zero_deprecation` in favor of `pl.utilities.rank_zero.rank_zero_deprecation` * `pl.utilities.warnings.LightningDeprecationWarning` in favor of `pl.utilities.rank_zero.LightningDeprecationWarning` -- Removed deprecated `Trainer.num_processes` attribute in favour of `Trainer.num_devices` ([#14423](https://github.com/Lightning-AI/lightning/pull/14423)) -- Removed the deprecated `Trainer.data_parallel_device_ids` hook in favour of `Trainer.device_ids` ([#14422](https://github.com/Lightning-AI/lightning/pull/14422)) -- Removed the deprecated class `TrainerCallbackHookMixin` ([#14401](https://github.com/Lightning-AI/lightning/pull/14401)) -- Removed the deprecated `BaseProfiler` and `AbstractProfiler` classes ([#14404](https://github.com/Lightning-AI/lightning/pull/14404)) -- Removed the deprecated way to set the distributed backend via the environment variable `PL_TORCH_DISTRIBUTED_BACKEND`, in favor of setting the `process_group_backend` in the strategy constructor ([#14693](https://github.com/Lightning-AI/lightning/pull/14693)) -- Removed deprecated callback hooks ([#14834](https://github.com/Lightning-AI/lightning/pull/14834)) +- Removed deprecated `Trainer.num_processes` attribute in favour of `Trainer.num_devices` ([#14423](https://github.com/Lightning-AI/pytorch-lightning/pull/14423)) +- Removed the deprecated `Trainer.data_parallel_device_ids` hook in favour of `Trainer.device_ids` ([#14422](https://github.com/Lightning-AI/pytorch-lightning/pull/14422)) +- Removed the deprecated class `TrainerCallbackHookMixin` ([#14401](https://github.com/Lightning-AI/pytorch-lightning/pull/14401)) +- Removed the deprecated `BaseProfiler` and `AbstractProfiler` classes ([#14404](https://github.com/Lightning-AI/pytorch-lightning/pull/14404)) +- Removed the deprecated way to set the distributed backend via the environment variable `PL_TORCH_DISTRIBUTED_BACKEND`, in favor of setting the `process_group_backend` in the strategy constructor ([#14693](https://github.com/Lightning-AI/pytorch-lightning/pull/14693)) +- Removed deprecated callback hooks ([#14834](https://github.com/Lightning-AI/pytorch-lightning/pull/14834)) * `Callback.on_configure_sharded_model` in favor of `Callback.setup` * `Callback.on_before_accelerator_backend_setup` in favor of `Callback.setup` * `Callback.on_batch_start` in favor of `Callback.on_train_batch_start` @@ -1046,2895 +1046,2895 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). * `Callback.on_epoch_start` in favor of `Callback.on_{train,validation,test}_epoch_start` * `Callback.on_epoch_end` in favor of `Callback.on_{train,validation,test}_epoch_end` * `Callback.on_pretrain_routine_{start,end}` in favor of `Callback.on_fit_start` -- Removed the deprecated device attributes `Trainer.{devices,gpus,num_gpus,ipus,tpu_cores}` in favor of the accelerator-agnostic `Trainer.num_devices` ([#14829](https://github.com/Lightning-AI/lightning/pull/14829)) -- Removed the deprecated `LightningIPUModule` ([#14830](https://github.com/Lightning-AI/lightning/pull/14830)) -- Removed the deprecated `Logger.agg_and_log_metrics` hook in favour of `Logger.log_metrics` and the `agg_key_funcs` and `agg_default_func` arguments. ([#14840](https://github.com/Lightning-AI/lightning/pull/14840)) -- Removed the deprecated precision plugin checkpoint hooks `PrecisionPlugin.on_load_checkpoint` and `PrecisionPlugin.on_save_checkpoint` ([#14833](https://github.com/Lightning-AI/lightning/pull/14833)) -- Removed the deprecated `Trainer.root_gpu` attribute in favor of `Trainer.strategy.root_device` ([#14829](https://github.com/Lightning-AI/lightning/pull/14829)) -- Removed the deprecated `Trainer.use_amp` and `LightningModule.use_amp` attributes ([#14832](https://github.com/Lightning-AI/lightning/pull/14832)) -- Removed the deprecated callback hooks `Callback.on_init_start` and `Callback.on_init_end` ([#14867](https://github.com/Lightning-AI/lightning/pull/14867)) -- Removed the deprecated `Trainer.run_stage` in favor of `Trainer.{fit,validate,test,predict}` ([#14870](https://github.com/Lightning-AI/lightning/pull/14870)) -- Removed the deprecated `SimpleProfiler.profile_iterable` and `AdvancedProfiler.profile_iterable` attributes ([#14864](https://github.com/Lightning-AI/lightning/pull/14864)) -- Removed the deprecated `Trainer.verbose_evaluate` ([#14884](https://github.com/Lightning-AI/lightning/pull/14884)) -- Removed the deprecated `Trainer.should_rank_save_checkpoint` ([#14885](https://github.com/Lightning-AI/lightning/pull/14885)) -- Removed the deprecated `TrainerOptimizersMixin` ([#14887](https://github.com/Lightning-AI/lightning/pull/14887)) -- Removed the deprecated `Trainer.lightning_optimizers` ([#14889](https://github.com/Lightning-AI/lightning/pull/14889)) -- Removed the deprecated `TrainerDataLoadingMixin` ([#14888](https://github.com/Lightning-AI/lightning/pull/14888)) -- Removed the deprecated `Trainer.call_hook` in favor of `Trainer._call_callback_hooks`, `Trainer._call_lightning_module_hook`, `Trainer._call_ttp_hook`, and `Trainer._call_accelerator_hook` ([#14869](https://github.com/Lightning-AI/lightning/pull/14869)) -- Removed the deprecated `Trainer.{validated,tested,predicted}_ckpt_path` ([#14897](https://github.com/Lightning-AI/lightning/pull/14897)) -- Removed the deprecated `device_stats_monitor_prefix_metric_keys` ([#14890](https://github.com/Lightning-AI/lightning/pull/14890)) -- Removed the deprecated `LightningDataModule.on_save/load_checkpoint` hooks ([#14909](https://github.com/Lightning-AI/lightning/pull/14909)) -- Removed support for returning a value in `Callback.on_save_checkpoint` in favor of implementing `Callback.state_dict` ([#14835](https://github.com/Lightning-AI/lightning/pull/14835)) +- Removed the deprecated device attributes `Trainer.{devices,gpus,num_gpus,ipus,tpu_cores}` in favor of the accelerator-agnostic `Trainer.num_devices` ([#14829](https://github.com/Lightning-AI/pytorch-lightning/pull/14829)) +- Removed the deprecated `LightningIPUModule` ([#14830](https://github.com/Lightning-AI/pytorch-lightning/pull/14830)) +- Removed the deprecated `Logger.agg_and_log_metrics` hook in favour of `Logger.log_metrics` and the `agg_key_funcs` and `agg_default_func` arguments. ([#14840](https://github.com/Lightning-AI/pytorch-lightning/pull/14840)) +- Removed the deprecated precision plugin checkpoint hooks `PrecisionPlugin.on_load_checkpoint` and `PrecisionPlugin.on_save_checkpoint` ([#14833](https://github.com/Lightning-AI/pytorch-lightning/pull/14833)) +- Removed the deprecated `Trainer.root_gpu` attribute in favor of `Trainer.strategy.root_device` ([#14829](https://github.com/Lightning-AI/pytorch-lightning/pull/14829)) +- Removed the deprecated `Trainer.use_amp` and `LightningModule.use_amp` attributes ([#14832](https://github.com/Lightning-AI/pytorch-lightning/pull/14832)) +- Removed the deprecated callback hooks `Callback.on_init_start` and `Callback.on_init_end` ([#14867](https://github.com/Lightning-AI/pytorch-lightning/pull/14867)) +- Removed the deprecated `Trainer.run_stage` in favor of `Trainer.{fit,validate,test,predict}` ([#14870](https://github.com/Lightning-AI/pytorch-lightning/pull/14870)) +- Removed the deprecated `SimpleProfiler.profile_iterable` and `AdvancedProfiler.profile_iterable` attributes ([#14864](https://github.com/Lightning-AI/pytorch-lightning/pull/14864)) +- Removed the deprecated `Trainer.verbose_evaluate` ([#14884](https://github.com/Lightning-AI/pytorch-lightning/pull/14884)) +- Removed the deprecated `Trainer.should_rank_save_checkpoint` ([#14885](https://github.com/Lightning-AI/pytorch-lightning/pull/14885)) +- Removed the deprecated `TrainerOptimizersMixin` ([#14887](https://github.com/Lightning-AI/pytorch-lightning/pull/14887)) +- Removed the deprecated `Trainer.lightning_optimizers` ([#14889](https://github.com/Lightning-AI/pytorch-lightning/pull/14889)) +- Removed the deprecated `TrainerDataLoadingMixin` ([#14888](https://github.com/Lightning-AI/pytorch-lightning/pull/14888)) +- Removed the deprecated `Trainer.call_hook` in favor of `Trainer._call_callback_hooks`, `Trainer._call_lightning_module_hook`, `Trainer._call_ttp_hook`, and `Trainer._call_accelerator_hook` ([#14869](https://github.com/Lightning-AI/pytorch-lightning/pull/14869)) +- Removed the deprecated `Trainer.{validated,tested,predicted}_ckpt_path` ([#14897](https://github.com/Lightning-AI/pytorch-lightning/pull/14897)) +- Removed the deprecated `device_stats_monitor_prefix_metric_keys` ([#14890](https://github.com/Lightning-AI/pytorch-lightning/pull/14890)) +- Removed the deprecated `LightningDataModule.on_save/load_checkpoint` hooks ([#14909](https://github.com/Lightning-AI/pytorch-lightning/pull/14909)) +- Removed support for returning a value in `Callback.on_save_checkpoint` in favor of implementing `Callback.state_dict` ([#14835](https://github.com/Lightning-AI/pytorch-lightning/pull/14835)) ### Fixed -- Fixed an issue with `LightningLite.setup()` not setting the `.device` attribute correctly on the returned wrapper ([#14822](https://github.com/Lightning-AI/lightning/pull/14822)) -- Fixed an attribute error when running the tuner together with the `StochasticWeightAveraging` callback ([#14836](https://github.com/Lightning-AI/lightning/pull/14836)) -- Fixed MissingFieldException in offline mode for the `NeptuneLogger()` ([#14919](https://github.com/Lightning-AI/lightning/pull/14919)) -- Fixed wandb `save_dir` is overridden by `None` `dir` when using CLI ([#14878](https://github.com/Lightning-AI/lightning/pull/14878)) -- Fixed a missing call to `LightningDataModule.load_state_dict` hook while restoring checkpoint using `LightningDataModule.load_from_checkpoint` ([#14883](https://github.com/Lightning-AI/lightning/pull/14883)) -- Fixed torchscript error with containers of LightningModules ([#14904](https://github.com/Lightning-AI/lightning/pull/14904)) -- Fixed reloading of the last checkpoint on run restart ([#14907](https://github.com/Lightning-AI/lightning/pull/14907)) -- `SaveConfigCallback` instances should only save the config once to allow having the `overwrite=False` safeguard when using `LightningCLI(..., run=False)` ([#14927](https://github.com/Lightning-AI/lightning/pull/14927)) -- Fixed an issue with terminating the trainer profiler when a `StopIteration` exception is raised while using an `IterableDataset` ([#14940](https://github.com/Lightning-AI/lightning/pull/14945)) -- Do not update on-plateau schedulers when reloading from an end-of-epoch checkpoint ([#14702](https://github.com/Lightning-AI/lightning/pull/14702)) -- Fixed `Trainer` support for PyTorch built without distributed support ([#14971](https://github.com/Lightning-AI/lightning/pull/14971)) -- Fixed batch normalization statistics calculation in `StochasticWeightAveraging` callback ([#14866](https://github.com/Lightning-AI/lightning/pull/14866)) -- Avoided initializing optimizers during deepspeed inference ([#14944](https://github.com/Lightning-AI/lightning/pull/14944)) -- Fixed `LightningCLI` parse_env and description in subcommands ([#15138](https://github.com/Lightning-AI/lightning/pull/15138)) -- Fixed an exception that would occur when creating a `multiprocessing.Pool` after importing Lightning ([#15292](https://github.com/Lightning-AI/lightning/pull/15292)) -- Fixed a pickling error when using `RichProgressBar` together with checkpointing ([#15319](https://github.com/Lightning-AI/lightning/pull/15319)) -- Fixed the `RichProgressBar` crashing when used with distributed strategies ([#15376](https://github.com/Lightning-AI/lightning/pull/15376)) -- Fixed an issue with `RichProgressBar` not resetting the internal state for the sanity check progress ([#15377](https://github.com/Lightning-AI/lightning/pull/15377)) -- Fixed an issue with DataLoader re-instantiation when the attribute is an array and the default value of the corresponding argument changed ([#15409](https://github.com/Lightning-AI/lightning/pull/15409)) +- Fixed an issue with `LightningLite.setup()` not setting the `.device` attribute correctly on the returned wrapper ([#14822](https://github.com/Lightning-AI/pytorch-lightning/pull/14822)) +- Fixed an attribute error when running the tuner together with the `StochasticWeightAveraging` callback ([#14836](https://github.com/Lightning-AI/pytorch-lightning/pull/14836)) +- Fixed MissingFieldException in offline mode for the `NeptuneLogger()` ([#14919](https://github.com/Lightning-AI/pytorch-lightning/pull/14919)) +- Fixed wandb `save_dir` is overridden by `None` `dir` when using CLI ([#14878](https://github.com/Lightning-AI/pytorch-lightning/pull/14878)) +- Fixed a missing call to `LightningDataModule.load_state_dict` hook while restoring checkpoint using `LightningDataModule.load_from_checkpoint` ([#14883](https://github.com/Lightning-AI/pytorch-lightning/pull/14883)) +- Fixed torchscript error with containers of LightningModules ([#14904](https://github.com/Lightning-AI/pytorch-lightning/pull/14904)) +- Fixed reloading of the last checkpoint on run restart ([#14907](https://github.com/Lightning-AI/pytorch-lightning/pull/14907)) +- `SaveConfigCallback` instances should only save the config once to allow having the `overwrite=False` safeguard when using `LightningCLI(..., run=False)` ([#14927](https://github.com/Lightning-AI/pytorch-lightning/pull/14927)) +- Fixed an issue with terminating the trainer profiler when a `StopIteration` exception is raised while using an `IterableDataset` ([#14940](https://github.com/Lightning-AI/pytorch-lightning/pull/14945)) +- Do not update on-plateau schedulers when reloading from an end-of-epoch checkpoint ([#14702](https://github.com/Lightning-AI/pytorch-lightning/pull/14702)) +- Fixed `Trainer` support for PyTorch built without distributed support ([#14971](https://github.com/Lightning-AI/pytorch-lightning/pull/14971)) +- Fixed batch normalization statistics calculation in `StochasticWeightAveraging` callback ([#14866](https://github.com/Lightning-AI/pytorch-lightning/pull/14866)) +- Avoided initializing optimizers during deepspeed inference ([#14944](https://github.com/Lightning-AI/pytorch-lightning/pull/14944)) +- Fixed `LightningCLI` parse_env and description in subcommands ([#15138](https://github.com/Lightning-AI/pytorch-lightning/pull/15138)) +- Fixed an exception that would occur when creating a `multiprocessing.Pool` after importing Lightning ([#15292](https://github.com/Lightning-AI/pytorch-lightning/pull/15292)) +- Fixed a pickling error when using `RichProgressBar` together with checkpointing ([#15319](https://github.com/Lightning-AI/pytorch-lightning/pull/15319)) +- Fixed the `RichProgressBar` crashing when used with distributed strategies ([#15376](https://github.com/Lightning-AI/pytorch-lightning/pull/15376)) +- Fixed an issue with `RichProgressBar` not resetting the internal state for the sanity check progress ([#15377](https://github.com/Lightning-AI/pytorch-lightning/pull/15377)) +- Fixed an issue with DataLoader re-instantiation when the attribute is an array and the default value of the corresponding argument changed ([#15409](https://github.com/Lightning-AI/pytorch-lightning/pull/15409)) ## [1.7.7] - 2022-09-22 ### Fixed -- Fixed the availability check for the neptune-client package ([#14714](https://github.com/Lightning-AI/lightning/pull/14714)) -- Break HPU Graphs into two parts (forward + backward as one and optimizer as another) for better performance ([#14656](https://github.com/Lightning-AI/lightning/pull/14656)) -- Fixed torchscript error with ensembles of LightningModules ([#14657](https://github.com/Lightning-AI/lightning/pull/14657), [#14724](https://github.com/Lightning-AI/lightning/pull/14724)) -- Fixed an issue with `TensorBoardLogger.finalize` creating a new experiment when none was created during the Trainer's execution ([#14762](https://github.com/Lightning-AI/lightning/pull/14762)) -- Fixed `TypeError` on import when `torch.distributed` is not available ([#14809](https://github.com/Lightning-AI/lightning/pull/14809)) +- Fixed the availability check for the neptune-client package ([#14714](https://github.com/Lightning-AI/pytorch-lightning/pull/14714)) +- Break HPU Graphs into two parts (forward + backward as one and optimizer as another) for better performance ([#14656](https://github.com/Lightning-AI/pytorch-lightning/pull/14656)) +- Fixed torchscript error with ensembles of LightningModules ([#14657](https://github.com/Lightning-AI/pytorch-lightning/pull/14657), [#14724](https://github.com/Lightning-AI/pytorch-lightning/pull/14724)) +- Fixed an issue with `TensorBoardLogger.finalize` creating a new experiment when none was created during the Trainer's execution ([#14762](https://github.com/Lightning-AI/pytorch-lightning/pull/14762)) +- Fixed `TypeError` on import when `torch.distributed` is not available ([#14809](https://github.com/Lightning-AI/pytorch-lightning/pull/14809)) ## [1.7.6] - 2022-09-13 ### Changed -- Improved the error messaging when passing `Trainer.method(model, x_dataloader=None)` with no module-method implementations available ([#14614](https://github.com/Lightning-AI/lightning/pull/14614)) +- Improved the error messaging when passing `Trainer.method(model, x_dataloader=None)` with no module-method implementations available ([#14614](https://github.com/Lightning-AI/pytorch-lightning/pull/14614)) ### Fixed -- Reset the dataloaders on OOM failure in batch size finder to use the last successful batch size ([#14372](https://github.com/Lightning-AI/lightning/pull/14372)) -- Fixed an issue to keep downscaling the batch size in case there hasn't been even a single successful optimal batch size with `mode="power"` ([#14372](https://github.com/Lightning-AI/lightning/pull/14372)) -- Fixed an issue where `self.log`-ing a tensor would create a user warning from PyTorch about cloning tensors ([#14599](https://github.com/Lightning-AI/lightning/pull/14599)) -- Fixed compatibility when `torch.distributed` is not available ([#14454](https://github.com/Lightning-AI/lightning/pull/14454)) +- Reset the dataloaders on OOM failure in batch size finder to use the last successful batch size ([#14372](https://github.com/Lightning-AI/pytorch-lightning/pull/14372)) +- Fixed an issue to keep downscaling the batch size in case there hasn't been even a single successful optimal batch size with `mode="power"` ([#14372](https://github.com/Lightning-AI/pytorch-lightning/pull/14372)) +- Fixed an issue where `self.log`-ing a tensor would create a user warning from PyTorch about cloning tensors ([#14599](https://github.com/Lightning-AI/pytorch-lightning/pull/14599)) +- Fixed compatibility when `torch.distributed` is not available ([#14454](https://github.com/Lightning-AI/pytorch-lightning/pull/14454)) ## [1.7.5] - 2022-09-06 ### Fixed -- Squeezed tensor values when logging with `LightningModule.log` ([#14489](https://github.com/Lightning-AI/lightning/pull/14489)) -- Fixed `WandbLogger` `save_dir` is not set after creation ([#14326](https://github.com/Lightning-AI/lightning/pull/14326)) -- Fixed `Trainer.estimated_stepping_batches` when maximum number of epochs is not set ([#14317](https://github.com/Lightning-AI/lightning/pull/14317)) +- Squeezed tensor values when logging with `LightningModule.log` ([#14489](https://github.com/Lightning-AI/pytorch-lightning/pull/14489)) +- Fixed `WandbLogger` `save_dir` is not set after creation ([#14326](https://github.com/Lightning-AI/pytorch-lightning/pull/14326)) +- Fixed `Trainer.estimated_stepping_batches` when maximum number of epochs is not set ([#14317](https://github.com/Lightning-AI/pytorch-lightning/pull/14317)) ## [1.7.4] - 2022-08-31 ### Added -- Added an environment variable `PL_DISABLE_FORK` that can be used to disable all forking in the Trainer ([#14319](https://github.com/Lightning-AI/lightning/pull/14319)) +- Added an environment variable `PL_DISABLE_FORK` that can be used to disable all forking in the Trainer ([#14319](https://github.com/Lightning-AI/pytorch-lightning/pull/14319)) ### Fixed -- Fixed `LightningDataModule` hparams parsing ([#12806](https://github.com/Lightning-AI/lightning/pull/12806)) -- Reset epoch progress with batch size scaler ([#13846](https://github.com/Lightning-AI/lightning/pull/13846)) -- Fixed restoring the trainer after using `lr_find()` so that the correct LR schedule is used for the actual training ([#14113](https://github.com/Lightning-AI/lightning/pull/14113)) -- Fixed incorrect values after transferring data to an MPS device ([#14368](https://github.com/Lightning-AI/lightning/pull/14368)) +- Fixed `LightningDataModule` hparams parsing ([#12806](https://github.com/Lightning-AI/pytorch-lightning/pull/12806)) +- Reset epoch progress with batch size scaler ([#13846](https://github.com/Lightning-AI/pytorch-lightning/pull/13846)) +- Fixed restoring the trainer after using `lr_find()` so that the correct LR schedule is used for the actual training ([#14113](https://github.com/Lightning-AI/pytorch-lightning/pull/14113)) +- Fixed incorrect values after transferring data to an MPS device ([#14368](https://github.com/Lightning-AI/pytorch-lightning/pull/14368)) ## [1.7.3] - 2022-08-25 ### Fixed -- Fixed an assertion error when using a `ReduceOnPlateau` scheduler with the Horovod strategy ([#14215](https://github.com/Lightning-AI/lightning/pull/14215)) -- Fixed an `AttributeError` when accessing `LightningModule.logger` and the Trainer has multiple loggers ([#14234](https://github.com/Lightning-AI/lightning/pull/14234)) -- Added back support for `log`ging in the `configure_gradient_clipping` hook after unintended removal in v1.7.2 ([#14298](https://github.com/Lightning-AI/lightning/pull/14298)) -- Fixed wrong num padding for `RichProgressBar` ([#14296](https://github.com/Lightning-AI/lightning/pull/14296)) -- Fixed an issue to avoid the impact of sanity check on `reload_dataloaders_every_n_epochs` for validation ([#13964](https://github.com/Lightning-AI/lightning/pull/13964)) +- Fixed an assertion error when using a `ReduceOnPlateau` scheduler with the Horovod strategy ([#14215](https://github.com/Lightning-AI/pytorch-lightning/pull/14215)) +- Fixed an `AttributeError` when accessing `LightningModule.logger` and the Trainer has multiple loggers ([#14234](https://github.com/Lightning-AI/pytorch-lightning/pull/14234)) +- Added back support for `log`ging in the `configure_gradient_clipping` hook after unintended removal in v1.7.2 ([#14298](https://github.com/Lightning-AI/pytorch-lightning/pull/14298)) +- Fixed wrong num padding for `RichProgressBar` ([#14296](https://github.com/Lightning-AI/pytorch-lightning/pull/14296)) +- Fixed an issue to avoid the impact of sanity check on `reload_dataloaders_every_n_epochs` for validation ([#13964](https://github.com/Lightning-AI/pytorch-lightning/pull/13964)) ## [1.7.2] - 2022-08-17 ### Added -- Added `FullyShardedNativeNativeMixedPrecisionPlugin` to handle precision for `DDPFullyShardedNativeStrategy` ([#14092](https://github.com/Lightning-AI/lightning/pull/14092)) -- Added profiling to these hooks: `on_before_batch_transfer`, `transfer_batch_to_device`, `on_after_batch_transfer`, `configure_gradient_clipping`, `clip_gradients` ([#14069](https://github.com/Lightning-AI/lightning/pull/14069)) +- Added `FullyShardedNativeNativeMixedPrecisionPlugin` to handle precision for `DDPFullyShardedNativeStrategy` ([#14092](https://github.com/Lightning-AI/pytorch-lightning/pull/14092)) +- Added profiling to these hooks: `on_before_batch_transfer`, `transfer_batch_to_device`, `on_after_batch_transfer`, `configure_gradient_clipping`, `clip_gradients` ([#14069](https://github.com/Lightning-AI/pytorch-lightning/pull/14069)) ### Changed -- The `WandbLogger.name` property no longer returns the name of the experiment, and instead returns the project's name ([#14145](https://github.com/Lightning-AI/lightning/pull/14145)) -- The default project name in `WandbLogger` is now "lightning_logs" ([#14145](https://github.com/Lightning-AI/lightning/pull/14145)) -- Updated compatibility for LightningLite to run with the latest DeepSpeed 0.7.0 ([13967](https://github.com/Lightning-AI/lightning/pull/13967)) +- The `WandbLogger.name` property no longer returns the name of the experiment, and instead returns the project's name ([#14145](https://github.com/Lightning-AI/pytorch-lightning/pull/14145)) +- The default project name in `WandbLogger` is now "lightning_logs" ([#14145](https://github.com/Lightning-AI/pytorch-lightning/pull/14145)) +- Updated compatibility for LightningLite to run with the latest DeepSpeed 0.7.0 ([13967](https://github.com/Lightning-AI/pytorch-lightning/pull/13967)) ### Fixed -- Fixed a bug that caused spurious `AttributeError` when multiple `DataLoader` classes are imported ([#14117](https://github.com/Lightning-AI/lightning/pull/14117)) -- Fixed epoch-end logging results not being reset after the end of the epoch ([#14061](https://github.com/Lightning-AI/lightning/pull/14061)) -- Fixed resuming from a checkpoint when using Stochastic Weight Averaging (SWA) ([#9938](https://github.com/Lightning-AI/lightning/pull/9938)) -- Fixed the device placement when `LightningModule.cuda()` gets called without specifying a device index and the current cuda device was not 0 ([#14128](https://github.com/Lightning-AI/lightning/pull/14128)) -- Avoided false positive warning about using `sync_dist` when using torchmetrics ([#14143](https://github.com/Lightning-AI/lightning/pull/14143)) -- Avoid `metadata.entry_points` deprecation warning on Python 3.10 ([#14052](https://github.com/Lightning-AI/lightning/pull/14052)) -- Fixed epoch-end logging results not being reset after the end of the epoch ([#14061](https://github.com/Lightning-AI/lightning/pull/14061)) -- Avoid raising the sampler warning if num_replicas=1 ([#14097](https://github.com/Lightning-AI/lightning/pull/14097)) -- Fixed saving hyperparameters in a composition where the parent class is not a `LightningModule` or `LightningDataModule` ([#14151](https://github.com/Lightning-AI/lightning/pull/14151)) -- Avoided requiring the FairScale package to use precision with the fsdp native strategy ([#14092](https://github.com/Lightning-AI/lightning/pull/14092)) -- Fixed an issue in which the default name for a run in `WandbLogger` would be set to the project name instead of a randomly generated string ([#14145](https://github.com/Lightning-AI/lightning/pull/14145)) -- Fixed not preserving set attributes on `DataLoader` and `BatchSampler` when instantiated inside `*_dataloader` hooks ([#14212](https://github.com/Lightning-AI/lightning/pull/14212)) +- Fixed a bug that caused spurious `AttributeError` when multiple `DataLoader` classes are imported ([#14117](https://github.com/Lightning-AI/pytorch-lightning/pull/14117)) +- Fixed epoch-end logging results not being reset after the end of the epoch ([#14061](https://github.com/Lightning-AI/pytorch-lightning/pull/14061)) +- Fixed resuming from a checkpoint when using Stochastic Weight Averaging (SWA) ([#9938](https://github.com/Lightning-AI/pytorch-lightning/pull/9938)) +- Fixed the device placement when `LightningModule.cuda()` gets called without specifying a device index and the current cuda device was not 0 ([#14128](https://github.com/Lightning-AI/pytorch-lightning/pull/14128)) +- Avoided false positive warning about using `sync_dist` when using torchmetrics ([#14143](https://github.com/Lightning-AI/pytorch-lightning/pull/14143)) +- Avoid `metadata.entry_points` deprecation warning on Python 3.10 ([#14052](https://github.com/Lightning-AI/pytorch-lightning/pull/14052)) +- Fixed epoch-end logging results not being reset after the end of the epoch ([#14061](https://github.com/Lightning-AI/pytorch-lightning/pull/14061)) +- Avoid raising the sampler warning if num_replicas=1 ([#14097](https://github.com/Lightning-AI/pytorch-lightning/pull/14097)) +- Fixed saving hyperparameters in a composition where the parent class is not a `LightningModule` or `LightningDataModule` ([#14151](https://github.com/Lightning-AI/pytorch-lightning/pull/14151)) +- Avoided requiring the FairScale package to use precision with the fsdp native strategy ([#14092](https://github.com/Lightning-AI/pytorch-lightning/pull/14092)) +- Fixed an issue in which the default name for a run in `WandbLogger` would be set to the project name instead of a randomly generated string ([#14145](https://github.com/Lightning-AI/pytorch-lightning/pull/14145)) +- Fixed not preserving set attributes on `DataLoader` and `BatchSampler` when instantiated inside `*_dataloader` hooks ([#14212](https://github.com/Lightning-AI/pytorch-lightning/pull/14212)) ## [1.7.1] - 2022-08-09 ### Fixed -- Casted only floating point tensors to fp16 with IPUs ([#13983](https://github.com/Lightning-AI/lightning/pull/13983)) -- Casted tensors to fp16 before moving them to device with `DeepSpeedStrategy` ([#14000](https://github.com/Lightning-AI/lightning/pull/14000)) -- Fixed the `NeptuneLogger` dependency being unrecognized ([#13988](https://github.com/Lightning-AI/lightning/pull/13988)) -- Fixed an issue where users would be warned about unset `max_epochs` even when `fast_dev_run` was set ([#13262](https://github.com/Lightning-AI/lightning/pull/13262)) -- Fixed MPS device being unrecognized ([#13992](https://github.com/Lightning-AI/lightning/pull/13992)) -- Fixed incorrect `precision="mixed"` being used with `DeepSpeedStrategy` and `IPUStrategy` ([#14041](https://github.com/Lightning-AI/lightning/pull/14041)) -- Fixed dtype inference during gradient norm computation ([#14051](https://github.com/Lightning-AI/lightning/pull/14051)) -- Fixed a bug that caused `ddp_find_unused_parameters` to be set `False`, whereas the intended default is `True` ([#14095](https://github.com/Lightning-AI/lightning/pull/14095)) +- Casted only floating point tensors to fp16 with IPUs ([#13983](https://github.com/Lightning-AI/pytorch-lightning/pull/13983)) +- Casted tensors to fp16 before moving them to device with `DeepSpeedStrategy` ([#14000](https://github.com/Lightning-AI/pytorch-lightning/pull/14000)) +- Fixed the `NeptuneLogger` dependency being unrecognized ([#13988](https://github.com/Lightning-AI/pytorch-lightning/pull/13988)) +- Fixed an issue where users would be warned about unset `max_epochs` even when `fast_dev_run` was set ([#13262](https://github.com/Lightning-AI/pytorch-lightning/pull/13262)) +- Fixed MPS device being unrecognized ([#13992](https://github.com/Lightning-AI/pytorch-lightning/pull/13992)) +- Fixed incorrect `precision="mixed"` being used with `DeepSpeedStrategy` and `IPUStrategy` ([#14041](https://github.com/Lightning-AI/pytorch-lightning/pull/14041)) +- Fixed dtype inference during gradient norm computation ([#14051](https://github.com/Lightning-AI/pytorch-lightning/pull/14051)) +- Fixed a bug that caused `ddp_find_unused_parameters` to be set `False`, whereas the intended default is `True` ([#14095](https://github.com/Lightning-AI/pytorch-lightning/pull/14095)) ## [1.7.0] - 2022-08-02 ### Added -- Added ``ServableModule`` and its associated callback called ``ServableModuleValidator`` to ensure the model can served ([#13614](https://github.com/Lightning-AI/lightning/pull/13614)) -- Converted validation loop config warnings to `PossibleUserWarning` ([#13377](https://github.com/Lightning-AI/lightning/pull/13377)) -- Added a flag named `log_rank_zero_only` to `EarlyStopping` to disable logging to non-zero rank processes ([#13233](https://github.com/Lightning-AI/lightning/pull/13233)) -- Added support for reloading the last checkpoint saved by passing `ckpt_path="last"` ([#12816](https://github.com/Lightning-AI/lightning/pull/12816)) -- Added `LightningDataModule.load_from_checkpoint` to support loading datamodules directly from checkpoint ([#12550](https://github.com/Lightning-AI/lightning/pull/12550)) -- Added a friendly error message when attempting to call `Trainer.save_checkpoint()` without a model attached ([#12772](https://github.com/Lightning-AI/lightning/pull/12772)) -- Added a friendly error message when attempting to use `DeepSpeedStrategy` on unsupported accelerators ([#12699](https://github.com/Lightning-AI/lightning/pull/12699)) -- Enabled `torch.inference_mode` for evaluation and prediction ([#12715](https://github.com/Lightning-AI/lightning/pull/12715)) -- Added support for setting `val_check_interval` to a value higher than the amount of training batches when `check_val_every_n_epoch=None` ([#11993](https://github.com/Lightning-AI/lightning/pull/11993)) -- Include the `pytorch_lightning` version as a header in the CLI config files ([#12532](https://github.com/Lightning-AI/lightning/pull/12532)) -- Added support for `Callback` registration through entry points ([#12739](https://github.com/Lightning-AI/lightning/pull/12739)) -- Added support for `Trainer(deterministic="warn")` to warn instead of fail when a non-deterministic operation is encountered ([#12588](https://github.com/Lightning-AI/lightning/pull/12588)) -- Added profiling to the loops' dataloader `__next__` calls ([#12124](https://github.com/Lightning-AI/lightning/pull/12124)) +- Added ``ServableModule`` and its associated callback called ``ServableModuleValidator`` to ensure the model can served ([#13614](https://github.com/Lightning-AI/pytorch-lightning/pull/13614)) +- Converted validation loop config warnings to `PossibleUserWarning` ([#13377](https://github.com/Lightning-AI/pytorch-lightning/pull/13377)) +- Added a flag named `log_rank_zero_only` to `EarlyStopping` to disable logging to non-zero rank processes ([#13233](https://github.com/Lightning-AI/pytorch-lightning/pull/13233)) +- Added support for reloading the last checkpoint saved by passing `ckpt_path="last"` ([#12816](https://github.com/Lightning-AI/pytorch-lightning/pull/12816)) +- Added `LightningDataModule.load_from_checkpoint` to support loading datamodules directly from checkpoint ([#12550](https://github.com/Lightning-AI/pytorch-lightning/pull/12550)) +- Added a friendly error message when attempting to call `Trainer.save_checkpoint()` without a model attached ([#12772](https://github.com/Lightning-AI/pytorch-lightning/pull/12772)) +- Added a friendly error message when attempting to use `DeepSpeedStrategy` on unsupported accelerators ([#12699](https://github.com/Lightning-AI/pytorch-lightning/pull/12699)) +- Enabled `torch.inference_mode` for evaluation and prediction ([#12715](https://github.com/Lightning-AI/pytorch-lightning/pull/12715)) +- Added support for setting `val_check_interval` to a value higher than the amount of training batches when `check_val_every_n_epoch=None` ([#11993](https://github.com/Lightning-AI/pytorch-lightning/pull/11993)) +- Include the `pytorch_lightning` version as a header in the CLI config files ([#12532](https://github.com/Lightning-AI/pytorch-lightning/pull/12532)) +- Added support for `Callback` registration through entry points ([#12739](https://github.com/Lightning-AI/pytorch-lightning/pull/12739)) +- Added support for `Trainer(deterministic="warn")` to warn instead of fail when a non-deterministic operation is encountered ([#12588](https://github.com/Lightning-AI/pytorch-lightning/pull/12588)) +- Added profiling to the loops' dataloader `__next__` calls ([#12124](https://github.com/Lightning-AI/pytorch-lightning/pull/12124)) - Hivemind Strategy - * Added `CollaborativeStrategy` ([#12842](https://github.com/Lightning-AI/lightning/pull/12842)) - * Renamed `CollaborativeStrategy` to `HivemindStrategy` ([#13388](https://github.com/Lightning-AI/lightning/pull/13388)) - * Removed unnecessary endpoint logic, renamed `collaborative` to `hivemind` ([#13392](https://github.com/Lightning-AI/lightning/pull/13392)) -- Include a version suffix for new "last" checkpoints of later runs in the same directory ([#12902](https://github.com/Lightning-AI/lightning/pull/12902)) -- Show a better error message when a Metric that does not return a Tensor is logged ([#13164](https://github.com/Lightning-AI/lightning/pull/13164)) -- Added missing `predict_dataset` argument in `LightningDataModule.from_datasets` to create predict dataloaders ([#12942](https://github.com/Lightning-AI/lightning/pull/12942)) -- Added class name prefix to metrics logged by `DeviceStatsMonitor` ([#12228](https://github.com/Lightning-AI/lightning/pull/12228)) -- Automatically wrap custom samplers under a distributed environment by using `DistributedSamplerWrapper` ([#12959](https://github.com/Lightning-AI/lightning/pull/12959)) -- Added profiling of `LightningDataModule` hooks ([#12971](https://github.com/Lightning-AI/lightning/pull/12971)) -- Added Native FSDP Strategy ([#12447](https://github.com/Lightning-AI/lightning/pull/12447)) -- Added breaking of lazy graph across training, validation, test and predict steps when training with habana accelerators to ensure better performance ([#12938](https://github.com/Lightning-AI/lightning/pull/12938)) -- Added `Checkpoint` class to inherit from ([#13024](https://github.com/Lightning-AI/lightning/pull/13024)) -- Added CPU metric tracking to `DeviceStatsMonitor` ([#11795](https://github.com/Lightning-AI/lightning/pull/11795)) -- Added `teardown()` method to `Accelerator` ([#11935](https://github.com/Lightning-AI/lightning/pull/11935)) -- Added support for using custom Trainers that don't include callbacks using the CLI ([#13138](https://github.com/Lightning-AI/lightning/pull/13138)) -- Added a `timeout` argument to `DDPStrategy` and `DDPSpawnStrategy`. ([#13244](https://github.com/Lightning-AI/lightning/pull/13244), [#13383](https://github.com/Lightning-AI/lightning/pull/13383)) -- Added `XLAEnvironment` cluster environment plugin ([#11330](https://github.com/Lightning-AI/lightning/pull/11330)) -- Added logging messages to notify when `FitLoop` stopping conditions are met ([#9749](https://github.com/Lightning-AI/lightning/pull/9749)) -- Added support for calling unknown methods with `DummyLogger` ([#13224](https://github.com/Lightning-AI/lightning/pull/13224) -- Added support for recursively setting the `Trainer` reference for ensembles of `LightningModule`s ([#13638](https://github.com/Lightning-AI/lightning/pull/13638) -- Added Apple Silicon Support via `MPSAccelerator` ([#13123](https://github.com/Lightning-AI/lightning/pull/13123)) -- Added support for DDP Fork ([#13405](https://github.com/Lightning-AI/lightning/pull/13405)) -- Added support for async checkpointing ([#13658](https://github.com/Lightning-AI/lightning/pull/13658)) -- Added support for HPU Device stats monitor ([#13819](https://github.com/Lightning-AI/lightning/pull/13819)) + * Added `CollaborativeStrategy` ([#12842](https://github.com/Lightning-AI/pytorch-lightning/pull/12842)) + * Renamed `CollaborativeStrategy` to `HivemindStrategy` ([#13388](https://github.com/Lightning-AI/pytorch-lightning/pull/13388)) + * Removed unnecessary endpoint logic, renamed `collaborative` to `hivemind` ([#13392](https://github.com/Lightning-AI/pytorch-lightning/pull/13392)) +- Include a version suffix for new "last" checkpoints of later runs in the same directory ([#12902](https://github.com/Lightning-AI/pytorch-lightning/pull/12902)) +- Show a better error message when a Metric that does not return a Tensor is logged ([#13164](https://github.com/Lightning-AI/pytorch-lightning/pull/13164)) +- Added missing `predict_dataset` argument in `LightningDataModule.from_datasets` to create predict dataloaders ([#12942](https://github.com/Lightning-AI/pytorch-lightning/pull/12942)) +- Added class name prefix to metrics logged by `DeviceStatsMonitor` ([#12228](https://github.com/Lightning-AI/pytorch-lightning/pull/12228)) +- Automatically wrap custom samplers under a distributed environment by using `DistributedSamplerWrapper` ([#12959](https://github.com/Lightning-AI/pytorch-lightning/pull/12959)) +- Added profiling of `LightningDataModule` hooks ([#12971](https://github.com/Lightning-AI/pytorch-lightning/pull/12971)) +- Added Native FSDP Strategy ([#12447](https://github.com/Lightning-AI/pytorch-lightning/pull/12447)) +- Added breaking of lazy graph across training, validation, test and predict steps when training with habana accelerators to ensure better performance ([#12938](https://github.com/Lightning-AI/pytorch-lightning/pull/12938)) +- Added `Checkpoint` class to inherit from ([#13024](https://github.com/Lightning-AI/pytorch-lightning/pull/13024)) +- Added CPU metric tracking to `DeviceStatsMonitor` ([#11795](https://github.com/Lightning-AI/pytorch-lightning/pull/11795)) +- Added `teardown()` method to `Accelerator` ([#11935](https://github.com/Lightning-AI/pytorch-lightning/pull/11935)) +- Added support for using custom Trainers that don't include callbacks using the CLI ([#13138](https://github.com/Lightning-AI/pytorch-lightning/pull/13138)) +- Added a `timeout` argument to `DDPStrategy` and `DDPSpawnStrategy`. ([#13244](https://github.com/Lightning-AI/pytorch-lightning/pull/13244), [#13383](https://github.com/Lightning-AI/pytorch-lightning/pull/13383)) +- Added `XLAEnvironment` cluster environment plugin ([#11330](https://github.com/Lightning-AI/pytorch-lightning/pull/11330)) +- Added logging messages to notify when `FitLoop` stopping conditions are met ([#9749](https://github.com/Lightning-AI/pytorch-lightning/pull/9749)) +- Added support for calling unknown methods with `DummyLogger` ([#13224](https://github.com/Lightning-AI/pytorch-lightning/pull/13224) +- Added support for recursively setting the `Trainer` reference for ensembles of `LightningModule`s ([#13638](https://github.com/Lightning-AI/pytorch-lightning/pull/13638) +- Added Apple Silicon Support via `MPSAccelerator` ([#13123](https://github.com/Lightning-AI/pytorch-lightning/pull/13123)) +- Added support for DDP Fork ([#13405](https://github.com/Lightning-AI/pytorch-lightning/pull/13405)) +- Added support for async checkpointing ([#13658](https://github.com/Lightning-AI/pytorch-lightning/pull/13658)) +- Added support for HPU Device stats monitor ([#13819](https://github.com/Lightning-AI/pytorch-lightning/pull/13819)) ### Changed -- `accelerator="gpu"` now automatically selects an available GPU backend (CUDA and MPS currently) ([#13642](https://github.com/Lightning-AI/lightning/pull/13642)) -- Enable validation during overfitting ([#12527](https://github.com/Lightning-AI/lightning/pull/12527)) -- Added dataclass support to `extract_batch_size` ([#12573](https://github.com/Lightning-AI/lightning/pull/12573)) -- Changed checkpoints save path in the case of one logger and user-provided weights_save_path from `weights_save_path/name/version/checkpoints` to `weights_save_path/checkpoints` ([#12372](https://github.com/Lightning-AI/lightning/pull/12372)) -- Changed checkpoints save path in the case of multiple loggers and user-provided weights_save_path from `weights_save_path/name1_name2/version1_version2/checkpoints` to `weights_save_path/checkpoints` ([#12372](https://github.com/Lightning-AI/lightning/pull/12372)) -- Marked `swa_lrs` argument in `StochasticWeightAveraging` callback as required ([#12556](https://github.com/Lightning-AI/lightning/pull/12556)) -- `LightningCLI`'s shorthand notation changed to use jsonargparse native feature ([#12614](https://github.com/Lightning-AI/lightning/pull/12614)) -- `LightningCLI` changed to use jsonargparse native support for list append ([#13129](https://github.com/Lightning-AI/lightning/pull/13129)) -- Changed `seed_everything_default` argument in the `LightningCLI` to type `Union[bool, int]`. If set to `True` a seed is automatically generated for the parser argument `--seed_everything`. ([#12822](https://github.com/Lightning-AI/lightning/pull/12822), [#13110](https://github.com/Lightning-AI/lightning/pull/13110)) -- Make positional arguments required for classes passed into the `add_argparse_args` function. ([#12504](https://github.com/Lightning-AI/lightning/pull/12504)) -- Raise an error if there are insufficient training batches when using a float value of `limit_train_batches` ([#12885](https://github.com/Lightning-AI/lightning/pull/12885)) -- `DataLoader` instantiated inside a `*_dataloader` hook will not set the passed arguments as attributes anymore ([#12981](https://github.com/Lightning-AI/lightning/pull/12981)) -- When a multi-element tensor is logged, an error is now raised instead of silently taking the mean of all elements ([#13164](https://github.com/Lightning-AI/lightning/pull/13164)) -- The `WandbLogger` will now use the run name in the logs folder if it is provided, and otherwise the project name ([#12604](https://github.com/Lightning-AI/lightning/pull/12604)) -- Enabled using any Sampler in distributed environment in Lite ([#13646](https://github.com/Lightning-AI/lightning/pull/13646)) -- Raised a warning instead of forcing `sync_dist=True` on epoch end ([13364](https://github.com/Lightning-AI/lightning/pull/13364)) -- Updated `val_check_interval`(int) to consider total train batches processed instead of `_batches_that_stepped` for validation check during training ([#12832](https://github.com/Lightning-AI/lightning/pull/12832) -- Updated Habana Accelerator's `auto_device_count`, `is_available` & `get_device_name` methods based on the latest torch habana package ([#13423](https://github.com/Lightning-AI/lightning/pull/13423)) -- Disallowed using `BatchSampler` when running on multiple IPUs ([#13854](https://github.com/Lightning-AI/lightning/pull/13854)) +- `accelerator="gpu"` now automatically selects an available GPU backend (CUDA and MPS currently) ([#13642](https://github.com/Lightning-AI/pytorch-lightning/pull/13642)) +- Enable validation during overfitting ([#12527](https://github.com/Lightning-AI/pytorch-lightning/pull/12527)) +- Added dataclass support to `extract_batch_size` ([#12573](https://github.com/Lightning-AI/pytorch-lightning/pull/12573)) +- Changed checkpoints save path in the case of one logger and user-provided weights_save_path from `weights_save_path/name/version/checkpoints` to `weights_save_path/checkpoints` ([#12372](https://github.com/Lightning-AI/pytorch-lightning/pull/12372)) +- Changed checkpoints save path in the case of multiple loggers and user-provided weights_save_path from `weights_save_path/name1_name2/version1_version2/checkpoints` to `weights_save_path/checkpoints` ([#12372](https://github.com/Lightning-AI/pytorch-lightning/pull/12372)) +- Marked `swa_lrs` argument in `StochasticWeightAveraging` callback as required ([#12556](https://github.com/Lightning-AI/pytorch-lightning/pull/12556)) +- `LightningCLI`'s shorthand notation changed to use jsonargparse native feature ([#12614](https://github.com/Lightning-AI/pytorch-lightning/pull/12614)) +- `LightningCLI` changed to use jsonargparse native support for list append ([#13129](https://github.com/Lightning-AI/pytorch-lightning/pull/13129)) +- Changed `seed_everything_default` argument in the `LightningCLI` to type `Union[bool, int]`. If set to `True` a seed is automatically generated for the parser argument `--seed_everything`. ([#12822](https://github.com/Lightning-AI/pytorch-lightning/pull/12822), [#13110](https://github.com/Lightning-AI/pytorch-lightning/pull/13110)) +- Make positional arguments required for classes passed into the `add_argparse_args` function. ([#12504](https://github.com/Lightning-AI/pytorch-lightning/pull/12504)) +- Raise an error if there are insufficient training batches when using a float value of `limit_train_batches` ([#12885](https://github.com/Lightning-AI/pytorch-lightning/pull/12885)) +- `DataLoader` instantiated inside a `*_dataloader` hook will not set the passed arguments as attributes anymore ([#12981](https://github.com/Lightning-AI/pytorch-lightning/pull/12981)) +- When a multi-element tensor is logged, an error is now raised instead of silently taking the mean of all elements ([#13164](https://github.com/Lightning-AI/pytorch-lightning/pull/13164)) +- The `WandbLogger` will now use the run name in the logs folder if it is provided, and otherwise the project name ([#12604](https://github.com/Lightning-AI/pytorch-lightning/pull/12604)) +- Enabled using any Sampler in distributed environment in Lite ([#13646](https://github.com/Lightning-AI/pytorch-lightning/pull/13646)) +- Raised a warning instead of forcing `sync_dist=True` on epoch end ([13364](https://github.com/Lightning-AI/pytorch-lightning/pull/13364)) +- Updated `val_check_interval`(int) to consider total train batches processed instead of `_batches_that_stepped` for validation check during training ([#12832](https://github.com/Lightning-AI/pytorch-lightning/pull/12832) +- Updated Habana Accelerator's `auto_device_count`, `is_available` & `get_device_name` methods based on the latest torch habana package ([#13423](https://github.com/Lightning-AI/pytorch-lightning/pull/13423)) +- Disallowed using `BatchSampler` when running on multiple IPUs ([#13854](https://github.com/Lightning-AI/pytorch-lightning/pull/13854)) ### Deprecated -- Deprecated `pl.accelerators.gpu.GPUAccelerator` in favor of `pl.accelerators.cuda.CUDAAccelerator` ([#13636](https://github.com/Lightning-AI/lightning/pull/13636)) -- Deprecated `pl.loggers.base.LightningLoggerBase` in favor of `pl.loggers.logger.Logger`, and deprecated `pl.loggers.base` in favor of `pl.loggers.logger` ([#120148](https://github.com/Lightning-AI/lightning/pull/12014)) -- Deprecated `pl.callbacks.base.Callback` in favor of `pl.callbacks.callback.Callback` ([#13031](https://github.com/Lightning-AI/lightning/pull/13031)) -- Deprecated `num_processes`, `gpus`, `tpu_cores,` and `ipus` from the `Trainer` constructor in favor of using the `accelerator` and `devices` arguments ([#11040](https://github.com/Lightning-AI/lightning/pull/11040)) -- Deprecated setting `LightningCLI(seed_everything_default=None)` in favor of `False` ([#12804](https://github.com/Lightning-AI/lightning/pull/12804)). -- Deprecated `pl.core.lightning.LightningModule` in favor of `pl.core.module.LightningModule` ([#12740](https://github.com/Lightning-AI/lightning/pull/12740)) -- Deprecated `pl.loops.base.Loop` in favor of `pl.loops.loop.Loop` ([#13043](https://github.com/Lightning-AI/lightning/pull/13043)) -- Deprecated `Trainer.reset_train_val_dataloaders()` in favor of `Trainer.reset_{train,val}_dataloader` ([#12184](https://github.com/Lightning-AI/lightning/pull/12184)) -- Deprecated LightningCLI's registries in favor of importing the respective package ([#13221](https://github.com/Lightning-AI/lightning/pull/13221)) -- Deprecated public utilities in `pl.utilities.cli.LightningCLI` in favor of equivalent copies in `pl.cli.LightningCLI` ([#13767](https://github.com/Lightning-AI/lightning/pull/13767)) -- Deprecated `pl.profiler.*` in favor of `pl.profilers` ([#12308](https://github.com/Lightning-AI/lightning/pull/12308)) +- Deprecated `pl.accelerators.gpu.GPUAccelerator` in favor of `pl.accelerators.cuda.CUDAAccelerator` ([#13636](https://github.com/Lightning-AI/pytorch-lightning/pull/13636)) +- Deprecated `pl.loggers.base.LightningLoggerBase` in favor of `pl.loggers.logger.Logger`, and deprecated `pl.loggers.base` in favor of `pl.loggers.logger` ([#120148](https://github.com/Lightning-AI/pytorch-lightning/pull/12014)) +- Deprecated `pl.callbacks.base.Callback` in favor of `pl.callbacks.callback.Callback` ([#13031](https://github.com/Lightning-AI/pytorch-lightning/pull/13031)) +- Deprecated `num_processes`, `gpus`, `tpu_cores,` and `ipus` from the `Trainer` constructor in favor of using the `accelerator` and `devices` arguments ([#11040](https://github.com/Lightning-AI/pytorch-lightning/pull/11040)) +- Deprecated setting `LightningCLI(seed_everything_default=None)` in favor of `False` ([#12804](https://github.com/Lightning-AI/pytorch-lightning/pull/12804)). +- Deprecated `pl.core.lightning.LightningModule` in favor of `pl.core.module.LightningModule` ([#12740](https://github.com/Lightning-AI/pytorch-lightning/pull/12740)) +- Deprecated `pl.loops.base.Loop` in favor of `pl.loops.loop.Loop` ([#13043](https://github.com/Lightning-AI/pytorch-lightning/pull/13043)) +- Deprecated `Trainer.reset_train_val_dataloaders()` in favor of `Trainer.reset_{train,val}_dataloader` ([#12184](https://github.com/Lightning-AI/pytorch-lightning/pull/12184)) +- Deprecated LightningCLI's registries in favor of importing the respective package ([#13221](https://github.com/Lightning-AI/pytorch-lightning/pull/13221)) +- Deprecated public utilities in `pl.utilities.cli.LightningCLI` in favor of equivalent copies in `pl.cli.LightningCLI` ([#13767](https://github.com/Lightning-AI/pytorch-lightning/pull/13767)) +- Deprecated `pl.profiler.*` in favor of `pl.profilers` ([#12308](https://github.com/Lightning-AI/pytorch-lightning/pull/12308)) ### Removed -- Removed deprecated `IndexBatchSamplerWrapper.batch_indices` ([#13565](https://github.com/Lightning-AI/lightning/pull/13565)) -- Removed the deprecated `LightningModule.add_to_queue` and `LightningModule.get_from_queue` method ([#13600](https://github.com/Lightning-AI/lightning/pull/13600)) -- Removed deprecated `pl.core.decorators.parameter_validation` from `decorators` ([#13514](https://github.com/Lightning-AI/lightning/pull/13514)) -- Removed the deprecated `Logger.close` method ([#13149](https://github.com/Lightning-AI/lightning/pull/13149)) -- Removed the deprecated `weights_summary` argument from the `Trainer` constructor ([#13070](https://github.com/Lightning-AI/lightning/pull/13070)) -- Removed the deprecated `flush_logs_every_n_steps` argument from the `Trainer` constructor ([#13074](https://github.com/Lightning-AI/lightning/pull/13074)) -- Removed the deprecated `process_position` argument from the `Trainer` constructor ([13071](https://github.com/Lightning-AI/lightning/pull/13071)) -- Removed the deprecated `checkpoint_callback` argument from the `Trainer` constructor ([#13027](https://github.com/Lightning-AI/lightning/pull/13027)) -- Removed the deprecated `on_{train,val,test,predict}_dataloader` hooks from the `LightningModule` and `LightningDataModule` ([#13033](https://github.com/Lightning-AI/lightning/pull/13033)) -- Removed the deprecated `TestTubeLogger` ([#12859](https://github.com/Lightning-AI/lightning/pull/12859)) -- Removed the deprecated `pl.core.memory.LayerSummary` and `pl.core.memory.ModelSummary` ([#12593](https://github.com/Lightning-AI/lightning/pull/12593)) -- Removed the deprecated `summarize` method from the `LightningModule` ([#12559](https://github.com/Lightning-AI/lightning/pull/12559)) -- Removed the deprecated `model_size` property from the `LightningModule` class ([#12641](https://github.com/Lightning-AI/lightning/pull/12641)) -- Removed the deprecated `stochastic_weight_avg` argument from the `Trainer` constructor ([#12535](https://github.com/Lightning-AI/lightning/pull/12535)) -- Removed the deprecated `progress_bar_refresh_rate` argument from the `Trainer` constructor ([#12514](https://github.com/Lightning-AI/lightning/pull/12514)) -- Removed the deprecated `prepare_data_per_node` argument from the `Trainer` constructor ([#12536](https://github.com/Lightning-AI/lightning/pull/12536)) -- Removed the deprecated `pl.core.memory.{get_gpu_memory_map,get_memory_profile}` ([#12659](https://github.com/Lightning-AI/lightning/pull/12659)) -- Removed the deprecated `terminate_on_nan` argument from the `Trainer` constructor ([#12553](https://github.com/Lightning-AI/lightning/pull/12553)) -- Removed the deprecated `XLAStatsMonitor` callback ([#12688](https://github.com/Lightning-AI/lightning/pull/12688)) -- Remove deprecated `pl.callbacks.progress.progress` ([#12658](https://github.com/Lightning-AI/lightning/pull/12658)) -- Removed the deprecated `dim` and `size` arguments from the `LightningDataModule` constructor([#12780](https://github.com/Lightning-AI/lightning/pull/12780)) -- Removed the deprecated `train_transforms` argument from the `LightningDataModule` constructor([#12662](https://github.com/Lightning-AI/lightning/pull/12662)) -- Removed the deprecated `log_gpu_memory` argument from the `Trainer` constructor ([#12657](https://github.com/Lightning-AI/lightning/pull/12657)) -- Removed the deprecated automatic logging of GPU stats by the logger connector ([#12657](https://github.com/Lightning-AI/lightning/pull/12657)) -- Removed deprecated `GPUStatsMonitor` callback ([#12554](https://github.com/Lightning-AI/lightning/pull/12554)) -- Removed support for passing strategy names or strategy instances to the accelerator Trainer argument ([#12696](https://github.com/Lightning-AI/lightning/pull/12696)) -- Removed support for passing strategy names or strategy instances to the plugins Trainer argument ([#12700](https://github.com/Lightning-AI/lightning/pull/12700)) -- Removed the deprecated `val_transforms` argument from the `LightningDataModule` constructor ([#12763](https://github.com/Lightning-AI/lightning/pull/12763)) -- Removed the deprecated `test_transforms` argument from the `LightningDataModule` constructor ([#12773](https://github.com/Lightning-AI/lightning/pull/12773)) -- Removed deprecated `Trainer(max_steps=None)` ([#13591](https://github.com/Lightning-AI/lightning/pull/13591)) -- Removed deprecated `dataloader_idx` argument from `on_train_batch_start/end` hooks `Callback` and `LightningModule` ([#12769](https://github.com/Lightning-AI/lightning/pull/12769), [#12977](https://github.com/Lightning-AI/lightning/pull/12977)) -- Removed deprecated `get_progress_bar_dict` property from `LightningModule` ([#12839](https://github.com/Lightning-AI/lightning/pull/12839)) -- Removed sanity check for multi-optimizer support with habana backends ([#13217](https://github.com/Lightning-AI/lightning/pull/13217)) -- Removed the need to explicitly load habana module ([#13338](https://github.com/Lightning-AI/lightning/pull/13338)) -- Removed the deprecated `Strategy.post_dispatch()` hook ([#13461](https://github.com/Lightning-AI/lightning/pull/13461)) -- Removed deprecated `pl.callbacks.lr_monitor.LearningRateMonitor.lr_sch_names` ([#13353](https://github.com/Lightning-AI/lightning/pull/13353)) -- Removed deprecated `Trainer.slurm_job_id` in favor of `SLURMEnvironment.job_id` ([#13459](https://github.com/Lightning-AI/lightning/pull/13459)) -- Removed support for the `DDP2Strategy` ([#12705](https://github.com/Lightning-AI/lightning/pull/12705)) -- Removed deprecated `LightningDistributed` ([#13549](https://github.com/Lightning-AI/lightning/pull/13549)) -- Removed deprecated ClusterEnvironment properties `master_address` and `master_port` in favor of `main_address` and `main_port` ([#13458](https://github.com/Lightning-AI/lightning/pull/13458)) -- Removed deprecated ClusterEnvironment methods `KubeflowEnvironment.is_using_kubelfow()`, `LSFEnvironment.is_using_lsf()` and `TorchElasticEnvironment.is_using_torchelastic()` in favor of the `detect()` method ([#13458](https://github.com/Lightning-AI/lightning/pull/13458)) -- Removed deprecated `Callback.on_keyboard_interrupt` ([#13438](https://github.com/Lightning-AI/lightning/pull/13438)) -- Removed deprecated `LightningModule.on_post_move_to_device` ([#13548](https://github.com/Lightning-AI/lightning/pull/13548)) -- Removed `TPUSpawnStrategy.{tpu_local_core_rank,tpu_global_core_rank}` attributes in favor of `TPUSpawnStrategy.{local_rank,global_rank}` ([#11163](https://github.com/Lightning-AI/lightning/pull/11163)) -- Removed `SingleTPUStrategy.{tpu_local_core_rank,tpu_global_core_rank}` attributes in favor of `SingleTPUStrategy.{local_rank,global_rank}`([#11163](https://github.com/Lightning-AI/lightning/pull/11163)) +- Removed deprecated `IndexBatchSamplerWrapper.batch_indices` ([#13565](https://github.com/Lightning-AI/pytorch-lightning/pull/13565)) +- Removed the deprecated `LightningModule.add_to_queue` and `LightningModule.get_from_queue` method ([#13600](https://github.com/Lightning-AI/pytorch-lightning/pull/13600)) +- Removed deprecated `pl.core.decorators.parameter_validation` from `decorators` ([#13514](https://github.com/Lightning-AI/pytorch-lightning/pull/13514)) +- Removed the deprecated `Logger.close` method ([#13149](https://github.com/Lightning-AI/pytorch-lightning/pull/13149)) +- Removed the deprecated `weights_summary` argument from the `Trainer` constructor ([#13070](https://github.com/Lightning-AI/pytorch-lightning/pull/13070)) +- Removed the deprecated `flush_logs_every_n_steps` argument from the `Trainer` constructor ([#13074](https://github.com/Lightning-AI/pytorch-lightning/pull/13074)) +- Removed the deprecated `process_position` argument from the `Trainer` constructor ([13071](https://github.com/Lightning-AI/pytorch-lightning/pull/13071)) +- Removed the deprecated `checkpoint_callback` argument from the `Trainer` constructor ([#13027](https://github.com/Lightning-AI/pytorch-lightning/pull/13027)) +- Removed the deprecated `on_{train,val,test,predict}_dataloader` hooks from the `LightningModule` and `LightningDataModule` ([#13033](https://github.com/Lightning-AI/pytorch-lightning/pull/13033)) +- Removed the deprecated `TestTubeLogger` ([#12859](https://github.com/Lightning-AI/pytorch-lightning/pull/12859)) +- Removed the deprecated `pl.core.memory.LayerSummary` and `pl.core.memory.ModelSummary` ([#12593](https://github.com/Lightning-AI/pytorch-lightning/pull/12593)) +- Removed the deprecated `summarize` method from the `LightningModule` ([#12559](https://github.com/Lightning-AI/pytorch-lightning/pull/12559)) +- Removed the deprecated `model_size` property from the `LightningModule` class ([#12641](https://github.com/Lightning-AI/pytorch-lightning/pull/12641)) +- Removed the deprecated `stochastic_weight_avg` argument from the `Trainer` constructor ([#12535](https://github.com/Lightning-AI/pytorch-lightning/pull/12535)) +- Removed the deprecated `progress_bar_refresh_rate` argument from the `Trainer` constructor ([#12514](https://github.com/Lightning-AI/pytorch-lightning/pull/12514)) +- Removed the deprecated `prepare_data_per_node` argument from the `Trainer` constructor ([#12536](https://github.com/Lightning-AI/pytorch-lightning/pull/12536)) +- Removed the deprecated `pl.core.memory.{get_gpu_memory_map,get_memory_profile}` ([#12659](https://github.com/Lightning-AI/pytorch-lightning/pull/12659)) +- Removed the deprecated `terminate_on_nan` argument from the `Trainer` constructor ([#12553](https://github.com/Lightning-AI/pytorch-lightning/pull/12553)) +- Removed the deprecated `XLAStatsMonitor` callback ([#12688](https://github.com/Lightning-AI/pytorch-lightning/pull/12688)) +- Remove deprecated `pl.callbacks.progress.progress` ([#12658](https://github.com/Lightning-AI/pytorch-lightning/pull/12658)) +- Removed the deprecated `dim` and `size` arguments from the `LightningDataModule` constructor([#12780](https://github.com/Lightning-AI/pytorch-lightning/pull/12780)) +- Removed the deprecated `train_transforms` argument from the `LightningDataModule` constructor([#12662](https://github.com/Lightning-AI/pytorch-lightning/pull/12662)) +- Removed the deprecated `log_gpu_memory` argument from the `Trainer` constructor ([#12657](https://github.com/Lightning-AI/pytorch-lightning/pull/12657)) +- Removed the deprecated automatic logging of GPU stats by the logger connector ([#12657](https://github.com/Lightning-AI/pytorch-lightning/pull/12657)) +- Removed deprecated `GPUStatsMonitor` callback ([#12554](https://github.com/Lightning-AI/pytorch-lightning/pull/12554)) +- Removed support for passing strategy names or strategy instances to the accelerator Trainer argument ([#12696](https://github.com/Lightning-AI/pytorch-lightning/pull/12696)) +- Removed support for passing strategy names or strategy instances to the plugins Trainer argument ([#12700](https://github.com/Lightning-AI/pytorch-lightning/pull/12700)) +- Removed the deprecated `val_transforms` argument from the `LightningDataModule` constructor ([#12763](https://github.com/Lightning-AI/pytorch-lightning/pull/12763)) +- Removed the deprecated `test_transforms` argument from the `LightningDataModule` constructor ([#12773](https://github.com/Lightning-AI/pytorch-lightning/pull/12773)) +- Removed deprecated `Trainer(max_steps=None)` ([#13591](https://github.com/Lightning-AI/pytorch-lightning/pull/13591)) +- Removed deprecated `dataloader_idx` argument from `on_train_batch_start/end` hooks `Callback` and `LightningModule` ([#12769](https://github.com/Lightning-AI/pytorch-lightning/pull/12769), [#12977](https://github.com/Lightning-AI/pytorch-lightning/pull/12977)) +- Removed deprecated `get_progress_bar_dict` property from `LightningModule` ([#12839](https://github.com/Lightning-AI/pytorch-lightning/pull/12839)) +- Removed sanity check for multi-optimizer support with habana backends ([#13217](https://github.com/Lightning-AI/pytorch-lightning/pull/13217)) +- Removed the need to explicitly load habana module ([#13338](https://github.com/Lightning-AI/pytorch-lightning/pull/13338)) +- Removed the deprecated `Strategy.post_dispatch()` hook ([#13461](https://github.com/Lightning-AI/pytorch-lightning/pull/13461)) +- Removed deprecated `pl.callbacks.lr_monitor.LearningRateMonitor.lr_sch_names` ([#13353](https://github.com/Lightning-AI/pytorch-lightning/pull/13353)) +- Removed deprecated `Trainer.slurm_job_id` in favor of `SLURMEnvironment.job_id` ([#13459](https://github.com/Lightning-AI/pytorch-lightning/pull/13459)) +- Removed support for the `DDP2Strategy` ([#12705](https://github.com/Lightning-AI/pytorch-lightning/pull/12705)) +- Removed deprecated `LightningDistributed` ([#13549](https://github.com/Lightning-AI/pytorch-lightning/pull/13549)) +- Removed deprecated ClusterEnvironment properties `master_address` and `master_port` in favor of `main_address` and `main_port` ([#13458](https://github.com/Lightning-AI/pytorch-lightning/pull/13458)) +- Removed deprecated ClusterEnvironment methods `KubeflowEnvironment.is_using_kubelfow()`, `LSFEnvironment.is_using_lsf()` and `TorchElasticEnvironment.is_using_torchelastic()` in favor of the `detect()` method ([#13458](https://github.com/Lightning-AI/pytorch-lightning/pull/13458)) +- Removed deprecated `Callback.on_keyboard_interrupt` ([#13438](https://github.com/Lightning-AI/pytorch-lightning/pull/13438)) +- Removed deprecated `LightningModule.on_post_move_to_device` ([#13548](https://github.com/Lightning-AI/pytorch-lightning/pull/13548)) +- Removed `TPUSpawnStrategy.{tpu_local_core_rank,tpu_global_core_rank}` attributes in favor of `TPUSpawnStrategy.{local_rank,global_rank}` ([#11163](https://github.com/Lightning-AI/pytorch-lightning/pull/11163)) +- Removed `SingleTPUStrategy.{tpu_local_core_rank,tpu_global_core_rank}` attributes in favor of `SingleTPUStrategy.{local_rank,global_rank}`([#11163](https://github.com/Lightning-AI/pytorch-lightning/pull/11163)) ### Fixed -- Improved support for custom `DataLoader`s when instantiated in `*_dataloader` hook ([#12981](https://github.com/Lightning-AI/lightning/pull/12981)) -- Allowed custom `BatchSampler`s when instantiated in `*_dataloader` hook [#13640](https://github.com/Lightning-AI/lightning/pull/13640)) -- Fixed an issue with unsupported torch.inference_mode() on hpu backends by making it use no_grad ([#13014](https://github.com/Lightning-AI/lightning/pull/13014)) -- The model wrapper returned by `LightningLite.setup()` now properly supports pass-through when looking up attributes ([#12597](https://github.com/Lightning-AI/lightning/pull/12597)) -- Fixed issue where the CLI fails with certain torch objects ([#13153](https://github.com/Lightning-AI/lightning/pull/13153)) -- Fixed ``LightningCLI`` signature parameter resolving for some lightning classes ([#13283](https://github.com/Lightning-AI/lightning/pull/13283)) -- Fixed Model Summary when using DeepSpeed Stage 3 ([#13427](https://github.com/Lightning-AI/lightning/pull/13427)) -- Fixed `pl.utilities.distributed.gather_all_tensors` to handle tensors of different dimensions ([#12630](https://github.com/Lightning-AI/lightning/pull/12630)) -- Fixed the input validation for the accelerator Trainer argument when passed as a string ([#13417](https://github.com/Lightning-AI/lightning/pull/13417)) -- Fixed `Trainer.predict(return_predictions=False)` to track prediction's batch_indices ([#13629](https://github.com/Lightning-AI/lightning/pull/13629)) -- Fixed and issue that prevented setting a custom `CheckpointIO` plugin with strategies ([#13785](https://github.com/Lightning-AI/lightning/pull/13785)) -- Fixed main progress bar counter when `val_check_interval=int` and `check_val_every_n_epoch=None` ([#12832](https://github.com/Lightning-AI/lightning/pull/12832) -- Improved support for custom `ReduceLROnPlateau` scheduler if `reduce_on_plateau` is set by the user in scheduler config ([#13838](https://github.com/Lightning-AI/lightning/pull/13838)) -- Used `global_step` while restoring logging step for old checkpoints ([#13645](https://github.com/Lightning-AI/lightning/pull/13645)) -- When training with `precision=16` on IPU, the cast has been moved off the IPU onto the host, making the copies from host to IPU cheaper ([#13880](https://github.com/Lightning-AI/lightning/pull/13880)) -- Fixed error handling in learning rate finder when not enough data points are available to give a good suggestion ([#13845](https://github.com/Lightning-AI/lightning/pull/13845)) -- Fixed an issue that caused the learning rate finder to set the model's learning rate to None when no suggestion was possible ([#13845](https://github.com/Lightning-AI/lightning/pull/13845)) -- Fixed an issue causing deterministic algorithms and other globals to get reset in spawned processes ([#13921](https://github.com/Lightning-AI/lightning/pull/13921)) -- Fixed default `amp_level` for `DeepSpeedPrecisionPlugin` to `O2` ([#13897](https://github.com/Lightning-AI/lightning/pull/13897)) -- Fixed Python 3.10 compatibility for truncated back-propagation through time (TBPTT) ([#13973](https://github.com/Lightning-AI/lightning/pull/13973)) -- Fixed `TQDMProgressBar` reset and update to show correct time estimation (2/2) ([#13962](https://github.com/Lightning-AI/lightning/pull/13962)) +- Improved support for custom `DataLoader`s when instantiated in `*_dataloader` hook ([#12981](https://github.com/Lightning-AI/pytorch-lightning/pull/12981)) +- Allowed custom `BatchSampler`s when instantiated in `*_dataloader` hook [#13640](https://github.com/Lightning-AI/pytorch-lightning/pull/13640)) +- Fixed an issue with unsupported torch.inference_mode() on hpu backends by making it use no_grad ([#13014](https://github.com/Lightning-AI/pytorch-lightning/pull/13014)) +- The model wrapper returned by `LightningLite.setup()` now properly supports pass-through when looking up attributes ([#12597](https://github.com/Lightning-AI/pytorch-lightning/pull/12597)) +- Fixed issue where the CLI fails with certain torch objects ([#13153](https://github.com/Lightning-AI/pytorch-lightning/pull/13153)) +- Fixed ``LightningCLI`` signature parameter resolving for some lightning classes ([#13283](https://github.com/Lightning-AI/pytorch-lightning/pull/13283)) +- Fixed Model Summary when using DeepSpeed Stage 3 ([#13427](https://github.com/Lightning-AI/pytorch-lightning/pull/13427)) +- Fixed `pl.utilities.distributed.gather_all_tensors` to handle tensors of different dimensions ([#12630](https://github.com/Lightning-AI/pytorch-lightning/pull/12630)) +- Fixed the input validation for the accelerator Trainer argument when passed as a string ([#13417](https://github.com/Lightning-AI/pytorch-lightning/pull/13417)) +- Fixed `Trainer.predict(return_predictions=False)` to track prediction's batch_indices ([#13629](https://github.com/Lightning-AI/pytorch-lightning/pull/13629)) +- Fixed and issue that prevented setting a custom `CheckpointIO` plugin with strategies ([#13785](https://github.com/Lightning-AI/pytorch-lightning/pull/13785)) +- Fixed main progress bar counter when `val_check_interval=int` and `check_val_every_n_epoch=None` ([#12832](https://github.com/Lightning-AI/pytorch-lightning/pull/12832) +- Improved support for custom `ReduceLROnPlateau` scheduler if `reduce_on_plateau` is set by the user in scheduler config ([#13838](https://github.com/Lightning-AI/pytorch-lightning/pull/13838)) +- Used `global_step` while restoring logging step for old checkpoints ([#13645](https://github.com/Lightning-AI/pytorch-lightning/pull/13645)) +- When training with `precision=16` on IPU, the cast has been moved off the IPU onto the host, making the copies from host to IPU cheaper ([#13880](https://github.com/Lightning-AI/pytorch-lightning/pull/13880)) +- Fixed error handling in learning rate finder when not enough data points are available to give a good suggestion ([#13845](https://github.com/Lightning-AI/pytorch-lightning/pull/13845)) +- Fixed an issue that caused the learning rate finder to set the model's learning rate to None when no suggestion was possible ([#13845](https://github.com/Lightning-AI/pytorch-lightning/pull/13845)) +- Fixed an issue causing deterministic algorithms and other globals to get reset in spawned processes ([#13921](https://github.com/Lightning-AI/pytorch-lightning/pull/13921)) +- Fixed default `amp_level` for `DeepSpeedPrecisionPlugin` to `O2` ([#13897](https://github.com/Lightning-AI/pytorch-lightning/pull/13897)) +- Fixed Python 3.10 compatibility for truncated back-propagation through time (TBPTT) ([#13973](https://github.com/Lightning-AI/pytorch-lightning/pull/13973)) +- Fixed `TQDMProgressBar` reset and update to show correct time estimation (2/2) ([#13962](https://github.com/Lightning-AI/pytorch-lightning/pull/13962)) ## [1.6.5] - 2022-07-13 ### Fixed -- Fixed `estimated_stepping_batches` requiring distributed comms in `configure_optimizers` for the `DeepSpeedStrategy` ([#13350](https://github.com/Lightning-AI/lightning/pull/13350)) -- Fixed bug with Python version check that prevented use with development versions of Python ([#13420](https://github.com/Lightning-AI/lightning/pull/13420)) -- The loops now call `.set_epoch()` also on batch samplers if the dataloader has one wrapped in a distributed sampler ([#13396](https://github.com/Lightning-AI/lightning/pull/13396)) -- Fixed the restoration of log step during restart ([#13467](https://github.com/Lightning-AI/lightning/pull/13467)) +- Fixed `estimated_stepping_batches` requiring distributed comms in `configure_optimizers` for the `DeepSpeedStrategy` ([#13350](https://github.com/Lightning-AI/pytorch-lightning/pull/13350)) +- Fixed bug with Python version check that prevented use with development versions of Python ([#13420](https://github.com/Lightning-AI/pytorch-lightning/pull/13420)) +- The loops now call `.set_epoch()` also on batch samplers if the dataloader has one wrapped in a distributed sampler ([#13396](https://github.com/Lightning-AI/pytorch-lightning/pull/13396)) +- Fixed the restoration of log step during restart ([#13467](https://github.com/Lightning-AI/pytorch-lightning/pull/13467)) ## [1.6.4] - 2022-06-01 ### Added -- Added all DDP params to be exposed through hpu parallel strategy ([#13067](https://github.com/Lightning-AI/lightning/pull/13067)) +- Added all DDP params to be exposed through hpu parallel strategy ([#13067](https://github.com/Lightning-AI/pytorch-lightning/pull/13067)) ### Changed -- Keep `torch.backends.cudnn.benchmark=False` by default (unlike in v1.6.{0-3}) after speed and memory problems depending on the data used. Please consider tuning `Trainer(benchmark)` manually. ([#13154](https://github.com/Lightning-AI/lightning/pull/13154)) -- Prevent modification of `torch.backends.cudnn.benchmark` when `Trainer(benchmark=...)` is not set ([#13154](https://github.com/Lightning-AI/lightning/pull/13154)) +- Keep `torch.backends.cudnn.benchmark=False` by default (unlike in v1.6.{0-3}) after speed and memory problems depending on the data used. Please consider tuning `Trainer(benchmark)` manually. ([#13154](https://github.com/Lightning-AI/pytorch-lightning/pull/13154)) +- Prevent modification of `torch.backends.cudnn.benchmark` when `Trainer(benchmark=...)` is not set ([#13154](https://github.com/Lightning-AI/pytorch-lightning/pull/13154)) ### Fixed -- Fixed an issue causing zero-division error for empty dataloaders ([#12885](https://github.com/Lightning-AI/lightning/pull/12885)) -- Fixed mismatching default values for the types of some arguments in the DeepSpeed and Fully-Sharded strategies which made the CLI unable to use them ([#12989](https://github.com/Lightning-AI/lightning/pull/12989)) -- Avoid redundant callback restore warning while tuning ([#13026](https://github.com/Lightning-AI/lightning/pull/13026)) -- Fixed `Trainer(precision=64)` during evaluation which now uses the wrapped precision module ([#12983](https://github.com/Lightning-AI/lightning/pull/12983)) -- Fixed an issue to use wrapped `LightningModule` for evaluation during `trainer.fit` for `BaguaStrategy` ([#12983](https://github.com/Lightning-AI/lightning/pull/12983)) -- Fixed an issue wrt unnecessary usage of habana mixed precision package for fp32 types ([#13028](https://github.com/Lightning-AI/lightning/pull/13028)) -- Fixed the number of references of `LightningModule` so it can be deleted ([#12897](https://github.com/Lightning-AI/lightning/pull/12897)) -- Fixed `materialize_module` setting a module's child recursively ([#12870](https://github.com/Lightning-AI/lightning/pull/12870)) -- Fixed issue where the CLI could not pass a `Profiler` to the `Trainer` ([#13084](https://github.com/Lightning-AI/lightning/pull/13084)) -- Fixed torchelastic detection with non-distributed installations ([#13142](https://github.com/Lightning-AI/lightning/pull/13142)) -- Fixed logging's step values when multiple dataloaders are used during evaluation ([#12184](https://github.com/Lightning-AI/lightning/pull/12184)) -- Fixed epoch logging on train epoch end ([#13025](https://github.com/Lightning-AI/lightning/pull/13025)) -- Fixed `DDPStrategy` and `DDPSpawnStrategy` to initialize optimizers only after moving the module to the device ([#11952](https://github.com/Lightning-AI/lightning/pull/11952)) +- Fixed an issue causing zero-division error for empty dataloaders ([#12885](https://github.com/Lightning-AI/pytorch-lightning/pull/12885)) +- Fixed mismatching default values for the types of some arguments in the DeepSpeed and Fully-Sharded strategies which made the CLI unable to use them ([#12989](https://github.com/Lightning-AI/pytorch-lightning/pull/12989)) +- Avoid redundant callback restore warning while tuning ([#13026](https://github.com/Lightning-AI/pytorch-lightning/pull/13026)) +- Fixed `Trainer(precision=64)` during evaluation which now uses the wrapped precision module ([#12983](https://github.com/Lightning-AI/pytorch-lightning/pull/12983)) +- Fixed an issue to use wrapped `LightningModule` for evaluation during `trainer.fit` for `BaguaStrategy` ([#12983](https://github.com/Lightning-AI/pytorch-lightning/pull/12983)) +- Fixed an issue wrt unnecessary usage of habana mixed precision package for fp32 types ([#13028](https://github.com/Lightning-AI/pytorch-lightning/pull/13028)) +- Fixed the number of references of `LightningModule` so it can be deleted ([#12897](https://github.com/Lightning-AI/pytorch-lightning/pull/12897)) +- Fixed `materialize_module` setting a module's child recursively ([#12870](https://github.com/Lightning-AI/pytorch-lightning/pull/12870)) +- Fixed issue where the CLI could not pass a `Profiler` to the `Trainer` ([#13084](https://github.com/Lightning-AI/pytorch-lightning/pull/13084)) +- Fixed torchelastic detection with non-distributed installations ([#13142](https://github.com/Lightning-AI/pytorch-lightning/pull/13142)) +- Fixed logging's step values when multiple dataloaders are used during evaluation ([#12184](https://github.com/Lightning-AI/pytorch-lightning/pull/12184)) +- Fixed epoch logging on train epoch end ([#13025](https://github.com/Lightning-AI/pytorch-lightning/pull/13025)) +- Fixed `DDPStrategy` and `DDPSpawnStrategy` to initialize optimizers only after moving the module to the device ([#11952](https://github.com/Lightning-AI/pytorch-lightning/pull/11952)) ## [1.6.3] - 2022-05-03 ### Fixed -- Use only a single instance of `rich.console.Console` throughout codebase ([#12886](https://github.com/Lightning-AI/lightning/pull/12886)) -- Fixed an issue to ensure all the checkpoint states are saved in a common filepath with `DeepspeedStrategy` ([#12887](https://github.com/Lightning-AI/lightning/pull/12887)) -- Fixed `trainer.logger` deprecation message ([#12671](https://github.com/Lightning-AI/lightning/pull/12671)) -- Fixed an issue where sharded grad scaler is passed in when using BF16 with the `ShardedStrategy` ([#12915](https://github.com/Lightning-AI/lightning/pull/12915)) -- Fixed an issue wrt recursive invocation of DDP configuration in hpu parallel plugin ([#12912](https://github.com/Lightning-AI/lightning/pull/12912)) -- Fixed printing of ragged dictionaries in `Trainer.validate` and `Trainer.test` ([#12857](https://github.com/Lightning-AI/lightning/pull/12857)) -- Fixed threading support for legacy loading of checkpoints ([#12814](https://github.com/Lightning-AI/lightning/pull/12814)) -- Fixed pickling of `KFoldLoop` ([#12441](https://github.com/Lightning-AI/lightning/pull/12441)) -- Stopped `optimizer_zero_grad` from being called after IPU execution ([#12913](https://github.com/Lightning-AI/lightning/pull/12913)) -- Fixed `fuse_modules` to be qat-aware for `torch>=1.11` ([#12891](https://github.com/Lightning-AI/lightning/pull/12891)) -- Enforced eval shuffle warning only for default samplers in DataLoader ([#12653](https://github.com/Lightning-AI/lightning/pull/12653)) -- Enable mixed precision in `DDPFullyShardedStrategy` when `precision=16` ([#12965](https://github.com/Lightning-AI/lightning/pull/12965)) -- Fixed `TQDMProgressBar` reset and update to show correct time estimation (1/2) ([#12889](https://github.com/Lightning-AI/lightning/pull/12889)) -- Fixed fit loop restart logic to enable resume using the checkpoint ([#12821](https://github.com/Lightning-AI/lightning/pull/12821)) +- Use only a single instance of `rich.console.Console` throughout codebase ([#12886](https://github.com/Lightning-AI/pytorch-lightning/pull/12886)) +- Fixed an issue to ensure all the checkpoint states are saved in a common filepath with `DeepspeedStrategy` ([#12887](https://github.com/Lightning-AI/pytorch-lightning/pull/12887)) +- Fixed `trainer.logger` deprecation message ([#12671](https://github.com/Lightning-AI/pytorch-lightning/pull/12671)) +- Fixed an issue where sharded grad scaler is passed in when using BF16 with the `ShardedStrategy` ([#12915](https://github.com/Lightning-AI/pytorch-lightning/pull/12915)) +- Fixed an issue wrt recursive invocation of DDP configuration in hpu parallel plugin ([#12912](https://github.com/Lightning-AI/pytorch-lightning/pull/12912)) +- Fixed printing of ragged dictionaries in `Trainer.validate` and `Trainer.test` ([#12857](https://github.com/Lightning-AI/pytorch-lightning/pull/12857)) +- Fixed threading support for legacy loading of checkpoints ([#12814](https://github.com/Lightning-AI/pytorch-lightning/pull/12814)) +- Fixed pickling of `KFoldLoop` ([#12441](https://github.com/Lightning-AI/pytorch-lightning/pull/12441)) +- Stopped `optimizer_zero_grad` from being called after IPU execution ([#12913](https://github.com/Lightning-AI/pytorch-lightning/pull/12913)) +- Fixed `fuse_modules` to be qat-aware for `torch>=1.11` ([#12891](https://github.com/Lightning-AI/pytorch-lightning/pull/12891)) +- Enforced eval shuffle warning only for default samplers in DataLoader ([#12653](https://github.com/Lightning-AI/pytorch-lightning/pull/12653)) +- Enable mixed precision in `DDPFullyShardedStrategy` when `precision=16` ([#12965](https://github.com/Lightning-AI/pytorch-lightning/pull/12965)) +- Fixed `TQDMProgressBar` reset and update to show correct time estimation (1/2) ([#12889](https://github.com/Lightning-AI/pytorch-lightning/pull/12889)) +- Fixed fit loop restart logic to enable resume using the checkpoint ([#12821](https://github.com/Lightning-AI/pytorch-lightning/pull/12821)) ## [1.6.2] - 2022-04-27 ### Fixed -- Fixed `ImportError` when `torch.distributed` is not available. ([#12794](https://github.com/Lightning-AI/lightning/pull/12794)) -- When using custom DataLoaders in LightningDataModule, multiple inheritance is resolved properly ([#12716](https://github.com/Lightning-AI/lightning/pull/12716)) -- Fixed encoding issues on terminals that do not support unicode characters ([#12828](https://github.com/Lightning-AI/lightning/pull/12828)) -- Fixed support for `ModelCheckpoint` monitors with dots ([#12783](https://github.com/Lightning-AI/lightning/pull/12783)) +- Fixed `ImportError` when `torch.distributed` is not available. ([#12794](https://github.com/Lightning-AI/pytorch-lightning/pull/12794)) +- When using custom DataLoaders in LightningDataModule, multiple inheritance is resolved properly ([#12716](https://github.com/Lightning-AI/pytorch-lightning/pull/12716)) +- Fixed encoding issues on terminals that do not support unicode characters ([#12828](https://github.com/Lightning-AI/pytorch-lightning/pull/12828)) +- Fixed support for `ModelCheckpoint` monitors with dots ([#12783](https://github.com/Lightning-AI/pytorch-lightning/pull/12783)) ## [1.6.1] - 2022-04-13 ### Changed -- Support `strategy` argument being case insensitive ([#12528](https://github.com/Lightning-AI/lightning/pull/12528)) +- Support `strategy` argument being case insensitive ([#12528](https://github.com/Lightning-AI/pytorch-lightning/pull/12528)) ### Fixed -- Run main progress bar updates independent of val progress bar updates in `TQDMProgressBar` ([#12563](https://github.com/Lightning-AI/lightning/pull/12563)) -- Avoid calling `average_parameters` multiple times per optimizer step ([#12452](https://github.com/Lightning-AI/lightning/pull/12452)) -- Properly pass some Logger's parent's arguments to `super().__init__()` ([#12609](https://github.com/Lightning-AI/lightning/pull/12609)) -- Fixed an issue where incorrect type warnings appear when the overridden `LightningLite.run` method accepts user-defined arguments ([#12629](https://github.com/Lightning-AI/lightning/pull/12629)) -- Fixed `rank_zero_only` decorator in LSF environments ([#12587](https://github.com/Lightning-AI/lightning/pull/12587)) -- Don't raise a warning when `nn.Module` is not saved under hparams ([#12669](https://github.com/Lightning-AI/lightning/pull/12669)) -- Raise `MisconfigurationException` when the accelerator is available but the user passes invalid `([]/0/"0")` values to the `devices` flag ([#12708](https://github.com/Lightning-AI/lightning/pull/12708)) -- Support `auto_select_gpus` with the accelerator and devices API ([#12608](https://github.com/Lightning-AI/lightning/pull/12608)) +- Run main progress bar updates independent of val progress bar updates in `TQDMProgressBar` ([#12563](https://github.com/Lightning-AI/pytorch-lightning/pull/12563)) +- Avoid calling `average_parameters` multiple times per optimizer step ([#12452](https://github.com/Lightning-AI/pytorch-lightning/pull/12452)) +- Properly pass some Logger's parent's arguments to `super().__init__()` ([#12609](https://github.com/Lightning-AI/pytorch-lightning/pull/12609)) +- Fixed an issue where incorrect type warnings appear when the overridden `LightningLite.run` method accepts user-defined arguments ([#12629](https://github.com/Lightning-AI/pytorch-lightning/pull/12629)) +- Fixed `rank_zero_only` decorator in LSF environments ([#12587](https://github.com/Lightning-AI/pytorch-lightning/pull/12587)) +- Don't raise a warning when `nn.Module` is not saved under hparams ([#12669](https://github.com/Lightning-AI/pytorch-lightning/pull/12669)) +- Raise `MisconfigurationException` when the accelerator is available but the user passes invalid `([]/0/"0")` values to the `devices` flag ([#12708](https://github.com/Lightning-AI/pytorch-lightning/pull/12708)) +- Support `auto_select_gpus` with the accelerator and devices API ([#12608](https://github.com/Lightning-AI/pytorch-lightning/pull/12608)) ## [1.6.0] - 2022-03-29 ### Added -- Allow logging to an existing run ID in MLflow with `MLFlowLogger` ([#12290](https://github.com/Lightning-AI/lightning/pull/12290)) -- Enable gradient accumulation using Horovod's `backward_passes_per_step` ([#11911](https://github.com/Lightning-AI/lightning/pull/11911)) -- Add new `DETAIL` log level to provide useful logs for improving monitoring and debugging of batch jobs ([#11008](https://github.com/Lightning-AI/lightning/pull/11008)) -- Added a flag `SLURMEnvironment(auto_requeue=True|False)` to control whether Lightning handles the requeuing ([#10601](https://github.com/Lightning-AI/lightning/pull/10601)) +- Allow logging to an existing run ID in MLflow with `MLFlowLogger` ([#12290](https://github.com/Lightning-AI/pytorch-lightning/pull/12290)) +- Enable gradient accumulation using Horovod's `backward_passes_per_step` ([#11911](https://github.com/Lightning-AI/pytorch-lightning/pull/11911)) +- Add new `DETAIL` log level to provide useful logs for improving monitoring and debugging of batch jobs ([#11008](https://github.com/Lightning-AI/pytorch-lightning/pull/11008)) +- Added a flag `SLURMEnvironment(auto_requeue=True|False)` to control whether Lightning handles the requeuing ([#10601](https://github.com/Lightning-AI/pytorch-lightning/pull/10601)) - Fault Tolerant Manual - * Add `_Stateful` protocol to detect if classes are stateful ([#10646](https://github.com/Lightning-AI/lightning/pull/10646)) - * Add `_FaultTolerantMode` enum used to track different supported fault tolerant modes ([#10645](https://github.com/Lightning-AI/lightning/pull/10645)) - * Add a `_rotate_worker_indices` utility to reload the state according the latest worker ([#10647](https://github.com/Lightning-AI/lightning/pull/10647)) - * Add stateful workers ([#10674](https://github.com/Lightning-AI/lightning/pull/10674)) - * Add an utility to collect the states across processes ([#10639](https://github.com/Lightning-AI/lightning/pull/10639)) - * Add logic to reload the states across data loading components ([#10699](https://github.com/Lightning-AI/lightning/pull/10699)) - * Cleanup some fault tolerant utilities ([#10703](https://github.com/Lightning-AI/lightning/pull/10703)) - * Enable Fault Tolerant Manual Training ([#10707](https://github.com/Lightning-AI/lightning/pull/10707)) - * Broadcast the `_terminate_gracefully` to all processes and add support for DDP ([#10638](https://github.com/Lightning-AI/lightning/pull/10638)) -- Added support for re-instantiation of custom (subclasses of) `DataLoaders` returned in the `*_dataloader()` methods, i.e., automatic replacement of samplers now works with custom types of `DataLoader` ([#10680](https://github.com/Lightning-AI/lightning/pull/10680)) -- Added a function to validate if fault tolerant training is supported. ([#10465](https://github.com/Lightning-AI/lightning/pull/10465)) -- Added a private callback to manage the creation and deletion of fault-tolerance checkpoints ([#11862](https://github.com/Lightning-AI/lightning/pull/11862)) -- Show a better error message when a custom `DataLoader` implementation is not well implemented and we need to reconstruct it ([#10719](https://github.com/Lightning-AI/lightning/pull/10719)) -- Show a better error message when frozen dataclass is used as a batch ([#10927](https://github.com/Lightning-AI/lightning/pull/10927)) -- Save the `Loop`'s state by default in the checkpoint ([#10784](https://github.com/Lightning-AI/lightning/pull/10784)) -- Added `Loop.replace` to easily switch one loop for another ([#10324](https://github.com/Lightning-AI/lightning/pull/10324)) -- Added support for `--lr_scheduler=ReduceLROnPlateau` to the `LightningCLI` ([#10860](https://github.com/Lightning-AI/lightning/pull/10860)) -- Added `LightningCLI.configure_optimizers` to override the `configure_optimizers` return value ([#10860](https://github.com/Lightning-AI/lightning/pull/10860)) -- Added `LightningCLI(auto_registry)` flag to register all subclasses of the registerable components automatically ([#12108](https://github.com/Lightning-AI/lightning/pull/12108)) -- Added a warning that shows when `max_epochs` in the `Trainer` is not set ([#10700](https://github.com/Lightning-AI/lightning/pull/10700)) -- Added support for returning a single Callback from `LightningModule.configure_callbacks` without wrapping it into a list ([#11060](https://github.com/Lightning-AI/lightning/pull/11060)) -- Added `console_kwargs` for `RichProgressBar` to initialize inner Console ([#10875](https://github.com/Lightning-AI/lightning/pull/10875)) -- Added support for shorthand notation to instantiate loggers with the `LightningCLI` ([#11533](https://github.com/Lightning-AI/lightning/pull/11533)) -- Added a `LOGGER_REGISTRY` instance to register custom loggers to the `LightningCLI` ([#11533](https://github.com/Lightning-AI/lightning/pull/11533)) -- Added info message when the `Trainer` arguments `limit_*_batches`, `overfit_batches`, or `val_check_interval` are set to `1` or `1.0` ([#11950](https://github.com/Lightning-AI/lightning/pull/11950)) -- Added a `PrecisionPlugin.teardown` method ([#10990](https://github.com/Lightning-AI/lightning/pull/10990)) -- Added `LightningModule.lr_scheduler_step` ([#10249](https://github.com/Lightning-AI/lightning/pull/10249)) -- Added support for no pre-fetching to `DataFetcher` ([#11606](https://github.com/Lightning-AI/lightning/pull/11606)) -- Added support for optimizer step progress tracking with manual optimization ([#11848](https://github.com/Lightning-AI/lightning/pull/11848)) -- Return the output of the `optimizer.step`. This can be useful for `LightningLite` users, manual optimization users, or users overriding `LightningModule.optimizer_step` ([#11711](https://github.com/Lightning-AI/lightning/pull/11711)) -- Teardown the active loop and strategy on exception ([#11620](https://github.com/Lightning-AI/lightning/pull/11620)) -- Added a `MisconfigurationException` if user provided `opt_idx` in scheduler config doesn't match with actual optimizer index of its respective optimizer ([#11247](https://github.com/Lightning-AI/lightning/pull/11247)) -- Added a `loggers` property to `Trainer` which returns a list of loggers provided by the user ([#11683](https://github.com/Lightning-AI/lightning/pull/11683)) -- Added a `loggers` property to `LightningModule` which retrieves the `loggers` property from `Trainer` ([#11683](https://github.com/Lightning-AI/lightning/pull/11683)) -- Added support for DDP when using a `CombinedLoader` for the training data ([#11648](https://github.com/Lightning-AI/lightning/pull/11648)) -- Added a warning when using `DistributedSampler` during validation/testing ([#11479](https://github.com/Lightning-AI/lightning/pull/11479)) -- Added support for `Bagua` training strategy ([#11146](https://github.com/Lightning-AI/lightning/pull/11146)) -- Added support for manually returning a `poptorch.DataLoader` in a `*_dataloader` hook ([#12116](https://github.com/Lightning-AI/lightning/pull/12116)) -- Added `rank_zero` module to centralize utilities ([#11747](https://github.com/Lightning-AI/lightning/pull/11747)) -- Added a `_Stateful` support for `LightningDataModule` ([#11637](https://github.com/Lightning-AI/lightning/pull/11637)) -- Added `_Stateful` support for `PrecisionPlugin` ([#11638](https://github.com/Lightning-AI/lightning/pull/11638)) -- Added `Accelerator.is_available` to check device availability ([#11797](https://github.com/Lightning-AI/lightning/pull/11797)) -- Enabled static type-checking on the signature of `Trainer` ([#11888](https://github.com/Lightning-AI/lightning/pull/11888)) -- Added utility functions for moving optimizers to devices ([#11758](https://github.com/Lightning-AI/lightning/pull/11758)) -- Added a warning when saving an instance of `nn.Module` with `save_hyperparameters()` ([#12068](https://github.com/Lightning-AI/lightning/pull/12068)) -- Added `estimated_stepping_batches` property to `Trainer` ([#11599](https://github.com/Lightning-AI/lightning/pull/11599)) -- Added support for pluggable Accelerators ([#12030](https://github.com/Lightning-AI/lightning/pull/12030)) -- Added profiling for `on_load_checkpoint`/`on_save_checkpoint` callback and LightningModule hooks ([#12149](https://github.com/Lightning-AI/lightning/pull/12149)) -- Added `LayerSync` and `NativeSyncBatchNorm` plugins ([#11754](https://github.com/Lightning-AI/lightning/pull/11754)) -- Added optional `storage_options` argument to `Trainer.save_checkpoint()` to pass to custom `CheckpointIO` implementations ([#11891](https://github.com/Lightning-AI/lightning/pull/11891)) -- Added support to explicitly specify the process group backend for parallel strategies ([#11745](https://github.com/Lightning-AI/lightning/pull/11745)) -- Added `device_ids` and `num_devices` property to `Trainer` ([#12151](https://github.com/Lightning-AI/lightning/pull/12151)) -- Added `Callback.state_dict()` and `Callback.load_state_dict()` methods ([#12232](https://github.com/Lightning-AI/lightning/pull/12232)) -- Added `AcceleratorRegistry` ([#12180](https://github.com/Lightning-AI/lightning/pull/12180)) -- Added support for Habana Accelerator (HPU) ([#11808](https://github.com/Lightning-AI/lightning/pull/11808)) -- Added support for dataclasses in `apply_to_collections` ([#11889](https://github.com/Lightning-AI/lightning/pull/11889)) + * Add `_Stateful` protocol to detect if classes are stateful ([#10646](https://github.com/Lightning-AI/pytorch-lightning/pull/10646)) + * Add `_FaultTolerantMode` enum used to track different supported fault tolerant modes ([#10645](https://github.com/Lightning-AI/pytorch-lightning/pull/10645)) + * Add a `_rotate_worker_indices` utility to reload the state according the latest worker ([#10647](https://github.com/Lightning-AI/pytorch-lightning/pull/10647)) + * Add stateful workers ([#10674](https://github.com/Lightning-AI/pytorch-lightning/pull/10674)) + * Add an utility to collect the states across processes ([#10639](https://github.com/Lightning-AI/pytorch-lightning/pull/10639)) + * Add logic to reload the states across data loading components ([#10699](https://github.com/Lightning-AI/pytorch-lightning/pull/10699)) + * Cleanup some fault tolerant utilities ([#10703](https://github.com/Lightning-AI/pytorch-lightning/pull/10703)) + * Enable Fault Tolerant Manual Training ([#10707](https://github.com/Lightning-AI/pytorch-lightning/pull/10707)) + * Broadcast the `_terminate_gracefully` to all processes and add support for DDP ([#10638](https://github.com/Lightning-AI/pytorch-lightning/pull/10638)) +- Added support for re-instantiation of custom (subclasses of) `DataLoaders` returned in the `*_dataloader()` methods, i.e., automatic replacement of samplers now works with custom types of `DataLoader` ([#10680](https://github.com/Lightning-AI/pytorch-lightning/pull/10680)) +- Added a function to validate if fault tolerant training is supported. ([#10465](https://github.com/Lightning-AI/pytorch-lightning/pull/10465)) +- Added a private callback to manage the creation and deletion of fault-tolerance checkpoints ([#11862](https://github.com/Lightning-AI/pytorch-lightning/pull/11862)) +- Show a better error message when a custom `DataLoader` implementation is not well implemented and we need to reconstruct it ([#10719](https://github.com/Lightning-AI/pytorch-lightning/pull/10719)) +- Show a better error message when frozen dataclass is used as a batch ([#10927](https://github.com/Lightning-AI/pytorch-lightning/pull/10927)) +- Save the `Loop`'s state by default in the checkpoint ([#10784](https://github.com/Lightning-AI/pytorch-lightning/pull/10784)) +- Added `Loop.replace` to easily switch one loop for another ([#10324](https://github.com/Lightning-AI/pytorch-lightning/pull/10324)) +- Added support for `--lr_scheduler=ReduceLROnPlateau` to the `LightningCLI` ([#10860](https://github.com/Lightning-AI/pytorch-lightning/pull/10860)) +- Added `LightningCLI.configure_optimizers` to override the `configure_optimizers` return value ([#10860](https://github.com/Lightning-AI/pytorch-lightning/pull/10860)) +- Added `LightningCLI(auto_registry)` flag to register all subclasses of the registerable components automatically ([#12108](https://github.com/Lightning-AI/pytorch-lightning/pull/12108)) +- Added a warning that shows when `max_epochs` in the `Trainer` is not set ([#10700](https://github.com/Lightning-AI/pytorch-lightning/pull/10700)) +- Added support for returning a single Callback from `LightningModule.configure_callbacks` without wrapping it into a list ([#11060](https://github.com/Lightning-AI/pytorch-lightning/pull/11060)) +- Added `console_kwargs` for `RichProgressBar` to initialize inner Console ([#10875](https://github.com/Lightning-AI/pytorch-lightning/pull/10875)) +- Added support for shorthand notation to instantiate loggers with the `LightningCLI` ([#11533](https://github.com/Lightning-AI/pytorch-lightning/pull/11533)) +- Added a `LOGGER_REGISTRY` instance to register custom loggers to the `LightningCLI` ([#11533](https://github.com/Lightning-AI/pytorch-lightning/pull/11533)) +- Added info message when the `Trainer` arguments `limit_*_batches`, `overfit_batches`, or `val_check_interval` are set to `1` or `1.0` ([#11950](https://github.com/Lightning-AI/pytorch-lightning/pull/11950)) +- Added a `PrecisionPlugin.teardown` method ([#10990](https://github.com/Lightning-AI/pytorch-lightning/pull/10990)) +- Added `LightningModule.lr_scheduler_step` ([#10249](https://github.com/Lightning-AI/pytorch-lightning/pull/10249)) +- Added support for no pre-fetching to `DataFetcher` ([#11606](https://github.com/Lightning-AI/pytorch-lightning/pull/11606)) +- Added support for optimizer step progress tracking with manual optimization ([#11848](https://github.com/Lightning-AI/pytorch-lightning/pull/11848)) +- Return the output of the `optimizer.step`. This can be useful for `LightningLite` users, manual optimization users, or users overriding `LightningModule.optimizer_step` ([#11711](https://github.com/Lightning-AI/pytorch-lightning/pull/11711)) +- Teardown the active loop and strategy on exception ([#11620](https://github.com/Lightning-AI/pytorch-lightning/pull/11620)) +- Added a `MisconfigurationException` if user provided `opt_idx` in scheduler config doesn't match with actual optimizer index of its respective optimizer ([#11247](https://github.com/Lightning-AI/pytorch-lightning/pull/11247)) +- Added a `loggers` property to `Trainer` which returns a list of loggers provided by the user ([#11683](https://github.com/Lightning-AI/pytorch-lightning/pull/11683)) +- Added a `loggers` property to `LightningModule` which retrieves the `loggers` property from `Trainer` ([#11683](https://github.com/Lightning-AI/pytorch-lightning/pull/11683)) +- Added support for DDP when using a `CombinedLoader` for the training data ([#11648](https://github.com/Lightning-AI/pytorch-lightning/pull/11648)) +- Added a warning when using `DistributedSampler` during validation/testing ([#11479](https://github.com/Lightning-AI/pytorch-lightning/pull/11479)) +- Added support for `Bagua` training strategy ([#11146](https://github.com/Lightning-AI/pytorch-lightning/pull/11146)) +- Added support for manually returning a `poptorch.DataLoader` in a `*_dataloader` hook ([#12116](https://github.com/Lightning-AI/pytorch-lightning/pull/12116)) +- Added `rank_zero` module to centralize utilities ([#11747](https://github.com/Lightning-AI/pytorch-lightning/pull/11747)) +- Added a `_Stateful` support for `LightningDataModule` ([#11637](https://github.com/Lightning-AI/pytorch-lightning/pull/11637)) +- Added `_Stateful` support for `PrecisionPlugin` ([#11638](https://github.com/Lightning-AI/pytorch-lightning/pull/11638)) +- Added `Accelerator.is_available` to check device availability ([#11797](https://github.com/Lightning-AI/pytorch-lightning/pull/11797)) +- Enabled static type-checking on the signature of `Trainer` ([#11888](https://github.com/Lightning-AI/pytorch-lightning/pull/11888)) +- Added utility functions for moving optimizers to devices ([#11758](https://github.com/Lightning-AI/pytorch-lightning/pull/11758)) +- Added a warning when saving an instance of `nn.Module` with `save_hyperparameters()` ([#12068](https://github.com/Lightning-AI/pytorch-lightning/pull/12068)) +- Added `estimated_stepping_batches` property to `Trainer` ([#11599](https://github.com/Lightning-AI/pytorch-lightning/pull/11599)) +- Added support for pluggable Accelerators ([#12030](https://github.com/Lightning-AI/pytorch-lightning/pull/12030)) +- Added profiling for `on_load_checkpoint`/`on_save_checkpoint` callback and LightningModule hooks ([#12149](https://github.com/Lightning-AI/pytorch-lightning/pull/12149)) +- Added `LayerSync` and `NativeSyncBatchNorm` plugins ([#11754](https://github.com/Lightning-AI/pytorch-lightning/pull/11754)) +- Added optional `storage_options` argument to `Trainer.save_checkpoint()` to pass to custom `CheckpointIO` implementations ([#11891](https://github.com/Lightning-AI/pytorch-lightning/pull/11891)) +- Added support to explicitly specify the process group backend for parallel strategies ([#11745](https://github.com/Lightning-AI/pytorch-lightning/pull/11745)) +- Added `device_ids` and `num_devices` property to `Trainer` ([#12151](https://github.com/Lightning-AI/pytorch-lightning/pull/12151)) +- Added `Callback.state_dict()` and `Callback.load_state_dict()` methods ([#12232](https://github.com/Lightning-AI/pytorch-lightning/pull/12232)) +- Added `AcceleratorRegistry` ([#12180](https://github.com/Lightning-AI/pytorch-lightning/pull/12180)) +- Added support for Habana Accelerator (HPU) ([#11808](https://github.com/Lightning-AI/pytorch-lightning/pull/11808)) +- Added support for dataclasses in `apply_to_collections` ([#11889](https://github.com/Lightning-AI/pytorch-lightning/pull/11889)) ### Changed -- Drop PyTorch 1.7 support ([#12191](https://github.com/Lightning-AI/lightning/pull/12191)), ([#12432](https://github.com/Lightning-AI/lightning/pull/12432)) -- Make `benchmark` flag optional and set its value based on the deterministic flag ([#11944](https://github.com/Lightning-AI/lightning/pull/11944)) -- Implemented a new native and rich format in `_print_results` method of the `EvaluationLoop` ([#11332](https://github.com/Lightning-AI/lightning/pull/11332)) -- Do not print an empty table at the end of the `EvaluationLoop` ([#12427](https://github.com/Lightning-AI/lightning/pull/12427)) -- Set the `prog_bar` flag to False in `LightningModule.log_grad_norm` ([#11472](https://github.com/Lightning-AI/lightning/pull/11472)) -- Raised exception in `init_dist_connection()` when torch distributed is not available ([#10418](https://github.com/Lightning-AI/lightning/pull/10418)) -- The `monitor` argument in the `EarlyStopping` callback is no longer optional ([#10328](https://github.com/Lightning-AI/lightning/pull/10328)) -- Do not fail if batch size could not be inferred for logging when using DeepSpeed ([#10438](https://github.com/Lightning-AI/lightning/pull/10438)) -- Raised `MisconfigurationException` when `enable_progress_bar=False` and a progress bar instance has been passed in the callback list ([#10520](https://github.com/Lightning-AI/lightning/pull/10520)) -- Moved `trainer.connectors.env_vars_connector._defaults_from_env_vars` to `utilities.argsparse._defaults_from_env_vars` ([#10501](https://github.com/Lightning-AI/lightning/pull/10501)) -- Changes in `LightningCLI` required for the new major release of jsonargparse v4.0.0 ([#10426](https://github.com/Lightning-AI/lightning/pull/10426)) -- Renamed `refresh_rate_per_second` parameter to `refresh_rate` for `RichProgressBar` signature ([#10497](https://github.com/Lightning-AI/lightning/pull/10497)) -- Moved ownership of the `PrecisionPlugin` into `TrainingTypePlugin` and updated all references ([#10570](https://github.com/Lightning-AI/lightning/pull/10570)) -- Fault Tolerant relies on `signal.SIGTERM` to gracefully exit instead of `signal.SIGUSR1` ([#10605](https://github.com/Lightning-AI/lightning/pull/10605)) -- `Loop.restarting=...` now sets the value recursively for all subloops ([#11442](https://github.com/Lightning-AI/lightning/pull/11442)) -- Raised an error if the `batch_size` cannot be inferred from the current batch if it contained a string or was a custom batch object ([#10541](https://github.com/Lightning-AI/lightning/pull/10541)) -- The validation loop is now disabled when `overfit_batches > 0` is set in the Trainer ([#9709](https://github.com/Lightning-AI/lightning/pull/9709)) -- Moved optimizer related logics from `Accelerator` to `TrainingTypePlugin` ([#10596](https://github.com/Lightning-AI/lightning/pull/10596)) -- Moved ownership of the lightning optimizers from the `Trainer` to the `Strategy` ([#11444](https://github.com/Lightning-AI/lightning/pull/11444)) -- Moved ownership of the data fetchers from the DataConnector to the Loops ([#11621](https://github.com/Lightning-AI/lightning/pull/11621)) -- Moved `batch_to_device` method from `Accelerator` to `TrainingTypePlugin` ([#10649](https://github.com/Lightning-AI/lightning/pull/10649)) -- The `DDPSpawnPlugin` no longer overrides the `post_dispatch` plugin hook ([#10034](https://github.com/Lightning-AI/lightning/pull/10034)) -- Integrate the progress bar implementation with progress tracking ([#11213](https://github.com/Lightning-AI/lightning/pull/11213)) -- The `LightningModule.{add_to_queue,get_from_queue}` hooks no longer get a `torch.multiprocessing.SimpleQueue` and instead receive a list based queue ([#10034](https://github.com/Lightning-AI/lightning/pull/10034)) -- Changed `training_step`, `validation_step`, `test_step` and `predict_step` method signatures in `Accelerator` and updated input from caller side ([#10908](https://github.com/Lightning-AI/lightning/pull/10908)) -- Changed the name of the temporary checkpoint that the `DDPSpawnPlugin` and related plugins save ([#10934](https://github.com/Lightning-AI/lightning/pull/10934)) -- `LoggerCollection` returns only unique logger names and versions ([#10976](https://github.com/Lightning-AI/lightning/pull/10976)) -- Redesigned process creation for spawn-based plugins (`DDPSpawnPlugin`, `TPUSpawnPlugin`, etc.) ([#10896](https://github.com/Lightning-AI/lightning/pull/10896)) +- Drop PyTorch 1.7 support ([#12191](https://github.com/Lightning-AI/pytorch-lightning/pull/12191)), ([#12432](https://github.com/Lightning-AI/pytorch-lightning/pull/12432)) +- Make `benchmark` flag optional and set its value based on the deterministic flag ([#11944](https://github.com/Lightning-AI/pytorch-lightning/pull/11944)) +- Implemented a new native and rich format in `_print_results` method of the `EvaluationLoop` ([#11332](https://github.com/Lightning-AI/pytorch-lightning/pull/11332)) +- Do not print an empty table at the end of the `EvaluationLoop` ([#12427](https://github.com/Lightning-AI/pytorch-lightning/pull/12427)) +- Set the `prog_bar` flag to False in `LightningModule.log_grad_norm` ([#11472](https://github.com/Lightning-AI/pytorch-lightning/pull/11472)) +- Raised exception in `init_dist_connection()` when torch distributed is not available ([#10418](https://github.com/Lightning-AI/pytorch-lightning/pull/10418)) +- The `monitor` argument in the `EarlyStopping` callback is no longer optional ([#10328](https://github.com/Lightning-AI/pytorch-lightning/pull/10328)) +- Do not fail if batch size could not be inferred for logging when using DeepSpeed ([#10438](https://github.com/Lightning-AI/pytorch-lightning/pull/10438)) +- Raised `MisconfigurationException` when `enable_progress_bar=False` and a progress bar instance has been passed in the callback list ([#10520](https://github.com/Lightning-AI/pytorch-lightning/pull/10520)) +- Moved `trainer.connectors.env_vars_connector._defaults_from_env_vars` to `utilities.argsparse._defaults_from_env_vars` ([#10501](https://github.com/Lightning-AI/pytorch-lightning/pull/10501)) +- Changes in `LightningCLI` required for the new major release of jsonargparse v4.0.0 ([#10426](https://github.com/Lightning-AI/pytorch-lightning/pull/10426)) +- Renamed `refresh_rate_per_second` parameter to `refresh_rate` for `RichProgressBar` signature ([#10497](https://github.com/Lightning-AI/pytorch-lightning/pull/10497)) +- Moved ownership of the `PrecisionPlugin` into `TrainingTypePlugin` and updated all references ([#10570](https://github.com/Lightning-AI/pytorch-lightning/pull/10570)) +- Fault Tolerant relies on `signal.SIGTERM` to gracefully exit instead of `signal.SIGUSR1` ([#10605](https://github.com/Lightning-AI/pytorch-lightning/pull/10605)) +- `Loop.restarting=...` now sets the value recursively for all subloops ([#11442](https://github.com/Lightning-AI/pytorch-lightning/pull/11442)) +- Raised an error if the `batch_size` cannot be inferred from the current batch if it contained a string or was a custom batch object ([#10541](https://github.com/Lightning-AI/pytorch-lightning/pull/10541)) +- The validation loop is now disabled when `overfit_batches > 0` is set in the Trainer ([#9709](https://github.com/Lightning-AI/pytorch-lightning/pull/9709)) +- Moved optimizer related logics from `Accelerator` to `TrainingTypePlugin` ([#10596](https://github.com/Lightning-AI/pytorch-lightning/pull/10596)) +- Moved ownership of the lightning optimizers from the `Trainer` to the `Strategy` ([#11444](https://github.com/Lightning-AI/pytorch-lightning/pull/11444)) +- Moved ownership of the data fetchers from the DataConnector to the Loops ([#11621](https://github.com/Lightning-AI/pytorch-lightning/pull/11621)) +- Moved `batch_to_device` method from `Accelerator` to `TrainingTypePlugin` ([#10649](https://github.com/Lightning-AI/pytorch-lightning/pull/10649)) +- The `DDPSpawnPlugin` no longer overrides the `post_dispatch` plugin hook ([#10034](https://github.com/Lightning-AI/pytorch-lightning/pull/10034)) +- Integrate the progress bar implementation with progress tracking ([#11213](https://github.com/Lightning-AI/pytorch-lightning/pull/11213)) +- The `LightningModule.{add_to_queue,get_from_queue}` hooks no longer get a `torch.multiprocessing.SimpleQueue` and instead receive a list based queue ([#10034](https://github.com/Lightning-AI/pytorch-lightning/pull/10034)) +- Changed `training_step`, `validation_step`, `test_step` and `predict_step` method signatures in `Accelerator` and updated input from caller side ([#10908](https://github.com/Lightning-AI/pytorch-lightning/pull/10908)) +- Changed the name of the temporary checkpoint that the `DDPSpawnPlugin` and related plugins save ([#10934](https://github.com/Lightning-AI/pytorch-lightning/pull/10934)) +- `LoggerCollection` returns only unique logger names and versions ([#10976](https://github.com/Lightning-AI/pytorch-lightning/pull/10976)) +- Redesigned process creation for spawn-based plugins (`DDPSpawnPlugin`, `TPUSpawnPlugin`, etc.) ([#10896](https://github.com/Lightning-AI/pytorch-lightning/pull/10896)) * All spawn-based plugins now spawn processes immediately upon calling `Trainer.{fit,validate,test,predict}` * The hooks/callbacks `prepare_data`, `setup`, `configure_sharded_model` and `teardown` now run under initialized process group for spawn-based plugins just like their non-spawn counterparts * Some configuration errors that were previously raised as `MisconfigurationException`s will now be raised as `ProcessRaisedException` (torch>=1.8) or as `Exception` (torch<1.8) - * Removed the `TrainingTypePlugin.pre_dispatch()` method and merged it with `TrainingTypePlugin.setup()` ([#11137](https://github.com/Lightning-AI/lightning/pull/11137)) -- Changed profiler to index and display the names of the hooks with a new pattern []. ([#11026](https://github.com/Lightning-AI/lightning/pull/11026)) -- Changed `batch_to_device` entry in profiling from stage-specific to generic, to match profiling of other hooks ([#11031](https://github.com/Lightning-AI/lightning/pull/11031)) -- Changed the info message for finalizing ddp-spawn worker processes to a debug-level message ([#10864](https://github.com/Lightning-AI/lightning/pull/10864)) -- Removed duplicated file extension when uploading model checkpoints with `NeptuneLogger` ([#11015](https://github.com/Lightning-AI/lightning/pull/11015)) -- Removed `__getstate__` and `__setstate__` of `RichProgressBar` ([#11100](https://github.com/Lightning-AI/lightning/pull/11100)) -- The `DDPPlugin` and `DDPSpawnPlugin` and their subclasses now remove the `SyncBatchNorm` wrappers in `teardown()` to enable proper support at inference after fitting ([#11078](https://github.com/Lightning-AI/lightning/pull/11078)) -- Moved ownership of the `Accelerator` instance to the `TrainingTypePlugin`; all training-type plugins now take an optional parameter `accelerator` ([#11022](https://github.com/Lightning-AI/lightning/pull/11022)) -- Renamed the `TrainingTypePlugin` to `Strategy` ([#11120](https://github.com/Lightning-AI/lightning/pull/11120)) - * Renamed the `ParallelPlugin` to `ParallelStrategy` ([#11123](https://github.com/Lightning-AI/lightning/pull/11123)) - * Renamed the `DataParallelPlugin` to `DataParallelStrategy` ([#11183](https://github.com/Lightning-AI/lightning/pull/11183)) - * Renamed the `DDPPlugin` to `DDPStrategy` ([#11142](https://github.com/Lightning-AI/lightning/pull/11142)) - * Renamed the `DDP2Plugin` to `DDP2Strategy` ([#11185](https://github.com/Lightning-AI/lightning/pull/11185)) - * Renamed the `DDPShardedPlugin` to `DDPShardedStrategy` ([#11186](https://github.com/Lightning-AI/lightning/pull/11186)) - * Renamed the `DDPFullyShardedPlugin` to `DDPFullyShardedStrategy` ([#11143](https://github.com/Lightning-AI/lightning/pull/11143)) - * Renamed the `DDPSpawnPlugin` to `DDPSpawnStrategy` ([#11145](https://github.com/Lightning-AI/lightning/pull/11145)) - * Renamed the `DDPSpawnShardedPlugin` to `DDPSpawnShardedStrategy` ([#11210](https://github.com/Lightning-AI/lightning/pull/11210)) - * Renamed the `DeepSpeedPlugin` to `DeepSpeedStrategy` ([#11194](https://github.com/Lightning-AI/lightning/pull/11194)) - * Renamed the `HorovodPlugin` to `HorovodStrategy` ([#11195](https://github.com/Lightning-AI/lightning/pull/11195)) - * Renamed the `TPUSpawnPlugin` to `TPUSpawnStrategy` ([#11190](https://github.com/Lightning-AI/lightning/pull/11190)) - * Renamed the `IPUPlugin` to `IPUStrategy` ([#11193](https://github.com/Lightning-AI/lightning/pull/11193)) - * Renamed the `SingleDevicePlugin` to `SingleDeviceStrategy` ([#11182](https://github.com/Lightning-AI/lightning/pull/11182)) - * Renamed the `SingleTPUPlugin` to `SingleTPUStrategy` ([#11182](https://github.com/Lightning-AI/lightning/pull/11182)) - * Renamed the `TrainingTypePluginsRegistry` to `StrategyRegistry` ([#11233](https://github.com/Lightning-AI/lightning/pull/11233)) -- Marked the `ResultCollection`, `ResultMetric`, and `ResultMetricCollection` classes as protected ([#11130](https://github.com/Lightning-AI/lightning/pull/11130)) -- Marked `trainer.checkpoint_connector` as protected ([#11550](https://github.com/Lightning-AI/lightning/pull/11550)) -- The epoch start/end hooks are now called by the `FitLoop` instead of the `TrainingEpochLoop` ([#11201](https://github.com/Lightning-AI/lightning/pull/11201)) -- DeepSpeed does not require lightning module zero 3 partitioning ([#10655](https://github.com/Lightning-AI/lightning/pull/10655)) -- Moved `Strategy` classes to the `strategies` directory ([#11226](https://github.com/Lightning-AI/lightning/pull/11226)) -- Renamed `training_type_plugin` file to `strategy` ([#11239](https://github.com/Lightning-AI/lightning/pull/11239)) -- Changed `DeviceStatsMonitor` to group metrics based on the logger's `group_separator` ([#11254](https://github.com/Lightning-AI/lightning/pull/11254)) -- Raised `UserWarning` if evaluation is triggered with `best` ckpt and trainer is configured with multiple checkpoint callbacks ([#11274](https://github.com/Lightning-AI/lightning/pull/11274)) -- `Trainer.logged_metrics` now always contains scalar tensors, even when a Python scalar was logged ([#11270](https://github.com/Lightning-AI/lightning/pull/11270)) -- The tuner now uses the checkpoint connector to copy and restore its state ([#11518](https://github.com/Lightning-AI/lightning/pull/11518)) -- Changed `MisconfigurationException` to `ModuleNotFoundError` when `rich` isn't available ([#11360](https://github.com/Lightning-AI/lightning/pull/11360)) -- The `trainer.current_epoch` value is now increased by 1 during and after `on_train_end` ([#8578](https://github.com/Lightning-AI/lightning/pull/8578)) -- The `trainer.global_step` value now accounts for multiple optimizers and TBPTT splits ([#11805](https://github.com/Lightning-AI/lightning/pull/11805)) -- The `trainer.global_step` value is now increased right after the `optimizer.step()` call which will impact users who access it during an intra-training validation hook ([#11805](https://github.com/Lightning-AI/lightning/pull/11805)) -- The filename of checkpoints created with `ModelCheckpoint(filename='{step}')` is different compared to previous versions. A checkpoint saved after 1 step will be named `step=1.ckpt` instead of `step=0.ckpt` ([#11805](https://github.com/Lightning-AI/lightning/pull/11805)) -- Inherit from `ABC` for `Accelerator`: Users need to implement `auto_device_count` ([#11521](https://github.com/Lightning-AI/lightning/pull/11521)) -- Changed `parallel_devices` property in `ParallelStrategy` to be lazy initialized ([#11572](https://github.com/Lightning-AI/lightning/pull/11572)) -- Updated `TQDMProgressBar` to run a separate progress bar for each eval dataloader ([#11657](https://github.com/Lightning-AI/lightning/pull/11657)) -- Sorted `SimpleProfiler(extended=False)` summary based on mean duration for each hook ([#11671](https://github.com/Lightning-AI/lightning/pull/11671)) -- Avoid enforcing `shuffle=False` for eval dataloaders ([#11575](https://github.com/Lightning-AI/lightning/pull/11575)) -- When using DP (data-parallel), Lightning will no longer automatically reduce all tensors returned in training_step; it will only reduce the loss unless `training_step_end` is overridden ([#11594](https://github.com/Lightning-AI/lightning/pull/11594)) -- When using DP (data-parallel), the `training_epoch_end` hook will no longer receive reduced outputs from `training_step` and instead get the full tensor of results from all GPUs ([#11594](https://github.com/Lightning-AI/lightning/pull/11594)) -- Changed default logger name to `lightning_logs` for consistency ([#11762](https://github.com/Lightning-AI/lightning/pull/11762)) -- Rewrote `accelerator_connector` ([#11448](https://github.com/Lightning-AI/lightning/pull/11448)) -- When manual optimization is used with DDP, we no longer force `find_unused_parameters=True` ([#12425](https://github.com/Lightning-AI/lightning/pull/12425)) -- Disable loading dataloades if corresponding `limit_batches=0` ([#11576](https://github.com/Lightning-AI/lightning/pull/11576)) -- Removed `is_global_zero` check in `training_epoch_loop` before `logger.save`. If you have a custom logger that implements `save` the Trainer will now call `save` on all ranks by default. To change this behavior add `@rank_zero_only` to your `save` implementation ([#12134](https://github.com/Lightning-AI/lightning/pull/12134)) -- Disabled tuner with distributed strategies ([#12179](https://github.com/Lightning-AI/lightning/pull/12179)) -- Marked `trainer.logger_connector` as protected ([#12195](https://github.com/Lightning-AI/lightning/pull/12195)) -- Move `Strategy.process_dataloader` function call from `fit/evaluation/predict_loop.py` to `data_connector.py` ([#12251](https://github.com/Lightning-AI/lightning/pull/12251)) -- `ModelCheckpoint(save_last=True, every_n_epochs=N)` now saves a "last" checkpoint every epoch (disregarding `every_n_epochs`) instead of only once at the end of training ([#12418](https://github.com/Lightning-AI/lightning/pull/12418)) -- The strategies that support `sync_batchnorm` now only apply it when fitting ([#11919](https://github.com/Lightning-AI/lightning/pull/11919)) -- Avoided fallback on CPU if no devices are provided for other accelerators ([#12410](https://github.com/Lightning-AI/lightning/pull/12410)) -- Modified `supporters.py` so that in the accumulator element (for loss) is created directly on the device ([#12430](https://github.com/Lightning-AI/lightning/pull/12430)) -- Removed `EarlyStopping.on_save_checkpoint` and `EarlyStopping.on_load_checkpoint` in favor of `EarlyStopping.state_dict` and `EarlyStopping.load_state_dict` ([#11887](https://github.com/Lightning-AI/lightning/pull/11887)) -- Removed `BaseFinetuning.on_save_checkpoint` and `BaseFinetuning.on_load_checkpoint` in favor of `BaseFinetuning.state_dict` and `BaseFinetuning.load_state_dict` ([#11887](https://github.com/Lightning-AI/lightning/pull/11887)) -- Removed `BackboneFinetuning.on_save_checkpoint` and `BackboneFinetuning.on_load_checkpoint` in favor of `BackboneFinetuning.state_dict` and `BackboneFinetuning.load_state_dict` ([#11887](https://github.com/Lightning-AI/lightning/pull/11887)) -- Removed `ModelCheckpoint.on_save_checkpoint` and `ModelCheckpoint.on_load_checkpoint` in favor of `ModelCheckpoint.state_dict` and `ModelCheckpoint.load_state_dict` ([#11887](https://github.com/Lightning-AI/lightning/pull/11887)) -- Removed `Timer.on_save_checkpoint` and `Timer.on_load_checkpoint` in favor of `Timer.state_dict` and `Timer.load_state_dict` ([#11887](https://github.com/Lightning-AI/lightning/pull/11887)) -- Replaced PostLocalSGDOptimizer with a dedicated model averaging component ([#12378](https://github.com/Lightning-AI/lightning/pull/12378)) + * Removed the `TrainingTypePlugin.pre_dispatch()` method and merged it with `TrainingTypePlugin.setup()` ([#11137](https://github.com/Lightning-AI/pytorch-lightning/pull/11137)) +- Changed profiler to index and display the names of the hooks with a new pattern []. ([#11026](https://github.com/Lightning-AI/pytorch-lightning/pull/11026)) +- Changed `batch_to_device` entry in profiling from stage-specific to generic, to match profiling of other hooks ([#11031](https://github.com/Lightning-AI/pytorch-lightning/pull/11031)) +- Changed the info message for finalizing ddp-spawn worker processes to a debug-level message ([#10864](https://github.com/Lightning-AI/pytorch-lightning/pull/10864)) +- Removed duplicated file extension when uploading model checkpoints with `NeptuneLogger` ([#11015](https://github.com/Lightning-AI/pytorch-lightning/pull/11015)) +- Removed `__getstate__` and `__setstate__` of `RichProgressBar` ([#11100](https://github.com/Lightning-AI/pytorch-lightning/pull/11100)) +- The `DDPPlugin` and `DDPSpawnPlugin` and their subclasses now remove the `SyncBatchNorm` wrappers in `teardown()` to enable proper support at inference after fitting ([#11078](https://github.com/Lightning-AI/pytorch-lightning/pull/11078)) +- Moved ownership of the `Accelerator` instance to the `TrainingTypePlugin`; all training-type plugins now take an optional parameter `accelerator` ([#11022](https://github.com/Lightning-AI/pytorch-lightning/pull/11022)) +- Renamed the `TrainingTypePlugin` to `Strategy` ([#11120](https://github.com/Lightning-AI/pytorch-lightning/pull/11120)) + * Renamed the `ParallelPlugin` to `ParallelStrategy` ([#11123](https://github.com/Lightning-AI/pytorch-lightning/pull/11123)) + * Renamed the `DataParallelPlugin` to `DataParallelStrategy` ([#11183](https://github.com/Lightning-AI/pytorch-lightning/pull/11183)) + * Renamed the `DDPPlugin` to `DDPStrategy` ([#11142](https://github.com/Lightning-AI/pytorch-lightning/pull/11142)) + * Renamed the `DDP2Plugin` to `DDP2Strategy` ([#11185](https://github.com/Lightning-AI/pytorch-lightning/pull/11185)) + * Renamed the `DDPShardedPlugin` to `DDPShardedStrategy` ([#11186](https://github.com/Lightning-AI/pytorch-lightning/pull/11186)) + * Renamed the `DDPFullyShardedPlugin` to `DDPFullyShardedStrategy` ([#11143](https://github.com/Lightning-AI/pytorch-lightning/pull/11143)) + * Renamed the `DDPSpawnPlugin` to `DDPSpawnStrategy` ([#11145](https://github.com/Lightning-AI/pytorch-lightning/pull/11145)) + * Renamed the `DDPSpawnShardedPlugin` to `DDPSpawnShardedStrategy` ([#11210](https://github.com/Lightning-AI/pytorch-lightning/pull/11210)) + * Renamed the `DeepSpeedPlugin` to `DeepSpeedStrategy` ([#11194](https://github.com/Lightning-AI/pytorch-lightning/pull/11194)) + * Renamed the `HorovodPlugin` to `HorovodStrategy` ([#11195](https://github.com/Lightning-AI/pytorch-lightning/pull/11195)) + * Renamed the `TPUSpawnPlugin` to `TPUSpawnStrategy` ([#11190](https://github.com/Lightning-AI/pytorch-lightning/pull/11190)) + * Renamed the `IPUPlugin` to `IPUStrategy` ([#11193](https://github.com/Lightning-AI/pytorch-lightning/pull/11193)) + * Renamed the `SingleDevicePlugin` to `SingleDeviceStrategy` ([#11182](https://github.com/Lightning-AI/pytorch-lightning/pull/11182)) + * Renamed the `SingleTPUPlugin` to `SingleTPUStrategy` ([#11182](https://github.com/Lightning-AI/pytorch-lightning/pull/11182)) + * Renamed the `TrainingTypePluginsRegistry` to `StrategyRegistry` ([#11233](https://github.com/Lightning-AI/pytorch-lightning/pull/11233)) +- Marked the `ResultCollection`, `ResultMetric`, and `ResultMetricCollection` classes as protected ([#11130](https://github.com/Lightning-AI/pytorch-lightning/pull/11130)) +- Marked `trainer.checkpoint_connector` as protected ([#11550](https://github.com/Lightning-AI/pytorch-lightning/pull/11550)) +- The epoch start/end hooks are now called by the `FitLoop` instead of the `TrainingEpochLoop` ([#11201](https://github.com/Lightning-AI/pytorch-lightning/pull/11201)) +- DeepSpeed does not require lightning module zero 3 partitioning ([#10655](https://github.com/Lightning-AI/pytorch-lightning/pull/10655)) +- Moved `Strategy` classes to the `strategies` directory ([#11226](https://github.com/Lightning-AI/pytorch-lightning/pull/11226)) +- Renamed `training_type_plugin` file to `strategy` ([#11239](https://github.com/Lightning-AI/pytorch-lightning/pull/11239)) +- Changed `DeviceStatsMonitor` to group metrics based on the logger's `group_separator` ([#11254](https://github.com/Lightning-AI/pytorch-lightning/pull/11254)) +- Raised `UserWarning` if evaluation is triggered with `best` ckpt and trainer is configured with multiple checkpoint callbacks ([#11274](https://github.com/Lightning-AI/pytorch-lightning/pull/11274)) +- `Trainer.logged_metrics` now always contains scalar tensors, even when a Python scalar was logged ([#11270](https://github.com/Lightning-AI/pytorch-lightning/pull/11270)) +- The tuner now uses the checkpoint connector to copy and restore its state ([#11518](https://github.com/Lightning-AI/pytorch-lightning/pull/11518)) +- Changed `MisconfigurationException` to `ModuleNotFoundError` when `rich` isn't available ([#11360](https://github.com/Lightning-AI/pytorch-lightning/pull/11360)) +- The `trainer.current_epoch` value is now increased by 1 during and after `on_train_end` ([#8578](https://github.com/Lightning-AI/pytorch-lightning/pull/8578)) +- The `trainer.global_step` value now accounts for multiple optimizers and TBPTT splits ([#11805](https://github.com/Lightning-AI/pytorch-lightning/pull/11805)) +- The `trainer.global_step` value is now increased right after the `optimizer.step()` call which will impact users who access it during an intra-training validation hook ([#11805](https://github.com/Lightning-AI/pytorch-lightning/pull/11805)) +- The filename of checkpoints created with `ModelCheckpoint(filename='{step}')` is different compared to previous versions. A checkpoint saved after 1 step will be named `step=1.ckpt` instead of `step=0.ckpt` ([#11805](https://github.com/Lightning-AI/pytorch-lightning/pull/11805)) +- Inherit from `ABC` for `Accelerator`: Users need to implement `auto_device_count` ([#11521](https://github.com/Lightning-AI/pytorch-lightning/pull/11521)) +- Changed `parallel_devices` property in `ParallelStrategy` to be lazy initialized ([#11572](https://github.com/Lightning-AI/pytorch-lightning/pull/11572)) +- Updated `TQDMProgressBar` to run a separate progress bar for each eval dataloader ([#11657](https://github.com/Lightning-AI/pytorch-lightning/pull/11657)) +- Sorted `SimpleProfiler(extended=False)` summary based on mean duration for each hook ([#11671](https://github.com/Lightning-AI/pytorch-lightning/pull/11671)) +- Avoid enforcing `shuffle=False` for eval dataloaders ([#11575](https://github.com/Lightning-AI/pytorch-lightning/pull/11575)) +- When using DP (data-parallel), Lightning will no longer automatically reduce all tensors returned in training_step; it will only reduce the loss unless `training_step_end` is overridden ([#11594](https://github.com/Lightning-AI/pytorch-lightning/pull/11594)) +- When using DP (data-parallel), the `training_epoch_end` hook will no longer receive reduced outputs from `training_step` and instead get the full tensor of results from all GPUs ([#11594](https://github.com/Lightning-AI/pytorch-lightning/pull/11594)) +- Changed default logger name to `lightning_logs` for consistency ([#11762](https://github.com/Lightning-AI/pytorch-lightning/pull/11762)) +- Rewrote `accelerator_connector` ([#11448](https://github.com/Lightning-AI/pytorch-lightning/pull/11448)) +- When manual optimization is used with DDP, we no longer force `find_unused_parameters=True` ([#12425](https://github.com/Lightning-AI/pytorch-lightning/pull/12425)) +- Disable loading dataloades if corresponding `limit_batches=0` ([#11576](https://github.com/Lightning-AI/pytorch-lightning/pull/11576)) +- Removed `is_global_zero` check in `training_epoch_loop` before `logger.save`. If you have a custom logger that implements `save` the Trainer will now call `save` on all ranks by default. To change this behavior add `@rank_zero_only` to your `save` implementation ([#12134](https://github.com/Lightning-AI/pytorch-lightning/pull/12134)) +- Disabled tuner with distributed strategies ([#12179](https://github.com/Lightning-AI/pytorch-lightning/pull/12179)) +- Marked `trainer.logger_connector` as protected ([#12195](https://github.com/Lightning-AI/pytorch-lightning/pull/12195)) +- Move `Strategy.process_dataloader` function call from `fit/evaluation/predict_loop.py` to `data_connector.py` ([#12251](https://github.com/Lightning-AI/pytorch-lightning/pull/12251)) +- `ModelCheckpoint(save_last=True, every_n_epochs=N)` now saves a "last" checkpoint every epoch (disregarding `every_n_epochs`) instead of only once at the end of training ([#12418](https://github.com/Lightning-AI/pytorch-lightning/pull/12418)) +- The strategies that support `sync_batchnorm` now only apply it when fitting ([#11919](https://github.com/Lightning-AI/pytorch-lightning/pull/11919)) +- Avoided fallback on CPU if no devices are provided for other accelerators ([#12410](https://github.com/Lightning-AI/pytorch-lightning/pull/12410)) +- Modified `supporters.py` so that in the accumulator element (for loss) is created directly on the device ([#12430](https://github.com/Lightning-AI/pytorch-lightning/pull/12430)) +- Removed `EarlyStopping.on_save_checkpoint` and `EarlyStopping.on_load_checkpoint` in favor of `EarlyStopping.state_dict` and `EarlyStopping.load_state_dict` ([#11887](https://github.com/Lightning-AI/pytorch-lightning/pull/11887)) +- Removed `BaseFinetuning.on_save_checkpoint` and `BaseFinetuning.on_load_checkpoint` in favor of `BaseFinetuning.state_dict` and `BaseFinetuning.load_state_dict` ([#11887](https://github.com/Lightning-AI/pytorch-lightning/pull/11887)) +- Removed `BackboneFinetuning.on_save_checkpoint` and `BackboneFinetuning.on_load_checkpoint` in favor of `BackboneFinetuning.state_dict` and `BackboneFinetuning.load_state_dict` ([#11887](https://github.com/Lightning-AI/pytorch-lightning/pull/11887)) +- Removed `ModelCheckpoint.on_save_checkpoint` and `ModelCheckpoint.on_load_checkpoint` in favor of `ModelCheckpoint.state_dict` and `ModelCheckpoint.load_state_dict` ([#11887](https://github.com/Lightning-AI/pytorch-lightning/pull/11887)) +- Removed `Timer.on_save_checkpoint` and `Timer.on_load_checkpoint` in favor of `Timer.state_dict` and `Timer.load_state_dict` ([#11887](https://github.com/Lightning-AI/pytorch-lightning/pull/11887)) +- Replaced PostLocalSGDOptimizer with a dedicated model averaging component ([#12378](https://github.com/Lightning-AI/pytorch-lightning/pull/12378)) ### Deprecated -- Deprecated `training_type_plugin` property in favor of `strategy` in `Trainer` and updated the references ([#11141](https://github.com/Lightning-AI/lightning/pull/11141)) -- Deprecated `Trainer.{validated,tested,predicted}_ckpt_path` and replaced with read-only property `Trainer.ckpt_path` set when checkpoints loaded via `Trainer.{fit,validate,test,predict}` ([#11696](https://github.com/Lightning-AI/lightning/pull/11696)) -- Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/Lightning-AI/lightning/pull/10103)) -- Deprecated `DistributedType` in favor of `_StrategyType` ([#10505](https://github.com/Lightning-AI/lightning/pull/10505)) -- Deprecated the `precision_plugin` constructor argument from `Accelerator` ([#10570](https://github.com/Lightning-AI/lightning/pull/10570)) -- Deprecated `DeviceType` in favor of `_AcceleratorType` ([#10503](https://github.com/Lightning-AI/lightning/pull/10503)) -- Deprecated the property `Trainer.slurm_job_id` in favor of the new `SLURMEnvironment.job_id()` method ([#10622](https://github.com/Lightning-AI/lightning/pull/10622)) -- Deprecated the access to the attribute `IndexBatchSamplerWrapper.batch_indices` in favor of `IndexBatchSamplerWrapper.seen_batch_indices` ([#10870](https://github.com/Lightning-AI/lightning/pull/10870)) -- Deprecated `on_init_start` and `on_init_end` callback hooks ([#10940](https://github.com/Lightning-AI/lightning/pull/10940)) -- Deprecated `Trainer.call_hook` in favor of `Trainer._call_callback_hooks`, `Trainer._call_lightning_module_hook`, `Trainer._call_ttp_hook`, and `Trainer._call_accelerator_hook` ([#10979](https://github.com/Lightning-AI/lightning/pull/10979)) -- Deprecated `TrainingTypePlugin.post_dispatch` in favor of `TrainingTypePlugin.teardown` ([#10939](https://github.com/Lightning-AI/lightning/pull/10939)) -- Deprecated `ModelIO.on_hpc_{save/load}` in favor of `CheckpointHooks.on_{save/load}_checkpoint` ([#10911](https://github.com/Lightning-AI/lightning/pull/10911)) -- Deprecated `Trainer.run_stage` in favor of `Trainer.{fit,validate,test,predict}` ([#11000](https://github.com/Lightning-AI/lightning/pull/11000)) -- Deprecated `Trainer.lr_schedulers` in favor of `Trainer.lr_scheduler_configs` which returns a list of dataclasses instead of dictionaries ([#11443](https://github.com/Lightning-AI/lightning/pull/11443)) -- Deprecated `Trainer.verbose_evaluate` in favor of `EvaluationLoop(verbose=...)` ([#10931](https://github.com/Lightning-AI/lightning/pull/10931)) -- Deprecated `Trainer.should_rank_save_checkpoint` Trainer property ([#11068](https://github.com/Lightning-AI/lightning/pull/11068)) -- Deprecated `Trainer.lightning_optimizers` ([#11444](https://github.com/Lightning-AI/lightning/pull/11444)) -- Deprecated `TrainerOptimizersMixin` and moved functionality to `core/optimizer.py`([#11155](https://github.com/Lightning-AI/lightning/pull/11155)) -- Deprecated the `on_train_batch_end(outputs)` format when multiple optimizers are used and TBPTT is enabled ([#12182](https://github.com/Lightning-AI/lightning/pull/12182)) -- Deprecated the `training_epoch_end(outputs)` format when multiple optimizers are used and TBPTT is enabled ([#12182](https://github.com/Lightning-AI/lightning/pull/12182)) -- Deprecated `TrainerCallbackHookMixin` ([#11148](https://github.com/Lightning-AI/lightning/pull/11148)) -- Deprecated `TrainerDataLoadingMixin` and moved functionality to `Trainer` and `DataConnector` ([#11282](https://github.com/Lightning-AI/lightning/pull/11282)) -- Deprecated function `pl.callbacks.device_stats_monitor.prefix_metric_keys` ([#11254](https://github.com/Lightning-AI/lightning/pull/11254)) -- Deprecated `Callback.on_epoch_start` hook in favour of `Callback.on_{train/val/test}_epoch_start` ([#11578](https://github.com/Lightning-AI/lightning/pull/11578)) -- Deprecated `Callback.on_epoch_end` hook in favour of `Callback.on_{train/val/test}_epoch_end` ([#11578](https://github.com/Lightning-AI/lightning/pull/11578)) -- Deprecated `LightningModule.on_epoch_start` hook in favor of `LightningModule.on_{train/val/test}_epoch_start` ([#11578](https://github.com/Lightning-AI/lightning/pull/11578)) -- Deprecated `LightningModule.on_epoch_end` hook in favor of `LightningModule.on_{train/val/test}_epoch_end` ([#11578](https://github.com/Lightning-AI/lightning/pull/11578)) -- Deprecated `on_before_accelerator_backend_setup` callback hook in favour of `setup` ([#11568](https://github.com/Lightning-AI/lightning/pull/11568)) -- Deprecated `on_batch_start` and `on_batch_end` callback hooks in favor of `on_train_batch_start` and `on_train_batch_end` ([#11577](https://github.com/Lightning-AI/lightning/pull/11577)) -- Deprecated `on_configure_sharded_model` callback hook in favor of `setup` ([#11627](https://github.com/Lightning-AI/lightning/pull/11627)) -- Deprecated `pl.utilities.distributed.rank_zero_only` in favor of `pl.utilities.rank_zero.rank_zero_only` ([#11747](https://github.com/Lightning-AI/lightning/pull/11747)) -- Deprecated `pl.utilities.distributed.rank_zero_debug` in favor of `pl.utilities.rank_zero.rank_zero_debug` ([#11747](https://github.com/Lightning-AI/lightning/pull/11747)) -- Deprecated `pl.utilities.distributed.rank_zero_info` in favor of `pl.utilities.rank_zero.rank_zero_info` ([#11747](https://github.com/Lightning-AI/lightning/pull/11747)) -- Deprecated `pl.utilities.warnings.rank_zero_warn` in favor of `pl.utilities.rank_zero.rank_zero_warn` ([#11747](https://github.com/Lightning-AI/lightning/pull/11747)) -- Deprecated `pl.utilities.warnings.rank_zero_deprecation` in favor of `pl.utilities.rank_zero.rank_zero_deprecation` ([#11747](https://github.com/Lightning-AI/lightning/pull/11747)) -- Deprecated `pl.utilities.warnings.LightningDeprecationWarning` in favor of `pl.utilities.rank_zero.LightningDeprecationWarning` ([#11747](https://github.com/Lightning-AI/lightning/pull/11747)) -- Deprecated `on_pretrain_routine_start` and `on_pretrain_routine_end` callback hooks in favor of `on_fit_start` ([#11794](https://github.com/Lightning-AI/lightning/pull/11794)) -- Deprecated `LightningModule.on_pretrain_routine_start` and `LightningModule.on_pretrain_routine_end` hooks in favor of `on_fit_start` ([#12122](https://github.com/Lightning-AI/lightning/pull/12122)) -- Deprecated `agg_key_funcs` and `agg_default_func` parameters from `LightningLoggerBase` ([#11871](https://github.com/Lightning-AI/lightning/pull/11871)) -- Deprecated `LightningLoggerBase.update_agg_funcs` ([#11871](https://github.com/Lightning-AI/lightning/pull/11871)) -- Deprecated `LightningLoggerBase.agg_and_log_metrics` in favor of `LightningLoggerBase.log_metrics` ([#11832](https://github.com/Lightning-AI/lightning/pull/11832)) -- Deprecated passing `weights_save_path` to the `Trainer` constructor in favor of adding the `ModelCheckpoint` callback with `dirpath` directly to the list of callbacks ([#12084](https://github.com/Lightning-AI/lightning/pull/12084)) -- Deprecated `pl.profiler.AbstractProfiler` in favor of `pl.profiler.Profiler` ([#12106](https://github.com/Lightning-AI/lightning/pull/12106)) -- Deprecated `pl.profiler.BaseProfiler` in favor of `pl.profiler.Profiler` ([#12150](https://github.com/Lightning-AI/lightning/pull/12150)) -- Deprecated `BaseProfiler.profile_iterable` ([#12102](https://github.com/Lightning-AI/lightning/pull/12102)) -- Deprecated `LoggerCollection` in favor of `trainer.loggers` ([#12147](https://github.com/Lightning-AI/lightning/pull/12147)) -- Deprecated `PrecisionPlugin.on_{save,load}_checkpoint` in favor of `PrecisionPlugin.{state_dict,load_state_dict}` ([#11978](https://github.com/Lightning-AI/lightning/pull/11978)) -- Deprecated `LightningDataModule.on_save/load_checkpoint` in favor of `state_dict/load_state_dict` ([#11893](https://github.com/Lightning-AI/lightning/pull/11893)) -- Deprecated `Trainer.use_amp` in favor of `Trainer.amp_backend` ([#12312](https://github.com/Lightning-AI/lightning/pull/12312)) -- Deprecated `LightningModule.use_amp` in favor of `Trainer.amp_backend` ([#12315](https://github.com/Lightning-AI/lightning/pull/12315)) -- Deprecated specifying the process group backend through the environment variable `PL_TORCH_DISTRIBUTED_BACKEND` ([#11745](https://github.com/Lightning-AI/lightning/pull/11745)) -- Deprecated `ParallelPlugin.torch_distributed_backend` in favor of `DDPStrategy.process_group_backend` property ([#11745](https://github.com/Lightning-AI/lightning/pull/11745)) -- Deprecated `ModelCheckpoint.save_checkpoint` in favor of `Trainer.save_checkpoint` ([#12456](https://github.com/Lightning-AI/lightning/pull/12456)) -- Deprecated `Trainer.devices` in favor of `Trainer.num_devices` and `Trainer.device_ids` ([#12151](https://github.com/Lightning-AI/lightning/pull/12151)) -- Deprecated `Trainer.root_gpu` in favor of `Trainer.strategy.root_device.index` when GPU is used ([#12262](https://github.com/Lightning-AI/lightning/pull/12262)) -- Deprecated `Trainer.num_gpus` in favor of `Trainer.num_devices` when GPU is used ([#12384](https://github.com/Lightning-AI/lightning/pull/12384)) -- Deprecated `Trainer.ipus` in favor of `Trainer.num_devices` when IPU is used ([#12386](https://github.com/Lightning-AI/lightning/pull/12386)) -- Deprecated `Trainer.num_processes` in favor of `Trainer.num_devices` ([#12388](https://github.com/Lightning-AI/lightning/pull/12388)) -- Deprecated `Trainer.data_parallel_device_ids` in favor of `Trainer.device_ids` ([#12072](https://github.com/Lightning-AI/lightning/pull/12072)) -- Deprecated returning state from `Callback.on_save_checkpoint` in favor of returning state in `Callback.state_dict` for checkpointing ([#11887](https://github.com/Lightning-AI/lightning/pull/11887)) -- Deprecated passing only the callback state to `Callback.on_load_checkpoint(callback_state)` in favor of passing the callback state to `Callback.load_state_dict` and in 1.8, passing the entire checkpoint dictionary to `Callback.on_load_checkpoint(checkpoint)` ([#11887](https://github.com/Lightning-AI/lightning/pull/11887)) -- Deprecated `Trainer.gpus` in favor of `Trainer.device_ids` or `Trainer.num_devices` ([#12436](https://github.com/Lightning-AI/lightning/pull/12436)) -- Deprecated `Trainer.tpu_cores` in favor of `Trainer.num_devices` ([#12437](https://github.com/Lightning-AI/lightning/pull/12437)) +- Deprecated `training_type_plugin` property in favor of `strategy` in `Trainer` and updated the references ([#11141](https://github.com/Lightning-AI/pytorch-lightning/pull/11141)) +- Deprecated `Trainer.{validated,tested,predicted}_ckpt_path` and replaced with read-only property `Trainer.ckpt_path` set when checkpoints loaded via `Trainer.{fit,validate,test,predict}` ([#11696](https://github.com/Lightning-AI/pytorch-lightning/pull/11696)) +- Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/Lightning-AI/pytorch-lightning/pull/10103)) +- Deprecated `DistributedType` in favor of `_StrategyType` ([#10505](https://github.com/Lightning-AI/pytorch-lightning/pull/10505)) +- Deprecated the `precision_plugin` constructor argument from `Accelerator` ([#10570](https://github.com/Lightning-AI/pytorch-lightning/pull/10570)) +- Deprecated `DeviceType` in favor of `_AcceleratorType` ([#10503](https://github.com/Lightning-AI/pytorch-lightning/pull/10503)) +- Deprecated the property `Trainer.slurm_job_id` in favor of the new `SLURMEnvironment.job_id()` method ([#10622](https://github.com/Lightning-AI/pytorch-lightning/pull/10622)) +- Deprecated the access to the attribute `IndexBatchSamplerWrapper.batch_indices` in favor of `IndexBatchSamplerWrapper.seen_batch_indices` ([#10870](https://github.com/Lightning-AI/pytorch-lightning/pull/10870)) +- Deprecated `on_init_start` and `on_init_end` callback hooks ([#10940](https://github.com/Lightning-AI/pytorch-lightning/pull/10940)) +- Deprecated `Trainer.call_hook` in favor of `Trainer._call_callback_hooks`, `Trainer._call_lightning_module_hook`, `Trainer._call_ttp_hook`, and `Trainer._call_accelerator_hook` ([#10979](https://github.com/Lightning-AI/pytorch-lightning/pull/10979)) +- Deprecated `TrainingTypePlugin.post_dispatch` in favor of `TrainingTypePlugin.teardown` ([#10939](https://github.com/Lightning-AI/pytorch-lightning/pull/10939)) +- Deprecated `ModelIO.on_hpc_{save/load}` in favor of `CheckpointHooks.on_{save/load}_checkpoint` ([#10911](https://github.com/Lightning-AI/pytorch-lightning/pull/10911)) +- Deprecated `Trainer.run_stage` in favor of `Trainer.{fit,validate,test,predict}` ([#11000](https://github.com/Lightning-AI/pytorch-lightning/pull/11000)) +- Deprecated `Trainer.lr_schedulers` in favor of `Trainer.lr_scheduler_configs` which returns a list of dataclasses instead of dictionaries ([#11443](https://github.com/Lightning-AI/pytorch-lightning/pull/11443)) +- Deprecated `Trainer.verbose_evaluate` in favor of `EvaluationLoop(verbose=...)` ([#10931](https://github.com/Lightning-AI/pytorch-lightning/pull/10931)) +- Deprecated `Trainer.should_rank_save_checkpoint` Trainer property ([#11068](https://github.com/Lightning-AI/pytorch-lightning/pull/11068)) +- Deprecated `Trainer.lightning_optimizers` ([#11444](https://github.com/Lightning-AI/pytorch-lightning/pull/11444)) +- Deprecated `TrainerOptimizersMixin` and moved functionality to `core/optimizer.py`([#11155](https://github.com/Lightning-AI/pytorch-lightning/pull/11155)) +- Deprecated the `on_train_batch_end(outputs)` format when multiple optimizers are used and TBPTT is enabled ([#12182](https://github.com/Lightning-AI/pytorch-lightning/pull/12182)) +- Deprecated the `training_epoch_end(outputs)` format when multiple optimizers are used and TBPTT is enabled ([#12182](https://github.com/Lightning-AI/pytorch-lightning/pull/12182)) +- Deprecated `TrainerCallbackHookMixin` ([#11148](https://github.com/Lightning-AI/pytorch-lightning/pull/11148)) +- Deprecated `TrainerDataLoadingMixin` and moved functionality to `Trainer` and `DataConnector` ([#11282](https://github.com/Lightning-AI/pytorch-lightning/pull/11282)) +- Deprecated function `pl.callbacks.device_stats_monitor.prefix_metric_keys` ([#11254](https://github.com/Lightning-AI/pytorch-lightning/pull/11254)) +- Deprecated `Callback.on_epoch_start` hook in favour of `Callback.on_{train/val/test}_epoch_start` ([#11578](https://github.com/Lightning-AI/pytorch-lightning/pull/11578)) +- Deprecated `Callback.on_epoch_end` hook in favour of `Callback.on_{train/val/test}_epoch_end` ([#11578](https://github.com/Lightning-AI/pytorch-lightning/pull/11578)) +- Deprecated `LightningModule.on_epoch_start` hook in favor of `LightningModule.on_{train/val/test}_epoch_start` ([#11578](https://github.com/Lightning-AI/pytorch-lightning/pull/11578)) +- Deprecated `LightningModule.on_epoch_end` hook in favor of `LightningModule.on_{train/val/test}_epoch_end` ([#11578](https://github.com/Lightning-AI/pytorch-lightning/pull/11578)) +- Deprecated `on_before_accelerator_backend_setup` callback hook in favour of `setup` ([#11568](https://github.com/Lightning-AI/pytorch-lightning/pull/11568)) +- Deprecated `on_batch_start` and `on_batch_end` callback hooks in favor of `on_train_batch_start` and `on_train_batch_end` ([#11577](https://github.com/Lightning-AI/pytorch-lightning/pull/11577)) +- Deprecated `on_configure_sharded_model` callback hook in favor of `setup` ([#11627](https://github.com/Lightning-AI/pytorch-lightning/pull/11627)) +- Deprecated `pl.utilities.distributed.rank_zero_only` in favor of `pl.utilities.rank_zero.rank_zero_only` ([#11747](https://github.com/Lightning-AI/pytorch-lightning/pull/11747)) +- Deprecated `pl.utilities.distributed.rank_zero_debug` in favor of `pl.utilities.rank_zero.rank_zero_debug` ([#11747](https://github.com/Lightning-AI/pytorch-lightning/pull/11747)) +- Deprecated `pl.utilities.distributed.rank_zero_info` in favor of `pl.utilities.rank_zero.rank_zero_info` ([#11747](https://github.com/Lightning-AI/pytorch-lightning/pull/11747)) +- Deprecated `pl.utilities.warnings.rank_zero_warn` in favor of `pl.utilities.rank_zero.rank_zero_warn` ([#11747](https://github.com/Lightning-AI/pytorch-lightning/pull/11747)) +- Deprecated `pl.utilities.warnings.rank_zero_deprecation` in favor of `pl.utilities.rank_zero.rank_zero_deprecation` ([#11747](https://github.com/Lightning-AI/pytorch-lightning/pull/11747)) +- Deprecated `pl.utilities.warnings.LightningDeprecationWarning` in favor of `pl.utilities.rank_zero.LightningDeprecationWarning` ([#11747](https://github.com/Lightning-AI/pytorch-lightning/pull/11747)) +- Deprecated `on_pretrain_routine_start` and `on_pretrain_routine_end` callback hooks in favor of `on_fit_start` ([#11794](https://github.com/Lightning-AI/pytorch-lightning/pull/11794)) +- Deprecated `LightningModule.on_pretrain_routine_start` and `LightningModule.on_pretrain_routine_end` hooks in favor of `on_fit_start` ([#12122](https://github.com/Lightning-AI/pytorch-lightning/pull/12122)) +- Deprecated `agg_key_funcs` and `agg_default_func` parameters from `LightningLoggerBase` ([#11871](https://github.com/Lightning-AI/pytorch-lightning/pull/11871)) +- Deprecated `LightningLoggerBase.update_agg_funcs` ([#11871](https://github.com/Lightning-AI/pytorch-lightning/pull/11871)) +- Deprecated `LightningLoggerBase.agg_and_log_metrics` in favor of `LightningLoggerBase.log_metrics` ([#11832](https://github.com/Lightning-AI/pytorch-lightning/pull/11832)) +- Deprecated passing `weights_save_path` to the `Trainer` constructor in favor of adding the `ModelCheckpoint` callback with `dirpath` directly to the list of callbacks ([#12084](https://github.com/Lightning-AI/pytorch-lightning/pull/12084)) +- Deprecated `pl.profiler.AbstractProfiler` in favor of `pl.profiler.Profiler` ([#12106](https://github.com/Lightning-AI/pytorch-lightning/pull/12106)) +- Deprecated `pl.profiler.BaseProfiler` in favor of `pl.profiler.Profiler` ([#12150](https://github.com/Lightning-AI/pytorch-lightning/pull/12150)) +- Deprecated `BaseProfiler.profile_iterable` ([#12102](https://github.com/Lightning-AI/pytorch-lightning/pull/12102)) +- Deprecated `LoggerCollection` in favor of `trainer.loggers` ([#12147](https://github.com/Lightning-AI/pytorch-lightning/pull/12147)) +- Deprecated `PrecisionPlugin.on_{save,load}_checkpoint` in favor of `PrecisionPlugin.{state_dict,load_state_dict}` ([#11978](https://github.com/Lightning-AI/pytorch-lightning/pull/11978)) +- Deprecated `LightningDataModule.on_save/load_checkpoint` in favor of `state_dict/load_state_dict` ([#11893](https://github.com/Lightning-AI/pytorch-lightning/pull/11893)) +- Deprecated `Trainer.use_amp` in favor of `Trainer.amp_backend` ([#12312](https://github.com/Lightning-AI/pytorch-lightning/pull/12312)) +- Deprecated `LightningModule.use_amp` in favor of `Trainer.amp_backend` ([#12315](https://github.com/Lightning-AI/pytorch-lightning/pull/12315)) +- Deprecated specifying the process group backend through the environment variable `PL_TORCH_DISTRIBUTED_BACKEND` ([#11745](https://github.com/Lightning-AI/pytorch-lightning/pull/11745)) +- Deprecated `ParallelPlugin.torch_distributed_backend` in favor of `DDPStrategy.process_group_backend` property ([#11745](https://github.com/Lightning-AI/pytorch-lightning/pull/11745)) +- Deprecated `ModelCheckpoint.save_checkpoint` in favor of `Trainer.save_checkpoint` ([#12456](https://github.com/Lightning-AI/pytorch-lightning/pull/12456)) +- Deprecated `Trainer.devices` in favor of `Trainer.num_devices` and `Trainer.device_ids` ([#12151](https://github.com/Lightning-AI/pytorch-lightning/pull/12151)) +- Deprecated `Trainer.root_gpu` in favor of `Trainer.strategy.root_device.index` when GPU is used ([#12262](https://github.com/Lightning-AI/pytorch-lightning/pull/12262)) +- Deprecated `Trainer.num_gpus` in favor of `Trainer.num_devices` when GPU is used ([#12384](https://github.com/Lightning-AI/pytorch-lightning/pull/12384)) +- Deprecated `Trainer.ipus` in favor of `Trainer.num_devices` when IPU is used ([#12386](https://github.com/Lightning-AI/pytorch-lightning/pull/12386)) +- Deprecated `Trainer.num_processes` in favor of `Trainer.num_devices` ([#12388](https://github.com/Lightning-AI/pytorch-lightning/pull/12388)) +- Deprecated `Trainer.data_parallel_device_ids` in favor of `Trainer.device_ids` ([#12072](https://github.com/Lightning-AI/pytorch-lightning/pull/12072)) +- Deprecated returning state from `Callback.on_save_checkpoint` in favor of returning state in `Callback.state_dict` for checkpointing ([#11887](https://github.com/Lightning-AI/pytorch-lightning/pull/11887)) +- Deprecated passing only the callback state to `Callback.on_load_checkpoint(callback_state)` in favor of passing the callback state to `Callback.load_state_dict` and in 1.8, passing the entire checkpoint dictionary to `Callback.on_load_checkpoint(checkpoint)` ([#11887](https://github.com/Lightning-AI/pytorch-lightning/pull/11887)) +- Deprecated `Trainer.gpus` in favor of `Trainer.device_ids` or `Trainer.num_devices` ([#12436](https://github.com/Lightning-AI/pytorch-lightning/pull/12436)) +- Deprecated `Trainer.tpu_cores` in favor of `Trainer.num_devices` ([#12437](https://github.com/Lightning-AI/pytorch-lightning/pull/12437)) ### Removed -- Removed deprecated parameter `method` in `pl.utilities.model_helpers.is_overridden` ([#10507](https://github.com/Lightning-AI/lightning/pull/10507)) -- Remove deprecated method `ClusterEnvironment.creates_children` ([#10339](https://github.com/Lightning-AI/lightning/pull/10339)) -- Removed deprecated `TrainerModelHooksMixin.is_function_implemented` and `TrainerModelHooksMixin.has_arg` ([#10322](https://github.com/Lightning-AI/lightning/pull/10322)) -- Removed deprecated `pl.utilities.device_dtype_mixin.DeviceDtypeModuleMixin` in favor of `pl.core.mixins.device_dtype_mixin.DeviceDtypeModuleMixin` ([#10442](https://github.com/Lightning-AI/lightning/pull/10442)) -- Removed deprecated `LightningModule.loaded_optimizer_states_dict` property ([#10346](https://github.com/Lightning-AI/lightning/pull/10346)) -- Removed deprecated `Trainer.fit(train_dataloader=)`, `Trainer.validate(val_dataloaders=)`, and `Trainer.test(test_dataloader=)` ([#10325](https://github.com/Lightning-AI/lightning/pull/10325)) -- Removed deprecated `has_prepared_data`, `has_setup_fit`, `has_setup_validate`, `has_setup_test`, `has_setup_predict`, `has_teardown_fit`, `has_teardown_validate`, `has_teardown_test` and `has_teardown_predict` datamodule lifecycle properties ([#10350](https://github.com/Lightning-AI/lightning/pull/10350)) -- Removed deprecated `every_n_val_epochs` parameter of ModelCheckpoint ([#10366](https://github.com/Lightning-AI/lightning/pull/10366)) -- Removed deprecated `import pl.profiler.profilers` in favor of `import pl.profiler` ([#10443](https://github.com/Lightning-AI/lightning/pull/10443)) -- Removed deprecated property `configure_slurm_dpp` from accelerator connector ([#10370](https://github.com/Lightning-AI/lightning/pull/10370)) -- Removed deprecated arguments `num_nodes` and `sync_batchnorm` from `DDPPlugin`, `DDPSpawnPlugin`, `DeepSpeedPlugin` ([#10357](https://github.com/Lightning-AI/lightning/pull/10357)) -- Removed deprecated property `is_slurm_managing_tasks` from AcceleratorConnector ([#10353](https://github.com/Lightning-AI/lightning/pull/10353)) -- Removed deprecated `LightningModule.log(tbptt_reduce_fx, tbptt_reduce_token, sync_dist_op)` ([#10423](https://github.com/Lightning-AI/lightning/pull/10423)) -- Removed deprecated `Plugin.task_idx` ([#10441](https://github.com/Lightning-AI/lightning/pull/10441)) -- Removed deprecated method `master_params` from PrecisionPlugin ([#10372](https://github.com/Lightning-AI/lightning/pull/10372)) -- Removed the automatic detachment of "extras" returned from `training_step`. For example, `return {'loss': ..., 'foo': foo.detach()}` will now be necessary if `foo` has gradients which you do not want to store ([#10424](https://github.com/Lightning-AI/lightning/pull/10424)) +- Removed deprecated parameter `method` in `pl.utilities.model_helpers.is_overridden` ([#10507](https://github.com/Lightning-AI/pytorch-lightning/pull/10507)) +- Remove deprecated method `ClusterEnvironment.creates_children` ([#10339](https://github.com/Lightning-AI/pytorch-lightning/pull/10339)) +- Removed deprecated `TrainerModelHooksMixin.is_function_implemented` and `TrainerModelHooksMixin.has_arg` ([#10322](https://github.com/Lightning-AI/pytorch-lightning/pull/10322)) +- Removed deprecated `pl.utilities.device_dtype_mixin.DeviceDtypeModuleMixin` in favor of `pl.core.mixins.device_dtype_mixin.DeviceDtypeModuleMixin` ([#10442](https://github.com/Lightning-AI/pytorch-lightning/pull/10442)) +- Removed deprecated `LightningModule.loaded_optimizer_states_dict` property ([#10346](https://github.com/Lightning-AI/pytorch-lightning/pull/10346)) +- Removed deprecated `Trainer.fit(train_dataloader=)`, `Trainer.validate(val_dataloaders=)`, and `Trainer.test(test_dataloader=)` ([#10325](https://github.com/Lightning-AI/pytorch-lightning/pull/10325)) +- Removed deprecated `has_prepared_data`, `has_setup_fit`, `has_setup_validate`, `has_setup_test`, `has_setup_predict`, `has_teardown_fit`, `has_teardown_validate`, `has_teardown_test` and `has_teardown_predict` datamodule lifecycle properties ([#10350](https://github.com/Lightning-AI/pytorch-lightning/pull/10350)) +- Removed deprecated `every_n_val_epochs` parameter of ModelCheckpoint ([#10366](https://github.com/Lightning-AI/pytorch-lightning/pull/10366)) +- Removed deprecated `import pl.profiler.profilers` in favor of `import pl.profiler` ([#10443](https://github.com/Lightning-AI/pytorch-lightning/pull/10443)) +- Removed deprecated property `configure_slurm_dpp` from accelerator connector ([#10370](https://github.com/Lightning-AI/pytorch-lightning/pull/10370)) +- Removed deprecated arguments `num_nodes` and `sync_batchnorm` from `DDPPlugin`, `DDPSpawnPlugin`, `DeepSpeedPlugin` ([#10357](https://github.com/Lightning-AI/pytorch-lightning/pull/10357)) +- Removed deprecated property `is_slurm_managing_tasks` from AcceleratorConnector ([#10353](https://github.com/Lightning-AI/pytorch-lightning/pull/10353)) +- Removed deprecated `LightningModule.log(tbptt_reduce_fx, tbptt_reduce_token, sync_dist_op)` ([#10423](https://github.com/Lightning-AI/pytorch-lightning/pull/10423)) +- Removed deprecated `Plugin.task_idx` ([#10441](https://github.com/Lightning-AI/pytorch-lightning/pull/10441)) +- Removed deprecated method `master_params` from PrecisionPlugin ([#10372](https://github.com/Lightning-AI/pytorch-lightning/pull/10372)) +- Removed the automatic detachment of "extras" returned from `training_step`. For example, `return {'loss': ..., 'foo': foo.detach()}` will now be necessary if `foo` has gradients which you do not want to store ([#10424](https://github.com/Lightning-AI/pytorch-lightning/pull/10424)) - Removed deprecated passthrough methods and properties from `Accelerator` base class: - * ([#10403](https://github.com/Lightning-AI/lightning/pull/10403)) - * ([#10448](https://github.com/Lightning-AI/lightning/pull/10448)) -- Removed deprecated signature for `transfer_batch_to_device` hook. The new argument `dataloader_idx` is now required ([#10480](https://github.com/Lightning-AI/lightning/pull/10480)) -- Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/Lightning-AI/lightning/pull/10451)) -- Removed deprecated `mode` argument from `ModelSummary` class ([#10449](https://github.com/Lightning-AI/lightning/pull/10449)) -- Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/Lightning-AI/lightning/pull/10482)) -- Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/Lightning-AI/lightning/pull/10482)) -- Removed deprecated `disable_validation` property from Trainer ([#10450](https://github.com/Lightning-AI/lightning/pull/10450)) -- Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/Lightning-AI/lightning/pull/10525)) -- Removed deprecated `reload_dataloaders_every_epoch` from `Trainer` in favour of `reload_dataloaders_every_n_epochs` ([#10481](https://github.com/Lightning-AI/lightning/pull/10481)) -- Removed the `precision_plugin` attribute from `Accelerator` in favor of its equivalent attribute `precision_plugin` in the `TrainingTypePlugin` ([#10570](https://github.com/Lightning-AI/lightning/pull/10570)) -- Removed `DeepSpeedPlugin.{precision,amp_type,amp_level}` properties ([#10657](https://github.com/Lightning-AI/lightning/pull/10657)) -- Removed patching of `on_before_batch_transfer`, `transfer_batch_to_device` and `on_after_batch_transfer` hooks in `LightningModule` ([#10603](https://github.com/Lightning-AI/lightning/pull/10603)) -- Removed argument `return_result` from the `DDPSpawnPlugin.spawn()` method ([#10867](https://github.com/Lightning-AI/lightning/pull/10867)) -- Removed the property `TrainingTypePlugin.results` and corresponding properties in subclasses ([#10034](https://github.com/Lightning-AI/lightning/pull/10034)) -- Removed the `mp_queue` attribute from `DDPSpawnPlugin` and `TPUSpawnPlugin` ([#10034](https://github.com/Lightning-AI/lightning/pull/10034)) -- Removed unnecessary `_move_optimizer_state` method overrides from `TPUSpawnPlugin` and `SingleTPUPlugin` ([#10849](https://github.com/Lightning-AI/lightning/pull/10849)) -- Removed `should_rank_save_checkpoint` property from `TrainingTypePlugin` ([#11070](https://github.com/Lightning-AI/lightning/pull/11070)) -- Removed `model_sharded_context` method from `Accelerator` ([#10886](https://github.com/Lightning-AI/lightning/pull/10886)) -- Removed method `pre_dispatch` from the `PrecisionPlugin` ([#10887](https://github.com/Lightning-AI/lightning/pull/10887)) -- Removed method `setup_optimizers_in_pre_dispatch` from the `strategies` and achieve the same logic in `setup` and `pre_dispatch` methods ([#10906](https://github.com/Lightning-AI/lightning/pull/10906)) -- Removed methods `pre_dispatch`, `dispatch` and `post_dispatch` from the `Accelerator` ([#10885](https://github.com/Lightning-AI/lightning/pull/10885)) -- Removed method `training_step`, `test_step`, `validation_step` and `predict_step` from the `Accelerator` ([#10890](https://github.com/Lightning-AI/lightning/pull/10890)) -- Removed `TrainingTypePlugin.start_{training,evaluating,predicting}` hooks and the same in all subclasses ([#10989](https://github.com/Lightning-AI/lightning/pull/10989), [#10896](https://github.com/Lightning-AI/lightning/pull/10896)) -- Removed `Accelerator.on_train_start` ([#10999](https://github.com/Lightning-AI/lightning/pull/10999)) -- Removed support for Python 3.6 ([#11117](https://github.com/Lightning-AI/lightning/pull/11117)) -- Removed `Strategy.init_optimizers` in favor of `Strategy.setup_optimizers` ([#11236](https://github.com/Lightning-AI/lightning/pull/11236)) -- Removed `profile("training_step_and_backward")` in `Closure` class since we already profile calls `training_step` and `backward` ([#11222](https://github.com/Lightning-AI/lightning/pull/11222)) -- Removed `Strategy.optimizer_zero_grad` ([#11246](https://github.com/Lightning-AI/lightning/pull/11246)) -- Removed `Strategy.on_gpu` ([#11537](https://github.com/Lightning-AI/lightning/pull/11537)) -- Removed `Strategy.on_tpu` property ([#11536](https://github.com/Lightning-AI/lightning/pull/11536)) -- Removed the abstract property `LightningLoggerBase.experiment` ([#11603](https://github.com/Lightning-AI/lightning/pull/11603)) -- Removed `FitLoop.current_epoch` getter and setter ([#11562](https://github.com/Lightning-AI/lightning/pull/11562)) -- Removed access to `_short_id` in `NeptuneLogger` ([#11517](https://github.com/Lightning-AI/lightning/pull/11517)) -- Removed `log_text` and `log_image` from the `LightningLoggerBase` API ([#11857](https://github.com/Lightning-AI/lightning/pull/11857)) -- Removed calls to `profile("model_forward")` in favor of profiling `training_step` ([#12032](https://github.com/Lightning-AI/lightning/pull/12032)) -- Removed `get_mp_spawn_kwargs` from `DDPSpawnStrategy` and `TPUSpawnStrategy` in favor of configuration in the `_SpawnLauncher` ([#11966](https://github.com/Lightning-AI/lightning/pull/11966)) -- Removed `_aggregate_metrics`, `_reduce_agg_metrics`, and `_finalize_agg_metrics` from `LightningLoggerBase` ([#12053](https://github.com/Lightning-AI/lightning/pull/12053)) -- Removed the `AcceleratorConnector.device_type` property ([#12081](https://github.com/Lightning-AI/lightning/pull/12081)) -- Removed `AcceleratorConnector.num_nodes` ([#12107](https://github.com/Lightning-AI/lightning/pull/12107)) -- Removed `AcceleratorConnector.has_ipu` property ([#12111](https://github.com/Lightning-AI/lightning/pull/12111)) -- Removed `AcceleratorConnector.use_ipu` property ([#12110](https://github.com/Lightning-AI/lightning/pull/12110)) -- Removed `AcceleratorConnector.has_tpu` property ([#12109](https://github.com/Lightning-AI/lightning/pull/12109)) -- Removed `AcceleratorConnector.use_dp` property ([#12112](https://github.com/Lightning-AI/lightning/pull/12112)) -- Removed `configure_sync_batchnorm` from `ParallelStrategy` and all other strategies that inherit from it ([#11754](https://github.com/Lightning-AI/lightning/pull/11754)) -- Removed public attribute `sync_batchnorm` from strategies ([#11754](https://github.com/Lightning-AI/lightning/pull/11754)) -- Removed `AcceleratorConnector.root_gpu` property ([#12262](https://github.com/Lightning-AI/lightning/pull/12262)) -- Removed `AcceleratorConnector.tpu_id` property ([#12387](https://github.com/Lightning-AI/lightning/pull/12387)) -- Removed `AcceleratorConnector.num_gpus` property ([#12384](https://github.com/Lightning-AI/lightning/pull/12384)) -- Removed `AcceleratorConnector.num_ipus` property ([#12386](https://github.com/Lightning-AI/lightning/pull/12386)) -- Removed `AcceleratorConnector.num_processes` property ([#12388](https://github.com/Lightning-AI/lightning/pull/12388)) -- Removed `AcceleratorConnector.parallel_device_ids` property ([#12072](https://github.com/Lightning-AI/lightning/pull/12072)) -- Removed `AcceleratorConnector.devices` property ([#12435](https://github.com/Lightning-AI/lightning/pull/12435)) -- Removed `AcceleratorConnector.parallel_devices` property ([#12075](https://github.com/Lightning-AI/lightning/pull/12075)) -- Removed `AcceleratorConnector.tpu_cores` property ([#12437](https://github.com/Lightning-AI/lightning/pull/12437)) + * ([#10403](https://github.com/Lightning-AI/pytorch-lightning/pull/10403)) + * ([#10448](https://github.com/Lightning-AI/pytorch-lightning/pull/10448)) +- Removed deprecated signature for `transfer_batch_to_device` hook. The new argument `dataloader_idx` is now required ([#10480](https://github.com/Lightning-AI/pytorch-lightning/pull/10480)) +- Removed deprecated `utilities.distributed.rank_zero_{warn/deprecation}` ([#10451](https://github.com/Lightning-AI/pytorch-lightning/pull/10451)) +- Removed deprecated `mode` argument from `ModelSummary` class ([#10449](https://github.com/Lightning-AI/pytorch-lightning/pull/10449)) +- Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/Lightning-AI/pytorch-lightning/pull/10482)) +- Removed deprecated `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#10482](https://github.com/Lightning-AI/pytorch-lightning/pull/10482)) +- Removed deprecated `disable_validation` property from Trainer ([#10450](https://github.com/Lightning-AI/pytorch-lightning/pull/10450)) +- Removed deprecated `CheckpointConnector.hpc_load` property in favor of `CheckpointConnector.restore` ([#10525](https://github.com/Lightning-AI/pytorch-lightning/pull/10525)) +- Removed deprecated `reload_dataloaders_every_epoch` from `Trainer` in favour of `reload_dataloaders_every_n_epochs` ([#10481](https://github.com/Lightning-AI/pytorch-lightning/pull/10481)) +- Removed the `precision_plugin` attribute from `Accelerator` in favor of its equivalent attribute `precision_plugin` in the `TrainingTypePlugin` ([#10570](https://github.com/Lightning-AI/pytorch-lightning/pull/10570)) +- Removed `DeepSpeedPlugin.{precision,amp_type,amp_level}` properties ([#10657](https://github.com/Lightning-AI/pytorch-lightning/pull/10657)) +- Removed patching of `on_before_batch_transfer`, `transfer_batch_to_device` and `on_after_batch_transfer` hooks in `LightningModule` ([#10603](https://github.com/Lightning-AI/pytorch-lightning/pull/10603)) +- Removed argument `return_result` from the `DDPSpawnPlugin.spawn()` method ([#10867](https://github.com/Lightning-AI/pytorch-lightning/pull/10867)) +- Removed the property `TrainingTypePlugin.results` and corresponding properties in subclasses ([#10034](https://github.com/Lightning-AI/pytorch-lightning/pull/10034)) +- Removed the `mp_queue` attribute from `DDPSpawnPlugin` and `TPUSpawnPlugin` ([#10034](https://github.com/Lightning-AI/pytorch-lightning/pull/10034)) +- Removed unnecessary `_move_optimizer_state` method overrides from `TPUSpawnPlugin` and `SingleTPUPlugin` ([#10849](https://github.com/Lightning-AI/pytorch-lightning/pull/10849)) +- Removed `should_rank_save_checkpoint` property from `TrainingTypePlugin` ([#11070](https://github.com/Lightning-AI/pytorch-lightning/pull/11070)) +- Removed `model_sharded_context` method from `Accelerator` ([#10886](https://github.com/Lightning-AI/pytorch-lightning/pull/10886)) +- Removed method `pre_dispatch` from the `PrecisionPlugin` ([#10887](https://github.com/Lightning-AI/pytorch-lightning/pull/10887)) +- Removed method `setup_optimizers_in_pre_dispatch` from the `strategies` and achieve the same logic in `setup` and `pre_dispatch` methods ([#10906](https://github.com/Lightning-AI/pytorch-lightning/pull/10906)) +- Removed methods `pre_dispatch`, `dispatch` and `post_dispatch` from the `Accelerator` ([#10885](https://github.com/Lightning-AI/pytorch-lightning/pull/10885)) +- Removed method `training_step`, `test_step`, `validation_step` and `predict_step` from the `Accelerator` ([#10890](https://github.com/Lightning-AI/pytorch-lightning/pull/10890)) +- Removed `TrainingTypePlugin.start_{training,evaluating,predicting}` hooks and the same in all subclasses ([#10989](https://github.com/Lightning-AI/pytorch-lightning/pull/10989), [#10896](https://github.com/Lightning-AI/pytorch-lightning/pull/10896)) +- Removed `Accelerator.on_train_start` ([#10999](https://github.com/Lightning-AI/pytorch-lightning/pull/10999)) +- Removed support for Python 3.6 ([#11117](https://github.com/Lightning-AI/pytorch-lightning/pull/11117)) +- Removed `Strategy.init_optimizers` in favor of `Strategy.setup_optimizers` ([#11236](https://github.com/Lightning-AI/pytorch-lightning/pull/11236)) +- Removed `profile("training_step_and_backward")` in `Closure` class since we already profile calls `training_step` and `backward` ([#11222](https://github.com/Lightning-AI/pytorch-lightning/pull/11222)) +- Removed `Strategy.optimizer_zero_grad` ([#11246](https://github.com/Lightning-AI/pytorch-lightning/pull/11246)) +- Removed `Strategy.on_gpu` ([#11537](https://github.com/Lightning-AI/pytorch-lightning/pull/11537)) +- Removed `Strategy.on_tpu` property ([#11536](https://github.com/Lightning-AI/pytorch-lightning/pull/11536)) +- Removed the abstract property `LightningLoggerBase.experiment` ([#11603](https://github.com/Lightning-AI/pytorch-lightning/pull/11603)) +- Removed `FitLoop.current_epoch` getter and setter ([#11562](https://github.com/Lightning-AI/pytorch-lightning/pull/11562)) +- Removed access to `_short_id` in `NeptuneLogger` ([#11517](https://github.com/Lightning-AI/pytorch-lightning/pull/11517)) +- Removed `log_text` and `log_image` from the `LightningLoggerBase` API ([#11857](https://github.com/Lightning-AI/pytorch-lightning/pull/11857)) +- Removed calls to `profile("model_forward")` in favor of profiling `training_step` ([#12032](https://github.com/Lightning-AI/pytorch-lightning/pull/12032)) +- Removed `get_mp_spawn_kwargs` from `DDPSpawnStrategy` and `TPUSpawnStrategy` in favor of configuration in the `_SpawnLauncher` ([#11966](https://github.com/Lightning-AI/pytorch-lightning/pull/11966)) +- Removed `_aggregate_metrics`, `_reduce_agg_metrics`, and `_finalize_agg_metrics` from `LightningLoggerBase` ([#12053](https://github.com/Lightning-AI/pytorch-lightning/pull/12053)) +- Removed the `AcceleratorConnector.device_type` property ([#12081](https://github.com/Lightning-AI/pytorch-lightning/pull/12081)) +- Removed `AcceleratorConnector.num_nodes` ([#12107](https://github.com/Lightning-AI/pytorch-lightning/pull/12107)) +- Removed `AcceleratorConnector.has_ipu` property ([#12111](https://github.com/Lightning-AI/pytorch-lightning/pull/12111)) +- Removed `AcceleratorConnector.use_ipu` property ([#12110](https://github.com/Lightning-AI/pytorch-lightning/pull/12110)) +- Removed `AcceleratorConnector.has_tpu` property ([#12109](https://github.com/Lightning-AI/pytorch-lightning/pull/12109)) +- Removed `AcceleratorConnector.use_dp` property ([#12112](https://github.com/Lightning-AI/pytorch-lightning/pull/12112)) +- Removed `configure_sync_batchnorm` from `ParallelStrategy` and all other strategies that inherit from it ([#11754](https://github.com/Lightning-AI/pytorch-lightning/pull/11754)) +- Removed public attribute `sync_batchnorm` from strategies ([#11754](https://github.com/Lightning-AI/pytorch-lightning/pull/11754)) +- Removed `AcceleratorConnector.root_gpu` property ([#12262](https://github.com/Lightning-AI/pytorch-lightning/pull/12262)) +- Removed `AcceleratorConnector.tpu_id` property ([#12387](https://github.com/Lightning-AI/pytorch-lightning/pull/12387)) +- Removed `AcceleratorConnector.num_gpus` property ([#12384](https://github.com/Lightning-AI/pytorch-lightning/pull/12384)) +- Removed `AcceleratorConnector.num_ipus` property ([#12386](https://github.com/Lightning-AI/pytorch-lightning/pull/12386)) +- Removed `AcceleratorConnector.num_processes` property ([#12388](https://github.com/Lightning-AI/pytorch-lightning/pull/12388)) +- Removed `AcceleratorConnector.parallel_device_ids` property ([#12072](https://github.com/Lightning-AI/pytorch-lightning/pull/12072)) +- Removed `AcceleratorConnector.devices` property ([#12435](https://github.com/Lightning-AI/pytorch-lightning/pull/12435)) +- Removed `AcceleratorConnector.parallel_devices` property ([#12075](https://github.com/Lightning-AI/pytorch-lightning/pull/12075)) +- Removed `AcceleratorConnector.tpu_cores` property ([#12437](https://github.com/Lightning-AI/pytorch-lightning/pull/12437)) ### Fixed -- Fixed an issue where `ModelCheckpoint` could delete last checkpoint from the old directory when `dirpath` has changed during resumed training ([#12225](https://github.com/Lightning-AI/lightning/pull/12225)) -- Fixed an issue where `ModelCheckpoint` could delete older checkpoints when `dirpath` has changed during resumed training ([#12045](https://github.com/Lightning-AI/lightning/pull/12045)) -- Fixed an issue where `HorovodStrategy.teardown()` did not complete gracefully if an exception was thrown during callback setup [#11752](https://github.com/Lightning-AI/lightning/pull/11752) -- Fixed security vulnerabilities CVE-2020-1747 and CVE-2020-14343 caused by the `PyYAML` dependency ([#11099](https://github.com/Lightning-AI/lightning/pull/11099)) -- Fixed security vulnerability "CWE-94: Improper Control of Generation of Code (Code Injection)" ([#12212](https://github.com/Lightning-AI/lightning/pull/12212)) -- Fixed logging on `{test,validation}_epoch_end` with multiple dataloaders ([#11132](https://github.com/Lightning-AI/lightning/pull/11132)) -- Reset the validation progress tracking state after sanity checking ([#11218](https://github.com/Lightning-AI/lightning/pull/11218)) -- Fixed double evaluation bug with fault-tolerance enabled where the second call was completely skipped ([#11119](https://github.com/Lightning-AI/lightning/pull/11119)) -- Fixed an issue with the `TPUSpawnPlugin` handling the `XLA_USE_BF16` environment variable incorrectly ([#10990](https://github.com/Lightning-AI/lightning/pull/10990)) -- Fixed wrong typehint for `Trainer.lightning_optimizers` ([#11155](https://github.com/Lightning-AI/lightning/pull/11155)) -- Fixed the lr-scheduler state not being dumped to checkpoint when using the deepspeed strategy ([#11307](https://github.com/Lightning-AI/lightning/pull/11307)) -- Fixed bug that forced overriding `configure_optimizers` with the CLI ([#11672](https://github.com/Lightning-AI/lightning/pull/11672)) -- Fixed type promotion when tensors of higher category than float are logged ([#11401](https://github.com/Lightning-AI/lightning/pull/11401)) -- Fixed `SimpleProfiler` summary ([#11414](https://github.com/Lightning-AI/lightning/pull/11414)) -- No longer set a `DistributedSampler` to the `poptorch.DataLoader` when IPUs are used ([#12114](https://github.com/Lightning-AI/lightning/pull/12114)) -- Fixed bug where progress bar was not being disabled when not in rank zero during predict ([#11377](https://github.com/Lightning-AI/lightning/pull/11377)) -- Fixed the mid-epoch warning call while resuming training ([#11556](https://github.com/Lightning-AI/lightning/pull/11556)) -- Fixed `LightningModule.{un,}toggle_model` when only 1 optimizer is used ([#12088](https://github.com/Lightning-AI/lightning/pull/12088)) -- Fixed an issue in `RichProgressbar` to display the metrics logged only on main progress bar ([#11690](https://github.com/Lightning-AI/lightning/pull/11690)) -- Fixed `RichProgressBar` progress when refresh rate does not evenly divide the total counter ([#11668](https://github.com/Lightning-AI/lightning/pull/11668)) -- Fixed `RichProgressBar` progress validation bar total when using multiple validation runs within a single training epoch ([#11668](https://github.com/Lightning-AI/lightning/pull/11668)) -- Configure native Deepspeed schedulers with interval='step' ([#11788](https://github.com/Lightning-AI/lightning/pull/11788)), ([#12031](https://github.com/Lightning-AI/lightning/pull/12031)) -- Update `RichProgressBarTheme` styles after detecting light theme on colab ([#10993](https://github.com/Lightning-AI/lightning/pull/10993)) -- Fixed passing `_ddp_params_and_buffers_to_ignore` ([#11949](https://github.com/Lightning-AI/lightning/pull/11949)) -- Fixed an `AttributeError` when calling `save_hyperparameters` and no parameters need saving ([#11827](https://github.com/Lightning-AI/lightning/pull/11827)) -- Fixed environment variable priority for global rank determination ([#11406](https://github.com/Lightning-AI/lightning/pull/11406)) -- Fixed an issue that caused the Trainer to produce identical results on subsequent runs without explicit re-seeding ([#11870](https://github.com/Lightning-AI/lightning/pull/11870)) -- Fixed an issue that caused the Tuner to affect the random state ([#11870](https://github.com/Lightning-AI/lightning/pull/11870)) -- Fixed to avoid common hook warning if no hook is overridden ([#12131](https://github.com/Lightning-AI/lightning/pull/12131)) -- Fixed deepspeed keeping old sub-folders in same ckpt path ([#12194](https://github.com/Lightning-AI/lightning/pull/12194)) -- Fixed returning logged metrics instead of callback metrics during evaluation ([#12224](https://github.com/Lightning-AI/lightning/pull/12224)) -- Fixed the case where `logger=None` is passed to the Trainer ([#12249](https://github.com/Lightning-AI/lightning/pull/12249)) -- Fixed bug where the global step tracked by `ModelCheckpoint` was still set even if no checkpoint was saved ([#12418](https://github.com/Lightning-AI/lightning/pull/12418)) -- Fixed bug where `ModelCheckpoint` was overriding the `epoch` and `step` logged values ([#12418](https://github.com/Lightning-AI/lightning/pull/12418)) -- Fixed bug where monitoring the default `epoch` and `step` values with `ModelCheckpoint` would fail ([#12418](https://github.com/Lightning-AI/lightning/pull/12418)) -- Fixed initializing optimizers unnecessarily in `DDPFullyShardedStrategy` ([#12267](https://github.com/Lightning-AI/lightning/pull/12267)) -- Fixed check for horovod module ([#12377](https://github.com/Lightning-AI/lightning/pull/12377)) -- Fixed logging to loggers with multiple eval dataloaders ([#12454](https://github.com/Lightning-AI/lightning/pull/12454)) -- Fixed an issue with resuming from a checkpoint trained with QAT ([#11346](https://github.com/Lightning-AI/lightning/pull/11346)) +- Fixed an issue where `ModelCheckpoint` could delete last checkpoint from the old directory when `dirpath` has changed during resumed training ([#12225](https://github.com/Lightning-AI/pytorch-lightning/pull/12225)) +- Fixed an issue where `ModelCheckpoint` could delete older checkpoints when `dirpath` has changed during resumed training ([#12045](https://github.com/Lightning-AI/pytorch-lightning/pull/12045)) +- Fixed an issue where `HorovodStrategy.teardown()` did not complete gracefully if an exception was thrown during callback setup [#11752](https://github.com/Lightning-AI/pytorch-lightning/pull/11752) +- Fixed security vulnerabilities CVE-2020-1747 and CVE-2020-14343 caused by the `PyYAML` dependency ([#11099](https://github.com/Lightning-AI/pytorch-lightning/pull/11099)) +- Fixed security vulnerability "CWE-94: Improper Control of Generation of Code (Code Injection)" ([#12212](https://github.com/Lightning-AI/pytorch-lightning/pull/12212)) +- Fixed logging on `{test,validation}_epoch_end` with multiple dataloaders ([#11132](https://github.com/Lightning-AI/pytorch-lightning/pull/11132)) +- Reset the validation progress tracking state after sanity checking ([#11218](https://github.com/Lightning-AI/pytorch-lightning/pull/11218)) +- Fixed double evaluation bug with fault-tolerance enabled where the second call was completely skipped ([#11119](https://github.com/Lightning-AI/pytorch-lightning/pull/11119)) +- Fixed an issue with the `TPUSpawnPlugin` handling the `XLA_USE_BF16` environment variable incorrectly ([#10990](https://github.com/Lightning-AI/pytorch-lightning/pull/10990)) +- Fixed wrong typehint for `Trainer.lightning_optimizers` ([#11155](https://github.com/Lightning-AI/pytorch-lightning/pull/11155)) +- Fixed the lr-scheduler state not being dumped to checkpoint when using the deepspeed strategy ([#11307](https://github.com/Lightning-AI/pytorch-lightning/pull/11307)) +- Fixed bug that forced overriding `configure_optimizers` with the CLI ([#11672](https://github.com/Lightning-AI/pytorch-lightning/pull/11672)) +- Fixed type promotion when tensors of higher category than float are logged ([#11401](https://github.com/Lightning-AI/pytorch-lightning/pull/11401)) +- Fixed `SimpleProfiler` summary ([#11414](https://github.com/Lightning-AI/pytorch-lightning/pull/11414)) +- No longer set a `DistributedSampler` to the `poptorch.DataLoader` when IPUs are used ([#12114](https://github.com/Lightning-AI/pytorch-lightning/pull/12114)) +- Fixed bug where progress bar was not being disabled when not in rank zero during predict ([#11377](https://github.com/Lightning-AI/pytorch-lightning/pull/11377)) +- Fixed the mid-epoch warning call while resuming training ([#11556](https://github.com/Lightning-AI/pytorch-lightning/pull/11556)) +- Fixed `LightningModule.{un,}toggle_model` when only 1 optimizer is used ([#12088](https://github.com/Lightning-AI/pytorch-lightning/pull/12088)) +- Fixed an issue in `RichProgressbar` to display the metrics logged only on main progress bar ([#11690](https://github.com/Lightning-AI/pytorch-lightning/pull/11690)) +- Fixed `RichProgressBar` progress when refresh rate does not evenly divide the total counter ([#11668](https://github.com/Lightning-AI/pytorch-lightning/pull/11668)) +- Fixed `RichProgressBar` progress validation bar total when using multiple validation runs within a single training epoch ([#11668](https://github.com/Lightning-AI/pytorch-lightning/pull/11668)) +- Configure native Deepspeed schedulers with interval='step' ([#11788](https://github.com/Lightning-AI/pytorch-lightning/pull/11788)), ([#12031](https://github.com/Lightning-AI/pytorch-lightning/pull/12031)) +- Update `RichProgressBarTheme` styles after detecting light theme on colab ([#10993](https://github.com/Lightning-AI/pytorch-lightning/pull/10993)) +- Fixed passing `_ddp_params_and_buffers_to_ignore` ([#11949](https://github.com/Lightning-AI/pytorch-lightning/pull/11949)) +- Fixed an `AttributeError` when calling `save_hyperparameters` and no parameters need saving ([#11827](https://github.com/Lightning-AI/pytorch-lightning/pull/11827)) +- Fixed environment variable priority for global rank determination ([#11406](https://github.com/Lightning-AI/pytorch-lightning/pull/11406)) +- Fixed an issue that caused the Trainer to produce identical results on subsequent runs without explicit re-seeding ([#11870](https://github.com/Lightning-AI/pytorch-lightning/pull/11870)) +- Fixed an issue that caused the Tuner to affect the random state ([#11870](https://github.com/Lightning-AI/pytorch-lightning/pull/11870)) +- Fixed to avoid common hook warning if no hook is overridden ([#12131](https://github.com/Lightning-AI/pytorch-lightning/pull/12131)) +- Fixed deepspeed keeping old sub-folders in same ckpt path ([#12194](https://github.com/Lightning-AI/pytorch-lightning/pull/12194)) +- Fixed returning logged metrics instead of callback metrics during evaluation ([#12224](https://github.com/Lightning-AI/pytorch-lightning/pull/12224)) +- Fixed the case where `logger=None` is passed to the Trainer ([#12249](https://github.com/Lightning-AI/pytorch-lightning/pull/12249)) +- Fixed bug where the global step tracked by `ModelCheckpoint` was still set even if no checkpoint was saved ([#12418](https://github.com/Lightning-AI/pytorch-lightning/pull/12418)) +- Fixed bug where `ModelCheckpoint` was overriding the `epoch` and `step` logged values ([#12418](https://github.com/Lightning-AI/pytorch-lightning/pull/12418)) +- Fixed bug where monitoring the default `epoch` and `step` values with `ModelCheckpoint` would fail ([#12418](https://github.com/Lightning-AI/pytorch-lightning/pull/12418)) +- Fixed initializing optimizers unnecessarily in `DDPFullyShardedStrategy` ([#12267](https://github.com/Lightning-AI/pytorch-lightning/pull/12267)) +- Fixed check for horovod module ([#12377](https://github.com/Lightning-AI/pytorch-lightning/pull/12377)) +- Fixed logging to loggers with multiple eval dataloaders ([#12454](https://github.com/Lightning-AI/pytorch-lightning/pull/12454)) +- Fixed an issue with resuming from a checkpoint trained with QAT ([#11346](https://github.com/Lightning-AI/pytorch-lightning/pull/11346)) ## [1.5.10] - 2022-02-08 ### Fixed -- Fixed an issue to avoid validation loop run on restart ([#11552](https://github.com/Lightning-AI/lightning/pull/11552)) -- The `RichProgressBar` now correctly shows the `on_epoch` logged values on train epoch end ([#11689](https://github.com/Lightning-AI/lightning/pull/11689)) -- Fixed an issue to make the `step` argument in `WandbLogger.log_image` work ([#11716](https://github.com/Lightning-AI/lightning/pull/11716)) -- Fixed `restore_optimizers` for mapping states ([#11757](https://github.com/Lightning-AI/lightning/pull/11757)) -- With `DPStrategy`, the batch is not explicitly moved to the device ([#11780](https://github.com/Lightning-AI/lightning/pull/11780)) -- Fixed an issue to avoid val bar disappear after `trainer.validate()` ([#11700](https://github.com/Lightning-AI/lightning/pull/11700)) -- Fixed supporting remote filesystems with `Trainer.weights_save_path` for fault-tolerant training ([#11776](https://github.com/Lightning-AI/lightning/pull/11776)) -- Fixed check for available modules ([#11526](https://github.com/Lightning-AI/lightning/pull/11526)) -- Fixed bug where the path for "last" checkpoints was not getting saved correctly which caused newer runs to not remove the previous "last" checkpoint ([#11481](https://github.com/Lightning-AI/lightning/pull/11481)) -- Fixed bug where the path for best checkpoints was not getting saved correctly when no metric was monitored which caused newer runs to not use the best checkpoint ([#11481](https://github.com/Lightning-AI/lightning/pull/11481)) +- Fixed an issue to avoid validation loop run on restart ([#11552](https://github.com/Lightning-AI/pytorch-lightning/pull/11552)) +- The `RichProgressBar` now correctly shows the `on_epoch` logged values on train epoch end ([#11689](https://github.com/Lightning-AI/pytorch-lightning/pull/11689)) +- Fixed an issue to make the `step` argument in `WandbLogger.log_image` work ([#11716](https://github.com/Lightning-AI/pytorch-lightning/pull/11716)) +- Fixed `restore_optimizers` for mapping states ([#11757](https://github.com/Lightning-AI/pytorch-lightning/pull/11757)) +- With `DPStrategy`, the batch is not explicitly moved to the device ([#11780](https://github.com/Lightning-AI/pytorch-lightning/pull/11780)) +- Fixed an issue to avoid val bar disappear after `trainer.validate()` ([#11700](https://github.com/Lightning-AI/pytorch-lightning/pull/11700)) +- Fixed supporting remote filesystems with `Trainer.weights_save_path` for fault-tolerant training ([#11776](https://github.com/Lightning-AI/pytorch-lightning/pull/11776)) +- Fixed check for available modules ([#11526](https://github.com/Lightning-AI/pytorch-lightning/pull/11526)) +- Fixed bug where the path for "last" checkpoints was not getting saved correctly which caused newer runs to not remove the previous "last" checkpoint ([#11481](https://github.com/Lightning-AI/pytorch-lightning/pull/11481)) +- Fixed bug where the path for best checkpoints was not getting saved correctly when no metric was monitored which caused newer runs to not use the best checkpoint ([#11481](https://github.com/Lightning-AI/pytorch-lightning/pull/11481)) ## [1.5.9] - 2022-01-20 ### Fixed -- Pinned sphinx-autodoc-typehints with 0` ([#10870](https://github.com/Lightning-AI/lightning/pull/10870)) -- Fixed an issue with item assignment on the logger on rank > 0 for those who support it ([#10917](https://github.com/Lightning-AI/lightning/pull/10917)) -- Fixed importing `torch_xla.debug` for `torch-xla<1.8` ([#10836](https://github.com/Lightning-AI/lightning/pull/10836)) -- Fixed an issue with `DDPSpawnPlugin` and related plugins leaving a temporary checkpoint behind ([#10934](https://github.com/Lightning-AI/lightning/pull/10934)) -- Fixed a `TypeError` occurring in the `SingalConnector.teardown()` method ([#10961](https://github.com/Lightning-AI/lightning/pull/10961)) +- Disabled batch_size extraction for torchmetric instances because they accumulate the metrics internally ([#10815](https://github.com/Lightning-AI/pytorch-lightning/pull/10815)) +- Fixed an issue with `SignalConnector` not restoring the default signal handlers on teardown when running on SLURM or with fault-tolerant training enabled ([#10611](https://github.com/Lightning-AI/pytorch-lightning/pull/10611)) +- Fixed `SignalConnector._has_already_handler` check for callable type ([#10483](https://github.com/Lightning-AI/pytorch-lightning/pull/10483)) +- Fixed an issue to return the results for each dataloader separately instead of duplicating them for each ([#10810](https://github.com/Lightning-AI/pytorch-lightning/pull/10810)) +- Improved exception message if `rich` version is less than `10.2.2` ([#10839](https://github.com/Lightning-AI/pytorch-lightning/pull/10839)) +- Fixed uploading best model checkpoint in NeptuneLogger ([#10369](https://github.com/Lightning-AI/pytorch-lightning/pull/10369)) +- Fixed early schedule reset logic in PyTorch profiler that was causing data leak ([#10837](https://github.com/Lightning-AI/pytorch-lightning/pull/10837)) +- Fixed a bug that caused incorrect batch indices to be passed to the `BasePredictionWriter` hooks when using a dataloader with `num_workers > 0` ([#10870](https://github.com/Lightning-AI/pytorch-lightning/pull/10870)) +- Fixed an issue with item assignment on the logger on rank > 0 for those who support it ([#10917](https://github.com/Lightning-AI/pytorch-lightning/pull/10917)) +- Fixed importing `torch_xla.debug` for `torch-xla<1.8` ([#10836](https://github.com/Lightning-AI/pytorch-lightning/pull/10836)) +- Fixed an issue with `DDPSpawnPlugin` and related plugins leaving a temporary checkpoint behind ([#10934](https://github.com/Lightning-AI/pytorch-lightning/pull/10934)) +- Fixed a `TypeError` occurring in the `SingalConnector.teardown()` method ([#10961](https://github.com/Lightning-AI/pytorch-lightning/pull/10961)) ## [1.5.4] - 2021-11-30 ### Fixed -- Fixed support for `--key.help=class` with the `LightningCLI` ([#10767](https://github.com/Lightning-AI/lightning/pull/10767)) -- Fixed `_compare_version` for python packages ([#10762](https://github.com/Lightning-AI/lightning/pull/10762)) -- Fixed TensorBoardLogger `SummaryWriter` not close before spawning the processes ([#10777](https://github.com/Lightning-AI/lightning/pull/10777)) -- Fixed a consolidation error in Lite when attempting to save the state dict of a sharded optimizer ([#10746](https://github.com/Lightning-AI/lightning/pull/10746)) -- Fixed the default logging level for batch hooks associated with training from `on_step=False, on_epoch=True` to `on_step=True, on_epoch=False` ([#10756](https://github.com/Lightning-AI/lightning/pull/10756)) +- Fixed support for `--key.help=class` with the `LightningCLI` ([#10767](https://github.com/Lightning-AI/pytorch-lightning/pull/10767)) +- Fixed `_compare_version` for python packages ([#10762](https://github.com/Lightning-AI/pytorch-lightning/pull/10762)) +- Fixed TensorBoardLogger `SummaryWriter` not close before spawning the processes ([#10777](https://github.com/Lightning-AI/pytorch-lightning/pull/10777)) +- Fixed a consolidation error in Lite when attempting to save the state dict of a sharded optimizer ([#10746](https://github.com/Lightning-AI/pytorch-lightning/pull/10746)) +- Fixed the default logging level for batch hooks associated with training from `on_step=False, on_epoch=True` to `on_step=True, on_epoch=False` ([#10756](https://github.com/Lightning-AI/pytorch-lightning/pull/10756)) ### Removed -- Removed PyTorch 1.6 support ([#10367](https://github.com/Lightning-AI/lightning/pull/10367), [#10738](https://github.com/Lightning-AI/lightning/pull/10738)) +- Removed PyTorch 1.6 support ([#10367](https://github.com/Lightning-AI/pytorch-lightning/pull/10367), [#10738](https://github.com/Lightning-AI/pytorch-lightning/pull/10738)) ## [1.5.3] - 2021-11-24 ### Fixed -- Fixed `ShardedTensor` state dict hook registration to check if torch distributed is available ([#10621](https://github.com/Lightning-AI/lightning/pull/10621)) -- Fixed an issue with `self.log` not respecting a tensor's `dtype` when applying computations ([#10076](https://github.com/Lightning-AI/lightning/pull/10076)) -- Fixed LigtningLite `_wrap_init` popping unexisting keys from DataLoader signature parameters ([#10613](https://github.com/Lightning-AI/lightning/pull/10613)) -- Fixed signals being registered within threads ([#10610](https://github.com/Lightning-AI/lightning/pull/10610)) -- Fixed an issue that caused Lightning to extract the batch size even though it was set by the user in `LightningModule.log` ([#10408](https://github.com/Lightning-AI/lightning/pull/10408)) -- Fixed `Trainer(move_metrics_to_cpu=True)` not moving the evaluation logged results to CPU ([#10631](https://github.com/Lightning-AI/lightning/pull/10631)) -- Fixed the `{validation,test}_step` outputs getting moved to CPU with `Trainer(move_metrics_to_cpu=True)` ([#10631](https://github.com/Lightning-AI/lightning/pull/10631)) -- Fixed an issue with collecting logged test results with multiple dataloaders ([#10522](https://github.com/Lightning-AI/lightning/pull/10522)) +- Fixed `ShardedTensor` state dict hook registration to check if torch distributed is available ([#10621](https://github.com/Lightning-AI/pytorch-lightning/pull/10621)) +- Fixed an issue with `self.log` not respecting a tensor's `dtype` when applying computations ([#10076](https://github.com/Lightning-AI/pytorch-lightning/pull/10076)) +- Fixed LigtningLite `_wrap_init` popping unexisting keys from DataLoader signature parameters ([#10613](https://github.com/Lightning-AI/pytorch-lightning/pull/10613)) +- Fixed signals being registered within threads ([#10610](https://github.com/Lightning-AI/pytorch-lightning/pull/10610)) +- Fixed an issue that caused Lightning to extract the batch size even though it was set by the user in `LightningModule.log` ([#10408](https://github.com/Lightning-AI/pytorch-lightning/pull/10408)) +- Fixed `Trainer(move_metrics_to_cpu=True)` not moving the evaluation logged results to CPU ([#10631](https://github.com/Lightning-AI/pytorch-lightning/pull/10631)) +- Fixed the `{validation,test}_step` outputs getting moved to CPU with `Trainer(move_metrics_to_cpu=True)` ([#10631](https://github.com/Lightning-AI/pytorch-lightning/pull/10631)) +- Fixed an issue with collecting logged test results with multiple dataloaders ([#10522](https://github.com/Lightning-AI/pytorch-lightning/pull/10522)) ## [1.5.2] - 2021-11-16 ### Fixed -- Fixed `CombinedLoader` and `max_size_cycle` didn't receive a `DistributedSampler` ([#10374](https://github.com/Lightning-AI/lightning/pull/10374)) -- Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/Lightning-AI/lightning/pull/9702)) -- Fixed `isinstance` not working with `init_meta_context`, materialized model not being moved to the device ([#10493](https://github.com/Lightning-AI/lightning/pull/10493)) -- Fixed an issue that prevented the Trainer to shutdown workers when execution is interrupted due to failure([#10463](https://github.com/Lightning-AI/lightning/pull/10463)) -- Squeeze the early stopping monitor to remove empty tensor dimensions ([#10461](https://github.com/Lightning-AI/lightning/pull/10461)) -- Fixed sampler replacement logic with `overfit_batches` to only replace the sample when `SequentialSampler` is not used ([#10486](https://github.com/Lightning-AI/lightning/pull/10486)) -- Fixed scripting causing false positive deprecation warnings ([#10470](https://github.com/Lightning-AI/lightning/pull/10470), [#10555](https://github.com/Lightning-AI/lightning/pull/10555)) -- Do not fail if batch size could not be inferred for logging when using DeepSpeed ([#10438](https://github.com/Lightning-AI/lightning/pull/10438)) -- Fixed propagation of device and dtype information to submodules of LightningLite when they inherit from `DeviceDtypeModuleMixin` ([#10559](https://github.com/Lightning-AI/lightning/pull/10559)) +- Fixed `CombinedLoader` and `max_size_cycle` didn't receive a `DistributedSampler` ([#10374](https://github.com/Lightning-AI/pytorch-lightning/pull/10374)) +- Fixed an issue where class or init-only variables of dataclasses were passed to the dataclass constructor in `utilities.apply_to_collection` ([#9702](https://github.com/Lightning-AI/pytorch-lightning/pull/9702)) +- Fixed `isinstance` not working with `init_meta_context`, materialized model not being moved to the device ([#10493](https://github.com/Lightning-AI/pytorch-lightning/pull/10493)) +- Fixed an issue that prevented the Trainer to shutdown workers when execution is interrupted due to failure([#10463](https://github.com/Lightning-AI/pytorch-lightning/pull/10463)) +- Squeeze the early stopping monitor to remove empty tensor dimensions ([#10461](https://github.com/Lightning-AI/pytorch-lightning/pull/10461)) +- Fixed sampler replacement logic with `overfit_batches` to only replace the sample when `SequentialSampler` is not used ([#10486](https://github.com/Lightning-AI/pytorch-lightning/pull/10486)) +- Fixed scripting causing false positive deprecation warnings ([#10470](https://github.com/Lightning-AI/pytorch-lightning/pull/10470), [#10555](https://github.com/Lightning-AI/pytorch-lightning/pull/10555)) +- Do not fail if batch size could not be inferred for logging when using DeepSpeed ([#10438](https://github.com/Lightning-AI/pytorch-lightning/pull/10438)) +- Fixed propagation of device and dtype information to submodules of LightningLite when they inherit from `DeviceDtypeModuleMixin` ([#10559](https://github.com/Lightning-AI/pytorch-lightning/pull/10559)) ## [1.5.1] - 2021-11-09 ### Fixed -- Fixed `apply_to_collection(defaultdict)` ([#10316](https://github.com/Lightning-AI/lightning/pull/10316)) -- Fixed failure when `DataLoader(batch_size=None)` is passed ([#10345](https://github.com/Lightning-AI/lightning/pull/10345)) -- Fixed interception of `__init__` arguments for sub-classed DataLoader re-instantiation in Lite ([#10334](https://github.com/Lightning-AI/lightning/pull/10334)) -- Fixed issue with pickling `CSVLogger` after a call to `CSVLogger.save` ([#10388](https://github.com/Lightning-AI/lightning/pull/10388)) -- Fixed an import error being caused by `PostLocalSGD` when `torch.distributed` not available ([#10359](https://github.com/Lightning-AI/lightning/pull/10359)) -- Fixed the logging with `on_step=True` in epoch-level hooks causing unintended side-effects. Logging with `on_step=True` in epoch-level hooks will now correctly raise an error ([#10409](https://github.com/Lightning-AI/lightning/pull/10409)) -- Fixed deadlocks for distributed training with `RichProgressBar` ([#10428](https://github.com/Lightning-AI/lightning/pull/10428)) -- Fixed an issue where the model wrapper in Lite converted non-floating point tensors to float ([#10429](https://github.com/Lightning-AI/lightning/pull/10429)) -- Fixed an issue with inferring the dataset type in fault-tolerant training ([#10432](https://github.com/Lightning-AI/lightning/pull/10432)) -- Fixed dataloader workers with `persistent_workers` being deleted on every iteration ([#10434](https://github.com/Lightning-AI/lightning/pull/10434)) +- Fixed `apply_to_collection(defaultdict)` ([#10316](https://github.com/Lightning-AI/pytorch-lightning/pull/10316)) +- Fixed failure when `DataLoader(batch_size=None)` is passed ([#10345](https://github.com/Lightning-AI/pytorch-lightning/pull/10345)) +- Fixed interception of `__init__` arguments for sub-classed DataLoader re-instantiation in Lite ([#10334](https://github.com/Lightning-AI/pytorch-lightning/pull/10334)) +- Fixed issue with pickling `CSVLogger` after a call to `CSVLogger.save` ([#10388](https://github.com/Lightning-AI/pytorch-lightning/pull/10388)) +- Fixed an import error being caused by `PostLocalSGD` when `torch.distributed` not available ([#10359](https://github.com/Lightning-AI/pytorch-lightning/pull/10359)) +- Fixed the logging with `on_step=True` in epoch-level hooks causing unintended side-effects. Logging with `on_step=True` in epoch-level hooks will now correctly raise an error ([#10409](https://github.com/Lightning-AI/pytorch-lightning/pull/10409)) +- Fixed deadlocks for distributed training with `RichProgressBar` ([#10428](https://github.com/Lightning-AI/pytorch-lightning/pull/10428)) +- Fixed an issue where the model wrapper in Lite converted non-floating point tensors to float ([#10429](https://github.com/Lightning-AI/pytorch-lightning/pull/10429)) +- Fixed an issue with inferring the dataset type in fault-tolerant training ([#10432](https://github.com/Lightning-AI/pytorch-lightning/pull/10432)) +- Fixed dataloader workers with `persistent_workers` being deleted on every iteration ([#10434](https://github.com/Lightning-AI/pytorch-lightning/pull/10434)) ## [1.5.0] - 2021-11-02 ### Added -- Added support for monitoring the learning rate without schedulers in `LearningRateMonitor` ([#9786](https://github.com/Lightning-AI/lightning/pull/9786)) -- Added registration of `ShardedTensor` state dict hooks in `LightningModule.__init__` if the PyTorch version supports `ShardedTensor` ([#8944](https://github.com/Lightning-AI/lightning/pull/8944)) -- Added error handling including calling of `on_keyboard_interrupt()` and `on_exception()` for all entrypoints (fit, validate, test, predict) ([#8819](https://github.com/Lightning-AI/lightning/pull/8819)) -- Added a flavor of `training_step` that takes `dataloader_iter` as an argument ([#8807](https://github.com/Lightning-AI/lightning/pull/8807)) -- Added a `state_key` property to the `Callback` base class ([#6886](https://github.com/Lightning-AI/lightning/pull/6886)) +- Added support for monitoring the learning rate without schedulers in `LearningRateMonitor` ([#9786](https://github.com/Lightning-AI/pytorch-lightning/pull/9786)) +- Added registration of `ShardedTensor` state dict hooks in `LightningModule.__init__` if the PyTorch version supports `ShardedTensor` ([#8944](https://github.com/Lightning-AI/pytorch-lightning/pull/8944)) +- Added error handling including calling of `on_keyboard_interrupt()` and `on_exception()` for all entrypoints (fit, validate, test, predict) ([#8819](https://github.com/Lightning-AI/pytorch-lightning/pull/8819)) +- Added a flavor of `training_step` that takes `dataloader_iter` as an argument ([#8807](https://github.com/Lightning-AI/pytorch-lightning/pull/8807)) +- Added a `state_key` property to the `Callback` base class ([#6886](https://github.com/Lightning-AI/pytorch-lightning/pull/6886)) - Added progress tracking to loops: - * Integrated `TrainingEpochLoop.total_batch_idx` ([#8598](https://github.com/Lightning-AI/lightning/pull/8598)) - * Added `BatchProgress` and integrated `TrainingEpochLoop.is_last_batch` ([#9657](https://github.com/Lightning-AI/lightning/pull/9657)) - * Avoid optional `Tracker` attributes ([#9320](https://github.com/Lightning-AI/lightning/pull/9320)) - * Reset `current` progress counters when restarting an epoch loop that had already finished ([#9371](https://github.com/Lightning-AI/lightning/pull/9371)) - * Call `reset_on_restart` in the loop's `reset` hook instead of when loading a checkpoint ([#9561](https://github.com/Lightning-AI/lightning/pull/9561)) - * Use `completed` over `processed` in `reset_on_restart` ([#9656](https://github.com/Lightning-AI/lightning/pull/9656)) - * Renamed `reset_on_epoch` to `reset_on_run` ([#9658](https://github.com/Lightning-AI/lightning/pull/9658)) -- Added `batch_size` and `rank_zero_only` arguments for `log_dict` to match `log` ([#8628](https://github.com/Lightning-AI/lightning/pull/8628)) -- Added a check for unique GPU ids ([#8666](https://github.com/Lightning-AI/lightning/pull/8666)) -- Added `ResultCollection` state_dict to the Loop `state_dict` and added support for distributed reload ([#8641](https://github.com/Lightning-AI/lightning/pull/8641)) -- Added DeepSpeed collate checkpoint utility function ([#8701](https://github.com/Lightning-AI/lightning/pull/8701)) -- Added a `handles_accumulate_grad_batches` property to the training type plugins ([#8856](https://github.com/Lightning-AI/lightning/pull/8856)) -- Added a warning to `WandbLogger` when reusing a wandb run ([#8714](https://github.com/Lightning-AI/lightning/pull/8714)) -- Added `log_graph` argument for `watch` method of `WandbLogger` ([#8662](https://github.com/Lightning-AI/lightning/pull/8662)) + * Integrated `TrainingEpochLoop.total_batch_idx` ([#8598](https://github.com/Lightning-AI/pytorch-lightning/pull/8598)) + * Added `BatchProgress` and integrated `TrainingEpochLoop.is_last_batch` ([#9657](https://github.com/Lightning-AI/pytorch-lightning/pull/9657)) + * Avoid optional `Tracker` attributes ([#9320](https://github.com/Lightning-AI/pytorch-lightning/pull/9320)) + * Reset `current` progress counters when restarting an epoch loop that had already finished ([#9371](https://github.com/Lightning-AI/pytorch-lightning/pull/9371)) + * Call `reset_on_restart` in the loop's `reset` hook instead of when loading a checkpoint ([#9561](https://github.com/Lightning-AI/pytorch-lightning/pull/9561)) + * Use `completed` over `processed` in `reset_on_restart` ([#9656](https://github.com/Lightning-AI/pytorch-lightning/pull/9656)) + * Renamed `reset_on_epoch` to `reset_on_run` ([#9658](https://github.com/Lightning-AI/pytorch-lightning/pull/9658)) +- Added `batch_size` and `rank_zero_only` arguments for `log_dict` to match `log` ([#8628](https://github.com/Lightning-AI/pytorch-lightning/pull/8628)) +- Added a check for unique GPU ids ([#8666](https://github.com/Lightning-AI/pytorch-lightning/pull/8666)) +- Added `ResultCollection` state_dict to the Loop `state_dict` and added support for distributed reload ([#8641](https://github.com/Lightning-AI/pytorch-lightning/pull/8641)) +- Added DeepSpeed collate checkpoint utility function ([#8701](https://github.com/Lightning-AI/pytorch-lightning/pull/8701)) +- Added a `handles_accumulate_grad_batches` property to the training type plugins ([#8856](https://github.com/Lightning-AI/pytorch-lightning/pull/8856)) +- Added a warning to `WandbLogger` when reusing a wandb run ([#8714](https://github.com/Lightning-AI/pytorch-lightning/pull/8714)) +- Added `log_graph` argument for `watch` method of `WandbLogger` ([#8662](https://github.com/Lightning-AI/pytorch-lightning/pull/8662)) - `LightningCLI` additions: - * Added `LightningCLI(run=False|True)` to choose whether to run a `Trainer` subcommand ([#8751](https://github.com/Lightning-AI/lightning/pull/8751)) - * Added support to call any trainer function from the `LightningCLI` via subcommands ([#7508](https://github.com/Lightning-AI/lightning/pull/7508)) - * Allow easy trainer re-instantiation ([#7508](https://github.com/Lightning-AI/lightning/pull/9241)) - * Automatically register all optimizers and learning rate schedulers ([#9565](https://github.com/Lightning-AI/lightning/pull/9565)) - * Allow registering custom optimizers and learning rate schedulers without subclassing the CLI ([#9565](https://github.com/Lightning-AI/lightning/pull/9565)) - * Support shorthand notation to instantiate optimizers and learning rate schedulers ([#9565](https://github.com/Lightning-AI/lightning/pull/9565)) - * Support passing lists of callbacks via command line ([#8815](https://github.com/Lightning-AI/lightning/pull/8815)) - * Support shorthand notation to instantiate models ([#9588](https://github.com/Lightning-AI/lightning/pull/9588)) - * Support shorthand notation to instantiate datamodules ([#10011](https://github.com/Lightning-AI/lightning/pull/10011)) - * Added `multifile` option to `LightningCLI` to enable/disable config saving to preserve multiple files structure ([#9073](https://github.com/Lightning-AI/lightning/pull/9073)) + * Added `LightningCLI(run=False|True)` to choose whether to run a `Trainer` subcommand ([#8751](https://github.com/Lightning-AI/pytorch-lightning/pull/8751)) + * Added support to call any trainer function from the `LightningCLI` via subcommands ([#7508](https://github.com/Lightning-AI/pytorch-lightning/pull/7508)) + * Allow easy trainer re-instantiation ([#7508](https://github.com/Lightning-AI/pytorch-lightning/pull/9241)) + * Automatically register all optimizers and learning rate schedulers ([#9565](https://github.com/Lightning-AI/pytorch-lightning/pull/9565)) + * Allow registering custom optimizers and learning rate schedulers without subclassing the CLI ([#9565](https://github.com/Lightning-AI/pytorch-lightning/pull/9565)) + * Support shorthand notation to instantiate optimizers and learning rate schedulers ([#9565](https://github.com/Lightning-AI/pytorch-lightning/pull/9565)) + * Support passing lists of callbacks via command line ([#8815](https://github.com/Lightning-AI/pytorch-lightning/pull/8815)) + * Support shorthand notation to instantiate models ([#9588](https://github.com/Lightning-AI/pytorch-lightning/pull/9588)) + * Support shorthand notation to instantiate datamodules ([#10011](https://github.com/Lightning-AI/pytorch-lightning/pull/10011)) + * Added `multifile` option to `LightningCLI` to enable/disable config saving to preserve multiple files structure ([#9073](https://github.com/Lightning-AI/pytorch-lightning/pull/9073)) - Fault-tolerant training: - * Added `FastForwardSampler` and `CaptureIterableDataset` injection to data loading utilities ([#8366](https://github.com/Lightning-AI/lightning/pull/8366)) - * Added `DataFetcher` to control fetching flow ([#8890](https://github.com/Lightning-AI/lightning/pull/8890)) - * Added `SharedCycleIteratorState` to prevent infinite loop ([#8889](https://github.com/Lightning-AI/lightning/pull/8889)) - * Added `CaptureMapDataset` for state management in map-style datasets ([#8891](https://github.com/Lightning-AI/lightning/pull/8891)) - * Added Fault Tolerant Training to `DataFetcher` ([#8891](https://github.com/Lightning-AI/lightning/pull/8891)) - * Replaced old prefetch iterator with new `DataFetcher` in training loop ([#8953](https://github.com/Lightning-AI/lightning/pull/8953)) - * Added partial support for global random state fault-tolerance in map-style datasets ([#8950](https://github.com/Lightning-AI/lightning/pull/8950)) - * Converted state to tuple explicitly when setting Python random state ([#9401](https://github.com/Lightning-AI/lightning/pull/9401)) - * Added support for restarting an optimizer loop (multiple optimizers) ([#9537](https://github.com/Lightning-AI/lightning/pull/9537)) - * Added support for restarting within Evaluation Loop ([#9563](https://github.com/Lightning-AI/lightning/pull/9563)) - * Added mechanism to detect that a signal has been sent so the Trainer can gracefully exit ([#9566](https://github.com/Lightning-AI/lightning/pull/9566)) - * Added support for skipping ahead to validation during the auto-restart of fitting ([#9681](https://github.com/Lightning-AI/lightning/pull/9681)) - * Added support for auto-restart if a fault-tolerant checkpoint is available ([#9722](https://github.com/Lightning-AI/lightning/pull/9722)) + * Added `FastForwardSampler` and `CaptureIterableDataset` injection to data loading utilities ([#8366](https://github.com/Lightning-AI/pytorch-lightning/pull/8366)) + * Added `DataFetcher` to control fetching flow ([#8890](https://github.com/Lightning-AI/pytorch-lightning/pull/8890)) + * Added `SharedCycleIteratorState` to prevent infinite loop ([#8889](https://github.com/Lightning-AI/pytorch-lightning/pull/8889)) + * Added `CaptureMapDataset` for state management in map-style datasets ([#8891](https://github.com/Lightning-AI/pytorch-lightning/pull/8891)) + * Added Fault Tolerant Training to `DataFetcher` ([#8891](https://github.com/Lightning-AI/pytorch-lightning/pull/8891)) + * Replaced old prefetch iterator with new `DataFetcher` in training loop ([#8953](https://github.com/Lightning-AI/pytorch-lightning/pull/8953)) + * Added partial support for global random state fault-tolerance in map-style datasets ([#8950](https://github.com/Lightning-AI/pytorch-lightning/pull/8950)) + * Converted state to tuple explicitly when setting Python random state ([#9401](https://github.com/Lightning-AI/pytorch-lightning/pull/9401)) + * Added support for restarting an optimizer loop (multiple optimizers) ([#9537](https://github.com/Lightning-AI/pytorch-lightning/pull/9537)) + * Added support for restarting within Evaluation Loop ([#9563](https://github.com/Lightning-AI/pytorch-lightning/pull/9563)) + * Added mechanism to detect that a signal has been sent so the Trainer can gracefully exit ([#9566](https://github.com/Lightning-AI/pytorch-lightning/pull/9566)) + * Added support for skipping ahead to validation during the auto-restart of fitting ([#9681](https://github.com/Lightning-AI/pytorch-lightning/pull/9681)) + * Added support for auto-restart if a fault-tolerant checkpoint is available ([#9722](https://github.com/Lightning-AI/pytorch-lightning/pull/9722)) - Checkpoint saving and loading extensibility: - * Added `CheckpointIO` plugin to expose checkpoint IO from training type plugin ([#8743](https://github.com/Lightning-AI/lightning/pull/8743)) - * Refactored `CheckpointConnector` to offload validation logic to the `CheckpointIO` plugin ([#9045](https://github.com/Lightning-AI/lightning/pull/9045)) - * Added `remove_checkpoint` to `CheckpointIO` plugin by moving the responsibility out of the `ModelCheckpoint` callback ([#9373](https://github.com/Lightning-AI/lightning/pull/9373)) - * Added `XLACheckpointIO` plugin ([#9972](https://github.com/Lightning-AI/lightning/pull/9972)) + * Added `CheckpointIO` plugin to expose checkpoint IO from training type plugin ([#8743](https://github.com/Lightning-AI/pytorch-lightning/pull/8743)) + * Refactored `CheckpointConnector` to offload validation logic to the `CheckpointIO` plugin ([#9045](https://github.com/Lightning-AI/pytorch-lightning/pull/9045)) + * Added `remove_checkpoint` to `CheckpointIO` plugin by moving the responsibility out of the `ModelCheckpoint` callback ([#9373](https://github.com/Lightning-AI/pytorch-lightning/pull/9373)) + * Added `XLACheckpointIO` plugin ([#9972](https://github.com/Lightning-AI/pytorch-lightning/pull/9972)) - Loop customization: - * Added `Closure` and `AbstractClosure` classes ([#8642](https://github.com/Lightning-AI/lightning/pull/8642)) - * Refactored `TrainingBatchLoop` and extracted `OptimizerLoop`, splitting off automatic optimization into its own loop ([#9191](https://github.com/Lightning-AI/lightning/pull/9191)) - * Removed `TrainingBatchLoop.backward()`; manual optimization now calls directly into `Accelerator.backward()` and automatic optimization handles backward in new `OptimizerLoop` ([#9265](https://github.com/Lightning-AI/lightning/pull/9265)) - * Extracted `ManualOptimization` logic from `TrainingBatchLoop` into its own separate loop class ([#9266](https://github.com/Lightning-AI/lightning/pull/9266)) - * Added `OutputResult` and `ManualResult` classes ([#9437](https://github.com/Lightning-AI/lightning/pull/9437), [#9424](https://github.com/Lightning-AI/lightning/pull/9424)) - * Marked `OptimizerLoop.backward` as protected ([#9514](https://github.com/Lightning-AI/lightning/pull/9514)) - * Marked `FitLoop.should_accumulate` as protected ([#9515](https://github.com/Lightning-AI/lightning/pull/9515)) - * Marked several methods in `PredictionLoop` as protected: `on_predict_start`, `on_predict_epoch_end`, `on_predict_end`, `on_predict_model_eval` ([#9516](https://github.com/Lightning-AI/lightning/pull/9516)) - * Marked several methods in `EvaluationLoop` as protected: `get_max_batches`, `on_evaluation_model_eval`, `on_evaluation_model_train`, `on_evaluation_start`, `on_evaluation_epoch_start`, `on_evaluation_epoch_end`, `on_evaluation_end`, `reload_evaluation_dataloaders` ([#9516](https://github.com/Lightning-AI/lightning/pull/9516)) - * Marked several methods in `EvaluationEpochLoop` as protected: `on_evaluation_batch_start`, `evaluation_step`, `evaluation_step_end` ([#9516](https://github.com/Lightning-AI/lightning/pull/9516)) - * Added `yielding_training_step` example ([#9983](https://github.com/Lightning-AI/lightning/pull/9983)) -- Added support for saving and loading state of multiple callbacks of the same type ([#7187](https://github.com/Lightning-AI/lightning/pull/7187)) -- Added DeepSpeed Stage 1 support ([#8974](https://github.com/Lightning-AI/lightning/pull/8974)) -- Added `Python dataclass` support for `LightningDataModule` ([#8272](https://github.com/Lightning-AI/lightning/pull/8272)) -- Added sanitization of tensors when they get logged as hyperparameters in `TensorBoardLogger` ([#9031](https://github.com/Lightning-AI/lightning/pull/9031)) -- Added `InterBatchParallelDataFetcher` ([#9020](https://github.com/Lightning-AI/lightning/pull/9020)) -- Added `DataLoaderIterDataFetcher` ([#9020](https://github.com/Lightning-AI/lightning/pull/9020)) -- Added `DataFetcher` within `Fit / Evaluation` Loop ([#9047](https://github.com/Lightning-AI/lightning/pull/9047)) -- Added a friendly error message when DDP attempts to spawn new distributed processes with rank > 0 ([#9005](https://github.com/Lightning-AI/lightning/pull/9005)) + * Added `Closure` and `AbstractClosure` classes ([#8642](https://github.com/Lightning-AI/pytorch-lightning/pull/8642)) + * Refactored `TrainingBatchLoop` and extracted `OptimizerLoop`, splitting off automatic optimization into its own loop ([#9191](https://github.com/Lightning-AI/pytorch-lightning/pull/9191)) + * Removed `TrainingBatchLoop.backward()`; manual optimization now calls directly into `Accelerator.backward()` and automatic optimization handles backward in new `OptimizerLoop` ([#9265](https://github.com/Lightning-AI/pytorch-lightning/pull/9265)) + * Extracted `ManualOptimization` logic from `TrainingBatchLoop` into its own separate loop class ([#9266](https://github.com/Lightning-AI/pytorch-lightning/pull/9266)) + * Added `OutputResult` and `ManualResult` classes ([#9437](https://github.com/Lightning-AI/pytorch-lightning/pull/9437), [#9424](https://github.com/Lightning-AI/pytorch-lightning/pull/9424)) + * Marked `OptimizerLoop.backward` as protected ([#9514](https://github.com/Lightning-AI/pytorch-lightning/pull/9514)) + * Marked `FitLoop.should_accumulate` as protected ([#9515](https://github.com/Lightning-AI/pytorch-lightning/pull/9515)) + * Marked several methods in `PredictionLoop` as protected: `on_predict_start`, `on_predict_epoch_end`, `on_predict_end`, `on_predict_model_eval` ([#9516](https://github.com/Lightning-AI/pytorch-lightning/pull/9516)) + * Marked several methods in `EvaluationLoop` as protected: `get_max_batches`, `on_evaluation_model_eval`, `on_evaluation_model_train`, `on_evaluation_start`, `on_evaluation_epoch_start`, `on_evaluation_epoch_end`, `on_evaluation_end`, `reload_evaluation_dataloaders` ([#9516](https://github.com/Lightning-AI/pytorch-lightning/pull/9516)) + * Marked several methods in `EvaluationEpochLoop` as protected: `on_evaluation_batch_start`, `evaluation_step`, `evaluation_step_end` ([#9516](https://github.com/Lightning-AI/pytorch-lightning/pull/9516)) + * Added `yielding_training_step` example ([#9983](https://github.com/Lightning-AI/pytorch-lightning/pull/9983)) +- Added support for saving and loading state of multiple callbacks of the same type ([#7187](https://github.com/Lightning-AI/pytorch-lightning/pull/7187)) +- Added DeepSpeed Stage 1 support ([#8974](https://github.com/Lightning-AI/pytorch-lightning/pull/8974)) +- Added `Python dataclass` support for `LightningDataModule` ([#8272](https://github.com/Lightning-AI/pytorch-lightning/pull/8272)) +- Added sanitization of tensors when they get logged as hyperparameters in `TensorBoardLogger` ([#9031](https://github.com/Lightning-AI/pytorch-lightning/pull/9031)) +- Added `InterBatchParallelDataFetcher` ([#9020](https://github.com/Lightning-AI/pytorch-lightning/pull/9020)) +- Added `DataLoaderIterDataFetcher` ([#9020](https://github.com/Lightning-AI/pytorch-lightning/pull/9020)) +- Added `DataFetcher` within `Fit / Evaluation` Loop ([#9047](https://github.com/Lightning-AI/pytorch-lightning/pull/9047)) +- Added a friendly error message when DDP attempts to spawn new distributed processes with rank > 0 ([#9005](https://github.com/Lightning-AI/pytorch-lightning/pull/9005)) - Added Rich integration: - * Added Rich progress bar ([#8929](https://github.com/Lightning-AI/lightning/pull/8929), [#9559](https://github.com/Lightning-AI/lightning/pull/9559)) - * Added Support for iterable datasets ([#9734](https://github.com/Lightning-AI/lightning/pull/9734)) - * Added `RichModelSummary` callback ([#9546](https://github.com/Lightning-AI/lightning/pull/9546)) - * Added `configure_columns` method to `RichProgressBar` ([#10288](https://github.com/Lightning-AI/lightning/pull/10288)) - * Added `leave` argument to `RichProgressBar` ([#10301](https://github.com/Lightning-AI/lightning/pull/10301)) -- Added input validation logic for precision ([#9080](https://github.com/Lightning-AI/lightning/pull/9080)) -- Added support for CPU AMP autocast ([#9084](https://github.com/Lightning-AI/lightning/pull/9084)) -- Added `on_exception` callback hook ([#9183](https://github.com/Lightning-AI/lightning/pull/9183)) -- Added a warning to DeepSpeed when inferring batch size ([#9221](https://github.com/Lightning-AI/lightning/pull/9221)) -- Added `ModelSummary` callback ([#9344](https://github.com/Lightning-AI/lightning/pull/9344)) -- Added `log_images`, `log_text` and `log_table` to `WandbLogger` ([#9545](https://github.com/Lightning-AI/lightning/pull/9545)) -- Added `PL_RECONCILE_PROCESS` environment variable to enable process reconciliation regardless of cluster environment settings ([#9389](https://github.com/Lightning-AI/lightning/pull/9389)) -- Added `get_device_stats` to the Accelerator interface and added its implementation for GPU and TPU ([#9586](https://github.com/Lightning-AI/lightning/pull/9586)) -- Added a warning when an unknown key is encountered in the optimizer configuration, and when `OneCycleLR` is used with `"interval": "epoch"` ([#9666](https://github.com/Lightning-AI/lightning/pull/9666)) -- Added `DeviceStatsMonitor` callback ([#9712](https://github.com/Lightning-AI/lightning/pull/9712)) -- Added `enable_progress_bar` to the Trainer constructor ([#9664](https://github.com/Lightning-AI/lightning/pull/9664)) -- Added `pl_legacy_patch` load utility for loading old checkpoints that have pickled legacy Lightning attributes ([#9166](https://github.com/Lightning-AI/lightning/pull/9166)) -- Added support for `torch.use_deterministic_algorithms` ([#9121](https://github.com/Lightning-AI/lightning/pull/9121)) -- Added automatic parameters tying for TPUs ([#9525](https://github.com/Lightning-AI/lightning/pull/9525)) -- Added support for `torch.autograd.set_detect_anomaly` through `Trainer` constructor argument `detect_anomaly` ([#9848](https://github.com/Lightning-AI/lightning/pull/9848)) -- Added `enable_model_summary` flag to Trainer ([#9699](https://github.com/Lightning-AI/lightning/pull/9699)) -- Added `strategy` argument to Trainer ([#8597](https://github.com/Lightning-AI/lightning/pull/8597)) -- Added `init_meta_context`, `materialize_module` utilities ([#9920](https://github.com/Lightning-AI/lightning/pull/9920)) -- Added `TPUPrecisionPlugin` ([#10020](https://github.com/Lightning-AI/lightning/pull/#10020)) + * Added Rich progress bar ([#8929](https://github.com/Lightning-AI/pytorch-lightning/pull/8929), [#9559](https://github.com/Lightning-AI/pytorch-lightning/pull/9559)) + * Added Support for iterable datasets ([#9734](https://github.com/Lightning-AI/pytorch-lightning/pull/9734)) + * Added `RichModelSummary` callback ([#9546](https://github.com/Lightning-AI/pytorch-lightning/pull/9546)) + * Added `configure_columns` method to `RichProgressBar` ([#10288](https://github.com/Lightning-AI/pytorch-lightning/pull/10288)) + * Added `leave` argument to `RichProgressBar` ([#10301](https://github.com/Lightning-AI/pytorch-lightning/pull/10301)) +- Added input validation logic for precision ([#9080](https://github.com/Lightning-AI/pytorch-lightning/pull/9080)) +- Added support for CPU AMP autocast ([#9084](https://github.com/Lightning-AI/pytorch-lightning/pull/9084)) +- Added `on_exception` callback hook ([#9183](https://github.com/Lightning-AI/pytorch-lightning/pull/9183)) +- Added a warning to DeepSpeed when inferring batch size ([#9221](https://github.com/Lightning-AI/pytorch-lightning/pull/9221)) +- Added `ModelSummary` callback ([#9344](https://github.com/Lightning-AI/pytorch-lightning/pull/9344)) +- Added `log_images`, `log_text` and `log_table` to `WandbLogger` ([#9545](https://github.com/Lightning-AI/pytorch-lightning/pull/9545)) +- Added `PL_RECONCILE_PROCESS` environment variable to enable process reconciliation regardless of cluster environment settings ([#9389](https://github.com/Lightning-AI/pytorch-lightning/pull/9389)) +- Added `get_device_stats` to the Accelerator interface and added its implementation for GPU and TPU ([#9586](https://github.com/Lightning-AI/pytorch-lightning/pull/9586)) +- Added a warning when an unknown key is encountered in the optimizer configuration, and when `OneCycleLR` is used with `"interval": "epoch"` ([#9666](https://github.com/Lightning-AI/pytorch-lightning/pull/9666)) +- Added `DeviceStatsMonitor` callback ([#9712](https://github.com/Lightning-AI/pytorch-lightning/pull/9712)) +- Added `enable_progress_bar` to the Trainer constructor ([#9664](https://github.com/Lightning-AI/pytorch-lightning/pull/9664)) +- Added `pl_legacy_patch` load utility for loading old checkpoints that have pickled legacy Lightning attributes ([#9166](https://github.com/Lightning-AI/pytorch-lightning/pull/9166)) +- Added support for `torch.use_deterministic_algorithms` ([#9121](https://github.com/Lightning-AI/pytorch-lightning/pull/9121)) +- Added automatic parameters tying for TPUs ([#9525](https://github.com/Lightning-AI/pytorch-lightning/pull/9525)) +- Added support for `torch.autograd.set_detect_anomaly` through `Trainer` constructor argument `detect_anomaly` ([#9848](https://github.com/Lightning-AI/pytorch-lightning/pull/9848)) +- Added `enable_model_summary` flag to Trainer ([#9699](https://github.com/Lightning-AI/pytorch-lightning/pull/9699)) +- Added `strategy` argument to Trainer ([#8597](https://github.com/Lightning-AI/pytorch-lightning/pull/8597)) +- Added `init_meta_context`, `materialize_module` utilities ([#9920](https://github.com/Lightning-AI/pytorch-lightning/pull/9920)) +- Added `TPUPrecisionPlugin` ([#10020](https://github.com/Lightning-AI/pytorch-lightning/pull/#10020)) - Added `torch.bfloat16` support: - * Added bfloat16 support for Lightning Trainer ([#9049](https://github.com/Lightning-AI/lightning/pull/9049)) - * Renamed `TPUHalfPrecisionPlugin` to `TPUBf16PrecisionPlugin` ([#10026](https://github.com/Lightning-AI/lightning/pull/10026)) - * Default to `precision=bf16` on CPU when `precision=16` is passed ([#10033](https://github.com/Lightning-AI/lightning/pull/10033)) - * Added support for `torch.autocast` ([#10053](https://github.com/Lightning-AI/lightning/pull/10053)) -- Added `kfold` example for loop customization ([#9965](https://github.com/Lightning-AI/lightning/pull/9965)) + * Added bfloat16 support for Lightning Trainer ([#9049](https://github.com/Lightning-AI/pytorch-lightning/pull/9049)) + * Renamed `TPUHalfPrecisionPlugin` to `TPUBf16PrecisionPlugin` ([#10026](https://github.com/Lightning-AI/pytorch-lightning/pull/10026)) + * Default to `precision=bf16` on CPU when `precision=16` is passed ([#10033](https://github.com/Lightning-AI/pytorch-lightning/pull/10033)) + * Added support for `torch.autocast` ([#10053](https://github.com/Lightning-AI/pytorch-lightning/pull/10053)) +- Added `kfold` example for loop customization ([#9965](https://github.com/Lightning-AI/pytorch-lightning/pull/9965)) - LightningLite: - * Added `PrecisionPlugin.forward_context`, making it the default implementation for all `{train,val,test,predict}_step_context()` methods ([#9988](https://github.com/Lightning-AI/lightning/pull/9988)) - * Added `DDPSpawnPlugin.spawn()` for spawning new processes of a given function ([#10018](https://github.com/Lightning-AI/lightning/pull/10018), [#10022](https://github.com/Lightning-AI/lightning/pull/10022)) - * Added `TrainingTypePlugin.{_setup_model, _setup_optimizer}` methods ([#9994](https://github.com/Lightning-AI/lightning/pull/9994), [#10064](https://github.com/Lightning-AI/lightning/pull/10064)) - * Implemented `DataParallelPlugin._setup_model` ([#10010](https://github.com/Lightning-AI/lightning/pull/10010)) - * Implemented `DeepSpeedPlugin._setup_model_and_optimizers` ([#10009](https://github.com/Lightning-AI/lightning/pull/10009), [#10064](https://github.com/Lightning-AI/lightning/pull/10064)) - * Implemented `{DDPShardedPlugin,DDPShardedSpawnPlugin}._setup_model_and_optimizers` ([#10028](https://github.com/Lightning-AI/lightning/pull/10028), [#10064](https://github.com/Lightning-AI/lightning/pull/10064)) - * Added optional `model` argument to the `optimizer_step` methods in accelerators and plugins ([#10023](https://github.com/Lightning-AI/lightning/pull/10023)) - * Updated precision attributes in `DeepSpeedPlugin` ([#10164](https://github.com/Lightning-AI/lightning/pull/10164)) - * Added the ability to return a result from rank 0 in `DDPSpawnPlugin.spawn` ([#10162](https://github.com/Lightning-AI/lightning/pull/10162)) - * Added `pl.lite` package ([#10175](https://github.com/Lightning-AI/lightning/pull/10175)) - * Added `LightningLite` documentation ([#10043](https://github.com/Lightning-AI/lightning/pull/10043)) - * Added `LightningLite` examples ([#9987](https://github.com/Lightning-AI/lightning/pull/9987)) - * Make the `_LiteDataLoader` an iterator and add supports for custom dataloader ([#10279](https://github.com/Lightning-AI/lightning/pull/10279)) -- Added `use_omegaconf` argument to `save_hparams_to_yaml` plugin ([#9170](https://github.com/Lightning-AI/lightning/pull/9170)) -- Added `ckpt_path` argument for `Trainer.fit()` ([#10061](https://github.com/Lightning-AI/lightning/pull/10061)) -- Added `auto_device_count` method to `Accelerators` ([#10222](https://github.com/Lightning-AI/lightning/pull/10222)) -- Added support for `devices="auto"` ([#10264](https://github.com/Lightning-AI/lightning/pull/10264)) -- Added a `filename` argument in `ModelCheckpoint.format_checkpoint_name` ([#9818](https://github.com/Lightning-AI/lightning/pull/9818)) -- Added support for empty `gpus` list to run on CPU ([#10246](https://github.com/Lightning-AI/lightning/pull/10246)) -- Added a warning if multiple batch sizes are found from ambiguous batch ([#10247](https://github.com/Lightning-AI/lightning/pull/10247)) + * Added `PrecisionPlugin.forward_context`, making it the default implementation for all `{train,val,test,predict}_step_context()` methods ([#9988](https://github.com/Lightning-AI/pytorch-lightning/pull/9988)) + * Added `DDPSpawnPlugin.spawn()` for spawning new processes of a given function ([#10018](https://github.com/Lightning-AI/pytorch-lightning/pull/10018), [#10022](https://github.com/Lightning-AI/pytorch-lightning/pull/10022)) + * Added `TrainingTypePlugin.{_setup_model, _setup_optimizer}` methods ([#9994](https://github.com/Lightning-AI/pytorch-lightning/pull/9994), [#10064](https://github.com/Lightning-AI/pytorch-lightning/pull/10064)) + * Implemented `DataParallelPlugin._setup_model` ([#10010](https://github.com/Lightning-AI/pytorch-lightning/pull/10010)) + * Implemented `DeepSpeedPlugin._setup_model_and_optimizers` ([#10009](https://github.com/Lightning-AI/pytorch-lightning/pull/10009), [#10064](https://github.com/Lightning-AI/pytorch-lightning/pull/10064)) + * Implemented `{DDPShardedPlugin,DDPShardedSpawnPlugin}._setup_model_and_optimizers` ([#10028](https://github.com/Lightning-AI/pytorch-lightning/pull/10028), [#10064](https://github.com/Lightning-AI/pytorch-lightning/pull/10064)) + * Added optional `model` argument to the `optimizer_step` methods in accelerators and plugins ([#10023](https://github.com/Lightning-AI/pytorch-lightning/pull/10023)) + * Updated precision attributes in `DeepSpeedPlugin` ([#10164](https://github.com/Lightning-AI/pytorch-lightning/pull/10164)) + * Added the ability to return a result from rank 0 in `DDPSpawnPlugin.spawn` ([#10162](https://github.com/Lightning-AI/pytorch-lightning/pull/10162)) + * Added `pl.lite` package ([#10175](https://github.com/Lightning-AI/pytorch-lightning/pull/10175)) + * Added `LightningLite` documentation ([#10043](https://github.com/Lightning-AI/pytorch-lightning/pull/10043)) + * Added `LightningLite` examples ([#9987](https://github.com/Lightning-AI/pytorch-lightning/pull/9987)) + * Make the `_LiteDataLoader` an iterator and add supports for custom dataloader ([#10279](https://github.com/Lightning-AI/pytorch-lightning/pull/10279)) +- Added `use_omegaconf` argument to `save_hparams_to_yaml` plugin ([#9170](https://github.com/Lightning-AI/pytorch-lightning/pull/9170)) +- Added `ckpt_path` argument for `Trainer.fit()` ([#10061](https://github.com/Lightning-AI/pytorch-lightning/pull/10061)) +- Added `auto_device_count` method to `Accelerators` ([#10222](https://github.com/Lightning-AI/pytorch-lightning/pull/10222)) +- Added support for `devices="auto"` ([#10264](https://github.com/Lightning-AI/pytorch-lightning/pull/10264)) +- Added a `filename` argument in `ModelCheckpoint.format_checkpoint_name` ([#9818](https://github.com/Lightning-AI/pytorch-lightning/pull/9818)) +- Added support for empty `gpus` list to run on CPU ([#10246](https://github.com/Lightning-AI/pytorch-lightning/pull/10246)) +- Added a warning if multiple batch sizes are found from ambiguous batch ([#10247](https://github.com/Lightning-AI/pytorch-lightning/pull/10247)) ### Changed -- Trainer now raises a `MisconfigurationException` when its methods are called with `ckpt_path="best"` but a checkpoint callback isn't configured ([#9841](https://github.com/Lightning-AI/lightning/pull/9841)) -- Setting `Trainer(accelerator="ddp_cpu")` now does not spawn a subprocess if `num_processes` is kept `1` along with `num_nodes > 1` ([#9603](https://github.com/Lightning-AI/lightning/pull/9603)) -- Module imports are now catching `ModuleNotFoundError` instead of `ImportError` ([#9867](https://github.com/Lightning-AI/lightning/pull/9867)) -- `pl.loggers.neptune.NeptuneLogger` is now consistent with the new [neptune-client](https://github.com/neptune-ai/neptune-client) API; the old [neptune-client](https://github.com/neptune-ai/neptune-client) API is supported by `NeptuneClient` from the [neptune-contrib](https://github.com/neptune-ai/neptune-contrib) repo ([#6867](https://github.com/Lightning-AI/lightning/pull/6867)) -- Parsing of `enums` type hyperparameters to be saved in the `haprams.yaml` file by TensorBoard and CSV loggers has been fixed and made in line with how OmegaConf parses it ([#9170](https://github.com/Lightning-AI/lightning/pull/9170)) -- Parsing of the `gpus` Trainer argument has changed: `gpus="n"` (str) no longer selects the GPU index n and instead selects the first n devices ([#8770](https://github.com/Lightning-AI/lightning/pull/8770)) -- `iteration_count` and other index attributes in the loops has been replaced with progress dataclasses ([#8477](https://github.com/Lightning-AI/lightning/pull/8477)) -- The `trainer.lightning_module` reference is now properly set at the very beginning of a run ([#8536](https://github.com/Lightning-AI/lightning/pull/8536)) -- The model weights now get loaded in all cases when the checkpoint path gets provided in validate/test/predict, regardless of whether the model instance is provided or not ([#8352](https://github.com/Lightning-AI/lightning/pull/8352)) -- The `Trainer` functions `reset_{train,val,test,predict}_dataloader`, `reset_train_val_dataloaders`, and `request_dataloader` `model` argument is now optional ([#8536](https://github.com/Lightning-AI/lightning/pull/8536)) -- Saved checkpoints will no longer use the type of a `Callback` as the key to avoid issues with unpickling ([#6886](https://github.com/Lightning-AI/lightning/pull/6886)) -- Improved string conversion for `ResultCollection` ([#8622](https://github.com/Lightning-AI/lightning/pull/8622)) +- Trainer now raises a `MisconfigurationException` when its methods are called with `ckpt_path="best"` but a checkpoint callback isn't configured ([#9841](https://github.com/Lightning-AI/pytorch-lightning/pull/9841)) +- Setting `Trainer(accelerator="ddp_cpu")` now does not spawn a subprocess if `num_processes` is kept `1` along with `num_nodes > 1` ([#9603](https://github.com/Lightning-AI/pytorch-lightning/pull/9603)) +- Module imports are now catching `ModuleNotFoundError` instead of `ImportError` ([#9867](https://github.com/Lightning-AI/pytorch-lightning/pull/9867)) +- `pl.loggers.neptune.NeptuneLogger` is now consistent with the new [neptune-client](https://github.com/neptune-ai/neptune-client) API; the old [neptune-client](https://github.com/neptune-ai/neptune-client) API is supported by `NeptuneClient` from the [neptune-contrib](https://github.com/neptune-ai/neptune-contrib) repo ([#6867](https://github.com/Lightning-AI/pytorch-lightning/pull/6867)) +- Parsing of `enums` type hyperparameters to be saved in the `haprams.yaml` file by TensorBoard and CSV loggers has been fixed and made in line with how OmegaConf parses it ([#9170](https://github.com/Lightning-AI/pytorch-lightning/pull/9170)) +- Parsing of the `gpus` Trainer argument has changed: `gpus="n"` (str) no longer selects the GPU index n and instead selects the first n devices ([#8770](https://github.com/Lightning-AI/pytorch-lightning/pull/8770)) +- `iteration_count` and other index attributes in the loops has been replaced with progress dataclasses ([#8477](https://github.com/Lightning-AI/pytorch-lightning/pull/8477)) +- The `trainer.lightning_module` reference is now properly set at the very beginning of a run ([#8536](https://github.com/Lightning-AI/pytorch-lightning/pull/8536)) +- The model weights now get loaded in all cases when the checkpoint path gets provided in validate/test/predict, regardless of whether the model instance is provided or not ([#8352](https://github.com/Lightning-AI/pytorch-lightning/pull/8352)) +- The `Trainer` functions `reset_{train,val,test,predict}_dataloader`, `reset_train_val_dataloaders`, and `request_dataloader` `model` argument is now optional ([#8536](https://github.com/Lightning-AI/pytorch-lightning/pull/8536)) +- Saved checkpoints will no longer use the type of a `Callback` as the key to avoid issues with unpickling ([#6886](https://github.com/Lightning-AI/pytorch-lightning/pull/6886)) +- Improved string conversion for `ResultCollection` ([#8622](https://github.com/Lightning-AI/pytorch-lightning/pull/8622)) - `LightningCLI` changes: - * `LightningCLI.init_parser` now returns the parser instance ([#8721](https://github.com/Lightning-AI/lightning/pull/8721)) - * `LightningCLI.add_core_arguments_to_parser`, `LightningCLI.parse_arguments` now take a `parser` argument ([#8721](https://github.com/Lightning-AI/lightning/pull/8721)) - * `LightningCLI.instantiate_trainer` now takes a config and a list of callbacks ([#8721](https://github.com/Lightning-AI/lightning/pull/8721)) - * Split `LightningCLI.add_core_arguments_to_parser` into `LightningCLI.add_default_arguments_to_parser` + `LightningCLI.add_core_arguments_to_parser` ([#8721](https://github.com/Lightning-AI/lightning/pull/8721)) -- The accelerator and training type plugin `setup` hooks no longer have a `model` argument ([#8536](https://github.com/Lightning-AI/lightning/pull/8536)) -- The accelerator and training type plugin `update_global_step` hook has been removed ([#8856](https://github.com/Lightning-AI/lightning/pull/8856)) -- The coverage of `self.log`-ing in any `LightningModule` or `Callback` hook has been improved ([#8498](https://github.com/Lightning-AI/lightning/pull/8498)) -- `self.log`-ing without a `Trainer` reference now raises a warning instead of an exception ([#9733](https://github.com/Lightning-AI/lightning/pull/9733)) -- Removed restrictions in the Trainer that loggers can only log from rank 0; the existing logger behavior has not changed ([#8608](https://github.com/Lightning-AI/lightning/pull/8608)) -- `Trainer.request_dataloader` now takes a `RunningStage` enum instance ([#8858](https://github.com/Lightning-AI/lightning/pull/8858)) -- Changed `rank_zero_warn` to `NotImplementedError` in the `{train, val, test, predict}_dataloader` hooks that `Lightning(Data)Module` uses ([#9161](https://github.com/Lightning-AI/lightning/pull/9161)) -- Moved `block_ddp_sync_behaviour` out of `TrainingBatchLoop` to loop utilities ([#9192](https://github.com/Lightning-AI/lightning/pull/9192)) -- Executing the `optimizer_closure` is now required when overriding the `optimizer_step` hook ([#9360](https://github.com/Lightning-AI/lightning/pull/9360)) -- Changed logging of `LightningModule` and `LightningDataModule` hyperparameters to raise an exception only if there are colliding keys with different values ([#9496](https://github.com/Lightning-AI/lightning/pull/9496)) -- `seed_everything` now fails when an invalid seed value is passed instead of selecting a random seed ([#8787](https://github.com/Lightning-AI/lightning/pull/8787)) -- The Trainer now calls `TrainingTypePlugin` collective APIs directly instead of going through the Accelerator reference ([#9677](https://github.com/Lightning-AI/lightning/pull/9677), [#9901](https://github.com/Lightning-AI/lightning/pull/9901)) -- The tuner now uses a unique filename to save a temporary checkpoint ([#9682](https://github.com/Lightning-AI/lightning/pull/9682)) -- Changed `HorovodPlugin.all_gather` to return a `torch.Tensor` instead of a list ([#9696](https://github.com/Lightning-AI/lightning/pull/9696)) + * `LightningCLI.init_parser` now returns the parser instance ([#8721](https://github.com/Lightning-AI/pytorch-lightning/pull/8721)) + * `LightningCLI.add_core_arguments_to_parser`, `LightningCLI.parse_arguments` now take a `parser` argument ([#8721](https://github.com/Lightning-AI/pytorch-lightning/pull/8721)) + * `LightningCLI.instantiate_trainer` now takes a config and a list of callbacks ([#8721](https://github.com/Lightning-AI/pytorch-lightning/pull/8721)) + * Split `LightningCLI.add_core_arguments_to_parser` into `LightningCLI.add_default_arguments_to_parser` + `LightningCLI.add_core_arguments_to_parser` ([#8721](https://github.com/Lightning-AI/pytorch-lightning/pull/8721)) +- The accelerator and training type plugin `setup` hooks no longer have a `model` argument ([#8536](https://github.com/Lightning-AI/pytorch-lightning/pull/8536)) +- The accelerator and training type plugin `update_global_step` hook has been removed ([#8856](https://github.com/Lightning-AI/pytorch-lightning/pull/8856)) +- The coverage of `self.log`-ing in any `LightningModule` or `Callback` hook has been improved ([#8498](https://github.com/Lightning-AI/pytorch-lightning/pull/8498)) +- `self.log`-ing without a `Trainer` reference now raises a warning instead of an exception ([#9733](https://github.com/Lightning-AI/pytorch-lightning/pull/9733)) +- Removed restrictions in the Trainer that loggers can only log from rank 0; the existing logger behavior has not changed ([#8608](https://github.com/Lightning-AI/pytorch-lightning/pull/8608)) +- `Trainer.request_dataloader` now takes a `RunningStage` enum instance ([#8858](https://github.com/Lightning-AI/pytorch-lightning/pull/8858)) +- Changed `rank_zero_warn` to `NotImplementedError` in the `{train, val, test, predict}_dataloader` hooks that `Lightning(Data)Module` uses ([#9161](https://github.com/Lightning-AI/pytorch-lightning/pull/9161)) +- Moved `block_ddp_sync_behaviour` out of `TrainingBatchLoop` to loop utilities ([#9192](https://github.com/Lightning-AI/pytorch-lightning/pull/9192)) +- Executing the `optimizer_closure` is now required when overriding the `optimizer_step` hook ([#9360](https://github.com/Lightning-AI/pytorch-lightning/pull/9360)) +- Changed logging of `LightningModule` and `LightningDataModule` hyperparameters to raise an exception only if there are colliding keys with different values ([#9496](https://github.com/Lightning-AI/pytorch-lightning/pull/9496)) +- `seed_everything` now fails when an invalid seed value is passed instead of selecting a random seed ([#8787](https://github.com/Lightning-AI/pytorch-lightning/pull/8787)) +- The Trainer now calls `TrainingTypePlugin` collective APIs directly instead of going through the Accelerator reference ([#9677](https://github.com/Lightning-AI/pytorch-lightning/pull/9677), [#9901](https://github.com/Lightning-AI/pytorch-lightning/pull/9901)) +- The tuner now uses a unique filename to save a temporary checkpoint ([#9682](https://github.com/Lightning-AI/pytorch-lightning/pull/9682)) +- Changed `HorovodPlugin.all_gather` to return a `torch.Tensor` instead of a list ([#9696](https://github.com/Lightning-AI/pytorch-lightning/pull/9696)) - Changed Trainer connectors to be protected attributes: - * Configuration Validator ([#9779](https://github.com/Lightning-AI/lightning/pull/9779)) -- The `current_epoch` and `global_step` attributes now get restored irrespective of the Trainer task ([#9413](https://github.com/Lightning-AI/lightning/pull/9413)) -- Trainer now raises an exception when requesting `amp_level` with native `amp_backend` ([#9755](https://github.com/Lightning-AI/lightning/pull/9755)) -- Update the logic to check for accumulation steps with deepspeed ([#9826](https://github.com/Lightning-AI/lightning/pull/9826)) -- `pl.utilities.grads.grad_norm` now raises an exception if parameter `norm_type <= 0` ([#9765](https://github.com/Lightning-AI/lightning/pull/9765)) -- Updated error message for interactive incompatible plugins ([#9896](https://github.com/Lightning-AI/lightning/pull/9896)) -- Moved the `optimizer_step` and `clip_gradients` hook from the `Accelerator` and `TrainingTypePlugin` into the `PrecisionPlugin` ([#10143](https://github.com/Lightning-AI/lightning/pull/10143), [#10029](https://github.com/Lightning-AI/lightning/pull/10029)) -- `NativeMixedPrecisionPlugin` and its subclasses now take an optional `GradScaler` instance ([#10055](https://github.com/Lightning-AI/lightning/pull/10055)) -- Trainer is now raising a `MisconfigurationException` instead of a warning if `Trainer.{validate/test}` is missing required methods ([#10016](https://github.com/Lightning-AI/lightning/pull/10016)) -- Changed default value of the `max_steps` Trainer argument from `None` to -1 ([#9460](https://github.com/Lightning-AI/lightning/pull/9460)) -- LightningModule now raises an error when calling `log(on_step=False, on_epoch=False)` ([#10227](https://github.com/Lightning-AI/lightning/pull/10227)) -- Quantization aware training observers are now disabled by default during validating/testing/predicting stages ([#8540](https://github.com/Lightning-AI/lightning/pull/8540)) -- Raised `MisconfigurationException` when total length of `dataloader` across ranks is zero, and give warning when total length is non-zero, but only local rank length is zero. ([#9827](https://github.com/Lightning-AI/lightning/pull/9827)) -- Changed the model size calculation using `ByteCounter` ([#10123](https://github.com/Lightning-AI/lightning/pull/10123)) -- Enabled `on_load_checkpoint` for `LightningDataModule` for all `trainer_fn` ([#10238](https://github.com/Lightning-AI/lightning/pull/10238)) -- Allowed separate config files for parameters with class type when LightningCLI is in `subclass_mode=False` ([#10286](https://github.com/Lightning-AI/lightning/pull/10286)) + * Configuration Validator ([#9779](https://github.com/Lightning-AI/pytorch-lightning/pull/9779)) +- The `current_epoch` and `global_step` attributes now get restored irrespective of the Trainer task ([#9413](https://github.com/Lightning-AI/pytorch-lightning/pull/9413)) +- Trainer now raises an exception when requesting `amp_level` with native `amp_backend` ([#9755](https://github.com/Lightning-AI/pytorch-lightning/pull/9755)) +- Update the logic to check for accumulation steps with deepspeed ([#9826](https://github.com/Lightning-AI/pytorch-lightning/pull/9826)) +- `pl.utilities.grads.grad_norm` now raises an exception if parameter `norm_type <= 0` ([#9765](https://github.com/Lightning-AI/pytorch-lightning/pull/9765)) +- Updated error message for interactive incompatible plugins ([#9896](https://github.com/Lightning-AI/pytorch-lightning/pull/9896)) +- Moved the `optimizer_step` and `clip_gradients` hook from the `Accelerator` and `TrainingTypePlugin` into the `PrecisionPlugin` ([#10143](https://github.com/Lightning-AI/pytorch-lightning/pull/10143), [#10029](https://github.com/Lightning-AI/pytorch-lightning/pull/10029)) +- `NativeMixedPrecisionPlugin` and its subclasses now take an optional `GradScaler` instance ([#10055](https://github.com/Lightning-AI/pytorch-lightning/pull/10055)) +- Trainer is now raising a `MisconfigurationException` instead of a warning if `Trainer.{validate/test}` is missing required methods ([#10016](https://github.com/Lightning-AI/pytorch-lightning/pull/10016)) +- Changed default value of the `max_steps` Trainer argument from `None` to -1 ([#9460](https://github.com/Lightning-AI/pytorch-lightning/pull/9460)) +- LightningModule now raises an error when calling `log(on_step=False, on_epoch=False)` ([#10227](https://github.com/Lightning-AI/pytorch-lightning/pull/10227)) +- Quantization aware training observers are now disabled by default during validating/testing/predicting stages ([#8540](https://github.com/Lightning-AI/pytorch-lightning/pull/8540)) +- Raised `MisconfigurationException` when total length of `dataloader` across ranks is zero, and give warning when total length is non-zero, but only local rank length is zero. ([#9827](https://github.com/Lightning-AI/pytorch-lightning/pull/9827)) +- Changed the model size calculation using `ByteCounter` ([#10123](https://github.com/Lightning-AI/pytorch-lightning/pull/10123)) +- Enabled `on_load_checkpoint` for `LightningDataModule` for all `trainer_fn` ([#10238](https://github.com/Lightning-AI/pytorch-lightning/pull/10238)) +- Allowed separate config files for parameters with class type when LightningCLI is in `subclass_mode=False` ([#10286](https://github.com/Lightning-AI/pytorch-lightning/pull/10286)) ### Deprecated -- Deprecated Trainer argument `terminate_on_nan` in favor of `detect_anomaly`([#9175](https://github.com/Lightning-AI/lightning/pull/9175)) -- Deprecated `Trainer.terminate_on_nan` public attribute access ([#9849](https://github.com/Lightning-AI/lightning/pull/9849)) -- Deprecated `LightningModule.summarize()` in favor of `pl.utilities.model_summary.summarize()` ([#8513](https://github.com/Lightning-AI/lightning/pull/8513)) -- Deprecated `LightningModule.model_size` ([#8343](https://github.com/Lightning-AI/lightning/pull/8343)) -- Deprecated `DataModule` properties: `train_transforms`, `val_transforms`, `test_transforms`, `size`, `dims` ([#8851](https://github.com/Lightning-AI/lightning/pull/8851)) -- Deprecated `add_to_queue`, `get_from_queue` from `LightningModule` in favor of corresponding methods in the `DDPSpawnPlugin` ([#9118](https://github.com/Lightning-AI/lightning/pull/9118)) -- Deprecated `LightningModule.get_progress_bar_dict` and `Trainer.progress_bar_dict` in favor of `pl.callbacks.progress.base.get_standard_metrics` and `ProgressBarBase.get_metrics` ([#8985](https://github.com/Lightning-AI/lightning/pull/8985)) -- Deprecated `prepare_data_per_node` flag on Trainer and set it as a property of `DataHooks`, accessible in the `LightningModule` and `LightningDataModule` ([#8958](https://github.com/Lightning-AI/lightning/pull/8958)) -- Deprecated the `TestTubeLogger` ([#9065](https://github.com/Lightning-AI/lightning/pull/9065)) -- Deprecated `on_{train/val/test/predict}_dataloader()` from `LightningModule` and `LightningDataModule` ([#9098](https://github.com/Lightning-AI/lightning/pull/9098)) -- Deprecated `on_keyboard_interrupt` callback hook in favor of new `on_exception` hook ([#9260](https://github.com/Lightning-AI/lightning/pull/9260)) -- Deprecated passing `process_position` to the `Trainer` constructor in favor of adding the `ProgressBar` callback with `process_position` directly to the list of callbacks ([#9222](https://github.com/Lightning-AI/lightning/pull/9222)) -- Deprecated passing `flush_logs_every_n_steps` as a Trainer argument, instead pass it to the logger init if supported ([#9366](https://github.com/Lightning-AI/lightning/pull/9366)) -- Deprecated `LightningLoggerBase.close`, `LoggerCollection.close` in favor of `LightningLoggerBase.finalize`, `LoggerCollection.finalize` ([#9422](https://github.com/Lightning-AI/lightning/pull/9422)) -- Deprecated passing `progress_bar_refresh_rate` to the `Trainer` constructor in favor of adding the `ProgressBar` callback with `refresh_rate` directly to the list of callbacks, or passing `enable_progress_bar=False` to disable the progress bar ([#9616](https://github.com/Lightning-AI/lightning/pull/9616)) -- Deprecated `LightningDistributed` and moved the broadcast logic to `DDPPlugin` and `DDPSpawnPlugin` directly ([#9691](https://github.com/Lightning-AI/lightning/pull/9691)) -- Deprecated passing `stochastic_weight_avg` to the `Trainer` constructor in favor of adding the `StochasticWeightAveraging` callback directly to the list of callbacks ([#8989](https://github.com/Lightning-AI/lightning/pull/8989)) -- Deprecated Accelerator collective API `barrier`, `broadcast`, and `all_gather` in favor of calling the `TrainingTypePlugin` collective API directly ([#9677](https://github.com/Lightning-AI/lightning/pull/9677)) -- Deprecated `checkpoint_callback` from the `Trainer` constructor in favor of `enable_checkpointing` ([#9754](https://github.com/Lightning-AI/lightning/pull/9754)) -- Deprecated the `LightningModule.on_post_move_to_device` method ([#9525](https://github.com/Lightning-AI/lightning/pull/9525)) -- Deprecated `pl.core.decorators.parameter_validation` in favor of `pl.utilities.parameter_tying.set_shared_parameters` ([#9525](https://github.com/Lightning-AI/lightning/pull/9525)) -- Deprecated passing `weights_summary` to the `Trainer` constructor in favor of adding the `ModelSummary` callback with `max_depth` directly to the list of callbacks ([#9699](https://github.com/Lightning-AI/lightning/pull/9699)) -- Deprecated `log_gpu_memory`, `gpu_metrics`, and util funcs in favor of `DeviceStatsMonitor` callback ([#9921](https://github.com/Lightning-AI/lightning/pull/9921)) -- Deprecated `GPUStatsMonitor` and `XLAStatsMonitor` in favor of `DeviceStatsMonitor` callback ([#9924](https://github.com/Lightning-AI/lightning/pull/9924)) -- Deprecated setting `Trainer(max_steps=None)`; To turn off the limit, set `Trainer(max_steps=-1)` (default) ([#9460](https://github.com/Lightning-AI/lightning/pull/9460)) -- Deprecated access to the `AcceleratorConnector.is_slurm_managing_tasks` attribute and marked it as protected ([#10101](https://github.com/Lightning-AI/lightning/pull/10101)) -- Deprecated access to the `AcceleratorConnector.configure_slurm_ddp` method and marked it as protected ([#10101](https://github.com/Lightning-AI/lightning/pull/10101)) -- Deprecated passing `resume_from_checkpoint` to the `Trainer` constructor in favor of `trainer.fit(ckpt_path=)` ([#10061](https://github.com/Lightning-AI/lightning/pull/10061)) -- Deprecated `ClusterEnvironment.creates_children()` in favor of `ClusterEnvironment.creates_processes_externally` (property) ([#10106](https://github.com/Lightning-AI/lightning/pull/10106)) -- Deprecated `PrecisionPlugin.master_params()` in favor of `PrecisionPlugin.main_params()` ([#10105](https://github.com/Lightning-AI/lightning/pull/10105)) -- Deprecated `lr_sch_names` from `LearningRateMonitor` ([#10066](https://github.com/Lightning-AI/lightning/pull/10066)) -- Deprecated `ProgressBar` callback in favor of `TQDMProgressBar` ([#10134](https://github.com/Lightning-AI/lightning/pull/10134)) +- Deprecated Trainer argument `terminate_on_nan` in favor of `detect_anomaly`([#9175](https://github.com/Lightning-AI/pytorch-lightning/pull/9175)) +- Deprecated `Trainer.terminate_on_nan` public attribute access ([#9849](https://github.com/Lightning-AI/pytorch-lightning/pull/9849)) +- Deprecated `LightningModule.summarize()` in favor of `pl.utilities.model_summary.summarize()` ([#8513](https://github.com/Lightning-AI/pytorch-lightning/pull/8513)) +- Deprecated `LightningModule.model_size` ([#8343](https://github.com/Lightning-AI/pytorch-lightning/pull/8343)) +- Deprecated `DataModule` properties: `train_transforms`, `val_transforms`, `test_transforms`, `size`, `dims` ([#8851](https://github.com/Lightning-AI/pytorch-lightning/pull/8851)) +- Deprecated `add_to_queue`, `get_from_queue` from `LightningModule` in favor of corresponding methods in the `DDPSpawnPlugin` ([#9118](https://github.com/Lightning-AI/pytorch-lightning/pull/9118)) +- Deprecated `LightningModule.get_progress_bar_dict` and `Trainer.progress_bar_dict` in favor of `pl.callbacks.progress.base.get_standard_metrics` and `ProgressBarBase.get_metrics` ([#8985](https://github.com/Lightning-AI/pytorch-lightning/pull/8985)) +- Deprecated `prepare_data_per_node` flag on Trainer and set it as a property of `DataHooks`, accessible in the `LightningModule` and `LightningDataModule` ([#8958](https://github.com/Lightning-AI/pytorch-lightning/pull/8958)) +- Deprecated the `TestTubeLogger` ([#9065](https://github.com/Lightning-AI/pytorch-lightning/pull/9065)) +- Deprecated `on_{train/val/test/predict}_dataloader()` from `LightningModule` and `LightningDataModule` ([#9098](https://github.com/Lightning-AI/pytorch-lightning/pull/9098)) +- Deprecated `on_keyboard_interrupt` callback hook in favor of new `on_exception` hook ([#9260](https://github.com/Lightning-AI/pytorch-lightning/pull/9260)) +- Deprecated passing `process_position` to the `Trainer` constructor in favor of adding the `ProgressBar` callback with `process_position` directly to the list of callbacks ([#9222](https://github.com/Lightning-AI/pytorch-lightning/pull/9222)) +- Deprecated passing `flush_logs_every_n_steps` as a Trainer argument, instead pass it to the logger init if supported ([#9366](https://github.com/Lightning-AI/pytorch-lightning/pull/9366)) +- Deprecated `LightningLoggerBase.close`, `LoggerCollection.close` in favor of `LightningLoggerBase.finalize`, `LoggerCollection.finalize` ([#9422](https://github.com/Lightning-AI/pytorch-lightning/pull/9422)) +- Deprecated passing `progress_bar_refresh_rate` to the `Trainer` constructor in favor of adding the `ProgressBar` callback with `refresh_rate` directly to the list of callbacks, or passing `enable_progress_bar=False` to disable the progress bar ([#9616](https://github.com/Lightning-AI/pytorch-lightning/pull/9616)) +- Deprecated `LightningDistributed` and moved the broadcast logic to `DDPPlugin` and `DDPSpawnPlugin` directly ([#9691](https://github.com/Lightning-AI/pytorch-lightning/pull/9691)) +- Deprecated passing `stochastic_weight_avg` to the `Trainer` constructor in favor of adding the `StochasticWeightAveraging` callback directly to the list of callbacks ([#8989](https://github.com/Lightning-AI/pytorch-lightning/pull/8989)) +- Deprecated Accelerator collective API `barrier`, `broadcast`, and `all_gather` in favor of calling the `TrainingTypePlugin` collective API directly ([#9677](https://github.com/Lightning-AI/pytorch-lightning/pull/9677)) +- Deprecated `checkpoint_callback` from the `Trainer` constructor in favor of `enable_checkpointing` ([#9754](https://github.com/Lightning-AI/pytorch-lightning/pull/9754)) +- Deprecated the `LightningModule.on_post_move_to_device` method ([#9525](https://github.com/Lightning-AI/pytorch-lightning/pull/9525)) +- Deprecated `pl.core.decorators.parameter_validation` in favor of `pl.utilities.parameter_tying.set_shared_parameters` ([#9525](https://github.com/Lightning-AI/pytorch-lightning/pull/9525)) +- Deprecated passing `weights_summary` to the `Trainer` constructor in favor of adding the `ModelSummary` callback with `max_depth` directly to the list of callbacks ([#9699](https://github.com/Lightning-AI/pytorch-lightning/pull/9699)) +- Deprecated `log_gpu_memory`, `gpu_metrics`, and util funcs in favor of `DeviceStatsMonitor` callback ([#9921](https://github.com/Lightning-AI/pytorch-lightning/pull/9921)) +- Deprecated `GPUStatsMonitor` and `XLAStatsMonitor` in favor of `DeviceStatsMonitor` callback ([#9924](https://github.com/Lightning-AI/pytorch-lightning/pull/9924)) +- Deprecated setting `Trainer(max_steps=None)`; To turn off the limit, set `Trainer(max_steps=-1)` (default) ([#9460](https://github.com/Lightning-AI/pytorch-lightning/pull/9460)) +- Deprecated access to the `AcceleratorConnector.is_slurm_managing_tasks` attribute and marked it as protected ([#10101](https://github.com/Lightning-AI/pytorch-lightning/pull/10101)) +- Deprecated access to the `AcceleratorConnector.configure_slurm_ddp` method and marked it as protected ([#10101](https://github.com/Lightning-AI/pytorch-lightning/pull/10101)) +- Deprecated passing `resume_from_checkpoint` to the `Trainer` constructor in favor of `trainer.fit(ckpt_path=)` ([#10061](https://github.com/Lightning-AI/pytorch-lightning/pull/10061)) +- Deprecated `ClusterEnvironment.creates_children()` in favor of `ClusterEnvironment.creates_processes_externally` (property) ([#10106](https://github.com/Lightning-AI/pytorch-lightning/pull/10106)) +- Deprecated `PrecisionPlugin.master_params()` in favor of `PrecisionPlugin.main_params()` ([#10105](https://github.com/Lightning-AI/pytorch-lightning/pull/10105)) +- Deprecated `lr_sch_names` from `LearningRateMonitor` ([#10066](https://github.com/Lightning-AI/pytorch-lightning/pull/10066)) +- Deprecated `ProgressBar` callback in favor of `TQDMProgressBar` ([#10134](https://github.com/Lightning-AI/pytorch-lightning/pull/10134)) ### Removed -- Removed deprecated `metrics` ([#8586](https://github.com/Lightning-AI/lightning/pull/8586/)) -- Removed the deprecated `outputs` argument in both the `LightningModule.on_train_epoch_end` and `Callback.on_train_epoch_end` hooks ([#8587](https://github.com/Lightning-AI/lightning/pull/8587)) -- Removed the deprecated `TrainerLoggingMixin` class ([#8609](https://github.com/Lightning-AI/lightning/pull/8609)) -- Removed the deprecated `TrainerTrainingTricksMixin` class ([#8679](https://github.com/Lightning-AI/lightning/pull/8679)) -- Removed the deprecated `optimizer_idx` from `training_step` as an accepted argument in manual optimization ([#8576](https://github.com/Lightning-AI/lightning/pull/8576)) -- Removed support for the deprecated `on_save_checkpoint` signature. The hook now takes a `checkpoint` positional parameter ([#8697](https://github.com/Lightning-AI/lightning/pull/8697)) -- Removed support for the deprecated `on_load_checkpoint` signature. The hook now takes a `pl_module` positional parameter ([#8697](https://github.com/Lightning-AI/lightning/pull/8697)) -- Removed the deprecated `save_function` property in `ModelCheckpoint` ([#8680](https://github.com/Lightning-AI/lightning/pull/8680)) -- Removed the deprecated `model` argument from `ModelCheckpoint.save_checkpoint` ([#8688](https://github.com/Lightning-AI/lightning/pull/8688)) -- Removed the deprecated `sync_step` argument from `WandbLogger` ([#8763](https://github.com/Lightning-AI/lightning/pull/8763)) -- Removed the deprecated `Trainer.truncated_bptt_steps` in favor of `LightningModule.truncated_bptt_steps` ([#8826](https://github.com/Lightning-AI/lightning/pull/8826)) -- Removed `LightningModule.write_predictions` and `LightningModule.write_predictions_dict` ([#8850](https://github.com/Lightning-AI/lightning/pull/8850)) -- Removed `on_reset_*_dataloader` hooks in TrainingType Plugins and Accelerators ([#8858](https://github.com/Lightning-AI/lightning/pull/8858)) -- Removed deprecated `GradInformation` module in favor of `pl.utilities.grads` ([#8831](https://github.com/Lightning-AI/lightning/pull/8831/)) -- Removed `TrainingTypePlugin.on_save` and `Accelerator.on_save` ([#9023](https://github.com/Lightning-AI/lightning/pull/9023)) -- Removed `{Accelerator,TrainingTypePlugin,PrecisionPlugin}.post_optimizer_step` ([#9746](https://github.com/Lightning-AI/lightning/pull/9746)) -- Removed deprecated `connect_precision_plugin` and `connect_training_type_plugin` from `Accelerator` ([#9019](https://github.com/Lightning-AI/lightning/pull/9019)) -- Removed `on_train_epoch_end` from `Accelerator` ([#9035](https://github.com/Lightning-AI/lightning/pull/9035)) -- Removed `InterBatchProcessor` in favor of `DataLoaderIterDataFetcher` ([#9052](https://github.com/Lightning-AI/lightning/pull/9052)) -- Removed `Plugin` in `base_plugin.py` in favor of accessing `TrainingTypePlugin` and `PrecisionPlugin` directly instead ([#9066](https://github.com/Lightning-AI/lightning/pull/9066)) -- Removed `teardown` from `ParallelPlugin` ([#8943](https://github.com/Lightning-AI/lightning/pull/8943)) -- Removed deprecated `profiled_functions` argument from `PyTorchProfiler` ([#9178](https://github.com/Lightning-AI/lightning/pull/9178)) -- Removed deprecated `pytorch_lightning.utilities.argparse_utils` module ([#9166](https://github.com/Lightning-AI/lightning/pull/9166)) -- Removed deprecated property `Trainer.running_sanity_check` in favor of `Trainer.sanity_checking` ([#9209](https://github.com/Lightning-AI/lightning/pull/9209)) -- Removed deprecated `BaseProfiler.output_filename` arg from it and its descendants in favor of `dirpath` and `filename` ([#9214](https://github.com/Lightning-AI/lightning/pull/9214)) -- Removed deprecated property `ModelCheckpoint.period` in favor of `ModelCheckpoint.every_n_epochs` ([#9213](https://github.com/Lightning-AI/lightning/pull/9213)) -- Removed deprecated `auto_move_data` decorator ([#9231](https://github.com/Lightning-AI/lightning/pull/9231)) -- Removed deprecated property `LightningModule.datamodule` in favor of `Trainer.datamodule` ([#9233](https://github.com/Lightning-AI/lightning/pull/9233)) -- Removed deprecated properties `DeepSpeedPlugin.cpu_offload*` in favor of `offload_optimizer`, `offload_parameters` and `pin_memory` ([#9244](https://github.com/Lightning-AI/lightning/pull/9244)) -- Removed deprecated property `AcceleratorConnector.is_using_torchelastic` in favor of `TorchElasticEnvironment.is_using_torchelastic()` ([#9729](https://github.com/Lightning-AI/lightning/pull/9729)) -- Removed `pl.utilities.debugging.InternalDebugger` ([#9680](https://github.com/Lightning-AI/lightning/pull/9680)) -- Removed `call_configure_sharded_model_hook` property from `Accelerator` and `TrainingTypePlugin` ([#9612](https://github.com/Lightning-AI/lightning/pull/9612)) -- Removed `TrainerProperties` mixin and moved property definitions directly into `Trainer` ([#9495](https://github.com/Lightning-AI/lightning/pull/9495)) -- Removed a redundant warning with `ModelCheckpoint(monitor=None)` callback ([#9875](https://github.com/Lightning-AI/lightning/pull/9875)) -- Remove `epoch` from `trainer.logged_metrics` ([#9904](https://github.com/Lightning-AI/lightning/pull/9904)) -- Remove deprecated `distributed_backend` from `Trainer` ([#10017](https://github.com/Lightning-AI/lightning/pull/10017)) -- Removed `process_idx` from the `{DDPSpawnPlugin,TPUSpawnPlugin}.new_process` methods ([#10022](https://github.com/Lightning-AI/lightning/pull/10022)) -- Removed automatic patching of `{train,val,test,predict}_dataloader()` on the `LightningModule` ([#9764](https://github.com/Lightning-AI/lightning/pull/9764)) -- Removed `pl.trainer.connectors.OptimizerConnector` ([#10120](https://github.com/Lightning-AI/lightning/pull/10120)) +- Removed deprecated `metrics` ([#8586](https://github.com/Lightning-AI/pytorch-lightning/pull/8586/)) +- Removed the deprecated `outputs` argument in both the `LightningModule.on_train_epoch_end` and `Callback.on_train_epoch_end` hooks ([#8587](https://github.com/Lightning-AI/pytorch-lightning/pull/8587)) +- Removed the deprecated `TrainerLoggingMixin` class ([#8609](https://github.com/Lightning-AI/pytorch-lightning/pull/8609)) +- Removed the deprecated `TrainerTrainingTricksMixin` class ([#8679](https://github.com/Lightning-AI/pytorch-lightning/pull/8679)) +- Removed the deprecated `optimizer_idx` from `training_step` as an accepted argument in manual optimization ([#8576](https://github.com/Lightning-AI/pytorch-lightning/pull/8576)) +- Removed support for the deprecated `on_save_checkpoint` signature. The hook now takes a `checkpoint` positional parameter ([#8697](https://github.com/Lightning-AI/pytorch-lightning/pull/8697)) +- Removed support for the deprecated `on_load_checkpoint` signature. The hook now takes a `pl_module` positional parameter ([#8697](https://github.com/Lightning-AI/pytorch-lightning/pull/8697)) +- Removed the deprecated `save_function` property in `ModelCheckpoint` ([#8680](https://github.com/Lightning-AI/pytorch-lightning/pull/8680)) +- Removed the deprecated `model` argument from `ModelCheckpoint.save_checkpoint` ([#8688](https://github.com/Lightning-AI/pytorch-lightning/pull/8688)) +- Removed the deprecated `sync_step` argument from `WandbLogger` ([#8763](https://github.com/Lightning-AI/pytorch-lightning/pull/8763)) +- Removed the deprecated `Trainer.truncated_bptt_steps` in favor of `LightningModule.truncated_bptt_steps` ([#8826](https://github.com/Lightning-AI/pytorch-lightning/pull/8826)) +- Removed `LightningModule.write_predictions` and `LightningModule.write_predictions_dict` ([#8850](https://github.com/Lightning-AI/pytorch-lightning/pull/8850)) +- Removed `on_reset_*_dataloader` hooks in TrainingType Plugins and Accelerators ([#8858](https://github.com/Lightning-AI/pytorch-lightning/pull/8858)) +- Removed deprecated `GradInformation` module in favor of `pl.utilities.grads` ([#8831](https://github.com/Lightning-AI/pytorch-lightning/pull/8831/)) +- Removed `TrainingTypePlugin.on_save` and `Accelerator.on_save` ([#9023](https://github.com/Lightning-AI/pytorch-lightning/pull/9023)) +- Removed `{Accelerator,TrainingTypePlugin,PrecisionPlugin}.post_optimizer_step` ([#9746](https://github.com/Lightning-AI/pytorch-lightning/pull/9746)) +- Removed deprecated `connect_precision_plugin` and `connect_training_type_plugin` from `Accelerator` ([#9019](https://github.com/Lightning-AI/pytorch-lightning/pull/9019)) +- Removed `on_train_epoch_end` from `Accelerator` ([#9035](https://github.com/Lightning-AI/pytorch-lightning/pull/9035)) +- Removed `InterBatchProcessor` in favor of `DataLoaderIterDataFetcher` ([#9052](https://github.com/Lightning-AI/pytorch-lightning/pull/9052)) +- Removed `Plugin` in `base_plugin.py` in favor of accessing `TrainingTypePlugin` and `PrecisionPlugin` directly instead ([#9066](https://github.com/Lightning-AI/pytorch-lightning/pull/9066)) +- Removed `teardown` from `ParallelPlugin` ([#8943](https://github.com/Lightning-AI/pytorch-lightning/pull/8943)) +- Removed deprecated `profiled_functions` argument from `PyTorchProfiler` ([#9178](https://github.com/Lightning-AI/pytorch-lightning/pull/9178)) +- Removed deprecated `pytorch_lightning.utilities.argparse_utils` module ([#9166](https://github.com/Lightning-AI/pytorch-lightning/pull/9166)) +- Removed deprecated property `Trainer.running_sanity_check` in favor of `Trainer.sanity_checking` ([#9209](https://github.com/Lightning-AI/pytorch-lightning/pull/9209)) +- Removed deprecated `BaseProfiler.output_filename` arg from it and its descendants in favor of `dirpath` and `filename` ([#9214](https://github.com/Lightning-AI/pytorch-lightning/pull/9214)) +- Removed deprecated property `ModelCheckpoint.period` in favor of `ModelCheckpoint.every_n_epochs` ([#9213](https://github.com/Lightning-AI/pytorch-lightning/pull/9213)) +- Removed deprecated `auto_move_data` decorator ([#9231](https://github.com/Lightning-AI/pytorch-lightning/pull/9231)) +- Removed deprecated property `LightningModule.datamodule` in favor of `Trainer.datamodule` ([#9233](https://github.com/Lightning-AI/pytorch-lightning/pull/9233)) +- Removed deprecated properties `DeepSpeedPlugin.cpu_offload*` in favor of `offload_optimizer`, `offload_parameters` and `pin_memory` ([#9244](https://github.com/Lightning-AI/pytorch-lightning/pull/9244)) +- Removed deprecated property `AcceleratorConnector.is_using_torchelastic` in favor of `TorchElasticEnvironment.is_using_torchelastic()` ([#9729](https://github.com/Lightning-AI/pytorch-lightning/pull/9729)) +- Removed `pl.utilities.debugging.InternalDebugger` ([#9680](https://github.com/Lightning-AI/pytorch-lightning/pull/9680)) +- Removed `call_configure_sharded_model_hook` property from `Accelerator` and `TrainingTypePlugin` ([#9612](https://github.com/Lightning-AI/pytorch-lightning/pull/9612)) +- Removed `TrainerProperties` mixin and moved property definitions directly into `Trainer` ([#9495](https://github.com/Lightning-AI/pytorch-lightning/pull/9495)) +- Removed a redundant warning with `ModelCheckpoint(monitor=None)` callback ([#9875](https://github.com/Lightning-AI/pytorch-lightning/pull/9875)) +- Remove `epoch` from `trainer.logged_metrics` ([#9904](https://github.com/Lightning-AI/pytorch-lightning/pull/9904)) +- Remove deprecated `distributed_backend` from `Trainer` ([#10017](https://github.com/Lightning-AI/pytorch-lightning/pull/10017)) +- Removed `process_idx` from the `{DDPSpawnPlugin,TPUSpawnPlugin}.new_process` methods ([#10022](https://github.com/Lightning-AI/pytorch-lightning/pull/10022)) +- Removed automatic patching of `{train,val,test,predict}_dataloader()` on the `LightningModule` ([#9764](https://github.com/Lightning-AI/pytorch-lightning/pull/9764)) +- Removed `pl.trainer.connectors.OptimizerConnector` ([#10120](https://github.com/Lightning-AI/pytorch-lightning/pull/10120)) ### Fixed -- Fixed ImageNet evaluation in example ([#10179](https://github.com/Lightning-AI/lightning/pull/10179)) -- Fixed an issue with logger outputs not being finalized correctly after prediction runs ([#8685](https://github.com/Lightning-AI/lightning/pull/8685)) -- Fixed `move_metrics_to_cpu` moving the loss to CPU while training on device ([#9308](https://github.com/Lightning-AI/lightning/pull/9308)) -- Fixed incorrect main progress bar indicator when resuming training mid-epoch ([#9310](https://github.com/Lightning-AI/lightning/pull/9310)) -- Fixed an issue with freeing memory of datafetchers during teardown ([#9387](https://github.com/Lightning-AI/lightning/pull/9387)) -- Fixed a bug where the training step output needed to be `deepcopy`-ed ([#9349](https://github.com/Lightning-AI/lightning/pull/9349)) -- Fixed an issue with freeing memory allocated by the data iterators in `Loop.on_run_end` ([#9386](https://github.com/Lightning-AI/lightning/pull/9386), [#9915](https://github.com/Lightning-AI/lightning/pull/9915)) -- Fixed `BasePredictionWriter` not returning the batch indices in a non-distributed setting ([#9432](https://github.com/Lightning-AI/lightning/pull/9432)) -- Fixed an error when running in XLA environments with no TPU attached ([#9572](https://github.com/Lightning-AI/lightning/pull/9572)) -- Fixed check on torchmetrics logged whose `compute()` output is a multielement tensor ([#9582](https://github.com/Lightning-AI/lightning/pull/9582)) -- Fixed gradient accumulation for `DDPShardedPlugin` ([#9122](https://github.com/Lightning-AI/lightning/pull/9122)) -- Fixed missing DeepSpeed distributed call ([#9540](https://github.com/Lightning-AI/lightning/pull/9540)) -- Fixed an issue with wrapped LightningModule during evaluation; The LightningModule no longer gets wrapped with data-parallel modules when not fitting in `DDPPlugin`, `DDPSpawnPlugin`, `DDPShardedPlugin`, `DDPSpawnShardedPlugin` ([#9096](https://github.com/Lightning-AI/lightning/pull/9096)) -- Fixed `trainer.accumulate_grad_batches` to be an int on init. The default value for it is now `None` inside Trainer ([#9652](https://github.com/Lightning-AI/lightning/pull/9652)) -- Fixed `broadcast` in `DDPPlugin` and `DDPSpawnPlugin` to respect the `src` input ([#9691](https://github.com/Lightning-AI/lightning/pull/9691)) -- Fixed `self.log(on_epoch=True, reduce_fx=sum))` for the `on_batch_start` and `on_train_batch_start` hooks ([#9791](https://github.com/Lightning-AI/lightning/pull/9791)) -- Fixed `self.log(on_epoch=True)` for the `on_batch_start` and `on_train_batch_start` hooks ([#9780](https://github.com/Lightning-AI/lightning/pull/9780)) -- Fixed restoring training state during `Trainer.fit` only ([#9413](https://github.com/Lightning-AI/lightning/pull/9413)) -- Fixed DeepSpeed and Lightning both calling the scheduler ([#9788](https://github.com/Lightning-AI/lightning/pull/9788)) -- Fixed missing arguments when saving hyperparameters from the parent class but not from the child class ([#9800](https://github.com/Lightning-AI/lightning/pull/9800)) -- Fixed DeepSpeed GPU device IDs ([#9847](https://github.com/Lightning-AI/lightning/pull/9847)) -- Reset `val_dataloader` in `tuner/batch_size_scaling` ([#9857](https://github.com/Lightning-AI/lightning/pull/9857)) -- Fixed use of `LightningCLI` in computer_vision_fine_tuning.py example ([#9934](https://github.com/Lightning-AI/lightning/pull/9934)) -- Fixed issue with non-init dataclass fields in `apply_to_collection` ([#9963](https://github.com/Lightning-AI/lightning/pull/9963)) -- Reset `val_dataloader` in `tuner/batch_size_scaling` for binsearch ([#9975](https://github.com/Lightning-AI/lightning/pull/9975)) -- Fixed logic to check for spawn in dataloader `TrainerDataLoadingMixin._worker_check` ([#9902](https://github.com/Lightning-AI/lightning/pull/9902)) -- Fixed `train_dataloader` getting loaded twice when resuming from a checkpoint during `Trainer.fit()` ([#9671](https://github.com/Lightning-AI/lightning/pull/9671)) -- Fixed `LearningRateMonitor` logging with multiple param groups optimizer with no scheduler ([#10044](https://github.com/Lightning-AI/lightning/pull/10044)) -- Fixed undesired side effects being caused by `Trainer` patching dataloader methods on the `LightningModule` ([#9764](https://github.com/Lightning-AI/lightning/pull/9764)) -- Fixed gradients not being unscaled when clipping or logging the gradient norm ([#9287](https://github.com/Lightning-AI/lightning/pull/9287)) -- Fixed `on_before_optimizer_step` getting called before the optimizer closure (including backward) has run ([#10167](https://github.com/Lightning-AI/lightning/pull/10167)) -- Fixed monitor value in `ModelCheckpoint` getting moved to the wrong device in a special case where it becomes NaN ([#10118](https://github.com/Lightning-AI/lightning/pull/10118)) -- Fixed creation of `dirpath` in `BaseProfiler` if it doesn't exist ([#10073](https://github.com/Lightning-AI/lightning/pull/10073)) -- Fixed incorrect handling of sigterm ([#10189](https://github.com/Lightning-AI/lightning/pull/10189)) -- Fixed bug where `log(on_step=True, on_epoch=True, sync_dist=True)` wouldn't reduce the value on step ([#10227](https://github.com/Lightning-AI/lightning/pull/10227)) -- Fixed an issue with `pl.utilities.seed.reset_seed` converting the `PL_SEED_WORKERS` environment variable to `bool` ([#10099](https://github.com/Lightning-AI/lightning/pull/10099)) -- Fixed iterating over a logger collection when `fast_dev_run > 0` ([#10232](https://github.com/Lightning-AI/lightning/pull/10232)) -- Fixed `batch_size` in `ResultCollection` not being reset to 1 on epoch end ([#10242](https://github.com/Lightning-AI/lightning/pull/10242)) -- Fixed `distrib_type` not being set when training plugin instances are being passed to the Trainer ([#10251](https://github.com/Lightning-AI/lightning/pull/10251)) +- Fixed ImageNet evaluation in example ([#10179](https://github.com/Lightning-AI/pytorch-lightning/pull/10179)) +- Fixed an issue with logger outputs not being finalized correctly after prediction runs ([#8685](https://github.com/Lightning-AI/pytorch-lightning/pull/8685)) +- Fixed `move_metrics_to_cpu` moving the loss to CPU while training on device ([#9308](https://github.com/Lightning-AI/pytorch-lightning/pull/9308)) +- Fixed incorrect main progress bar indicator when resuming training mid-epoch ([#9310](https://github.com/Lightning-AI/pytorch-lightning/pull/9310)) +- Fixed an issue with freeing memory of datafetchers during teardown ([#9387](https://github.com/Lightning-AI/pytorch-lightning/pull/9387)) +- Fixed a bug where the training step output needed to be `deepcopy`-ed ([#9349](https://github.com/Lightning-AI/pytorch-lightning/pull/9349)) +- Fixed an issue with freeing memory allocated by the data iterators in `Loop.on_run_end` ([#9386](https://github.com/Lightning-AI/pytorch-lightning/pull/9386), [#9915](https://github.com/Lightning-AI/pytorch-lightning/pull/9915)) +- Fixed `BasePredictionWriter` not returning the batch indices in a non-distributed setting ([#9432](https://github.com/Lightning-AI/pytorch-lightning/pull/9432)) +- Fixed an error when running in XLA environments with no TPU attached ([#9572](https://github.com/Lightning-AI/pytorch-lightning/pull/9572)) +- Fixed check on torchmetrics logged whose `compute()` output is a multielement tensor ([#9582](https://github.com/Lightning-AI/pytorch-lightning/pull/9582)) +- Fixed gradient accumulation for `DDPShardedPlugin` ([#9122](https://github.com/Lightning-AI/pytorch-lightning/pull/9122)) +- Fixed missing DeepSpeed distributed call ([#9540](https://github.com/Lightning-AI/pytorch-lightning/pull/9540)) +- Fixed an issue with wrapped LightningModule during evaluation; The LightningModule no longer gets wrapped with data-parallel modules when not fitting in `DDPPlugin`, `DDPSpawnPlugin`, `DDPShardedPlugin`, `DDPSpawnShardedPlugin` ([#9096](https://github.com/Lightning-AI/pytorch-lightning/pull/9096)) +- Fixed `trainer.accumulate_grad_batches` to be an int on init. The default value for it is now `None` inside Trainer ([#9652](https://github.com/Lightning-AI/pytorch-lightning/pull/9652)) +- Fixed `broadcast` in `DDPPlugin` and `DDPSpawnPlugin` to respect the `src` input ([#9691](https://github.com/Lightning-AI/pytorch-lightning/pull/9691)) +- Fixed `self.log(on_epoch=True, reduce_fx=sum))` for the `on_batch_start` and `on_train_batch_start` hooks ([#9791](https://github.com/Lightning-AI/pytorch-lightning/pull/9791)) +- Fixed `self.log(on_epoch=True)` for the `on_batch_start` and `on_train_batch_start` hooks ([#9780](https://github.com/Lightning-AI/pytorch-lightning/pull/9780)) +- Fixed restoring training state during `Trainer.fit` only ([#9413](https://github.com/Lightning-AI/pytorch-lightning/pull/9413)) +- Fixed DeepSpeed and Lightning both calling the scheduler ([#9788](https://github.com/Lightning-AI/pytorch-lightning/pull/9788)) +- Fixed missing arguments when saving hyperparameters from the parent class but not from the child class ([#9800](https://github.com/Lightning-AI/pytorch-lightning/pull/9800)) +- Fixed DeepSpeed GPU device IDs ([#9847](https://github.com/Lightning-AI/pytorch-lightning/pull/9847)) +- Reset `val_dataloader` in `tuner/batch_size_scaling` ([#9857](https://github.com/Lightning-AI/pytorch-lightning/pull/9857)) +- Fixed use of `LightningCLI` in computer_vision_fine_tuning.py example ([#9934](https://github.com/Lightning-AI/pytorch-lightning/pull/9934)) +- Fixed issue with non-init dataclass fields in `apply_to_collection` ([#9963](https://github.com/Lightning-AI/pytorch-lightning/pull/9963)) +- Reset `val_dataloader` in `tuner/batch_size_scaling` for binsearch ([#9975](https://github.com/Lightning-AI/pytorch-lightning/pull/9975)) +- Fixed logic to check for spawn in dataloader `TrainerDataLoadingMixin._worker_check` ([#9902](https://github.com/Lightning-AI/pytorch-lightning/pull/9902)) +- Fixed `train_dataloader` getting loaded twice when resuming from a checkpoint during `Trainer.fit()` ([#9671](https://github.com/Lightning-AI/pytorch-lightning/pull/9671)) +- Fixed `LearningRateMonitor` logging with multiple param groups optimizer with no scheduler ([#10044](https://github.com/Lightning-AI/pytorch-lightning/pull/10044)) +- Fixed undesired side effects being caused by `Trainer` patching dataloader methods on the `LightningModule` ([#9764](https://github.com/Lightning-AI/pytorch-lightning/pull/9764)) +- Fixed gradients not being unscaled when clipping or logging the gradient norm ([#9287](https://github.com/Lightning-AI/pytorch-lightning/pull/9287)) +- Fixed `on_before_optimizer_step` getting called before the optimizer closure (including backward) has run ([#10167](https://github.com/Lightning-AI/pytorch-lightning/pull/10167)) +- Fixed monitor value in `ModelCheckpoint` getting moved to the wrong device in a special case where it becomes NaN ([#10118](https://github.com/Lightning-AI/pytorch-lightning/pull/10118)) +- Fixed creation of `dirpath` in `BaseProfiler` if it doesn't exist ([#10073](https://github.com/Lightning-AI/pytorch-lightning/pull/10073)) +- Fixed incorrect handling of sigterm ([#10189](https://github.com/Lightning-AI/pytorch-lightning/pull/10189)) +- Fixed bug where `log(on_step=True, on_epoch=True, sync_dist=True)` wouldn't reduce the value on step ([#10227](https://github.com/Lightning-AI/pytorch-lightning/pull/10227)) +- Fixed an issue with `pl.utilities.seed.reset_seed` converting the `PL_SEED_WORKERS` environment variable to `bool` ([#10099](https://github.com/Lightning-AI/pytorch-lightning/pull/10099)) +- Fixed iterating over a logger collection when `fast_dev_run > 0` ([#10232](https://github.com/Lightning-AI/pytorch-lightning/pull/10232)) +- Fixed `batch_size` in `ResultCollection` not being reset to 1 on epoch end ([#10242](https://github.com/Lightning-AI/pytorch-lightning/pull/10242)) +- Fixed `distrib_type` not being set when training plugin instances are being passed to the Trainer ([#10251](https://github.com/Lightning-AI/pytorch-lightning/pull/10251)) ## [1.4.9] - 2021-09-30 -- Fixed `lr_find` to generate same results on multiple calls ([#9704](https://github.com/Lightning-AI/lightning/pull/9704)) -- Fixed `reset` metrics on validation epoch end ([#9717](https://github.com/Lightning-AI/lightning/pull/9717)) -- Fixed input validation for `gradient_clip_val`, `gradient_clip_algorithm`, `track_grad_norm` and `terminate_on_nan` Trainer arguments ([#9595](https://github.com/Lightning-AI/lightning/pull/9595)) -- Reset metrics before each task starts ([#9410](https://github.com/Lightning-AI/lightning/pull/9410)) +- Fixed `lr_find` to generate same results on multiple calls ([#9704](https://github.com/Lightning-AI/pytorch-lightning/pull/9704)) +- Fixed `reset` metrics on validation epoch end ([#9717](https://github.com/Lightning-AI/pytorch-lightning/pull/9717)) +- Fixed input validation for `gradient_clip_val`, `gradient_clip_algorithm`, `track_grad_norm` and `terminate_on_nan` Trainer arguments ([#9595](https://github.com/Lightning-AI/pytorch-lightning/pull/9595)) +- Reset metrics before each task starts ([#9410](https://github.com/Lightning-AI/pytorch-lightning/pull/9410)) ## [1.4.8] - 2021-09-22 -- Fixed error reporting in DDP process reconciliation when processes are launched by an external agent ([#9389](https://github.com/Lightning-AI/lightning/pull/9389)) -- Added PL_RECONCILE_PROCESS environment variable to enable process reconciliation regardless of cluster environment settings ([#9389](https://github.com/Lightning-AI/lightning/pull/9389)) -- Fixed `add_argparse_args` raising `TypeError` when args are typed as `typing.Generic` in Python 3.6 ([#9554](https://github.com/Lightning-AI/lightning/pull/9554)) -- Fixed back-compatibility for saving hyperparameters from a single container and inferring its argument name by reverting [#9125](https://github.com/Lightning-AI/lightning/pull/9125) ([#9642](https://github.com/Lightning-AI/lightning/pull/9642)) +- Fixed error reporting in DDP process reconciliation when processes are launched by an external agent ([#9389](https://github.com/Lightning-AI/pytorch-lightning/pull/9389)) +- Added PL_RECONCILE_PROCESS environment variable to enable process reconciliation regardless of cluster environment settings ([#9389](https://github.com/Lightning-AI/pytorch-lightning/pull/9389)) +- Fixed `add_argparse_args` raising `TypeError` when args are typed as `typing.Generic` in Python 3.6 ([#9554](https://github.com/Lightning-AI/pytorch-lightning/pull/9554)) +- Fixed back-compatibility for saving hyperparameters from a single container and inferring its argument name by reverting [#9125](https://github.com/Lightning-AI/pytorch-lightning/pull/9125) ([#9642](https://github.com/Lightning-AI/pytorch-lightning/pull/9642)) ## [1.4.7] - 2021-09-14 -- Fixed logging of nan parameters ([#9364](https://github.com/Lightning-AI/lightning/pull/9364)) -- Fixed `replace_sampler` missing the batch size under specific conditions ([#9367](https://github.com/Lightning-AI/lightning/pull/9367)) -- Pass init args to ShardedDataParallel ([#9483](https://github.com/Lightning-AI/lightning/pull/9483)) -- Fixed collision of user argument when using ShardedDDP ([#9512](https://github.com/Lightning-AI/lightning/pull/9512)) -- Fixed DeepSpeed crash for RNNs ([#9489](https://github.com/Lightning-AI/lightning/pull/9489)) +- Fixed logging of nan parameters ([#9364](https://github.com/Lightning-AI/pytorch-lightning/pull/9364)) +- Fixed `replace_sampler` missing the batch size under specific conditions ([#9367](https://github.com/Lightning-AI/pytorch-lightning/pull/9367)) +- Pass init args to ShardedDataParallel ([#9483](https://github.com/Lightning-AI/pytorch-lightning/pull/9483)) +- Fixed collision of user argument when using ShardedDDP ([#9512](https://github.com/Lightning-AI/pytorch-lightning/pull/9512)) +- Fixed DeepSpeed crash for RNNs ([#9489](https://github.com/Lightning-AI/pytorch-lightning/pull/9489)) ## [1.4.6] - 2021-09-07 -- Fixed an issues with export to ONNX format when a model has multiple inputs ([#8800](https://github.com/Lightning-AI/lightning/pull/8800)) -- Removed deprecation warnings being called for `on_{task}_dataloader` ([#9279](https://github.com/Lightning-AI/lightning/pull/9279)) +- Fixed an issues with export to ONNX format when a model has multiple inputs ([#8800](https://github.com/Lightning-AI/pytorch-lightning/pull/8800)) +- Removed deprecation warnings being called for `on_{task}_dataloader` ([#9279](https://github.com/Lightning-AI/pytorch-lightning/pull/9279)) - Fixed save/load/resume from checkpoint for DeepSpeed Plugin ( - [#8397](https://github.com/Lightning-AI/lightning/pull/8397), - [#8644](https://github.com/Lightning-AI/lightning/pull/8644), - [#8627](https://github.com/Lightning-AI/lightning/pull/8627)) -- Fixed `EarlyStopping` running on train epoch end when `check_val_every_n_epoch>1` is set ([#9156](https://github.com/Lightning-AI/lightning/pull/9156)) -- Fixed an issue with logger outputs not being finalized correctly after prediction runs ([#8333](https://github.com/Lightning-AI/lightning/pull/8333)) -- Fixed the Apex and DeepSpeed plugin closure running after the `on_before_optimizer_step` hook ([#9288](https://github.com/Lightning-AI/lightning/pull/9288)) -- Fixed the Native AMP plugin closure not running with manual optimization ([#9288](https://github.com/Lightning-AI/lightning/pull/9288)) -- Fixed bug where data-loading functions where not getting the correct running stage passed ([#8858](https://github.com/Lightning-AI/lightning/pull/8858)) -- Fixed intra-epoch evaluation outputs staying in memory when the respective `*_epoch_end` hook wasn't overridden ([#9261](https://github.com/Lightning-AI/lightning/pull/9261)) -- Fixed error handling in DDP process reconciliation when `_sync_dir` was not initialized ([#9267](https://github.com/Lightning-AI/lightning/pull/9267)) -- Fixed PyTorch Profiler not enabled for manual optimization ([#9316](https://github.com/Lightning-AI/lightning/pull/9316)) -- Fixed inspection of other args when a container is specified in `save_hyperparameters` ([#9125](https://github.com/Lightning-AI/lightning/pull/9125)) -- Fixed signature of `Timer.on_train_epoch_end` and `StochasticWeightAveraging.on_train_epoch_end` to prevent unwanted deprecation warnings ([#9347](https://github.com/Lightning-AI/lightning/pull/9347)) + [#8397](https://github.com/Lightning-AI/pytorch-lightning/pull/8397), + [#8644](https://github.com/Lightning-AI/pytorch-lightning/pull/8644), + [#8627](https://github.com/Lightning-AI/pytorch-lightning/pull/8627)) +- Fixed `EarlyStopping` running on train epoch end when `check_val_every_n_epoch>1` is set ([#9156](https://github.com/Lightning-AI/pytorch-lightning/pull/9156)) +- Fixed an issue with logger outputs not being finalized correctly after prediction runs ([#8333](https://github.com/Lightning-AI/pytorch-lightning/pull/8333)) +- Fixed the Apex and DeepSpeed plugin closure running after the `on_before_optimizer_step` hook ([#9288](https://github.com/Lightning-AI/pytorch-lightning/pull/9288)) +- Fixed the Native AMP plugin closure not running with manual optimization ([#9288](https://github.com/Lightning-AI/pytorch-lightning/pull/9288)) +- Fixed bug where data-loading functions where not getting the correct running stage passed ([#8858](https://github.com/Lightning-AI/pytorch-lightning/pull/8858)) +- Fixed intra-epoch evaluation outputs staying in memory when the respective `*_epoch_end` hook wasn't overridden ([#9261](https://github.com/Lightning-AI/pytorch-lightning/pull/9261)) +- Fixed error handling in DDP process reconciliation when `_sync_dir` was not initialized ([#9267](https://github.com/Lightning-AI/pytorch-lightning/pull/9267)) +- Fixed PyTorch Profiler not enabled for manual optimization ([#9316](https://github.com/Lightning-AI/pytorch-lightning/pull/9316)) +- Fixed inspection of other args when a container is specified in `save_hyperparameters` ([#9125](https://github.com/Lightning-AI/pytorch-lightning/pull/9125)) +- Fixed signature of `Timer.on_train_epoch_end` and `StochasticWeightAveraging.on_train_epoch_end` to prevent unwanted deprecation warnings ([#9347](https://github.com/Lightning-AI/pytorch-lightning/pull/9347)) ## [1.4.5] - 2021-08-31 -- Fixed reduction using `self.log(sync_dict=True, reduce_fx={mean,max})` ([#9142](https://github.com/Lightning-AI/lightning/pull/9142)) -- Fixed not setting a default value for `max_epochs` if `max_time` was specified on the `Trainer` constructor ([#9072](https://github.com/Lightning-AI/lightning/pull/9072)) -- Fixed the CometLogger, no longer modifies the metrics in place. Instead creates a copy of metrics before performing any operations ([#9150](https://github.com/Lightning-AI/lightning/pull/9150)) -- Fixed `DDP` "CUDA error: initialization error" due to a `copy` instead of `deepcopy` on `ResultCollection` ([#9239](https://github.com/Lightning-AI/lightning/pull/9239)) +- Fixed reduction using `self.log(sync_dict=True, reduce_fx={mean,max})` ([#9142](https://github.com/Lightning-AI/pytorch-lightning/pull/9142)) +- Fixed not setting a default value for `max_epochs` if `max_time` was specified on the `Trainer` constructor ([#9072](https://github.com/Lightning-AI/pytorch-lightning/pull/9072)) +- Fixed the CometLogger, no longer modifies the metrics in place. Instead creates a copy of metrics before performing any operations ([#9150](https://github.com/Lightning-AI/pytorch-lightning/pull/9150)) +- Fixed `DDP` "CUDA error: initialization error" due to a `copy` instead of `deepcopy` on `ResultCollection` ([#9239](https://github.com/Lightning-AI/pytorch-lightning/pull/9239)) ## [1.4.4] - 2021-08-24 -- Fixed a bug in the binary search mode of auto batch size scaling where exception was raised if the first trainer run resulted in OOM ([#8954](https://github.com/Lightning-AI/lightning/pull/8954)) -- Fixed a bug causing logging with `log_gpu_memory='min_max'` not working ([#9013](https://github.com/Lightning-AI/lightning/pull/9013)) +- Fixed a bug in the binary search mode of auto batch size scaling where exception was raised if the first trainer run resulted in OOM ([#8954](https://github.com/Lightning-AI/pytorch-lightning/pull/8954)) +- Fixed a bug causing logging with `log_gpu_memory='min_max'` not working ([#9013](https://github.com/Lightning-AI/pytorch-lightning/pull/9013)) ## [1.4.3] - 2021-08-17 -- Fixed plateau scheduler stepping on incomplete epoch ([#8861](https://github.com/Lightning-AI/lightning/pull/8861)) -- Fixed infinite loop with `CycleIterator` and multiple loaders ([#8889](https://github.com/Lightning-AI/lightning/pull/8889)) -- Fixed `StochasticWeightAveraging` with a list of learning rates not applying them to each param group ([#8747](https://github.com/Lightning-AI/lightning/pull/8747)) -- Restore original loaders if replaced by entrypoint ([#8885](https://github.com/Lightning-AI/lightning/pull/8885)) -- Fixed lost reference to `_Metadata` object in `ResultMetricCollection` ([#8932](https://github.com/Lightning-AI/lightning/pull/8932)) -- Ensure the existence of `DDPPlugin._sync_dir` in `reconciliate_processes` ([#8939](https://github.com/Lightning-AI/lightning/pull/8939)) +- Fixed plateau scheduler stepping on incomplete epoch ([#8861](https://github.com/Lightning-AI/pytorch-lightning/pull/8861)) +- Fixed infinite loop with `CycleIterator` and multiple loaders ([#8889](https://github.com/Lightning-AI/pytorch-lightning/pull/8889)) +- Fixed `StochasticWeightAveraging` with a list of learning rates not applying them to each param group ([#8747](https://github.com/Lightning-AI/pytorch-lightning/pull/8747)) +- Restore original loaders if replaced by entrypoint ([#8885](https://github.com/Lightning-AI/pytorch-lightning/pull/8885)) +- Fixed lost reference to `_Metadata` object in `ResultMetricCollection` ([#8932](https://github.com/Lightning-AI/pytorch-lightning/pull/8932)) +- Ensure the existence of `DDPPlugin._sync_dir` in `reconciliate_processes` ([#8939](https://github.com/Lightning-AI/pytorch-lightning/pull/8939)) ## [1.4.2] - 2021-08-10 -- Fixed recursive call for `apply_to_collection(include_none=False)` ([#8719](https://github.com/Lightning-AI/lightning/pull/8719)) -- Fixed truncated backprop through time enablement when set as a property on the LightningModule and not the Trainer ([#8804](https://github.com/Lightning-AI/lightning/pull/8804/)) -- Fixed comments and exception message for metrics_to_scalars ([#8782](https://github.com/Lightning-AI/lightning/pull/8782/)) -- Fixed typo error in LightningLoggerBase.after_save_checkpoint docstring ([#8737](https://github.com/Lightning-AI/lightning/pull/8737/)) +- Fixed recursive call for `apply_to_collection(include_none=False)` ([#8719](https://github.com/Lightning-AI/pytorch-lightning/pull/8719)) +- Fixed truncated backprop through time enablement when set as a property on the LightningModule and not the Trainer ([#8804](https://github.com/Lightning-AI/pytorch-lightning/pull/8804/)) +- Fixed comments and exception message for metrics_to_scalars ([#8782](https://github.com/Lightning-AI/pytorch-lightning/pull/8782/)) +- Fixed typo error in LightningLoggerBase.after_save_checkpoint docstring ([#8737](https://github.com/Lightning-AI/pytorch-lightning/pull/8737/)) ## [1.4.1] - 2021-08-03 -- Fixed `trainer.fit_loop.split_idx` always returning `None` ([#8601](https://github.com/Lightning-AI/lightning/pull/8601)) -- Fixed references for `ResultCollection.extra` ([#8622](https://github.com/Lightning-AI/lightning/pull/8622)) -- Fixed reference issues during epoch end result collection ([#8621](https://github.com/Lightning-AI/lightning/pull/8621)) -- Fixed horovod auto-detection when horovod is not installed and the launcher is `mpirun` ([#8610](https://github.com/Lightning-AI/lightning/pull/8610)) -- Fixed an issue with `training_step` outputs not getting collected correctly for `training_epoch_end` ([#8613](https://github.com/Lightning-AI/lightning/pull/8613)) -- Fixed distributed types support for CPUs ([#8667](https://github.com/Lightning-AI/lightning/pull/8667)) -- Fixed a deadlock issue with DDP and torchelastic ([#8655](https://github.com/Lightning-AI/lightning/pull/8655)) -- Fixed `accelerator=ddp` choice for CPU ([#8645](https://github.com/Lightning-AI/lightning/pull/8645)) +- Fixed `trainer.fit_loop.split_idx` always returning `None` ([#8601](https://github.com/Lightning-AI/pytorch-lightning/pull/8601)) +- Fixed references for `ResultCollection.extra` ([#8622](https://github.com/Lightning-AI/pytorch-lightning/pull/8622)) +- Fixed reference issues during epoch end result collection ([#8621](https://github.com/Lightning-AI/pytorch-lightning/pull/8621)) +- Fixed horovod auto-detection when horovod is not installed and the launcher is `mpirun` ([#8610](https://github.com/Lightning-AI/pytorch-lightning/pull/8610)) +- Fixed an issue with `training_step` outputs not getting collected correctly for `training_epoch_end` ([#8613](https://github.com/Lightning-AI/pytorch-lightning/pull/8613)) +- Fixed distributed types support for CPUs ([#8667](https://github.com/Lightning-AI/pytorch-lightning/pull/8667)) +- Fixed a deadlock issue with DDP and torchelastic ([#8655](https://github.com/Lightning-AI/pytorch-lightning/pull/8655)) +- Fixed `accelerator=ddp` choice for CPU ([#8645](https://github.com/Lightning-AI/pytorch-lightning/pull/8645)) ## [1.4.0] - 2021-07-27 ### Added -- Added `extract_batch_size` utility and corresponding tests to extract batch dimension from multiple batch types ([#8357](https://github.com/Lightning-AI/lightning/pull/8357/)) -- Added support for named parameter groups in `LearningRateMonitor` ([#7987](https://github.com/Lightning-AI/lightning/pull/7987)) -- Added `dataclass` support for `pl.utilities.apply_to_collection` ([#7935](https://github.com/Lightning-AI/lightning/pull/7935)) -- Added support to `LightningModule.to_torchscript` for saving to custom filesystems with `fsspec` ([#7617](https://github.com/Lightning-AI/lightning/pull/7617)) +- Added `extract_batch_size` utility and corresponding tests to extract batch dimension from multiple batch types ([#8357](https://github.com/Lightning-AI/pytorch-lightning/pull/8357/)) +- Added support for named parameter groups in `LearningRateMonitor` ([#7987](https://github.com/Lightning-AI/pytorch-lightning/pull/7987)) +- Added `dataclass` support for `pl.utilities.apply_to_collection` ([#7935](https://github.com/Lightning-AI/pytorch-lightning/pull/7935)) +- Added support to `LightningModule.to_torchscript` for saving to custom filesystems with `fsspec` ([#7617](https://github.com/Lightning-AI/pytorch-lightning/pull/7617)) - Added `KubeflowEnvironment` for use with the `PyTorchJob` operator in Kubeflow -- Added LightningCLI support for config files on object stores ([#7521](https://github.com/Lightning-AI/lightning/pull/7521)) -- Added `ModelPruning(prune_on_train_epoch_end=True|False)` to choose when to apply pruning ([#7704](https://github.com/Lightning-AI/lightning/pull/7704)) -- Added support for checkpointing based on a provided time interval during training ([#7515](https://github.com/Lightning-AI/lightning/pull/7515)) +- Added LightningCLI support for config files on object stores ([#7521](https://github.com/Lightning-AI/pytorch-lightning/pull/7521)) +- Added `ModelPruning(prune_on_train_epoch_end=True|False)` to choose when to apply pruning ([#7704](https://github.com/Lightning-AI/pytorch-lightning/pull/7704)) +- Added support for checkpointing based on a provided time interval during training ([#7515](https://github.com/Lightning-AI/pytorch-lightning/pull/7515)) - Progress tracking - * Added dataclasses for progress tracking ([#6603](https://github.com/Lightning-AI/lightning/pull/6603), - [#7574](https://github.com/Lightning-AI/lightning/pull/7574), - [#8140](https://github.com/Lightning-AI/lightning/pull/8140), - [#8362](https://github.com/Lightning-AI/lightning/pull/8362)) - * Add `{,load_}state_dict` to the progress tracking dataclasses ([#8140](https://github.com/Lightning-AI/lightning/pull/8140)) - * Connect the progress tracking dataclasses to the loops ([#8244](https://github.com/Lightning-AI/lightning/pull/8244), - [#8362](https://github.com/Lightning-AI/lightning/pull/8362)) - * Do not reset the progress tracking dataclasses total counters ([#8475](https://github.com/Lightning-AI/lightning/pull/8475)) -- Added support for passing a `LightningDataModule` positionally as the second argument to `trainer.{validate,test,predict}` ([#7431](https://github.com/Lightning-AI/lightning/pull/7431)) -- Added argument `trainer.predict(ckpt_path)` ([#7430](https://github.com/Lightning-AI/lightning/pull/7430)) -- Added `clip_grad_by_value` support for TPUs ([#7025](https://github.com/Lightning-AI/lightning/pull/7025)) -- Added support for passing any class to `is_overridden` ([#7918](https://github.com/Lightning-AI/lightning/pull/7918)) -- Added `sub_dir` parameter to `TensorBoardLogger` ([#6195](https://github.com/Lightning-AI/lightning/pull/6195)) -- Added correct `dataloader_idx` to batch transfer hooks ([#6241](https://github.com/Lightning-AI/lightning/pull/6241)) -- Added `include_none=bool` argument to `apply_to_collection` ([#7769](https://github.com/Lightning-AI/lightning/pull/7769)) -- Added `apply_to_collections` to apply a function to two zipped collections ([#7769](https://github.com/Lightning-AI/lightning/pull/7769)) -- Added `ddp_fully_sharded` support ([#7487](https://github.com/Lightning-AI/lightning/pull/7487)) -- Added `should_rank_save_checkpoint` property to Training Plugins ([#7684](https://github.com/Lightning-AI/lightning/pull/7684)) -- Added `log_grad_norm` hook to `LightningModule` to customize the logging of gradient norms ([#7873](https://github.com/Lightning-AI/lightning/pull/7873)) -- Added `save_config_filename` init argument to `LightningCLI` to ease resolving name conflicts ([#7741](https://github.com/Lightning-AI/lightning/pull/7741)) -- Added `save_config_overwrite` init argument to `LightningCLI` to ease overwriting existing config files ([#8059](https://github.com/Lightning-AI/lightning/pull/8059)) -- Added reset dataloader hooks to Training Plugins and Accelerators ([#7861](https://github.com/Lightning-AI/lightning/pull/7861)) -- Added trainer stage hooks for Training Plugins and Accelerators ([#7864](https://github.com/Lightning-AI/lightning/pull/7864)) -- Added the `on_before_optimizer_step` hook ([#8048](https://github.com/Lightning-AI/lightning/pull/8048)) -- Added IPU Accelerator ([#7867](https://github.com/Lightning-AI/lightning/pull/7867)) + * Added dataclasses for progress tracking ([#6603](https://github.com/Lightning-AI/pytorch-lightning/pull/6603), + [#7574](https://github.com/Lightning-AI/pytorch-lightning/pull/7574), + [#8140](https://github.com/Lightning-AI/pytorch-lightning/pull/8140), + [#8362](https://github.com/Lightning-AI/pytorch-lightning/pull/8362)) + * Add `{,load_}state_dict` to the progress tracking dataclasses ([#8140](https://github.com/Lightning-AI/pytorch-lightning/pull/8140)) + * Connect the progress tracking dataclasses to the loops ([#8244](https://github.com/Lightning-AI/pytorch-lightning/pull/8244), + [#8362](https://github.com/Lightning-AI/pytorch-lightning/pull/8362)) + * Do not reset the progress tracking dataclasses total counters ([#8475](https://github.com/Lightning-AI/pytorch-lightning/pull/8475)) +- Added support for passing a `LightningDataModule` positionally as the second argument to `trainer.{validate,test,predict}` ([#7431](https://github.com/Lightning-AI/pytorch-lightning/pull/7431)) +- Added argument `trainer.predict(ckpt_path)` ([#7430](https://github.com/Lightning-AI/pytorch-lightning/pull/7430)) +- Added `clip_grad_by_value` support for TPUs ([#7025](https://github.com/Lightning-AI/pytorch-lightning/pull/7025)) +- Added support for passing any class to `is_overridden` ([#7918](https://github.com/Lightning-AI/pytorch-lightning/pull/7918)) +- Added `sub_dir` parameter to `TensorBoardLogger` ([#6195](https://github.com/Lightning-AI/pytorch-lightning/pull/6195)) +- Added correct `dataloader_idx` to batch transfer hooks ([#6241](https://github.com/Lightning-AI/pytorch-lightning/pull/6241)) +- Added `include_none=bool` argument to `apply_to_collection` ([#7769](https://github.com/Lightning-AI/pytorch-lightning/pull/7769)) +- Added `apply_to_collections` to apply a function to two zipped collections ([#7769](https://github.com/Lightning-AI/pytorch-lightning/pull/7769)) +- Added `ddp_fully_sharded` support ([#7487](https://github.com/Lightning-AI/pytorch-lightning/pull/7487)) +- Added `should_rank_save_checkpoint` property to Training Plugins ([#7684](https://github.com/Lightning-AI/pytorch-lightning/pull/7684)) +- Added `log_grad_norm` hook to `LightningModule` to customize the logging of gradient norms ([#7873](https://github.com/Lightning-AI/pytorch-lightning/pull/7873)) +- Added `save_config_filename` init argument to `LightningCLI` to ease resolving name conflicts ([#7741](https://github.com/Lightning-AI/pytorch-lightning/pull/7741)) +- Added `save_config_overwrite` init argument to `LightningCLI` to ease overwriting existing config files ([#8059](https://github.com/Lightning-AI/pytorch-lightning/pull/8059)) +- Added reset dataloader hooks to Training Plugins and Accelerators ([#7861](https://github.com/Lightning-AI/pytorch-lightning/pull/7861)) +- Added trainer stage hooks for Training Plugins and Accelerators ([#7864](https://github.com/Lightning-AI/pytorch-lightning/pull/7864)) +- Added the `on_before_optimizer_step` hook ([#8048](https://github.com/Lightning-AI/pytorch-lightning/pull/8048)) +- Added IPU Accelerator ([#7867](https://github.com/Lightning-AI/pytorch-lightning/pull/7867)) - Fault-tolerant training - * Added `{,load_}state_dict` to `ResultCollection` ([#7948](https://github.com/Lightning-AI/lightning/pull/7948)) - * Added `{,load_}state_dict` to `Loops` ([#8197](https://github.com/Lightning-AI/lightning/pull/8197)) - * Added `FastForwardSampler` and `CaptureIterableDataset` ([#8307](https://github.com/Lightning-AI/lightning/pull/8307)) - * Set `Loop.restarting=False` at the end of the first iteration ([#8362](https://github.com/Lightning-AI/lightning/pull/8362)) - * Save the loops state with the checkpoint (opt-in) ([#8362](https://github.com/Lightning-AI/lightning/pull/8362)) - * Save a checkpoint to restore the state on exception (opt-in) ([#8362](https://github.com/Lightning-AI/lightning/pull/8362)) - * Added `state_dict` and `load_state_dict` utilities for `CombinedLoader` + utilities for dataloader ([#8364](https://github.com/Lightning-AI/lightning/pull/8364)) -- Added `rank_zero_only` to `LightningModule.log` function ([#7966](https://github.com/Lightning-AI/lightning/pull/7966)) -- Added `metric_attribute` to `LightningModule.log` function ([#7966](https://github.com/Lightning-AI/lightning/pull/7966)) -- Added a warning if `Trainer(log_every_n_steps)` is a value too high for the training dataloader ([#7734](https://github.com/Lightning-AI/lightning/pull/7734)) -- Added LightningCLI support for argument links applied on instantiation ([#7895](https://github.com/Lightning-AI/lightning/pull/7895)) -- Added LightningCLI support for configurable callbacks that should always be present ([#7964](https://github.com/Lightning-AI/lightning/pull/7964)) -- Added DeepSpeed Infinity Support, and updated to DeepSpeed 0.4.0 ([#7234](https://github.com/Lightning-AI/lightning/pull/7234)) -- Added support for `torch.nn.UninitializedParameter` in `ModelSummary` ([#7642](https://github.com/Lightning-AI/lightning/pull/7642)) -- Added support `LightningModule.save_hyperparameters` when `LightningModule` is a dataclass ([#7992](https://github.com/Lightning-AI/lightning/pull/7992)) -- Added support for overriding `optimizer_zero_grad` and `optimizer_step` when using accumulate_grad_batches ([#7980](https://github.com/Lightning-AI/lightning/pull/7980)) -- Added `logger` boolean flag to `save_hyperparameters` ([#7960](https://github.com/Lightning-AI/lightning/pull/7960)) -- Added support for calling scripts using the module syntax (`python -m package.script`) ([#8073](https://github.com/Lightning-AI/lightning/pull/8073)) -- Added support for optimizers and learning rate schedulers to `LightningCLI` ([#8093](https://github.com/Lightning-AI/lightning/pull/8093)) -- Added XLA Profiler ([#8014](https://github.com/Lightning-AI/lightning/pull/8014)) -- Added `PrecisionPlugin.{pre,post}_backward` ([#8328](https://github.com/Lightning-AI/lightning/pull/8328)) -- Added `on_load_checkpoint` and `on_save_checkpoint` hooks to the `PrecisionPlugin` base class ([#7831](https://github.com/Lightning-AI/lightning/pull/7831)) -- Added `max_depth` parameter in `ModelSummary` ([#8062](https://github.com/Lightning-AI/lightning/pull/8062)) -- Added `XLAStatsMonitor` callback ([#8235](https://github.com/Lightning-AI/lightning/pull/8235)) -- Added `restore` function and `restarting` attribute to base `Loop` ([#8247](https://github.com/Lightning-AI/lightning/pull/8247)) -- Added support for `save_hyperparameters` in `LightningDataModule` ([#3792](https://github.com/Lightning-AI/lightning/pull/3792)) -- Added the `ModelCheckpoint(save_on_train_epoch_end)` to choose when to run the saving logic ([#8389](https://github.com/Lightning-AI/lightning/pull/8389)) -- Added `LSFEnvironment` for distributed training with the LSF resource manager `jsrun` ([#5102](https://github.com/Lightning-AI/lightning/pull/5102)) -- Added support for `accelerator='cpu'|'gpu'|'tpu'|'ipu'|'auto'` ([#7808](https://github.com/Lightning-AI/lightning/pull/7808)) -- Added `tpu_spawn_debug` to plugin registry ([#7933](https://github.com/Lightning-AI/lightning/pull/7933)) -- Enabled traditional/manual launching of DDP processes through `LOCAL_RANK` and `NODE_RANK` environment variable assignments ([#7480](https://github.com/Lightning-AI/lightning/pull/7480)) -- Added `quantize_on_fit_end` argument to `QuantizationAwareTraining` ([#8464](https://github.com/Lightning-AI/lightning/pull/8464)) -- Added experimental support for loop specialization ([#8226](https://github.com/Lightning-AI/lightning/pull/8226)) -- Added support for `devices` flag to Trainer ([#8440](https://github.com/Lightning-AI/lightning/pull/8440)) -- Added private `prevent_trainer_and_dataloaders_deepcopy` context manager on the `LightningModule` ([#8472](https://github.com/Lightning-AI/lightning/pull/8472)) -- Added support for providing callables to the Lightning CLI instead of types ([#8400](https://github.com/Lightning-AI/lightning/pull/8400)) + * Added `{,load_}state_dict` to `ResultCollection` ([#7948](https://github.com/Lightning-AI/pytorch-lightning/pull/7948)) + * Added `{,load_}state_dict` to `Loops` ([#8197](https://github.com/Lightning-AI/pytorch-lightning/pull/8197)) + * Added `FastForwardSampler` and `CaptureIterableDataset` ([#8307](https://github.com/Lightning-AI/pytorch-lightning/pull/8307)) + * Set `Loop.restarting=False` at the end of the first iteration ([#8362](https://github.com/Lightning-AI/pytorch-lightning/pull/8362)) + * Save the loops state with the checkpoint (opt-in) ([#8362](https://github.com/Lightning-AI/pytorch-lightning/pull/8362)) + * Save a checkpoint to restore the state on exception (opt-in) ([#8362](https://github.com/Lightning-AI/pytorch-lightning/pull/8362)) + * Added `state_dict` and `load_state_dict` utilities for `CombinedLoader` + utilities for dataloader ([#8364](https://github.com/Lightning-AI/pytorch-lightning/pull/8364)) +- Added `rank_zero_only` to `LightningModule.log` function ([#7966](https://github.com/Lightning-AI/pytorch-lightning/pull/7966)) +- Added `metric_attribute` to `LightningModule.log` function ([#7966](https://github.com/Lightning-AI/pytorch-lightning/pull/7966)) +- Added a warning if `Trainer(log_every_n_steps)` is a value too high for the training dataloader ([#7734](https://github.com/Lightning-AI/pytorch-lightning/pull/7734)) +- Added LightningCLI support for argument links applied on instantiation ([#7895](https://github.com/Lightning-AI/pytorch-lightning/pull/7895)) +- Added LightningCLI support for configurable callbacks that should always be present ([#7964](https://github.com/Lightning-AI/pytorch-lightning/pull/7964)) +- Added DeepSpeed Infinity Support, and updated to DeepSpeed 0.4.0 ([#7234](https://github.com/Lightning-AI/pytorch-lightning/pull/7234)) +- Added support for `torch.nn.UninitializedParameter` in `ModelSummary` ([#7642](https://github.com/Lightning-AI/pytorch-lightning/pull/7642)) +- Added support `LightningModule.save_hyperparameters` when `LightningModule` is a dataclass ([#7992](https://github.com/Lightning-AI/pytorch-lightning/pull/7992)) +- Added support for overriding `optimizer_zero_grad` and `optimizer_step` when using accumulate_grad_batches ([#7980](https://github.com/Lightning-AI/pytorch-lightning/pull/7980)) +- Added `logger` boolean flag to `save_hyperparameters` ([#7960](https://github.com/Lightning-AI/pytorch-lightning/pull/7960)) +- Added support for calling scripts using the module syntax (`python -m package.script`) ([#8073](https://github.com/Lightning-AI/pytorch-lightning/pull/8073)) +- Added support for optimizers and learning rate schedulers to `LightningCLI` ([#8093](https://github.com/Lightning-AI/pytorch-lightning/pull/8093)) +- Added XLA Profiler ([#8014](https://github.com/Lightning-AI/pytorch-lightning/pull/8014)) +- Added `PrecisionPlugin.{pre,post}_backward` ([#8328](https://github.com/Lightning-AI/pytorch-lightning/pull/8328)) +- Added `on_load_checkpoint` and `on_save_checkpoint` hooks to the `PrecisionPlugin` base class ([#7831](https://github.com/Lightning-AI/pytorch-lightning/pull/7831)) +- Added `max_depth` parameter in `ModelSummary` ([#8062](https://github.com/Lightning-AI/pytorch-lightning/pull/8062)) +- Added `XLAStatsMonitor` callback ([#8235](https://github.com/Lightning-AI/pytorch-lightning/pull/8235)) +- Added `restore` function and `restarting` attribute to base `Loop` ([#8247](https://github.com/Lightning-AI/pytorch-lightning/pull/8247)) +- Added support for `save_hyperparameters` in `LightningDataModule` ([#3792](https://github.com/Lightning-AI/pytorch-lightning/pull/3792)) +- Added the `ModelCheckpoint(save_on_train_epoch_end)` to choose when to run the saving logic ([#8389](https://github.com/Lightning-AI/pytorch-lightning/pull/8389)) +- Added `LSFEnvironment` for distributed training with the LSF resource manager `jsrun` ([#5102](https://github.com/Lightning-AI/pytorch-lightning/pull/5102)) +- Added support for `accelerator='cpu'|'gpu'|'tpu'|'ipu'|'auto'` ([#7808](https://github.com/Lightning-AI/pytorch-lightning/pull/7808)) +- Added `tpu_spawn_debug` to plugin registry ([#7933](https://github.com/Lightning-AI/pytorch-lightning/pull/7933)) +- Enabled traditional/manual launching of DDP processes through `LOCAL_RANK` and `NODE_RANK` environment variable assignments ([#7480](https://github.com/Lightning-AI/pytorch-lightning/pull/7480)) +- Added `quantize_on_fit_end` argument to `QuantizationAwareTraining` ([#8464](https://github.com/Lightning-AI/pytorch-lightning/pull/8464)) +- Added experimental support for loop specialization ([#8226](https://github.com/Lightning-AI/pytorch-lightning/pull/8226)) +- Added support for `devices` flag to Trainer ([#8440](https://github.com/Lightning-AI/pytorch-lightning/pull/8440)) +- Added private `prevent_trainer_and_dataloaders_deepcopy` context manager on the `LightningModule` ([#8472](https://github.com/Lightning-AI/pytorch-lightning/pull/8472)) +- Added support for providing callables to the Lightning CLI instead of types ([#8400](https://github.com/Lightning-AI/pytorch-lightning/pull/8400)) ### Changed -- Decoupled device parsing logic from Accelerator connector to Trainer ([#8180](https://github.com/Lightning-AI/lightning/pull/8180)) -- Changed the `Trainer`'s `checkpoint_callback` argument to allow only boolean values ([#7539](https://github.com/Lightning-AI/lightning/pull/7539)) -- Log epoch metrics before the `on_evaluation_end` hook ([#7272](https://github.com/Lightning-AI/lightning/pull/7272)) -- Explicitly disallow calling `self.log(on_epoch=False)` during epoch-only or single-call hooks ([#7874](https://github.com/Lightning-AI/lightning/pull/7874)) +- Decoupled device parsing logic from Accelerator connector to Trainer ([#8180](https://github.com/Lightning-AI/pytorch-lightning/pull/8180)) +- Changed the `Trainer`'s `checkpoint_callback` argument to allow only boolean values ([#7539](https://github.com/Lightning-AI/pytorch-lightning/pull/7539)) +- Log epoch metrics before the `on_evaluation_end` hook ([#7272](https://github.com/Lightning-AI/pytorch-lightning/pull/7272)) +- Explicitly disallow calling `self.log(on_epoch=False)` during epoch-only or single-call hooks ([#7874](https://github.com/Lightning-AI/pytorch-lightning/pull/7874)) - Changed these `Trainer` methods to be protected: `call_setup_hook`, `call_configure_sharded_model`, `pre_dispatch`, `dispatch`, `post_dispatch`, `call_teardown_hook`, `run_train`, `run_sanity_check`, `run_evaluate`, `run_evaluation`, `run_predict`, `track_output_for_epoch_end` -- Changed `metrics_to_scalars` to work with any collection or value ([#7888](https://github.com/Lightning-AI/lightning/pull/7888)) -- Changed `clip_grad_norm` to use `torch.nn.utils.clip_grad_norm_` ([#7025](https://github.com/Lightning-AI/lightning/pull/7025)) -- Validation is now always run inside the training epoch scope ([#7357](https://github.com/Lightning-AI/lightning/pull/7357)) -- `ModelCheckpoint` now runs at the end of the training epoch by default ([#8389](https://github.com/Lightning-AI/lightning/pull/8389)) -- `EarlyStopping` now runs at the end of the training epoch by default ([#8286](https://github.com/Lightning-AI/lightning/pull/8286)) +- Changed `metrics_to_scalars` to work with any collection or value ([#7888](https://github.com/Lightning-AI/pytorch-lightning/pull/7888)) +- Changed `clip_grad_norm` to use `torch.nn.utils.clip_grad_norm_` ([#7025](https://github.com/Lightning-AI/pytorch-lightning/pull/7025)) +- Validation is now always run inside the training epoch scope ([#7357](https://github.com/Lightning-AI/pytorch-lightning/pull/7357)) +- `ModelCheckpoint` now runs at the end of the training epoch by default ([#8389](https://github.com/Lightning-AI/pytorch-lightning/pull/8389)) +- `EarlyStopping` now runs at the end of the training epoch by default ([#8286](https://github.com/Lightning-AI/pytorch-lightning/pull/8286)) - Refactored Loops - * Moved attributes `global_step`, `current_epoch`, `max/min_steps`, `max/min_epochs`, `batch_idx`, and `total_batch_idx` to TrainLoop ([#7437](https://github.com/Lightning-AI/lightning/pull/7437)) - * Refactored result handling in training loop ([#7506](https://github.com/Lightning-AI/lightning/pull/7506)) - * Moved attributes `hiddens` and `split_idx` to TrainLoop ([#7507](https://github.com/Lightning-AI/lightning/pull/7507)) - * Refactored the logic around manual and automatic optimization inside the optimizer loop ([#7526](https://github.com/Lightning-AI/lightning/pull/7526)) - * Simplified "should run validation" logic ([#7682](https://github.com/Lightning-AI/lightning/pull/7682)) - * Simplified logic for updating the learning rate for schedulers ([#7682](https://github.com/Lightning-AI/lightning/pull/7682)) - * Removed the `on_epoch` guard from the "should stop" validation check ([#7701](https://github.com/Lightning-AI/lightning/pull/7701)) - * Refactored internal loop interface; added new classes `FitLoop`, `TrainingEpochLoop`, `TrainingBatchLoop` ([#7871](https://github.com/Lightning-AI/lightning/pull/7871), [#8077](https://github.com/Lightning-AI/lightning/pull/8077)) - * Removed `pl.trainer.training_loop` ([#7985](https://github.com/Lightning-AI/lightning/pull/7985)) - * Refactored evaluation loop interface; added new classes `DataLoaderLoop`, `EvaluationLoop`, `EvaluationEpochLoop` ([#7990](https://github.com/Lightning-AI/lightning/pull/7990), [#8077](https://github.com/Lightning-AI/lightning/pull/8077)) - * Removed `pl.trainer.evaluation_loop` ([#8056](https://github.com/Lightning-AI/lightning/pull/8056)) - * Restricted public access to several internal functions ([#8024](https://github.com/Lightning-AI/lightning/pull/8024)) - * Refactored trainer `_run_*` functions and separate evaluation loops ([#8065](https://github.com/Lightning-AI/lightning/pull/8065)) - * Refactored prediction loop interface; added new classes `PredictionLoop`, `PredictionEpochLoop` ([#7700](https://github.com/Lightning-AI/lightning/pull/7700), [#8077](https://github.com/Lightning-AI/lightning/pull/8077)) - * Removed `pl.trainer.predict_loop` ([#8094](https://github.com/Lightning-AI/lightning/pull/8094)) - * Moved result teardown to the loops ([#8245](https://github.com/Lightning-AI/lightning/pull/8245)) - * Improve `Loop` API to better handle children `state_dict` and `progress` ([#8334](https://github.com/Lightning-AI/lightning/pull/8334)) + * Moved attributes `global_step`, `current_epoch`, `max/min_steps`, `max/min_epochs`, `batch_idx`, and `total_batch_idx` to TrainLoop ([#7437](https://github.com/Lightning-AI/pytorch-lightning/pull/7437)) + * Refactored result handling in training loop ([#7506](https://github.com/Lightning-AI/pytorch-lightning/pull/7506)) + * Moved attributes `hiddens` and `split_idx` to TrainLoop ([#7507](https://github.com/Lightning-AI/pytorch-lightning/pull/7507)) + * Refactored the logic around manual and automatic optimization inside the optimizer loop ([#7526](https://github.com/Lightning-AI/pytorch-lightning/pull/7526)) + * Simplified "should run validation" logic ([#7682](https://github.com/Lightning-AI/pytorch-lightning/pull/7682)) + * Simplified logic for updating the learning rate for schedulers ([#7682](https://github.com/Lightning-AI/pytorch-lightning/pull/7682)) + * Removed the `on_epoch` guard from the "should stop" validation check ([#7701](https://github.com/Lightning-AI/pytorch-lightning/pull/7701)) + * Refactored internal loop interface; added new classes `FitLoop`, `TrainingEpochLoop`, `TrainingBatchLoop` ([#7871](https://github.com/Lightning-AI/pytorch-lightning/pull/7871), [#8077](https://github.com/Lightning-AI/pytorch-lightning/pull/8077)) + * Removed `pl.trainer.training_loop` ([#7985](https://github.com/Lightning-AI/pytorch-lightning/pull/7985)) + * Refactored evaluation loop interface; added new classes `DataLoaderLoop`, `EvaluationLoop`, `EvaluationEpochLoop` ([#7990](https://github.com/Lightning-AI/pytorch-lightning/pull/7990), [#8077](https://github.com/Lightning-AI/pytorch-lightning/pull/8077)) + * Removed `pl.trainer.evaluation_loop` ([#8056](https://github.com/Lightning-AI/pytorch-lightning/pull/8056)) + * Restricted public access to several internal functions ([#8024](https://github.com/Lightning-AI/pytorch-lightning/pull/8024)) + * Refactored trainer `_run_*` functions and separate evaluation loops ([#8065](https://github.com/Lightning-AI/pytorch-lightning/pull/8065)) + * Refactored prediction loop interface; added new classes `PredictionLoop`, `PredictionEpochLoop` ([#7700](https://github.com/Lightning-AI/pytorch-lightning/pull/7700), [#8077](https://github.com/Lightning-AI/pytorch-lightning/pull/8077)) + * Removed `pl.trainer.predict_loop` ([#8094](https://github.com/Lightning-AI/pytorch-lightning/pull/8094)) + * Moved result teardown to the loops ([#8245](https://github.com/Lightning-AI/pytorch-lightning/pull/8245)) + * Improve `Loop` API to better handle children `state_dict` and `progress` ([#8334](https://github.com/Lightning-AI/pytorch-lightning/pull/8334)) - Refactored logging - * Renamed and moved `core/step_result.py` to `trainer/connectors/logger_connector/result.py` ([#7736](https://github.com/Lightning-AI/lightning/pull/7736)) - * Dramatically simplify the `LoggerConnector` ([#7882](https://github.com/Lightning-AI/lightning/pull/7882)) - * `trainer.{logged,progress_bar,callback}_metrics` are now updated on-demand ([#7882](https://github.com/Lightning-AI/lightning/pull/7882)) - * Completely overhaul the `Result` object in favor of `ResultMetric` ([#7882](https://github.com/Lightning-AI/lightning/pull/7882)) - * Improve epoch-level reduction time and overall memory usage ([#7882](https://github.com/Lightning-AI/lightning/pull/7882)) - * Allow passing `self.log(batch_size=...)` ([#7891](https://github.com/Lightning-AI/lightning/pull/7891)) - * Each of the training loops now keeps its own results collection ([#7891](https://github.com/Lightning-AI/lightning/pull/7891)) - * Remove `EpochResultStore` and `HookResultStore` in favor of `ResultCollection` ([#7909](https://github.com/Lightning-AI/lightning/pull/7909)) - * Remove `MetricsHolder` ([#7909](https://github.com/Lightning-AI/lightning/pull/7909)) -- Moved `ignore_scalar_return_in_dp` warning suppression to the DataParallelPlugin class ([#7421](https://github.com/Lightning-AI/lightning/pull/7421/)) -- Changed the behaviour when logging evaluation step metrics to no longer append `/epoch_*` to the metric name ([#7351](https://github.com/Lightning-AI/lightning/pull/7351)) -- Raised `ValueError` when a `None` value is `self.log`-ed ([#7771](https://github.com/Lightning-AI/lightning/pull/7771)) -- Changed `resolve_training_type_plugins` to allow setting `num_nodes` and `sync_batchnorm` from `Trainer` setting ([#7026](https://github.com/Lightning-AI/lightning/pull/7026)) -- Default `seed_everything(workers=True)` in the `LightningCLI` ([#7504](https://github.com/Lightning-AI/lightning/pull/7504)) -- Changed `model.state_dict()` in `CheckpointConnector` to allow `training_type_plugin` to customize the model's `state_dict()` ([#7474](https://github.com/Lightning-AI/lightning/pull/7474)) -- `MLflowLogger` now uses the env variable `MLFLOW_TRACKING_URI` as default tracking URI ([#7457](https://github.com/Lightning-AI/lightning/pull/7457)) -- Changed `Trainer` arg and functionality from `reload_dataloaders_every_epoch` to `reload_dataloaders_every_n_epochs` ([#5043](https://github.com/Lightning-AI/lightning/pull/5043)) -- Changed `WandbLogger(log_model={True/'all'})` to log models as artifacts ([#6231](https://github.com/Lightning-AI/lightning/pull/6231)) -- MLFlowLogger now accepts `run_name` as an constructor argument ([#7622](https://github.com/Lightning-AI/lightning/pull/7622)) -- Changed `teardown()` in `Accelerator` to allow `training_type_plugin` to customize `teardown` logic ([#7579](https://github.com/Lightning-AI/lightning/pull/7579)) -- `Trainer.fit` now raises an error when using manual optimization with unsupported features such as `gradient_clip_val` or `accumulate_grad_batches` ([#7788](https://github.com/Lightning-AI/lightning/pull/7788)) -- Accelerator hooks are called regardless if `LightningModule` overrides the same hooks ([#7826](https://github.com/Lightning-AI/lightning/pull/7826)) -- Moved profilers to their own file ([#7822](https://github.com/Lightning-AI/lightning/pull/7822)) -- The `on_after_backward` hook is now called on accumulating iterations. Use the `on_before_optimizer_step` hook to mimic the old behaviour ([#8328](https://github.com/Lightning-AI/lightning/pull/8328)) -- The mixed precision loss is no longer unscaled before the `on_after_backward` hook. Use the `on_before_optimizer_step` hook to mimic the old behaviour ([#8328](https://github.com/Lightning-AI/lightning/pull/8328)) -- The `TrainingTypePlugin.{pre,post}_backward` hooks no longer take the `optimizer, opt_idx, should_accumulate` arguments ([#8328](https://github.com/Lightning-AI/lightning/pull/8328)) -- The `PrecisionPlugin.backward` hooks no longer returns a value ([#8328](https://github.com/Lightning-AI/lightning/pull/8328)) -- The `PrecisionPlugin.backward` hooks no longer takes a `should_accumulate` argument ([#8328](https://github.com/Lightning-AI/lightning/pull/8328)) -- Added the `on_before_backward` hook ([#7865](https://github.com/Lightning-AI/lightning/pull/7865)) -- `LightningCLI` now aborts with a clearer message if config already exists and disables save config during `fast_dev_run`([#7963](https://github.com/Lightning-AI/lightning/pull/7963)) -- Saved the `LightningCLI` config on `setup` and only on the main process ([#8017](https://github.com/Lightning-AI/lightning/pull/8017)) -- Dropped the `LightningCLI` `ArgumentParser` when pickling ([#8017](https://github.com/Lightning-AI/lightning/pull/8017)) -- Skip `broadcast` if distributed not initialized for the spawn plugins ([#8017](https://github.com/Lightning-AI/lightning/pull/8017)) -- `Trainer(resume_from_checkpoint=...)` now restores the model directly after `LightningModule.setup()`, which is before `LightningModule.configure_sharded_model()` ([#7652](https://github.com/Lightning-AI/lightning/pull/7652)) -- Moved `torch.cuda.set_device()` to enable collective calls earlier in setup ([#8312](https://github.com/Lightning-AI/lightning/pull/8312)) -- Used XLA utility API to move data to CPU (Single TPU core) ([#8078](https://github.com/Lightning-AI/lightning/pull/8078)) -- Improved error messages in `replace_sampler` when the `DataLoader` attributes are not included in the signature or the signature is missing optional arguments ([#8519](https://github.com/Lightning-AI/lightning/pull/8519)) -- Moved `DeviceDtypeModuleMixin` and `HyperparametersMixin` mixin to `core` ([#8396](https://github.com/Lightning-AI/lightning/pull/8396)) -- Return the `default_root_dir` as the `log_dir` when the logger is a `LoggerCollection` ([#8187](https://github.com/Lightning-AI/lightning/pull/8187)) + * Renamed and moved `core/step_result.py` to `trainer/connectors/logger_connector/result.py` ([#7736](https://github.com/Lightning-AI/pytorch-lightning/pull/7736)) + * Dramatically simplify the `LoggerConnector` ([#7882](https://github.com/Lightning-AI/pytorch-lightning/pull/7882)) + * `trainer.{logged,progress_bar,callback}_metrics` are now updated on-demand ([#7882](https://github.com/Lightning-AI/pytorch-lightning/pull/7882)) + * Completely overhaul the `Result` object in favor of `ResultMetric` ([#7882](https://github.com/Lightning-AI/pytorch-lightning/pull/7882)) + * Improve epoch-level reduction time and overall memory usage ([#7882](https://github.com/Lightning-AI/pytorch-lightning/pull/7882)) + * Allow passing `self.log(batch_size=...)` ([#7891](https://github.com/Lightning-AI/pytorch-lightning/pull/7891)) + * Each of the training loops now keeps its own results collection ([#7891](https://github.com/Lightning-AI/pytorch-lightning/pull/7891)) + * Remove `EpochResultStore` and `HookResultStore` in favor of `ResultCollection` ([#7909](https://github.com/Lightning-AI/pytorch-lightning/pull/7909)) + * Remove `MetricsHolder` ([#7909](https://github.com/Lightning-AI/pytorch-lightning/pull/7909)) +- Moved `ignore_scalar_return_in_dp` warning suppression to the DataParallelPlugin class ([#7421](https://github.com/Lightning-AI/pytorch-lightning/pull/7421/)) +- Changed the behaviour when logging evaluation step metrics to no longer append `/epoch_*` to the metric name ([#7351](https://github.com/Lightning-AI/pytorch-lightning/pull/7351)) +- Raised `ValueError` when a `None` value is `self.log`-ed ([#7771](https://github.com/Lightning-AI/pytorch-lightning/pull/7771)) +- Changed `resolve_training_type_plugins` to allow setting `num_nodes` and `sync_batchnorm` from `Trainer` setting ([#7026](https://github.com/Lightning-AI/pytorch-lightning/pull/7026)) +- Default `seed_everything(workers=True)` in the `LightningCLI` ([#7504](https://github.com/Lightning-AI/pytorch-lightning/pull/7504)) +- Changed `model.state_dict()` in `CheckpointConnector` to allow `training_type_plugin` to customize the model's `state_dict()` ([#7474](https://github.com/Lightning-AI/pytorch-lightning/pull/7474)) +- `MLflowLogger` now uses the env variable `MLFLOW_TRACKING_URI` as default tracking URI ([#7457](https://github.com/Lightning-AI/pytorch-lightning/pull/7457)) +- Changed `Trainer` arg and functionality from `reload_dataloaders_every_epoch` to `reload_dataloaders_every_n_epochs` ([#5043](https://github.com/Lightning-AI/pytorch-lightning/pull/5043)) +- Changed `WandbLogger(log_model={True/'all'})` to log models as artifacts ([#6231](https://github.com/Lightning-AI/pytorch-lightning/pull/6231)) +- MLFlowLogger now accepts `run_name` as an constructor argument ([#7622](https://github.com/Lightning-AI/pytorch-lightning/pull/7622)) +- Changed `teardown()` in `Accelerator` to allow `training_type_plugin` to customize `teardown` logic ([#7579](https://github.com/Lightning-AI/pytorch-lightning/pull/7579)) +- `Trainer.fit` now raises an error when using manual optimization with unsupported features such as `gradient_clip_val` or `accumulate_grad_batches` ([#7788](https://github.com/Lightning-AI/pytorch-lightning/pull/7788)) +- Accelerator hooks are called regardless if `LightningModule` overrides the same hooks ([#7826](https://github.com/Lightning-AI/pytorch-lightning/pull/7826)) +- Moved profilers to their own file ([#7822](https://github.com/Lightning-AI/pytorch-lightning/pull/7822)) +- The `on_after_backward` hook is now called on accumulating iterations. Use the `on_before_optimizer_step` hook to mimic the old behaviour ([#8328](https://github.com/Lightning-AI/pytorch-lightning/pull/8328)) +- The mixed precision loss is no longer unscaled before the `on_after_backward` hook. Use the `on_before_optimizer_step` hook to mimic the old behaviour ([#8328](https://github.com/Lightning-AI/pytorch-lightning/pull/8328)) +- The `TrainingTypePlugin.{pre,post}_backward` hooks no longer take the `optimizer, opt_idx, should_accumulate` arguments ([#8328](https://github.com/Lightning-AI/pytorch-lightning/pull/8328)) +- The `PrecisionPlugin.backward` hooks no longer returns a value ([#8328](https://github.com/Lightning-AI/pytorch-lightning/pull/8328)) +- The `PrecisionPlugin.backward` hooks no longer takes a `should_accumulate` argument ([#8328](https://github.com/Lightning-AI/pytorch-lightning/pull/8328)) +- Added the `on_before_backward` hook ([#7865](https://github.com/Lightning-AI/pytorch-lightning/pull/7865)) +- `LightningCLI` now aborts with a clearer message if config already exists and disables save config during `fast_dev_run`([#7963](https://github.com/Lightning-AI/pytorch-lightning/pull/7963)) +- Saved the `LightningCLI` config on `setup` and only on the main process ([#8017](https://github.com/Lightning-AI/pytorch-lightning/pull/8017)) +- Dropped the `LightningCLI` `ArgumentParser` when pickling ([#8017](https://github.com/Lightning-AI/pytorch-lightning/pull/8017)) +- Skip `broadcast` if distributed not initialized for the spawn plugins ([#8017](https://github.com/Lightning-AI/pytorch-lightning/pull/8017)) +- `Trainer(resume_from_checkpoint=...)` now restores the model directly after `LightningModule.setup()`, which is before `LightningModule.configure_sharded_model()` ([#7652](https://github.com/Lightning-AI/pytorch-lightning/pull/7652)) +- Moved `torch.cuda.set_device()` to enable collective calls earlier in setup ([#8312](https://github.com/Lightning-AI/pytorch-lightning/pull/8312)) +- Used XLA utility API to move data to CPU (Single TPU core) ([#8078](https://github.com/Lightning-AI/pytorch-lightning/pull/8078)) +- Improved error messages in `replace_sampler` when the `DataLoader` attributes are not included in the signature or the signature is missing optional arguments ([#8519](https://github.com/Lightning-AI/pytorch-lightning/pull/8519)) +- Moved `DeviceDtypeModuleMixin` and `HyperparametersMixin` mixin to `core` ([#8396](https://github.com/Lightning-AI/pytorch-lightning/pull/8396)) +- Return the `default_root_dir` as the `log_dir` when the logger is a `LoggerCollection` ([#8187](https://github.com/Lightning-AI/pytorch-lightning/pull/8187)) ### Deprecated -- Deprecated `LightningModule.loaded_optimizer_states_dict` ([#8229](https://github.com/Lightning-AI/lightning/pull/8229)) -- Standardized the dataloaders arguments of `trainer.{fit,valdiate,test,tune}` ([#7431](https://github.com/Lightning-AI/lightning/pull/7431)) -- Deprecated `DataModule` properties: `has_prepared_data`, `has_setup_fit`, `has_setup_validate`, `has_setup_test`, `has_setup_predict`, `has_teardown_fit`, `has_teardown_validate`, `has_teardown_test`, `has_teardown_predict` ([#7657](https://github.com/Lightning-AI/lightning/pull/7657/)) -- Deprecated `TrainerModelHooksMixin` in favor of `pl.utilities.signature_utils` ([#7422](https://github.com/Lightning-AI/lightning/pull/7422)) -- Deprecated `num_nodes` and `sync_batchnorm` arguments in `DDPPlugin` and `DDPSpawnPlugin` ([#7026](https://github.com/Lightning-AI/lightning/pull/7026)) -- Deprecated `self.log(sync_dist_op)` in favor of `self.log(reduce_fx)`. ([#7891](https://github.com/Lightning-AI/lightning/pull/7891)) -- Deprecated `is_overridden(model=...)` in favor of `is_overridden(instance=...)` ([#7918](https://github.com/Lightning-AI/lightning/pull/7918)) -- Deprecated automatically detaching returned extras with grads ([#7994](https://github.com/Lightning-AI/lightning/pull/7994)) -- Deprecated default value of `monitor` argument in EarlyStopping callback to enforce `monitor` as a required argument ([#7907](https://github.com/Lightning-AI/lightning/pull/7907)) -- Deprecated importing `rank_zero_{warn,deprecation}` directly from `pl.utilities.distributed` ([#8085](https://github.com/Lightning-AI/lightning/pull/8085)) -- Deprecated the use of `CheckpointConnector.hpc_load()` in favor of `CheckpointConnector.restore()` ([#7652](https://github.com/Lightning-AI/lightning/pull/7652)) -- Deprecated `ModelCheckpoint(every_n_val_epochs)` in favor of `ModelCheckpoint(every_n_epochs)` ([#8383](https://github.com/Lightning-AI/lightning/pull/8383)) -- Deprecated `DDPPlugin.task_idx` in favor of `DDPPlugin.local_rank` ([#8203](https://github.com/Lightning-AI/lightning/pull/8203)) -- Deprecated the `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#8025](https://github.com/Lightning-AI/lightning/pull/8025)) -- Deprecated the `Trainer.disable_validation` property in favor of `not Trainer.enable_validation` ([#8291](https://github.com/Lightning-AI/lightning/pull/8291)) -- Deprecated `mode` parameter in `ModelSummary` in favor of `max_depth` ([#8062](https://github.com/Lightning-AI/lightning/pull/8062)) -- Deprecated `reload_dataloaders_every_epoch` argument of `Trainer` in favor of `reload_dataloaders_every_n_epochs` ([#5043](https://github.com/Lightning-AI/lightning/pull/5043)) -- Deprecated `distributed_backend` argument for `Trainer` ([#8575](https://github.com/Lightning-AI/lightning/pull/8575)) +- Deprecated `LightningModule.loaded_optimizer_states_dict` ([#8229](https://github.com/Lightning-AI/pytorch-lightning/pull/8229)) +- Standardized the dataloaders arguments of `trainer.{fit,valdiate,test,tune}` ([#7431](https://github.com/Lightning-AI/pytorch-lightning/pull/7431)) +- Deprecated `DataModule` properties: `has_prepared_data`, `has_setup_fit`, `has_setup_validate`, `has_setup_test`, `has_setup_predict`, `has_teardown_fit`, `has_teardown_validate`, `has_teardown_test`, `has_teardown_predict` ([#7657](https://github.com/Lightning-AI/pytorch-lightning/pull/7657/)) +- Deprecated `TrainerModelHooksMixin` in favor of `pl.utilities.signature_utils` ([#7422](https://github.com/Lightning-AI/pytorch-lightning/pull/7422)) +- Deprecated `num_nodes` and `sync_batchnorm` arguments in `DDPPlugin` and `DDPSpawnPlugin` ([#7026](https://github.com/Lightning-AI/pytorch-lightning/pull/7026)) +- Deprecated `self.log(sync_dist_op)` in favor of `self.log(reduce_fx)`. ([#7891](https://github.com/Lightning-AI/pytorch-lightning/pull/7891)) +- Deprecated `is_overridden(model=...)` in favor of `is_overridden(instance=...)` ([#7918](https://github.com/Lightning-AI/pytorch-lightning/pull/7918)) +- Deprecated automatically detaching returned extras with grads ([#7994](https://github.com/Lightning-AI/pytorch-lightning/pull/7994)) +- Deprecated default value of `monitor` argument in EarlyStopping callback to enforce `monitor` as a required argument ([#7907](https://github.com/Lightning-AI/pytorch-lightning/pull/7907)) +- Deprecated importing `rank_zero_{warn,deprecation}` directly from `pl.utilities.distributed` ([#8085](https://github.com/Lightning-AI/pytorch-lightning/pull/8085)) +- Deprecated the use of `CheckpointConnector.hpc_load()` in favor of `CheckpointConnector.restore()` ([#7652](https://github.com/Lightning-AI/pytorch-lightning/pull/7652)) +- Deprecated `ModelCheckpoint(every_n_val_epochs)` in favor of `ModelCheckpoint(every_n_epochs)` ([#8383](https://github.com/Lightning-AI/pytorch-lightning/pull/8383)) +- Deprecated `DDPPlugin.task_idx` in favor of `DDPPlugin.local_rank` ([#8203](https://github.com/Lightning-AI/pytorch-lightning/pull/8203)) +- Deprecated the `Trainer.train_loop` property in favor of `Trainer.fit_loop` ([#8025](https://github.com/Lightning-AI/pytorch-lightning/pull/8025)) +- Deprecated the `Trainer.disable_validation` property in favor of `not Trainer.enable_validation` ([#8291](https://github.com/Lightning-AI/pytorch-lightning/pull/8291)) +- Deprecated `mode` parameter in `ModelSummary` in favor of `max_depth` ([#8062](https://github.com/Lightning-AI/pytorch-lightning/pull/8062)) +- Deprecated `reload_dataloaders_every_epoch` argument of `Trainer` in favor of `reload_dataloaders_every_n_epochs` ([#5043](https://github.com/Lightning-AI/pytorch-lightning/pull/5043)) +- Deprecated `distributed_backend` argument for `Trainer` ([#8575](https://github.com/Lightning-AI/pytorch-lightning/pull/8575)) ### Removed -- Dropped official support/testing for PyTorch <1.6 ([#8288](https://github.com/Lightning-AI/lightning/pull/8288)) -- Removed `ProfilerConnector` ([#7654](https://github.com/Lightning-AI/lightning/pull/7654)) -- Pruned deprecated classif. metrics from `pl.metrics.functional.classification` ([#7499](https://github.com/Lightning-AI/lightning/pull/7499)) -- Removed deprecated data parallel classes `LightningDataParallel` and `LightningDistributedDataParallel` from `pl.overrides.data_parallel` ([#7510](https://github.com/Lightning-AI/lightning/pull/7510)) -- Removed deprecated trainer attributes - `get_model` and `accelerator_backend` ([#7502](https://github.com/Lightning-AI/lightning/pull/7502)) -- Removed support for automatically monitoring the `val_loss` key with `ModelCheckpoint`. Pass your `monitor` of choice to the `ModelCheckpoint` instance instead ([#8293](https://github.com/Lightning-AI/lightning/pull/8293)) -- Removed support for `self.log(tbptt_reduce_fx)` and `self.log(tbptt_pad_token)`. Please, open a discussion explaining your use-case if you relied on these. ([#7644](https://github.com/Lightning-AI/lightning/pull/7644)) -- Removed deprecated utils modules `model_utils`, `warning_utils`, `xla_device_utils` and partially `argparse_utils` ([#7503](https://github.com/Lightning-AI/lightning/pull/7503)) -- Removed `RPCPlugin` and `RPCSequentialPlugin`. If you were successfully using these plugins, please open a GitHub discussion about your use case ([#8101](https://github.com/Lightning-AI/lightning/pull/8101)) -- Removed deprecated trainer attributes - `on_cpu`, `on_tpu`, `use_tpu`, `on_gpu`, `use_dp`, `use_ddp`, `use_ddp2`, `use_horovod`, `use_single_gpu` ([#7501](https://github.com/Lightning-AI/lightning/pull/7501)) -- Removed deprecated `optimizer` argument in `LightningModule.manual_backward()`; Toggling optimizers in manual optimization should be done using `LightningModule.{un}toggle_optimizer()` ([#8287](https://github.com/Lightning-AI/lightning/pull/8287)) -- Removed DeepSpeed FP16 Exception as FP32 is now supported ([#8462](https://github.com/Lightning-AI/lightning/pull/8462)) -- Removed environment variable `PL_EXP_VERSION` from DDP subprocesses ([7403](https://github.com/Lightning-AI/lightning/pull/7403)) +- Dropped official support/testing for PyTorch <1.6 ([#8288](https://github.com/Lightning-AI/pytorch-lightning/pull/8288)) +- Removed `ProfilerConnector` ([#7654](https://github.com/Lightning-AI/pytorch-lightning/pull/7654)) +- Pruned deprecated classif. metrics from `pl.metrics.functional.classification` ([#7499](https://github.com/Lightning-AI/pytorch-lightning/pull/7499)) +- Removed deprecated data parallel classes `LightningDataParallel` and `LightningDistributedDataParallel` from `pl.overrides.data_parallel` ([#7510](https://github.com/Lightning-AI/pytorch-lightning/pull/7510)) +- Removed deprecated trainer attributes - `get_model` and `accelerator_backend` ([#7502](https://github.com/Lightning-AI/pytorch-lightning/pull/7502)) +- Removed support for automatically monitoring the `val_loss` key with `ModelCheckpoint`. Pass your `monitor` of choice to the `ModelCheckpoint` instance instead ([#8293](https://github.com/Lightning-AI/pytorch-lightning/pull/8293)) +- Removed support for `self.log(tbptt_reduce_fx)` and `self.log(tbptt_pad_token)`. Please, open a discussion explaining your use-case if you relied on these. ([#7644](https://github.com/Lightning-AI/pytorch-lightning/pull/7644)) +- Removed deprecated utils modules `model_utils`, `warning_utils`, `xla_device_utils` and partially `argparse_utils` ([#7503](https://github.com/Lightning-AI/pytorch-lightning/pull/7503)) +- Removed `RPCPlugin` and `RPCSequentialPlugin`. If you were successfully using these plugins, please open a GitHub discussion about your use case ([#8101](https://github.com/Lightning-AI/pytorch-lightning/pull/8101)) +- Removed deprecated trainer attributes - `on_cpu`, `on_tpu`, `use_tpu`, `on_gpu`, `use_dp`, `use_ddp`, `use_ddp2`, `use_horovod`, `use_single_gpu` ([#7501](https://github.com/Lightning-AI/pytorch-lightning/pull/7501)) +- Removed deprecated `optimizer` argument in `LightningModule.manual_backward()`; Toggling optimizers in manual optimization should be done using `LightningModule.{un}toggle_optimizer()` ([#8287](https://github.com/Lightning-AI/pytorch-lightning/pull/8287)) +- Removed DeepSpeed FP16 Exception as FP32 is now supported ([#8462](https://github.com/Lightning-AI/pytorch-lightning/pull/8462)) +- Removed environment variable `PL_EXP_VERSION` from DDP subprocesses ([7403](https://github.com/Lightning-AI/pytorch-lightning/pull/7403)) ### Fixed -- Fixed the `GPUStatsMonitor` callbacks to use the correct GPU IDs if `CUDA_VISIBLE_DEVICES` set ([#8260](https://github.com/Lightning-AI/lightning/pull/8260)) -- Fixed `lr_scheduler` checkpointed state by calling `update_lr_schedulers` before saving checkpoints ([#7877](https://github.com/Lightning-AI/lightning/pull/7877)) -- Fixed ambiguous warning when both overfit and train dataloader shuffling are enabled ([#7685](https://github.com/Lightning-AI/lightning/pull/7685)) -- Fixed dev debugger memory growing due to tracking events even when disabled ([#7875](https://github.com/Lightning-AI/lightning/pull/7875)) -- Fixed `None` loss keys getting added in `training_epoch_end` when using manual optimization and not returning a loss ([#7772](https://github.com/Lightning-AI/lightning/pull/7772)) -- Fixed a bug where `precision=64` with `accelerator='ddp_spawn'` would throw a pickle error ([#6924](https://github.com/Lightning-AI/lightning/pull/6924)) -- Do not override the existing `epoch` value in `logged_metrics` when already logged by the user ([#7982](https://github.com/Lightning-AI/lightning/pull/7982)) -- Support for manual optimization with DeepSpeed ([#7970](https://github.com/Lightning-AI/lightning/pull/7970)) -- Fixed `dataloader_idx` argument value when predicting with only one `DataLoader` ([#7941](https://github.com/Lightning-AI/lightning/pull/7941)) -- Fixed passing the `stage` argument of `Callback.{setup,teardown}` as a keyword ([#7973](https://github.com/Lightning-AI/lightning/pull/7973)) -- Fixed metrics generated during `validation sanity checking` are cleaned on end ([#8171](https://github.com/Lightning-AI/lightning/pull/8171)) -- Fixed `log_gpu_memory` metrics not being added to `logging` when nothing else is logged ([#8174](https://github.com/Lightning-AI/lightning/pull/8174)) -- Fixed a bug where calling `log` with a `Metric` instance would raise an error if it was a nested attribute of the model ([#8181](https://github.com/Lightning-AI/lightning/pull/8181)) -- Fixed a bug where using `precision=64` would cause buffers with complex dtype to be cast to real ([#8208](https://github.com/Lightning-AI/lightning/pull/8208)) -- Fixed `is_overridden` returning true for wrapped functions with no changes ([#8296](https://github.com/Lightning-AI/lightning/pull/8296)) -- Fixed a bug where `truncated_bptt_steps` would throw an AttributeError when the target RNN has multiple hidden states ([#8145](https://github.com/Lightning-AI/lightning/pull/8145)) -- Fixed `self.optimizers()` not returning a single optimizer if it had been wrapped ([#8326](https://github.com/Lightning-AI/lightning/pull/8326)) -- Fixed the `on_after_backward` hook not getting called when using manual optimization and no plugins ([#8328](https://github.com/Lightning-AI/lightning/pull/8328)) -- Fixed the `LightningModule.backward` hook only getting called with the `apex` plugin when using manual optimization ([#8328](https://github.com/Lightning-AI/lightning/pull/8328)) -- Fixed moving batch to device before sending it to the `on_*_batch_start`/`on_*_batch_end` callbacks and model hooks ([#7378](https://github.com/Lightning-AI/lightning/pull/7378)) -- Fixed passing a custom `DDPPlugin` when choosing `accelerator="ddp_cpu"` for the accelerator ([#6208](https://github.com/Lightning-AI/lightning/pull/6208)) -- Fixed missing call to `LightningModule.untoggle_optimizer` in training loop when running gradient accumulation with multiple optimizers ([#8284](https://github.com/Lightning-AI/lightning/pull/8284)) -- Fixed hash of LightningEnum to work with value instead of name ([#8421](https://github.com/Lightning-AI/lightning/pull/8421)). -- Fixed a bug where an extra checkpoint was saved at the end of training if the `val_check_interval` did not align with the number of training batches ([#7724](https://github.com/Lightning-AI/lightning/pull/7724)) -- Fixed hash of LightningEnum to work with value instead of name([#8421](https://github.com/Lightning-AI/lightning/pull/8421)). -- Fixed `move_data_to_device` to return the batch if the object `to` function didn't return `self` ([#8433](https://github.com/Lightning-AI/lightning/pull/8433)) -- Fixed progress bar updates for Pod Training ([#8258](https://github.com/Lightning-AI/lightning/pull/8258)) -- Fixed clearing dataloader references before attaching new dataloaders in consecutive `Trainer.{fit,validate,test,predict}´ runs ([#8442](https://github.com/Lightning-AI/lightning/pull/8442)) -- Fixed memory leaks on GPU by moving `optimizer_states`, `ResultCollection.extra`, `ResultMetric` attributes, and `LoggerConnector` metrics to `cpu`. Also, delete the DDP wrapper on `teardown` ([#8490](https://github.com/Lightning-AI/lightning/pull/8490)) -- Fixed `SWA` callback using LightningModule `prevent_trainer_and_dataloaders_deepcopy` to avoid OOM ([#8472](https://github.com/Lightning-AI/lightning/pull/8472)) -- Fixed `ModelPruning` callback `on_save_checkpoint` to avoid making a `deepcopy` potentially leading to OOM ([#8472](https://github.com/Lightning-AI/lightning/pull/8472)) -- Fixed the sampler replacement logic for `DataLoader`s which do not define all `DataLoader` attributes as `__init__` parameters ([#8519](https://github.com/Lightning-AI/lightning/pull/8519)) -- Fixed DeepSpeed Windows support ([#8488](https://github.com/Lightning-AI/lightning/pull/8488)) -- Fixed DeepSpeed not properly setting the trainer `lr_schedulers` attribute ([#8527](https://github.com/Lightning-AI/lightning/pull/8527)) -- Fixed experiment version and log-dir divergence in DDP when using multiple `Trainer` instances in sequence ([7403](https://github.com/Lightning-AI/lightning/pull/7403)) -- Enabled manual optimization for TPUs ([#8458](https://github.com/Lightning-AI/lightning/pull/8458)) -- Fixed `accumulate_grad_batches` not been recomputed during model reload ([#5334](https://github.com/Lightning-AI/lightning/pull/5334)) -- Fixed a `TypeError` when wrapping optimizers in the `HorovodPlugin` and running `Trainer.test` ([#7840](https://github.com/Lightning-AI/lightning/pull/7840)) -- Fixed `BackboneFinetuning` restoration ([#8501](https://github.com/Lightning-AI/lightning/pull/8501)) -- Fixed `lr_scheduler` with metric (e.g. `torch.optim.lr_scheduler.ReduceLROnPlateau`) when using `automatic_optimization = False` ([#7643](https://github.com/Lightning-AI/lightning/pull/7643)) -- Fixed `DeepSpeed` breaking with no schedulers ([#8580](https://github.com/Lightning-AI/lightning/pull/8580)) +- Fixed the `GPUStatsMonitor` callbacks to use the correct GPU IDs if `CUDA_VISIBLE_DEVICES` set ([#8260](https://github.com/Lightning-AI/pytorch-lightning/pull/8260)) +- Fixed `lr_scheduler` checkpointed state by calling `update_lr_schedulers` before saving checkpoints ([#7877](https://github.com/Lightning-AI/pytorch-lightning/pull/7877)) +- Fixed ambiguous warning when both overfit and train dataloader shuffling are enabled ([#7685](https://github.com/Lightning-AI/pytorch-lightning/pull/7685)) +- Fixed dev debugger memory growing due to tracking events even when disabled ([#7875](https://github.com/Lightning-AI/pytorch-lightning/pull/7875)) +- Fixed `None` loss keys getting added in `training_epoch_end` when using manual optimization and not returning a loss ([#7772](https://github.com/Lightning-AI/pytorch-lightning/pull/7772)) +- Fixed a bug where `precision=64` with `accelerator='ddp_spawn'` would throw a pickle error ([#6924](https://github.com/Lightning-AI/pytorch-lightning/pull/6924)) +- Do not override the existing `epoch` value in `logged_metrics` when already logged by the user ([#7982](https://github.com/Lightning-AI/pytorch-lightning/pull/7982)) +- Support for manual optimization with DeepSpeed ([#7970](https://github.com/Lightning-AI/pytorch-lightning/pull/7970)) +- Fixed `dataloader_idx` argument value when predicting with only one `DataLoader` ([#7941](https://github.com/Lightning-AI/pytorch-lightning/pull/7941)) +- Fixed passing the `stage` argument of `Callback.{setup,teardown}` as a keyword ([#7973](https://github.com/Lightning-AI/pytorch-lightning/pull/7973)) +- Fixed metrics generated during `validation sanity checking` are cleaned on end ([#8171](https://github.com/Lightning-AI/pytorch-lightning/pull/8171)) +- Fixed `log_gpu_memory` metrics not being added to `logging` when nothing else is logged ([#8174](https://github.com/Lightning-AI/pytorch-lightning/pull/8174)) +- Fixed a bug where calling `log` with a `Metric` instance would raise an error if it was a nested attribute of the model ([#8181](https://github.com/Lightning-AI/pytorch-lightning/pull/8181)) +- Fixed a bug where using `precision=64` would cause buffers with complex dtype to be cast to real ([#8208](https://github.com/Lightning-AI/pytorch-lightning/pull/8208)) +- Fixed `is_overridden` returning true for wrapped functions with no changes ([#8296](https://github.com/Lightning-AI/pytorch-lightning/pull/8296)) +- Fixed a bug where `truncated_bptt_steps` would throw an AttributeError when the target RNN has multiple hidden states ([#8145](https://github.com/Lightning-AI/pytorch-lightning/pull/8145)) +- Fixed `self.optimizers()` not returning a single optimizer if it had been wrapped ([#8326](https://github.com/Lightning-AI/pytorch-lightning/pull/8326)) +- Fixed the `on_after_backward` hook not getting called when using manual optimization and no plugins ([#8328](https://github.com/Lightning-AI/pytorch-lightning/pull/8328)) +- Fixed the `LightningModule.backward` hook only getting called with the `apex` plugin when using manual optimization ([#8328](https://github.com/Lightning-AI/pytorch-lightning/pull/8328)) +- Fixed moving batch to device before sending it to the `on_*_batch_start`/`on_*_batch_end` callbacks and model hooks ([#7378](https://github.com/Lightning-AI/pytorch-lightning/pull/7378)) +- Fixed passing a custom `DDPPlugin` when choosing `accelerator="ddp_cpu"` for the accelerator ([#6208](https://github.com/Lightning-AI/pytorch-lightning/pull/6208)) +- Fixed missing call to `LightningModule.untoggle_optimizer` in training loop when running gradient accumulation with multiple optimizers ([#8284](https://github.com/Lightning-AI/pytorch-lightning/pull/8284)) +- Fixed hash of LightningEnum to work with value instead of name ([#8421](https://github.com/Lightning-AI/pytorch-lightning/pull/8421)). +- Fixed a bug where an extra checkpoint was saved at the end of training if the `val_check_interval` did not align with the number of training batches ([#7724](https://github.com/Lightning-AI/pytorch-lightning/pull/7724)) +- Fixed hash of LightningEnum to work with value instead of name([#8421](https://github.com/Lightning-AI/pytorch-lightning/pull/8421)). +- Fixed `move_data_to_device` to return the batch if the object `to` function didn't return `self` ([#8433](https://github.com/Lightning-AI/pytorch-lightning/pull/8433)) +- Fixed progress bar updates for Pod Training ([#8258](https://github.com/Lightning-AI/pytorch-lightning/pull/8258)) +- Fixed clearing dataloader references before attaching new dataloaders in consecutive `Trainer.{fit,validate,test,predict}´ runs ([#8442](https://github.com/Lightning-AI/pytorch-lightning/pull/8442)) +- Fixed memory leaks on GPU by moving `optimizer_states`, `ResultCollection.extra`, `ResultMetric` attributes, and `LoggerConnector` metrics to `cpu`. Also, delete the DDP wrapper on `teardown` ([#8490](https://github.com/Lightning-AI/pytorch-lightning/pull/8490)) +- Fixed `SWA` callback using LightningModule `prevent_trainer_and_dataloaders_deepcopy` to avoid OOM ([#8472](https://github.com/Lightning-AI/pytorch-lightning/pull/8472)) +- Fixed `ModelPruning` callback `on_save_checkpoint` to avoid making a `deepcopy` potentially leading to OOM ([#8472](https://github.com/Lightning-AI/pytorch-lightning/pull/8472)) +- Fixed the sampler replacement logic for `DataLoader`s which do not define all `DataLoader` attributes as `__init__` parameters ([#8519](https://github.com/Lightning-AI/pytorch-lightning/pull/8519)) +- Fixed DeepSpeed Windows support ([#8488](https://github.com/Lightning-AI/pytorch-lightning/pull/8488)) +- Fixed DeepSpeed not properly setting the trainer `lr_schedulers` attribute ([#8527](https://github.com/Lightning-AI/pytorch-lightning/pull/8527)) +- Fixed experiment version and log-dir divergence in DDP when using multiple `Trainer` instances in sequence ([7403](https://github.com/Lightning-AI/pytorch-lightning/pull/7403)) +- Enabled manual optimization for TPUs ([#8458](https://github.com/Lightning-AI/pytorch-lightning/pull/8458)) +- Fixed `accumulate_grad_batches` not been recomputed during model reload ([#5334](https://github.com/Lightning-AI/pytorch-lightning/pull/5334)) +- Fixed a `TypeError` when wrapping optimizers in the `HorovodPlugin` and running `Trainer.test` ([#7840](https://github.com/Lightning-AI/pytorch-lightning/pull/7840)) +- Fixed `BackboneFinetuning` restoration ([#8501](https://github.com/Lightning-AI/pytorch-lightning/pull/8501)) +- Fixed `lr_scheduler` with metric (e.g. `torch.optim.lr_scheduler.ReduceLROnPlateau`) when using `automatic_optimization = False` ([#7643](https://github.com/Lightning-AI/pytorch-lightning/pull/7643)) +- Fixed `DeepSpeed` breaking with no schedulers ([#8580](https://github.com/Lightning-AI/pytorch-lightning/pull/8580)) ## [1.3.8] - 2021-07-01 ### Fixed -- Fixed a sync deadlock when checkpointing a `LightningModule` that uses a torchmetrics 0.4 `Metric` ([#8218](https://github.com/Lightning-AI/lightning/pull/8218)) -- Fixed compatibility TorchMetrics v0.4 ([#8206](https://github.com/Lightning-AI/lightning/pull/8206)) -- Added torchelastic check when sanitizing GPUs ([#8095](https://github.com/Lightning-AI/lightning/pull/8095)) -- Fixed a DDP info message that was never shown ([#8111](https://github.com/Lightning-AI/lightning/pull/8111)) -- Fixed metrics deprecation message at module import level ([#8163](https://github.com/Lightning-AI/lightning/pull/8163)) -- Fixed a bug where an infinite recursion would be triggered when using the `BaseFinetuning` callback on a model that contains a `ModuleDict` ([#8170](https://github.com/Lightning-AI/lightning/pull/8170)) -- Added a mechanism to detect `deadlock` for `DDP` when only 1 process trigger an `Exception`. The mechanism will `kill the processes` when it happens ([#8167](https://github.com/Lightning-AI/lightning/pull/8167)) -- Fixed NCCL error when selecting non-consecutive device ids ([#8165](https://github.com/Lightning-AI/lightning/pull/8165)) -- Fixed SWA to also work with `IterableDataset` ([#8172](https://github.com/Lightning-AI/lightning/pull/8172)) +- Fixed a sync deadlock when checkpointing a `LightningModule` that uses a torchmetrics 0.4 `Metric` ([#8218](https://github.com/Lightning-AI/pytorch-lightning/pull/8218)) +- Fixed compatibility TorchMetrics v0.4 ([#8206](https://github.com/Lightning-AI/pytorch-lightning/pull/8206)) +- Added torchelastic check when sanitizing GPUs ([#8095](https://github.com/Lightning-AI/pytorch-lightning/pull/8095)) +- Fixed a DDP info message that was never shown ([#8111](https://github.com/Lightning-AI/pytorch-lightning/pull/8111)) +- Fixed metrics deprecation message at module import level ([#8163](https://github.com/Lightning-AI/pytorch-lightning/pull/8163)) +- Fixed a bug where an infinite recursion would be triggered when using the `BaseFinetuning` callback on a model that contains a `ModuleDict` ([#8170](https://github.com/Lightning-AI/pytorch-lightning/pull/8170)) +- Added a mechanism to detect `deadlock` for `DDP` when only 1 process trigger an `Exception`. The mechanism will `kill the processes` when it happens ([#8167](https://github.com/Lightning-AI/pytorch-lightning/pull/8167)) +- Fixed NCCL error when selecting non-consecutive device ids ([#8165](https://github.com/Lightning-AI/pytorch-lightning/pull/8165)) +- Fixed SWA to also work with `IterableDataset` ([#8172](https://github.com/Lightning-AI/pytorch-lightning/pull/8172)) ## [1.3.7] - 2021-06-22 ### Fixed -- Fixed a bug where skipping an optimizer while using amp causes amp to trigger an assertion error ([#7975](https://github.com/Lightning-AI/lightning/pull/7975)) -- Fixed deprecation messages not showing due to incorrect stacklevel ([#8002](https://github.com/Lightning-AI/lightning/pull/8002), [#8005](https://github.com/Lightning-AI/lightning/pull/8005)) -- Fixed setting a `DistributedSampler` when using a distributed plugin in a custom accelerator ([#7814](https://github.com/Lightning-AI/lightning/pull/7814)) -- Improved `PyTorchProfiler` chrome traces names ([#8009](https://github.com/Lightning-AI/lightning/pull/8009)) -- Fixed moving the best score to device in `EarlyStopping` callback for TPU devices ([#7959](https://github.com/Lightning-AI/lightning/pull/7959)) -- Fixes access to `callback_metrics` in ddp_spawn ([#7916](https://github.com/Lightning-AI/lightning/pull/7916)) +- Fixed a bug where skipping an optimizer while using amp causes amp to trigger an assertion error ([#7975](https://github.com/Lightning-AI/pytorch-lightning/pull/7975)) +- Fixed deprecation messages not showing due to incorrect stacklevel ([#8002](https://github.com/Lightning-AI/pytorch-lightning/pull/8002), [#8005](https://github.com/Lightning-AI/pytorch-lightning/pull/8005)) +- Fixed setting a `DistributedSampler` when using a distributed plugin in a custom accelerator ([#7814](https://github.com/Lightning-AI/pytorch-lightning/pull/7814)) +- Improved `PyTorchProfiler` chrome traces names ([#8009](https://github.com/Lightning-AI/pytorch-lightning/pull/8009)) +- Fixed moving the best score to device in `EarlyStopping` callback for TPU devices ([#7959](https://github.com/Lightning-AI/pytorch-lightning/pull/7959)) +- Fixes access to `callback_metrics` in ddp_spawn ([#7916](https://github.com/Lightning-AI/pytorch-lightning/pull/7916)) ## [1.3.6] - 2021-06-15 ### Fixed -- Fixed logs overwriting issue for remote filesystems ([#7889](https://github.com/Lightning-AI/lightning/pull/7889)) -- Fixed `DataModule.prepare_data` could only be called on the global rank 0 process ([#7945](https://github.com/Lightning-AI/lightning/pull/7945)) -- Fixed setting `worker_init_fn` to seed dataloaders correctly when using DDP ([#7942](https://github.com/Lightning-AI/lightning/pull/7942)) -- Fixed `BaseFinetuning` callback to properly handle parent modules w/ parameters ([#7931](https://github.com/Lightning-AI/lightning/pull/7931)) +- Fixed logs overwriting issue for remote filesystems ([#7889](https://github.com/Lightning-AI/pytorch-lightning/pull/7889)) +- Fixed `DataModule.prepare_data` could only be called on the global rank 0 process ([#7945](https://github.com/Lightning-AI/pytorch-lightning/pull/7945)) +- Fixed setting `worker_init_fn` to seed dataloaders correctly when using DDP ([#7942](https://github.com/Lightning-AI/pytorch-lightning/pull/7942)) +- Fixed `BaseFinetuning` callback to properly handle parent modules w/ parameters ([#7931](https://github.com/Lightning-AI/pytorch-lightning/pull/7931)) ## [1.3.5] - 2021-06-08 ### Added -- Added warning to Training Step output ([#7779](https://github.com/Lightning-AI/lightning/pull/7779)) +- Added warning to Training Step output ([#7779](https://github.com/Lightning-AI/pytorch-lightning/pull/7779)) ### Fixed -- Fixed `LearningRateMonitor` and `BackboneFinetuning` ([#7835](https://github.com/Lightning-AI/lightning/pull/7835)) -- Minor improvements to `apply_to_collection` and type signature of `log_dict` ([#7851](https://github.com/Lightning-AI/lightning/pull/7851)) -- Fixed docker versions ([#7834](https://github.com/Lightning-AI/lightning/pull/7834)) -- Fixed sharded training check for fp16 precision ([#7825](https://github.com/Lightning-AI/lightning/pull/7825)) -- Fixed support for torch Module type hints in LightningCLI ([#7807](https://github.com/Lightning-AI/lightning/pull/7807)) +- Fixed `LearningRateMonitor` and `BackboneFinetuning` ([#7835](https://github.com/Lightning-AI/pytorch-lightning/pull/7835)) +- Minor improvements to `apply_to_collection` and type signature of `log_dict` ([#7851](https://github.com/Lightning-AI/pytorch-lightning/pull/7851)) +- Fixed docker versions ([#7834](https://github.com/Lightning-AI/pytorch-lightning/pull/7834)) +- Fixed sharded training check for fp16 precision ([#7825](https://github.com/Lightning-AI/pytorch-lightning/pull/7825)) +- Fixed support for torch Module type hints in LightningCLI ([#7807](https://github.com/Lightning-AI/pytorch-lightning/pull/7807)) ### Changed -- Move `training_output` validation to after `train_step_end` ([#7868](https://github.com/Lightning-AI/lightning/pull/7868)) +- Move `training_output` validation to after `train_step_end` ([#7868](https://github.com/Lightning-AI/pytorch-lightning/pull/7868)) ## [1.3.4] - 2021-06-01 ### Fixed -- Fixed info message when max training time reached ([#7780](https://github.com/Lightning-AI/lightning/pull/7780)) -- Fixed missing `__len__` method to `IndexBatchSamplerWrapper` ([#7681](https://github.com/Lightning-AI/lightning/pull/7681)) +- Fixed info message when max training time reached ([#7780](https://github.com/Lightning-AI/pytorch-lightning/pull/7780)) +- Fixed missing `__len__` method to `IndexBatchSamplerWrapper` ([#7681](https://github.com/Lightning-AI/pytorch-lightning/pull/7681)) ## [1.3.3] - 2021-05-27 ### Changed -- Changed calling of `untoggle_optimizer(opt_idx)` out of the closure function ([#7563](https://github.com/Lightning-AI/lightning/pull/7563)) +- Changed calling of `untoggle_optimizer(opt_idx)` out of the closure function ([#7563](https://github.com/Lightning-AI/pytorch-lightning/pull/7563)) ### Fixed -- Fixed `ProgressBar` pickling after calling `trainer.predict` ([#7608](https://github.com/Lightning-AI/lightning/pull/7608)) -- Fixed broadcasting in multi-node, multi-gpu DDP using torch 1.7 ([#7592](https://github.com/Lightning-AI/lightning/pull/7592)) -- Fixed dataloaders are not reset when tuning the model ([#7566](https://github.com/Lightning-AI/lightning/pull/7566)) -- Fixed print errors in `ProgressBar` when `trainer.fit` is not called ([#7674](https://github.com/Lightning-AI/lightning/pull/7674)) -- Fixed global step update when the epoch is skipped ([#7677](https://github.com/Lightning-AI/lightning/pull/7677)) -- Fixed training loop total batch counter when accumulate grad batches was enabled ([#7692](https://github.com/Lightning-AI/lightning/pull/7692)) +- Fixed `ProgressBar` pickling after calling `trainer.predict` ([#7608](https://github.com/Lightning-AI/pytorch-lightning/pull/7608)) +- Fixed broadcasting in multi-node, multi-gpu DDP using torch 1.7 ([#7592](https://github.com/Lightning-AI/pytorch-lightning/pull/7592)) +- Fixed dataloaders are not reset when tuning the model ([#7566](https://github.com/Lightning-AI/pytorch-lightning/pull/7566)) +- Fixed print errors in `ProgressBar` when `trainer.fit` is not called ([#7674](https://github.com/Lightning-AI/pytorch-lightning/pull/7674)) +- Fixed global step update when the epoch is skipped ([#7677](https://github.com/Lightning-AI/pytorch-lightning/pull/7677)) +- Fixed training loop total batch counter when accumulate grad batches was enabled ([#7692](https://github.com/Lightning-AI/pytorch-lightning/pull/7692)) ## [1.3.2] - 2021-05-18 ### Changed -- `DataModule`s now avoid duplicate `{setup,teardown,prepare_data}` calls for the same stage ([#7238](https://github.com/Lightning-AI/lightning/pull/7238)) +- `DataModule`s now avoid duplicate `{setup,teardown,prepare_data}` calls for the same stage ([#7238](https://github.com/Lightning-AI/pytorch-lightning/pull/7238)) ### Fixed -- Fixed parsing of multiple training dataloaders ([#7433](https://github.com/Lightning-AI/lightning/pull/7433)) -- Fixed recursive passing of `wrong_type` keyword argument in `pl.utilities.apply_to_collection` ([#7433](https://github.com/Lightning-AI/lightning/pull/7433)) -- Fixed setting correct `DistribType` for `ddp_cpu` (spawn) backend ([#7492](https://github.com/Lightning-AI/lightning/pull/7492)) -- Fixed incorrect number of calls to LR scheduler when `check_val_every_n_epoch > 1` ([#7032](https://github.com/Lightning-AI/lightning/pull/7032)) +- Fixed parsing of multiple training dataloaders ([#7433](https://github.com/Lightning-AI/pytorch-lightning/pull/7433)) +- Fixed recursive passing of `wrong_type` keyword argument in `pl.utilities.apply_to_collection` ([#7433](https://github.com/Lightning-AI/pytorch-lightning/pull/7433)) +- Fixed setting correct `DistribType` for `ddp_cpu` (spawn) backend ([#7492](https://github.com/Lightning-AI/pytorch-lightning/pull/7492)) +- Fixed incorrect number of calls to LR scheduler when `check_val_every_n_epoch > 1` ([#7032](https://github.com/Lightning-AI/pytorch-lightning/pull/7032)) ## [1.3.1] - 2021-05-11 ### Fixed -- Fixed DeepSpeed with IterableDatasets ([#7362](https://github.com/Lightning-AI/lightning/pull/7362)) -- Fixed `Trainer.current_epoch` not getting restored after tuning ([#7434](https://github.com/Lightning-AI/lightning/pull/7434)) -- Fixed local rank displayed in console log ([#7395](https://github.com/Lightning-AI/lightning/pull/7395)) +- Fixed DeepSpeed with IterableDatasets ([#7362](https://github.com/Lightning-AI/pytorch-lightning/pull/7362)) +- Fixed `Trainer.current_epoch` not getting restored after tuning ([#7434](https://github.com/Lightning-AI/pytorch-lightning/pull/7434)) +- Fixed local rank displayed in console log ([#7395](https://github.com/Lightning-AI/pytorch-lightning/pull/7395)) ## [1.3.0] - 2021-05-06 ### Added -- Added support for the `EarlyStopping` callback to run at the end of the training epoch ([#6944](https://github.com/Lightning-AI/lightning/pull/6944)) -- Added synchronization points before and after `setup` hooks are run ([#7202](https://github.com/Lightning-AI/lightning/pull/7202)) -- Added a `teardown` hook to `ClusterEnvironment` ([#6942](https://github.com/Lightning-AI/lightning/pull/6942)) -- Added utils for metrics to scalar conversions ([#7180](https://github.com/Lightning-AI/lightning/pull/7180)) -- Added utils for NaN/Inf detection for gradients and parameters ([#6834](https://github.com/Lightning-AI/lightning/pull/6834)) -- Added more explicit exception message when trying to execute `trainer.test()` or `trainer.validate()` with `fast_dev_run=True` ([#6667](https://github.com/Lightning-AI/lightning/pull/6667)) +- Added support for the `EarlyStopping` callback to run at the end of the training epoch ([#6944](https://github.com/Lightning-AI/pytorch-lightning/pull/6944)) +- Added synchronization points before and after `setup` hooks are run ([#7202](https://github.com/Lightning-AI/pytorch-lightning/pull/7202)) +- Added a `teardown` hook to `ClusterEnvironment` ([#6942](https://github.com/Lightning-AI/pytorch-lightning/pull/6942)) +- Added utils for metrics to scalar conversions ([#7180](https://github.com/Lightning-AI/pytorch-lightning/pull/7180)) +- Added utils for NaN/Inf detection for gradients and parameters ([#6834](https://github.com/Lightning-AI/pytorch-lightning/pull/6834)) +- Added more explicit exception message when trying to execute `trainer.test()` or `trainer.validate()` with `fast_dev_run=True` ([#6667](https://github.com/Lightning-AI/pytorch-lightning/pull/6667)) - Added `LightningCLI` class to provide simple reproducibility with minimum boilerplate training CLI ( - [#4492](https://github.com/Lightning-AI/lightning/pull/4492), - [#6862](https://github.com/Lightning-AI/lightning/pull/6862), - [#7156](https://github.com/Lightning-AI/lightning/pull/7156), - [#7299](https://github.com/Lightning-AI/lightning/pull/7299)) -- Added `gradient_clip_algorithm` argument to Trainer for gradient clipping by value ([#6123](https://github.com/Lightning-AI/lightning/pull/6123)). -- Added a way to print to terminal without breaking up the progress bar ([#5470](https://github.com/Lightning-AI/lightning/pull/5470)) -- Added support to checkpoint after training steps in `ModelCheckpoint` callback ([#6146](https://github.com/Lightning-AI/lightning/pull/6146)) -- Added `TrainerStatus.{INITIALIZING,RUNNING,FINISHED,INTERRUPTED}` ([#7173](https://github.com/Lightning-AI/lightning/pull/7173)) -- Added `Trainer.validate()` method to perform one evaluation epoch over the validation set ([#4948](https://github.com/Lightning-AI/lightning/pull/4948)) -- Added `LightningEnvironment` for Lightning-specific DDP ([#5915](https://github.com/Lightning-AI/lightning/pull/5915)) -- Added `teardown()` hook to LightningDataModule ([#4673](https://github.com/Lightning-AI/lightning/pull/4673)) -- Added `auto_insert_metric_name` parameter to `ModelCheckpoint` ([#6277](https://github.com/Lightning-AI/lightning/pull/6277)) -- Added arg to `self.log` that enables users to give custom names when dealing with multiple dataloaders ([#6274](https://github.com/Lightning-AI/lightning/pull/6274)) -- Added `teardown` method to `BaseProfiler` to enable subclasses defining post-profiling steps outside of `__del__` ([#6370](https://github.com/Lightning-AI/lightning/pull/6370)) -- Added `setup` method to `BaseProfiler` to enable subclasses defining pre-profiling steps for every process ([#6633](https://github.com/Lightning-AI/lightning/pull/6633)) -- Added no return warning to predict ([#6139](https://github.com/Lightning-AI/lightning/pull/6139)) -- Added `Trainer.predict` config validation ([#6543](https://github.com/Lightning-AI/lightning/pull/6543)) -- Added `AbstractProfiler` interface ([#6621](https://github.com/Lightning-AI/lightning/pull/6621)) -- Added support for including module names for forward in the autograd trace of `PyTorchProfiler` ([#6349](https://github.com/Lightning-AI/lightning/pull/6349)) -- Added support for the PyTorch 1.8.1 autograd profiler ([#6618](https://github.com/Lightning-AI/lightning/pull/6618)) -- Added `outputs` parameter to callback's `on_validation_epoch_end` & `on_test_epoch_end` hooks ([#6120](https://github.com/Lightning-AI/lightning/pull/6120)) -- Added `configure_sharded_model` hook ([#6679](https://github.com/Lightning-AI/lightning/pull/6679)) -- Added support for `precision=64`, enabling training with double precision ([#6595](https://github.com/Lightning-AI/lightning/pull/6595)) -- Added support for DDP communication hooks ([#6736](https://github.com/Lightning-AI/lightning/pull/6736)) -- Added `artifact_location` argument to `MLFlowLogger` which will be passed to the `MlflowClient.create_experiment` call ([#6677](https://github.com/Lightning-AI/lightning/pull/6677)) + [#4492](https://github.com/Lightning-AI/pytorch-lightning/pull/4492), + [#6862](https://github.com/Lightning-AI/pytorch-lightning/pull/6862), + [#7156](https://github.com/Lightning-AI/pytorch-lightning/pull/7156), + [#7299](https://github.com/Lightning-AI/pytorch-lightning/pull/7299)) +- Added `gradient_clip_algorithm` argument to Trainer for gradient clipping by value ([#6123](https://github.com/Lightning-AI/pytorch-lightning/pull/6123)). +- Added a way to print to terminal without breaking up the progress bar ([#5470](https://github.com/Lightning-AI/pytorch-lightning/pull/5470)) +- Added support to checkpoint after training steps in `ModelCheckpoint` callback ([#6146](https://github.com/Lightning-AI/pytorch-lightning/pull/6146)) +- Added `TrainerStatus.{INITIALIZING,RUNNING,FINISHED,INTERRUPTED}` ([#7173](https://github.com/Lightning-AI/pytorch-lightning/pull/7173)) +- Added `Trainer.validate()` method to perform one evaluation epoch over the validation set ([#4948](https://github.com/Lightning-AI/pytorch-lightning/pull/4948)) +- Added `LightningEnvironment` for Lightning-specific DDP ([#5915](https://github.com/Lightning-AI/pytorch-lightning/pull/5915)) +- Added `teardown()` hook to LightningDataModule ([#4673](https://github.com/Lightning-AI/pytorch-lightning/pull/4673)) +- Added `auto_insert_metric_name` parameter to `ModelCheckpoint` ([#6277](https://github.com/Lightning-AI/pytorch-lightning/pull/6277)) +- Added arg to `self.log` that enables users to give custom names when dealing with multiple dataloaders ([#6274](https://github.com/Lightning-AI/pytorch-lightning/pull/6274)) +- Added `teardown` method to `BaseProfiler` to enable subclasses defining post-profiling steps outside of `__del__` ([#6370](https://github.com/Lightning-AI/pytorch-lightning/pull/6370)) +- Added `setup` method to `BaseProfiler` to enable subclasses defining pre-profiling steps for every process ([#6633](https://github.com/Lightning-AI/pytorch-lightning/pull/6633)) +- Added no return warning to predict ([#6139](https://github.com/Lightning-AI/pytorch-lightning/pull/6139)) +- Added `Trainer.predict` config validation ([#6543](https://github.com/Lightning-AI/pytorch-lightning/pull/6543)) +- Added `AbstractProfiler` interface ([#6621](https://github.com/Lightning-AI/pytorch-lightning/pull/6621)) +- Added support for including module names for forward in the autograd trace of `PyTorchProfiler` ([#6349](https://github.com/Lightning-AI/pytorch-lightning/pull/6349)) +- Added support for the PyTorch 1.8.1 autograd profiler ([#6618](https://github.com/Lightning-AI/pytorch-lightning/pull/6618)) +- Added `outputs` parameter to callback's `on_validation_epoch_end` & `on_test_epoch_end` hooks ([#6120](https://github.com/Lightning-AI/pytorch-lightning/pull/6120)) +- Added `configure_sharded_model` hook ([#6679](https://github.com/Lightning-AI/pytorch-lightning/pull/6679)) +- Added support for `precision=64`, enabling training with double precision ([#6595](https://github.com/Lightning-AI/pytorch-lightning/pull/6595)) +- Added support for DDP communication hooks ([#6736](https://github.com/Lightning-AI/pytorch-lightning/pull/6736)) +- Added `artifact_location` argument to `MLFlowLogger` which will be passed to the `MlflowClient.create_experiment` call ([#6677](https://github.com/Lightning-AI/pytorch-lightning/pull/6677)) - Added `model` parameter to precision plugins' `clip_gradients` signature ( - [#6764](https://github.com/Lightning-AI/lightning/pull/6764), - [#7231](https://github.com/Lightning-AI/lightning/pull/7231)) -- Added `is_last_batch` attribute to `Trainer` ([#6825](https://github.com/Lightning-AI/lightning/pull/6825)) -- Added `LightningModule.lr_schedulers()` for manual optimization ([#6567](https://github.com/Lightning-AI/lightning/pull/6567)) -- Added `MpModelWrapper` in TPU Spawn ([#7045](https://github.com/Lightning-AI/lightning/pull/7045)) -- Added `max_time` Trainer argument to limit training time ([#6823](https://github.com/Lightning-AI/lightning/pull/6823)) -- Added `on_predict_{batch,epoch}_{start,end}` hooks ([#7141](https://github.com/Lightning-AI/lightning/pull/7141)) -- Added new `EarlyStopping` parameters `stopping_threshold` and `divergence_threshold` ([#6868](https://github.com/Lightning-AI/lightning/pull/6868)) -- Added `debug` flag to TPU Training Plugins (PT_XLA_DEBUG) ([#7219](https://github.com/Lightning-AI/lightning/pull/7219)) -- Added new `UnrepeatedDistributedSampler` and `IndexBatchSamplerWrapper` for tracking distributed predictions ([#7215](https://github.com/Lightning-AI/lightning/pull/7215)) -- Added `trainer.predict(return_predictions=None|False|True)` ([#7215](https://github.com/Lightning-AI/lightning/pull/7215)) -- Added `BasePredictionWriter` callback to implement prediction saving ([#7127](https://github.com/Lightning-AI/lightning/pull/7127)) -- Added `trainer.tune(scale_batch_size_kwargs, lr_find_kwargs)` arguments to configure the tuning algorithms ([#7258](https://github.com/Lightning-AI/lightning/pull/7258)) -- Added `tpu_distributed` check for TPU Spawn barrier ([#7241](https://github.com/Lightning-AI/lightning/pull/7241)) -- Added device updates to TPU Spawn for Pod training ([#7243](https://github.com/Lightning-AI/lightning/pull/7243)) -- Added warning when missing `Callback` and using `resume_from_checkpoint` ([#7254](https://github.com/Lightning-AI/lightning/pull/7254)) -- DeepSpeed single file saving ([#6900](https://github.com/Lightning-AI/lightning/pull/6900)) + [#6764](https://github.com/Lightning-AI/pytorch-lightning/pull/6764), + [#7231](https://github.com/Lightning-AI/pytorch-lightning/pull/7231)) +- Added `is_last_batch` attribute to `Trainer` ([#6825](https://github.com/Lightning-AI/pytorch-lightning/pull/6825)) +- Added `LightningModule.lr_schedulers()` for manual optimization ([#6567](https://github.com/Lightning-AI/pytorch-lightning/pull/6567)) +- Added `MpModelWrapper` in TPU Spawn ([#7045](https://github.com/Lightning-AI/pytorch-lightning/pull/7045)) +- Added `max_time` Trainer argument to limit training time ([#6823](https://github.com/Lightning-AI/pytorch-lightning/pull/6823)) +- Added `on_predict_{batch,epoch}_{start,end}` hooks ([#7141](https://github.com/Lightning-AI/pytorch-lightning/pull/7141)) +- Added new `EarlyStopping` parameters `stopping_threshold` and `divergence_threshold` ([#6868](https://github.com/Lightning-AI/pytorch-lightning/pull/6868)) +- Added `debug` flag to TPU Training Plugins (PT_XLA_DEBUG) ([#7219](https://github.com/Lightning-AI/pytorch-lightning/pull/7219)) +- Added new `UnrepeatedDistributedSampler` and `IndexBatchSamplerWrapper` for tracking distributed predictions ([#7215](https://github.com/Lightning-AI/pytorch-lightning/pull/7215)) +- Added `trainer.predict(return_predictions=None|False|True)` ([#7215](https://github.com/Lightning-AI/pytorch-lightning/pull/7215)) +- Added `BasePredictionWriter` callback to implement prediction saving ([#7127](https://github.com/Lightning-AI/pytorch-lightning/pull/7127)) +- Added `trainer.tune(scale_batch_size_kwargs, lr_find_kwargs)` arguments to configure the tuning algorithms ([#7258](https://github.com/Lightning-AI/pytorch-lightning/pull/7258)) +- Added `tpu_distributed` check for TPU Spawn barrier ([#7241](https://github.com/Lightning-AI/pytorch-lightning/pull/7241)) +- Added device updates to TPU Spawn for Pod training ([#7243](https://github.com/Lightning-AI/pytorch-lightning/pull/7243)) +- Added warning when missing `Callback` and using `resume_from_checkpoint` ([#7254](https://github.com/Lightning-AI/pytorch-lightning/pull/7254)) +- DeepSpeed single file saving ([#6900](https://github.com/Lightning-AI/pytorch-lightning/pull/6900)) - Added Training type Plugins Registry ( - [#6982](https://github.com/Lightning-AI/lightning/pull/6982), - [#7063](https://github.com/Lightning-AI/lightning/pull/7063), - [#7214](https://github.com/Lightning-AI/lightning/pull/7214), - [#7224](https://github.com/Lightning-AI/lightning/pull/7224) + [#6982](https://github.com/Lightning-AI/pytorch-lightning/pull/6982), + [#7063](https://github.com/Lightning-AI/pytorch-lightning/pull/7063), + [#7214](https://github.com/Lightning-AI/pytorch-lightning/pull/7214), + [#7224](https://github.com/Lightning-AI/pytorch-lightning/pull/7224) ) -- Add `ignore` param to `save_hyperparameters` ([#6056](https://github.com/Lightning-AI/lightning/pull/6056)) +- Add `ignore` param to `save_hyperparameters` ([#6056](https://github.com/Lightning-AI/pytorch-lightning/pull/6056)) ### Changed -- Changed `LightningModule.truncated_bptt_steps` to be property ([#7323](https://github.com/Lightning-AI/lightning/pull/7323)) -- Changed `EarlyStopping` callback from by default running `EarlyStopping.on_validation_end` if only training is run. Set `check_on_train_epoch_end` to run the callback at the end of the train epoch instead of at the end of the validation epoch ([#7069](https://github.com/Lightning-AI/lightning/pull/7069)) -- Renamed `pl.callbacks.swa` to `pl.callbacks.stochastic_weight_avg` ([#6259](https://github.com/Lightning-AI/lightning/pull/6259)) +- Changed `LightningModule.truncated_bptt_steps` to be property ([#7323](https://github.com/Lightning-AI/pytorch-lightning/pull/7323)) +- Changed `EarlyStopping` callback from by default running `EarlyStopping.on_validation_end` if only training is run. Set `check_on_train_epoch_end` to run the callback at the end of the train epoch instead of at the end of the validation epoch ([#7069](https://github.com/Lightning-AI/pytorch-lightning/pull/7069)) +- Renamed `pl.callbacks.swa` to `pl.callbacks.stochastic_weight_avg` ([#6259](https://github.com/Lightning-AI/pytorch-lightning/pull/6259)) - Refactor `RunningStage` and `TrainerState` usage ( - [#4945](https://github.com/Lightning-AI/lightning/pull/4945), - [#7173](https://github.com/Lightning-AI/lightning/pull/7173)) + [#4945](https://github.com/Lightning-AI/pytorch-lightning/pull/4945), + [#7173](https://github.com/Lightning-AI/pytorch-lightning/pull/7173)) * Added `RunningStage.SANITY_CHECKING` * Added `TrainerFn.{FITTING,VALIDATING,TESTING,PREDICTING,TUNING}` * Changed `trainer.evaluating` to return `True` if validating or testing -- Changed `setup()` and `teardown()` stage argument to take any of `{fit,validate,test,predict}` ([#6386](https://github.com/Lightning-AI/lightning/pull/6386)) -- Changed profilers to save separate report files per state and rank ([#6621](https://github.com/Lightning-AI/lightning/pull/6621)) -- The trainer no longer tries to save a checkpoint on exception or run callback's `on_train_end` functions ([#6864](https://github.com/Lightning-AI/lightning/pull/6864)) -- Changed `PyTorchProfiler` to use `torch.autograd.profiler.record_function` to record functions ([#6349](https://github.com/Lightning-AI/lightning/pull/6349)) -- Disabled `lr_scheduler.step()` in manual optimization ([#6825](https://github.com/Lightning-AI/lightning/pull/6825)) -- Changed warnings and recommendations for dataloaders in `ddp_spawn` ([#6762](https://github.com/Lightning-AI/lightning/pull/6762)) -- `pl.seed_everything` will now also set the seed on the `DistributedSampler` ([#7024](https://github.com/Lightning-AI/lightning/pull/7024)) -- Changed default setting for communication of multi-node training using `DDPShardedPlugin` ([#6937](https://github.com/Lightning-AI/lightning/pull/6937)) -- `trainer.tune()` now returns the tuning result ([#7258](https://github.com/Lightning-AI/lightning/pull/7258)) -- `LightningModule.from_datasets()` now accepts `IterableDataset` instances as training datasets. ([#7503](https://github.com/Lightning-AI/lightning/pull/7503)) -- Changed `resume_from_checkpoint` warning to an error when the checkpoint file does not exist ([#7075](https://github.com/Lightning-AI/lightning/pull/7075)) -- Automatically set `sync_batchnorm` for `training_type_plugin` ([#6536](https://github.com/Lightning-AI/lightning/pull/6536)) -- Allowed training type plugin to delay optimizer creation ([#6331](https://github.com/Lightning-AI/lightning/pull/6331)) -- Removed ModelSummary validation from train loop on_trainer_init ([#6610](https://github.com/Lightning-AI/lightning/pull/6610)) -- Moved `save_function` to accelerator ([#6689](https://github.com/Lightning-AI/lightning/pull/6689)) -- Updated DeepSpeed ZeRO ([#6546](https://github.com/Lightning-AI/lightning/pull/6546), - [#6752](https://github.com/Lightning-AI/lightning/pull/6752), - [#6142](https://github.com/Lightning-AI/lightning/pull/6142), - [#6321](https://github.com/Lightning-AI/lightning/pull/6321)) -- Improved verbose logging for `EarlyStopping` callback ([#6811](https://github.com/Lightning-AI/lightning/pull/6811)) -- Run ddp_spawn dataloader checks on Windows ([#6930](https://github.com/Lightning-AI/lightning/pull/6930)) -- Updated mlflow with using `resolve_tags` ([#6746](https://github.com/Lightning-AI/lightning/pull/6746)) -- Moved `save_hyperparameters` to its own function ([#7119](https://github.com/Lightning-AI/lightning/pull/7119)) -- Replaced `_DataModuleWrapper` with `__new__` ([#7289](https://github.com/Lightning-AI/lightning/pull/7289)) -- Reset `current_fx` properties on lightning module in teardown ([#7247](https://github.com/Lightning-AI/lightning/pull/7247)) -- Auto-set `DataLoader.worker_init_fn` with `seed_everything` ([#6960](https://github.com/Lightning-AI/lightning/pull/6960)) -- Remove `model.trainer` call inside of dataloading mixin ([#7317](https://github.com/Lightning-AI/lightning/pull/7317)) -- Split profilers module ([#6261](https://github.com/Lightning-AI/lightning/pull/6261)) -- Ensure accelerator is valid if running interactively ([#5970](https://github.com/Lightning-AI/lightning/pull/5970)) -- Disabled batch transfer in DP mode ([#6098](https://github.com/Lightning-AI/lightning/pull/6098)) +- Changed `setup()` and `teardown()` stage argument to take any of `{fit,validate,test,predict}` ([#6386](https://github.com/Lightning-AI/pytorch-lightning/pull/6386)) +- Changed profilers to save separate report files per state and rank ([#6621](https://github.com/Lightning-AI/pytorch-lightning/pull/6621)) +- The trainer no longer tries to save a checkpoint on exception or run callback's `on_train_end` functions ([#6864](https://github.com/Lightning-AI/pytorch-lightning/pull/6864)) +- Changed `PyTorchProfiler` to use `torch.autograd.profiler.record_function` to record functions ([#6349](https://github.com/Lightning-AI/pytorch-lightning/pull/6349)) +- Disabled `lr_scheduler.step()` in manual optimization ([#6825](https://github.com/Lightning-AI/pytorch-lightning/pull/6825)) +- Changed warnings and recommendations for dataloaders in `ddp_spawn` ([#6762](https://github.com/Lightning-AI/pytorch-lightning/pull/6762)) +- `pl.seed_everything` will now also set the seed on the `DistributedSampler` ([#7024](https://github.com/Lightning-AI/pytorch-lightning/pull/7024)) +- Changed default setting for communication of multi-node training using `DDPShardedPlugin` ([#6937](https://github.com/Lightning-AI/pytorch-lightning/pull/6937)) +- `trainer.tune()` now returns the tuning result ([#7258](https://github.com/Lightning-AI/pytorch-lightning/pull/7258)) +- `LightningModule.from_datasets()` now accepts `IterableDataset` instances as training datasets. ([#7503](https://github.com/Lightning-AI/pytorch-lightning/pull/7503)) +- Changed `resume_from_checkpoint` warning to an error when the checkpoint file does not exist ([#7075](https://github.com/Lightning-AI/pytorch-lightning/pull/7075)) +- Automatically set `sync_batchnorm` for `training_type_plugin` ([#6536](https://github.com/Lightning-AI/pytorch-lightning/pull/6536)) +- Allowed training type plugin to delay optimizer creation ([#6331](https://github.com/Lightning-AI/pytorch-lightning/pull/6331)) +- Removed ModelSummary validation from train loop on_trainer_init ([#6610](https://github.com/Lightning-AI/pytorch-lightning/pull/6610)) +- Moved `save_function` to accelerator ([#6689](https://github.com/Lightning-AI/pytorch-lightning/pull/6689)) +- Updated DeepSpeed ZeRO ([#6546](https://github.com/Lightning-AI/pytorch-lightning/pull/6546), + [#6752](https://github.com/Lightning-AI/pytorch-lightning/pull/6752), + [#6142](https://github.com/Lightning-AI/pytorch-lightning/pull/6142), + [#6321](https://github.com/Lightning-AI/pytorch-lightning/pull/6321)) +- Improved verbose logging for `EarlyStopping` callback ([#6811](https://github.com/Lightning-AI/pytorch-lightning/pull/6811)) +- Run ddp_spawn dataloader checks on Windows ([#6930](https://github.com/Lightning-AI/pytorch-lightning/pull/6930)) +- Updated mlflow with using `resolve_tags` ([#6746](https://github.com/Lightning-AI/pytorch-lightning/pull/6746)) +- Moved `save_hyperparameters` to its own function ([#7119](https://github.com/Lightning-AI/pytorch-lightning/pull/7119)) +- Replaced `_DataModuleWrapper` with `__new__` ([#7289](https://github.com/Lightning-AI/pytorch-lightning/pull/7289)) +- Reset `current_fx` properties on lightning module in teardown ([#7247](https://github.com/Lightning-AI/pytorch-lightning/pull/7247)) +- Auto-set `DataLoader.worker_init_fn` with `seed_everything` ([#6960](https://github.com/Lightning-AI/pytorch-lightning/pull/6960)) +- Remove `model.trainer` call inside of dataloading mixin ([#7317](https://github.com/Lightning-AI/pytorch-lightning/pull/7317)) +- Split profilers module ([#6261](https://github.com/Lightning-AI/pytorch-lightning/pull/6261)) +- Ensure accelerator is valid if running interactively ([#5970](https://github.com/Lightning-AI/pytorch-lightning/pull/5970)) +- Disabled batch transfer in DP mode ([#6098](https://github.com/Lightning-AI/pytorch-lightning/pull/6098)) ### Deprecated -- Deprecated `outputs` in both `LightningModule.on_train_epoch_end` and `Callback.on_train_epoch_end` hooks ([#7339](https://github.com/Lightning-AI/lightning/pull/7339)) -- Deprecated `Trainer.truncated_bptt_steps` in favor of `LightningModule.truncated_bptt_steps` ([#7323](https://github.com/Lightning-AI/lightning/pull/7323)) -- Deprecated `outputs` in both `LightningModule.on_train_epoch_end` and `Callback.on_train_epoch_end` hooks ([#7339](https://github.com/Lightning-AI/lightning/pull/7339)) -- Deprecated `LightningModule.grad_norm` in favor of `pl.utilities.grads.grad_norm` ([#7292](https://github.com/Lightning-AI/lightning/pull/7292)) -- Deprecated the `save_function` property from the `ModelCheckpoint` callback ([#7201](https://github.com/Lightning-AI/lightning/pull/7201)) -- Deprecated `LightningModule.write_predictions` and `LightningModule.write_predictions_dict` ([#7066](https://github.com/Lightning-AI/lightning/pull/7066)) -- Deprecated `TrainerLoggingMixin` in favor of a separate utilities module for metric handling ([#7180](https://github.com/Lightning-AI/lightning/pull/7180)) -- Deprecated `TrainerTrainingTricksMixin` in favor of a separate utilities module for NaN/Inf detection for gradients and parameters ([#6834](https://github.com/Lightning-AI/lightning/pull/6834)) -- `period` has been deprecated in favor of `every_n_val_epochs` in the `ModelCheckpoint` callback ([#6146](https://github.com/Lightning-AI/lightning/pull/6146)) -- Deprecated `trainer.running_sanity_check` in favor of `trainer.sanity_checking` ([#4945](https://github.com/Lightning-AI/lightning/pull/4945)) -- Deprecated `Profiler(output_filename)` in favor of `dirpath` and `filename` ([#6621](https://github.com/Lightning-AI/lightning/pull/6621)) -- Deprecated `PyTorchProfiler(profiled_functions)` in favor of `record_functions` ([#6349](https://github.com/Lightning-AI/lightning/pull/6349)) -- Deprecated `@auto_move_data` in favor of `trainer.predict` ([#6993](https://github.com/Lightning-AI/lightning/pull/6993)) -- Deprecated `Callback.on_load_checkpoint(checkpoint)` in favor of `Callback.on_load_checkpoint(trainer, pl_module, checkpoint)` ([#7253](https://github.com/Lightning-AI/lightning/pull/7253)) +- Deprecated `outputs` in both `LightningModule.on_train_epoch_end` and `Callback.on_train_epoch_end` hooks ([#7339](https://github.com/Lightning-AI/pytorch-lightning/pull/7339)) +- Deprecated `Trainer.truncated_bptt_steps` in favor of `LightningModule.truncated_bptt_steps` ([#7323](https://github.com/Lightning-AI/pytorch-lightning/pull/7323)) +- Deprecated `outputs` in both `LightningModule.on_train_epoch_end` and `Callback.on_train_epoch_end` hooks ([#7339](https://github.com/Lightning-AI/pytorch-lightning/pull/7339)) +- Deprecated `LightningModule.grad_norm` in favor of `pl.utilities.grads.grad_norm` ([#7292](https://github.com/Lightning-AI/pytorch-lightning/pull/7292)) +- Deprecated the `save_function` property from the `ModelCheckpoint` callback ([#7201](https://github.com/Lightning-AI/pytorch-lightning/pull/7201)) +- Deprecated `LightningModule.write_predictions` and `LightningModule.write_predictions_dict` ([#7066](https://github.com/Lightning-AI/pytorch-lightning/pull/7066)) +- Deprecated `TrainerLoggingMixin` in favor of a separate utilities module for metric handling ([#7180](https://github.com/Lightning-AI/pytorch-lightning/pull/7180)) +- Deprecated `TrainerTrainingTricksMixin` in favor of a separate utilities module for NaN/Inf detection for gradients and parameters ([#6834](https://github.com/Lightning-AI/pytorch-lightning/pull/6834)) +- `period` has been deprecated in favor of `every_n_val_epochs` in the `ModelCheckpoint` callback ([#6146](https://github.com/Lightning-AI/pytorch-lightning/pull/6146)) +- Deprecated `trainer.running_sanity_check` in favor of `trainer.sanity_checking` ([#4945](https://github.com/Lightning-AI/pytorch-lightning/pull/4945)) +- Deprecated `Profiler(output_filename)` in favor of `dirpath` and `filename` ([#6621](https://github.com/Lightning-AI/pytorch-lightning/pull/6621)) +- Deprecated `PyTorchProfiler(profiled_functions)` in favor of `record_functions` ([#6349](https://github.com/Lightning-AI/pytorch-lightning/pull/6349)) +- Deprecated `@auto_move_data` in favor of `trainer.predict` ([#6993](https://github.com/Lightning-AI/pytorch-lightning/pull/6993)) +- Deprecated `Callback.on_load_checkpoint(checkpoint)` in favor of `Callback.on_load_checkpoint(trainer, pl_module, checkpoint)` ([#7253](https://github.com/Lightning-AI/pytorch-lightning/pull/7253)) - Deprecated metrics in favor of `torchmetrics` ( - [#6505](https://github.com/Lightning-AI/lightning/pull/6505), - [#6530](https://github.com/Lightning-AI/lightning/pull/6530), - [#6540](https://github.com/Lightning-AI/lightning/pull/6540), - [#6547](https://github.com/Lightning-AI/lightning/pull/6547), - [#6515](https://github.com/Lightning-AI/lightning/pull/6515), - [#6572](https://github.com/Lightning-AI/lightning/pull/6572), - [#6573](https://github.com/Lightning-AI/lightning/pull/6573), - [#6584](https://github.com/Lightning-AI/lightning/pull/6584), - [#6636](https://github.com/Lightning-AI/lightning/pull/6636), - [#6637](https://github.com/Lightning-AI/lightning/pull/6637), - [#6649](https://github.com/Lightning-AI/lightning/pull/6649), - [#6659](https://github.com/Lightning-AI/lightning/pull/6659), - [#7131](https://github.com/Lightning-AI/lightning/pull/7131), + [#6505](https://github.com/Lightning-AI/pytorch-lightning/pull/6505), + [#6530](https://github.com/Lightning-AI/pytorch-lightning/pull/6530), + [#6540](https://github.com/Lightning-AI/pytorch-lightning/pull/6540), + [#6547](https://github.com/Lightning-AI/pytorch-lightning/pull/6547), + [#6515](https://github.com/Lightning-AI/pytorch-lightning/pull/6515), + [#6572](https://github.com/Lightning-AI/pytorch-lightning/pull/6572), + [#6573](https://github.com/Lightning-AI/pytorch-lightning/pull/6573), + [#6584](https://github.com/Lightning-AI/pytorch-lightning/pull/6584), + [#6636](https://github.com/Lightning-AI/pytorch-lightning/pull/6636), + [#6637](https://github.com/Lightning-AI/pytorch-lightning/pull/6637), + [#6649](https://github.com/Lightning-AI/pytorch-lightning/pull/6649), + [#6659](https://github.com/Lightning-AI/pytorch-lightning/pull/6659), + [#7131](https://github.com/Lightning-AI/pytorch-lightning/pull/7131), ) -- Deprecated the `LightningModule.datamodule` getter and setter methods; access them through `Trainer.datamodule` instead ([#7168](https://github.com/Lightning-AI/lightning/pull/7168)) -- Deprecated the use of `Trainer(gpus="i")` (string) for selecting the i-th GPU; from v1.5 this will set the number of GPUs instead of the index ([#6388](https://github.com/Lightning-AI/lightning/pull/6388)) +- Deprecated the `LightningModule.datamodule` getter and setter methods; access them through `Trainer.datamodule` instead ([#7168](https://github.com/Lightning-AI/pytorch-lightning/pull/7168)) +- Deprecated the use of `Trainer(gpus="i")` (string) for selecting the i-th GPU; from v1.5 this will set the number of GPUs instead of the index ([#6388](https://github.com/Lightning-AI/pytorch-lightning/pull/6388)) ### Removed -- Removed the `exp_save_path` property from the `LightningModule` ([#7266](https://github.com/Lightning-AI/lightning/pull/7266)) -- Removed training loop explicitly calling `EarlyStopping.on_validation_end` if no validation is run ([#7069](https://github.com/Lightning-AI/lightning/pull/7069)) -- Removed `automatic_optimization` as a property from the training loop in favor of `LightningModule.automatic_optimization` ([#7130](https://github.com/Lightning-AI/lightning/pull/7130)) -- Removed evaluation loop legacy returns for `*_epoch_end` hooks ([#6973](https://github.com/Lightning-AI/lightning/pull/6973)) -- Removed support for passing a bool value to `profiler` argument of Trainer ([#6164](https://github.com/Lightning-AI/lightning/pull/6164)) -- Removed no return warning from val/test step ([#6139](https://github.com/Lightning-AI/lightning/pull/6139)) -- Removed passing a `ModelCheckpoint` instance to `Trainer(checkpoint_callback)` ([#6166](https://github.com/Lightning-AI/lightning/pull/6166)) -- Removed deprecated Trainer argument `enable_pl_optimizer` and `automatic_optimization` ([#6163](https://github.com/Lightning-AI/lightning/pull/6163)) -- Removed deprecated metrics ([#6161](https://github.com/Lightning-AI/lightning/pull/6161)) +- Removed the `exp_save_path` property from the `LightningModule` ([#7266](https://github.com/Lightning-AI/pytorch-lightning/pull/7266)) +- Removed training loop explicitly calling `EarlyStopping.on_validation_end` if no validation is run ([#7069](https://github.com/Lightning-AI/pytorch-lightning/pull/7069)) +- Removed `automatic_optimization` as a property from the training loop in favor of `LightningModule.automatic_optimization` ([#7130](https://github.com/Lightning-AI/pytorch-lightning/pull/7130)) +- Removed evaluation loop legacy returns for `*_epoch_end` hooks ([#6973](https://github.com/Lightning-AI/pytorch-lightning/pull/6973)) +- Removed support for passing a bool value to `profiler` argument of Trainer ([#6164](https://github.com/Lightning-AI/pytorch-lightning/pull/6164)) +- Removed no return warning from val/test step ([#6139](https://github.com/Lightning-AI/pytorch-lightning/pull/6139)) +- Removed passing a `ModelCheckpoint` instance to `Trainer(checkpoint_callback)` ([#6166](https://github.com/Lightning-AI/pytorch-lightning/pull/6166)) +- Removed deprecated Trainer argument `enable_pl_optimizer` and `automatic_optimization` ([#6163](https://github.com/Lightning-AI/pytorch-lightning/pull/6163)) +- Removed deprecated metrics ([#6161](https://github.com/Lightning-AI/pytorch-lightning/pull/6161)) * from `pl.metrics.functional.classification` removed `to_onehot`, `to_categorical`, `get_num_classes`, `roc`, `multiclass_roc`, `average_precision`, `precision_recall_curve`, `multiclass_precision_recall_curve` * from `pl.metrics.functional.reduction` removed `reduce`, `class_reduce` -- Removed deprecated `ModelCheckpoint` arguments `prefix`, `mode="auto"` ([#6162](https://github.com/Lightning-AI/lightning/pull/6162)) -- Removed `mode='auto'` from `EarlyStopping` ([#6167](https://github.com/Lightning-AI/lightning/pull/6167)) -- Removed `epoch` and `step` arguments from `ModelCheckpoint.format_checkpoint_name()`, these are now included in the `metrics` argument ([#7344](https://github.com/Lightning-AI/lightning/pull/7344)) -- Removed legacy references for magic keys in the `Result` object ([#6016](https://github.com/Lightning-AI/lightning/pull/6016)) -- Removed deprecated `LightningModule` `hparams` setter ([#6207](https://github.com/Lightning-AI/lightning/pull/6207)) -- Removed legacy code to log or include metrics in the progress bar by returning them in a dict with the `"log"/"progress_bar"` magic keys. Use `self.log` instead ([#6734](https://github.com/Lightning-AI/lightning/pull/6734)) -- Removed `trainer.fit()` return value of `1`. It has no return now ([#7237](https://github.com/Lightning-AI/lightning/pull/7237)) -- Removed `logger_connector` legacy code ([#6733](https://github.com/Lightning-AI/lightning/pull/6733)) -- Removed unused mixin attributes ([#6487](https://github.com/Lightning-AI/lightning/pull/6487)) +- Removed deprecated `ModelCheckpoint` arguments `prefix`, `mode="auto"` ([#6162](https://github.com/Lightning-AI/pytorch-lightning/pull/6162)) +- Removed `mode='auto'` from `EarlyStopping` ([#6167](https://github.com/Lightning-AI/pytorch-lightning/pull/6167)) +- Removed `epoch` and `step` arguments from `ModelCheckpoint.format_checkpoint_name()`, these are now included in the `metrics` argument ([#7344](https://github.com/Lightning-AI/pytorch-lightning/pull/7344)) +- Removed legacy references for magic keys in the `Result` object ([#6016](https://github.com/Lightning-AI/pytorch-lightning/pull/6016)) +- Removed deprecated `LightningModule` `hparams` setter ([#6207](https://github.com/Lightning-AI/pytorch-lightning/pull/6207)) +- Removed legacy code to log or include metrics in the progress bar by returning them in a dict with the `"log"/"progress_bar"` magic keys. Use `self.log` instead ([#6734](https://github.com/Lightning-AI/pytorch-lightning/pull/6734)) +- Removed `trainer.fit()` return value of `1`. It has no return now ([#7237](https://github.com/Lightning-AI/pytorch-lightning/pull/7237)) +- Removed `logger_connector` legacy code ([#6733](https://github.com/Lightning-AI/pytorch-lightning/pull/6733)) +- Removed unused mixin attributes ([#6487](https://github.com/Lightning-AI/pytorch-lightning/pull/6487)) ### Fixed -- Fixed NaN errors in progress bars when training with iterable datasets with no length defined ([#7306](https://github.com/Lightning-AI/lightning/pull/7306)) -- Fixed attaching train and validation dataloaders when `reload_dataloaders_every_epoch=True` and `num_sanity_val_steps=0` ([#7207](https://github.com/Lightning-AI/lightning/pull/7207)) -- Added a barrier in the accelerator `teardown` to synchronize processes before execution finishes ([#6814](https://github.com/Lightning-AI/lightning/pull/6814)) -- Fixed multi-node DDP sub-process launch by using `local_rank` instead of `global_rank` for main process assertion ([#7061](https://github.com/Lightning-AI/lightning/pull/7061)) -- Fixed incorrect removal of `WORLD_SIZE` environment variable in DDP training when launching with torch distributed/torchelastic ([#6942](https://github.com/Lightning-AI/lightning/pull/6942)) -- Made the `Plugin.reduce` method more consistent across all Plugins to reflect a mean-reduction by default ([#6011](https://github.com/Lightning-AI/lightning/pull/6011)) -- Move lightning module to correct device type when using LightningDistributedWrapper ([#6070](https://github.com/Lightning-AI/lightning/pull/6070)) -- Do not print top-k verbose log with `ModelCheckpoint(monitor=None)` ([#6109](https://github.com/Lightning-AI/lightning/pull/6109)) -- Fixed `ModelCheckpoint(save_top_k=0, save_last=True)` not saving the `last` checkpoint ([#6136](https://github.com/Lightning-AI/lightning/pull/6136)) -- Fixed `.teardown(stage='fit')` and `.on_fit_{start,end}()` getting called during `trainer.test` ([#6386](https://github.com/Lightning-AI/lightning/pull/6386)) -- Fixed LightningModule `all_gather` on cpu tensors ([#6416](https://github.com/Lightning-AI/lightning/pull/6416)) -- Fixed torch distributed not available in setup hook for DDP ([#6506](https://github.com/Lightning-AI/lightning/pull/6506)) -- Fixed `trainer.tuner.{lr_find,scale_batch_size}` not setting the `Trainer` state properly ([#7258](https://github.com/Lightning-AI/lightning/pull/7258)) -- Fixed bug where the learning rate schedulers did not follow the optimizer frequencies ([#4868](https://github.com/Lightning-AI/lightning/pull/4868)) -- Fixed pickle error checker to now check for `pickle.PickleError` to catch all pickle errors ([#6917](https://github.com/Lightning-AI/lightning/pull/6917)) -- Fixed a bug where the outputs object passed to `LightningModule.training_epoch_end` was different from the object passed to the `on_train_end_epoch` hook ([#6969](https://github.com/Lightning-AI/lightning/pull/6969)) -- Fixed a bug where the outputs passed to `train_batch_end` would be lists even when using a single optimizer and no truncated backprop through time steps ([#6969](https://github.com/Lightning-AI/lightning/pull/6969)) -- Fixed bug for trainer error handling which would cause hang for distributed training ([#6864](https://github.com/Lightning-AI/lightning/pull/6864)) -- Fixed `self.device` not returning the correct device in replicas of data-parallel ([#6414](https://github.com/Lightning-AI/lightning/pull/6414)) -- Fixed `lr_find` trying beyond `num_training` steps and suggesting a too high learning rate ([#7076](https://github.com/Lightning-AI/lightning/pull/7076)) -- Fixed logger creating incorrect version folder in DDP with repeated `Trainer.fit` calls ([#7077](https://github.com/Lightning-AI/lightning/pull/7077)) -- Fixed metric objects passed directly to `self.log` not being reset correctly ([#7055](https://github.com/Lightning-AI/lightning/pull/7055)) -- Fixed `CombinedLoader` in distributed settings for validation / testing ([#7102](https://github.com/Lightning-AI/lightning/pull/7102)) -- Fixed the save_dir in `WandbLogger` when the run was initiated externally ([#7106](https://github.com/Lightning-AI/lightning/pull/7106)) -- Fixed `num_sanity_val_steps` affecting reproducibility of training data shuffling ([#7014](https://github.com/Lightning-AI/lightning/pull/7014)) -- Fixed resetting device after `fitting/evaluating/predicting` ([#7188](https://github.com/Lightning-AI/lightning/pull/7188)) -- Fixed bug where `trainer.tuner.scale_batch_size(max_trials=0)` would not return the correct batch size result ([#7262](https://github.com/Lightning-AI/lightning/pull/7262)) -- Fixed metrics not being properly logged with `precision=16` and `manual_optimization` ([#7228](https://github.com/Lightning-AI/lightning/pull/7228)) -- Fixed `BaseFinetuning` properly reloading `optimizer_states` when using `resume_from_checkpoint` ([#6891](https://github.com/Lightning-AI/lightning/pull/6891)) -- Fixed `parameters_to_ignore` not properly set to DDPWrapper ([#7239](https://github.com/Lightning-AI/lightning/pull/7239)) -- Fixed parsing of `fast_dev_run=True` with the built-in `ArgumentParser` ([#7240](https://github.com/Lightning-AI/lightning/pull/7240)) -- Fixed handling an `IterableDataset` that fails to produce a batch at the beginning of an epoch ([#7294](https://github.com/Lightning-AI/lightning/pull/7294)) -- Fixed `LightningModule.save_hyperparameters()` when attempting to save an empty container ([#7268](https://github.com/Lightning-AI/lightning/pull/7268)) -- Fixed `apex` not properly instantiated when running with `ddp` ([#7274](https://github.com/Lightning-AI/lightning/pull/7274)) -- Fixed optimizer `state` not moved to `GPU` ([#7277](https://github.com/Lightning-AI/lightning/pull/7277)) -- Fixed custom init args for `WandbLogger` ([#6989](https://github.com/Lightning-AI/lightning/pull/6989)) -- Fixed a bug where an error would be raised if the train dataloader sometimes produced None for a batch ([#7342](https://github.com/Lightning-AI/lightning/pull/7342)) +- Fixed NaN errors in progress bars when training with iterable datasets with no length defined ([#7306](https://github.com/Lightning-AI/pytorch-lightning/pull/7306)) +- Fixed attaching train and validation dataloaders when `reload_dataloaders_every_epoch=True` and `num_sanity_val_steps=0` ([#7207](https://github.com/Lightning-AI/pytorch-lightning/pull/7207)) +- Added a barrier in the accelerator `teardown` to synchronize processes before execution finishes ([#6814](https://github.com/Lightning-AI/pytorch-lightning/pull/6814)) +- Fixed multi-node DDP sub-process launch by using `local_rank` instead of `global_rank` for main process assertion ([#7061](https://github.com/Lightning-AI/pytorch-lightning/pull/7061)) +- Fixed incorrect removal of `WORLD_SIZE` environment variable in DDP training when launching with torch distributed/torchelastic ([#6942](https://github.com/Lightning-AI/pytorch-lightning/pull/6942)) +- Made the `Plugin.reduce` method more consistent across all Plugins to reflect a mean-reduction by default ([#6011](https://github.com/Lightning-AI/pytorch-lightning/pull/6011)) +- Move lightning module to correct device type when using LightningDistributedWrapper ([#6070](https://github.com/Lightning-AI/pytorch-lightning/pull/6070)) +- Do not print top-k verbose log with `ModelCheckpoint(monitor=None)` ([#6109](https://github.com/Lightning-AI/pytorch-lightning/pull/6109)) +- Fixed `ModelCheckpoint(save_top_k=0, save_last=True)` not saving the `last` checkpoint ([#6136](https://github.com/Lightning-AI/pytorch-lightning/pull/6136)) +- Fixed `.teardown(stage='fit')` and `.on_fit_{start,end}()` getting called during `trainer.test` ([#6386](https://github.com/Lightning-AI/pytorch-lightning/pull/6386)) +- Fixed LightningModule `all_gather` on cpu tensors ([#6416](https://github.com/Lightning-AI/pytorch-lightning/pull/6416)) +- Fixed torch distributed not available in setup hook for DDP ([#6506](https://github.com/Lightning-AI/pytorch-lightning/pull/6506)) +- Fixed `trainer.tuner.{lr_find,scale_batch_size}` not setting the `Trainer` state properly ([#7258](https://github.com/Lightning-AI/pytorch-lightning/pull/7258)) +- Fixed bug where the learning rate schedulers did not follow the optimizer frequencies ([#4868](https://github.com/Lightning-AI/pytorch-lightning/pull/4868)) +- Fixed pickle error checker to now check for `pickle.PickleError` to catch all pickle errors ([#6917](https://github.com/Lightning-AI/pytorch-lightning/pull/6917)) +- Fixed a bug where the outputs object passed to `LightningModule.training_epoch_end` was different from the object passed to the `on_train_end_epoch` hook ([#6969](https://github.com/Lightning-AI/pytorch-lightning/pull/6969)) +- Fixed a bug where the outputs passed to `train_batch_end` would be lists even when using a single optimizer and no truncated backprop through time steps ([#6969](https://github.com/Lightning-AI/pytorch-lightning/pull/6969)) +- Fixed bug for trainer error handling which would cause hang for distributed training ([#6864](https://github.com/Lightning-AI/pytorch-lightning/pull/6864)) +- Fixed `self.device` not returning the correct device in replicas of data-parallel ([#6414](https://github.com/Lightning-AI/pytorch-lightning/pull/6414)) +- Fixed `lr_find` trying beyond `num_training` steps and suggesting a too high learning rate ([#7076](https://github.com/Lightning-AI/pytorch-lightning/pull/7076)) +- Fixed logger creating incorrect version folder in DDP with repeated `Trainer.fit` calls ([#7077](https://github.com/Lightning-AI/pytorch-lightning/pull/7077)) +- Fixed metric objects passed directly to `self.log` not being reset correctly ([#7055](https://github.com/Lightning-AI/pytorch-lightning/pull/7055)) +- Fixed `CombinedLoader` in distributed settings for validation / testing ([#7102](https://github.com/Lightning-AI/pytorch-lightning/pull/7102)) +- Fixed the save_dir in `WandbLogger` when the run was initiated externally ([#7106](https://github.com/Lightning-AI/pytorch-lightning/pull/7106)) +- Fixed `num_sanity_val_steps` affecting reproducibility of training data shuffling ([#7014](https://github.com/Lightning-AI/pytorch-lightning/pull/7014)) +- Fixed resetting device after `fitting/evaluating/predicting` ([#7188](https://github.com/Lightning-AI/pytorch-lightning/pull/7188)) +- Fixed bug where `trainer.tuner.scale_batch_size(max_trials=0)` would not return the correct batch size result ([#7262](https://github.com/Lightning-AI/pytorch-lightning/pull/7262)) +- Fixed metrics not being properly logged with `precision=16` and `manual_optimization` ([#7228](https://github.com/Lightning-AI/pytorch-lightning/pull/7228)) +- Fixed `BaseFinetuning` properly reloading `optimizer_states` when using `resume_from_checkpoint` ([#6891](https://github.com/Lightning-AI/pytorch-lightning/pull/6891)) +- Fixed `parameters_to_ignore` not properly set to DDPWrapper ([#7239](https://github.com/Lightning-AI/pytorch-lightning/pull/7239)) +- Fixed parsing of `fast_dev_run=True` with the built-in `ArgumentParser` ([#7240](https://github.com/Lightning-AI/pytorch-lightning/pull/7240)) +- Fixed handling an `IterableDataset` that fails to produce a batch at the beginning of an epoch ([#7294](https://github.com/Lightning-AI/pytorch-lightning/pull/7294)) +- Fixed `LightningModule.save_hyperparameters()` when attempting to save an empty container ([#7268](https://github.com/Lightning-AI/pytorch-lightning/pull/7268)) +- Fixed `apex` not properly instantiated when running with `ddp` ([#7274](https://github.com/Lightning-AI/pytorch-lightning/pull/7274)) +- Fixed optimizer `state` not moved to `GPU` ([#7277](https://github.com/Lightning-AI/pytorch-lightning/pull/7277)) +- Fixed custom init args for `WandbLogger` ([#6989](https://github.com/Lightning-AI/pytorch-lightning/pull/6989)) +- Fixed a bug where an error would be raised if the train dataloader sometimes produced None for a batch ([#7342](https://github.com/Lightning-AI/pytorch-lightning/pull/7342)) - Fixed examples ( - [#6600](https://github.com/Lightning-AI/lightning/pull/6600), - [#6638](https://github.com/Lightning-AI/lightning/pull/6638), - [#7096](https://github.com/Lightning-AI/lightning/pull/7096), - [#7246](https://github.com/Lightning-AI/lightning/pull/7246), - [#6357](https://github.com/Lightning-AI/lightning/pull/6357), - [#6476](https://github.com/Lightning-AI/lightning/pull/6476), - [#6294](https://github.com/Lightning-AI/lightning/pull/6294), - [#6373](https://github.com/Lightning-AI/lightning/pull/6373), - [#6088](https://github.com/Lightning-AI/lightning/pull/6088), - [#7398](https://github.com/Lightning-AI/lightning/pull/7398) + [#6600](https://github.com/Lightning-AI/pytorch-lightning/pull/6600), + [#6638](https://github.com/Lightning-AI/pytorch-lightning/pull/6638), + [#7096](https://github.com/Lightning-AI/pytorch-lightning/pull/7096), + [#7246](https://github.com/Lightning-AI/pytorch-lightning/pull/7246), + [#6357](https://github.com/Lightning-AI/pytorch-lightning/pull/6357), + [#6476](https://github.com/Lightning-AI/pytorch-lightning/pull/6476), + [#6294](https://github.com/Lightning-AI/pytorch-lightning/pull/6294), + [#6373](https://github.com/Lightning-AI/pytorch-lightning/pull/6373), + [#6088](https://github.com/Lightning-AI/pytorch-lightning/pull/6088), + [#7398](https://github.com/Lightning-AI/pytorch-lightning/pull/7398) ) -- Resolved schedule step bug for PyTorch Profiler ([#6674](https://github.com/Lightning-AI/lightning/pull/6674), - [#6681](https://github.com/Lightning-AI/lightning/pull/6681)) -- Updated logic for checking TPUs availability ([#6767](https://github.com/Lightning-AI/lightning/pull/6767)) -- Resolve TPU miss rendezvous ([#6781](https://github.com/Lightning-AI/lightning/pull/6781)) -- Fixed auto-scaling mode when calling tune method on trainer ([#7321](https://github.com/Lightning-AI/lightning/pull/7321)) -- Fixed finetuning complex models correctly unfreezes ([#6880](https://github.com/Lightning-AI/lightning/pull/6880)) -- Ensure we set the eval/train flag correctly on accelerator model ([#6877](https://github.com/Lightning-AI/lightning/pull/6877)) -- Set better defaults for `rank_zero_only.rank` when training is launched with SLURM and torchelastic ([#6802](https://github.com/Lightning-AI/lightning/pull/6802)) -- Fixed matching the number of outputs of backward with forward for AllGatherGrad ([#6625](https://github.com/Lightning-AI/lightning/pull/6625)) -- Fixed the `gradient_clip_algorithm` has no effect ([#6928](https://github.com/Lightning-AI/lightning/pull/6928)) -- Fixed CUDA OOM detection and handling ([#6934](https://github.com/Lightning-AI/lightning/pull/6934)) -- Fixed `unfreeze_and_add_param_group` expects `modules` rather than `module` ([#6822](https://github.com/Lightning-AI/lightning/pull/6822)) -- Fixed DPP + SyncBN when move on device ([#6838](https://github.com/Lightning-AI/lightning/pull/6838)) -- Fixed missing arguments in `lr_find` call ([#6784](https://github.com/Lightning-AI/lightning/pull/6784)) -- Fixed `set_default_tensor_type` to `torch.DoubleTensor` with precision=64 ([#7108](https://github.com/Lightning-AI/lightning/pull/7108)) -- Fixed `NeptuneLogger.log_text(step=None)` ([#7194](https://github.com/Lightning-AI/lightning/pull/7194)) -- Fixed importing torchtext batch ([#6365](https://github.com/Lightning-AI/lightning/pull/6365), - [#6323](https://github.com/Lightning-AI/lightning/pull/6323), - [#6211](https://github.com/Lightning-AI/lightning/pull/6211)) +- Resolved schedule step bug for PyTorch Profiler ([#6674](https://github.com/Lightning-AI/pytorch-lightning/pull/6674), + [#6681](https://github.com/Lightning-AI/pytorch-lightning/pull/6681)) +- Updated logic for checking TPUs availability ([#6767](https://github.com/Lightning-AI/pytorch-lightning/pull/6767)) +- Resolve TPU miss rendezvous ([#6781](https://github.com/Lightning-AI/pytorch-lightning/pull/6781)) +- Fixed auto-scaling mode when calling tune method on trainer ([#7321](https://github.com/Lightning-AI/pytorch-lightning/pull/7321)) +- Fixed finetuning complex models correctly unfreezes ([#6880](https://github.com/Lightning-AI/pytorch-lightning/pull/6880)) +- Ensure we set the eval/train flag correctly on accelerator model ([#6877](https://github.com/Lightning-AI/pytorch-lightning/pull/6877)) +- Set better defaults for `rank_zero_only.rank` when training is launched with SLURM and torchelastic ([#6802](https://github.com/Lightning-AI/pytorch-lightning/pull/6802)) +- Fixed matching the number of outputs of backward with forward for AllGatherGrad ([#6625](https://github.com/Lightning-AI/pytorch-lightning/pull/6625)) +- Fixed the `gradient_clip_algorithm` has no effect ([#6928](https://github.com/Lightning-AI/pytorch-lightning/pull/6928)) +- Fixed CUDA OOM detection and handling ([#6934](https://github.com/Lightning-AI/pytorch-lightning/pull/6934)) +- Fixed `unfreeze_and_add_param_group` expects `modules` rather than `module` ([#6822](https://github.com/Lightning-AI/pytorch-lightning/pull/6822)) +- Fixed DPP + SyncBN when move on device ([#6838](https://github.com/Lightning-AI/pytorch-lightning/pull/6838)) +- Fixed missing arguments in `lr_find` call ([#6784](https://github.com/Lightning-AI/pytorch-lightning/pull/6784)) +- Fixed `set_default_tensor_type` to `torch.DoubleTensor` with precision=64 ([#7108](https://github.com/Lightning-AI/pytorch-lightning/pull/7108)) +- Fixed `NeptuneLogger.log_text(step=None)` ([#7194](https://github.com/Lightning-AI/pytorch-lightning/pull/7194)) +- Fixed importing torchtext batch ([#6365](https://github.com/Lightning-AI/pytorch-lightning/pull/6365), + [#6323](https://github.com/Lightning-AI/pytorch-lightning/pull/6323), + [#6211](https://github.com/Lightning-AI/pytorch-lightning/pull/6211)) ## [1.2.9] - 2021-04-20 ### Fixed -- Fixed the order to call for world ranks & the `root_device` property in `TPUSpawnPlugin` ([#7074](https://github.com/Lightning-AI/lightning/pull/7074)) -- Fixed multi-gpu join for Horovod ([#6954](https://github.com/Lightning-AI/lightning/pull/6954)) -- Fixed parsing for pre-release package versions ([#6999](https://github.com/Lightning-AI/lightning/pull/6999)) +- Fixed the order to call for world ranks & the `root_device` property in `TPUSpawnPlugin` ([#7074](https://github.com/Lightning-AI/pytorch-lightning/pull/7074)) +- Fixed multi-gpu join for Horovod ([#6954](https://github.com/Lightning-AI/pytorch-lightning/pull/6954)) +- Fixed parsing for pre-release package versions ([#6999](https://github.com/Lightning-AI/pytorch-lightning/pull/6999)) ## [1.2.8] - 2021-04-14 ### Added -- Added TPUSpawn + IterableDataset error message ([#6875](https://github.com/Lightning-AI/lightning/pull/6875)) +- Added TPUSpawn + IterableDataset error message ([#6875](https://github.com/Lightning-AI/pytorch-lightning/pull/6875)) ### Fixed -- Fixed process rank not being available right away after `Trainer` instantiation ([#6941](https://github.com/Lightning-AI/lightning/pull/6941)) -- Fixed `sync_dist` for tpus ([#6950](https://github.com/Lightning-AI/lightning/pull/6950)) -- Fixed `AttributeError` for `require_backward_grad_sync` when running manual optimization with sharded plugin ([#6915](https://github.com/Lightning-AI/lightning/pull/6915)) -- Fixed `--gpus` default for parser returned by `Trainer.add_argparse_args` ([#6898](https://github.com/Lightning-AI/lightning/pull/6898)) -- Fixed TPU Spawn all gather ([#6896](https://github.com/Lightning-AI/lightning/pull/6896)) -- Fixed `EarlyStopping` logic when `min_epochs` or `min_steps` requirement is not met ([#6705](https://github.com/Lightning-AI/lightning/pull/6705)) -- Fixed csv extension check ([#6436](https://github.com/Lightning-AI/lightning/pull/6436)) -- Fixed checkpoint issue when using Horovod distributed backend ([#6958](https://github.com/Lightning-AI/lightning/pull/6958)) -- Fixed tensorboard exception raising ([#6901](https://github.com/Lightning-AI/lightning/pull/6901)) -- Fixed setting the eval/train flag correctly on accelerator model ([#6983](https://github.com/Lightning-AI/lightning/pull/6983)) -- Fixed DDP_SPAWN compatibility with bug_report_model.py ([#6892](https://github.com/Lightning-AI/lightning/pull/6892)) -- Fixed bug where `BaseFinetuning.flatten_modules()` was duplicating leaf node parameters ([#6879](https://github.com/Lightning-AI/lightning/pull/6879)) +- Fixed process rank not being available right away after `Trainer` instantiation ([#6941](https://github.com/Lightning-AI/pytorch-lightning/pull/6941)) +- Fixed `sync_dist` for tpus ([#6950](https://github.com/Lightning-AI/pytorch-lightning/pull/6950)) +- Fixed `AttributeError` for `require_backward_grad_sync` when running manual optimization with sharded plugin ([#6915](https://github.com/Lightning-AI/pytorch-lightning/pull/6915)) +- Fixed `--gpus` default for parser returned by `Trainer.add_argparse_args` ([#6898](https://github.com/Lightning-AI/pytorch-lightning/pull/6898)) +- Fixed TPU Spawn all gather ([#6896](https://github.com/Lightning-AI/pytorch-lightning/pull/6896)) +- Fixed `EarlyStopping` logic when `min_epochs` or `min_steps` requirement is not met ([#6705](https://github.com/Lightning-AI/pytorch-lightning/pull/6705)) +- Fixed csv extension check ([#6436](https://github.com/Lightning-AI/pytorch-lightning/pull/6436)) +- Fixed checkpoint issue when using Horovod distributed backend ([#6958](https://github.com/Lightning-AI/pytorch-lightning/pull/6958)) +- Fixed tensorboard exception raising ([#6901](https://github.com/Lightning-AI/pytorch-lightning/pull/6901)) +- Fixed setting the eval/train flag correctly on accelerator model ([#6983](https://github.com/Lightning-AI/pytorch-lightning/pull/6983)) +- Fixed DDP_SPAWN compatibility with bug_report_model.py ([#6892](https://github.com/Lightning-AI/pytorch-lightning/pull/6892)) +- Fixed bug where `BaseFinetuning.flatten_modules()` was duplicating leaf node parameters ([#6879](https://github.com/Lightning-AI/pytorch-lightning/pull/6879)) - Set better defaults for `rank_zero_only.rank` when training is launched with SLURM and torchelastic: - * Support SLURM and torchelastic global rank environment variables ([#5715](https://github.com/Lightning-AI/lightning/pull/5715)) - * Remove hardcoding of local rank in accelerator connector ([#6878](https://github.com/Lightning-AI/lightning/pull/6878)) + * Support SLURM and torchelastic global rank environment variables ([#5715](https://github.com/Lightning-AI/pytorch-lightning/pull/5715)) + * Remove hardcoding of local rank in accelerator connector ([#6878](https://github.com/Lightning-AI/pytorch-lightning/pull/6878)) ## [1.2.7] - 2021-04-06 ### Fixed -- Fixed resolve a bug with omegaconf and xm.save ([#6741](https://github.com/Lightning-AI/lightning/pull/6741)) -- Fixed an issue with IterableDataset when __len__ is not defined ([#6828](https://github.com/Lightning-AI/lightning/pull/6828)) -- Sanitize None params during pruning ([#6836](https://github.com/Lightning-AI/lightning/pull/6836)) -- Enforce an epoch scheduler interval when using SWA ([#6588](https://github.com/Lightning-AI/lightning/pull/6588)) -- Fixed TPU Colab hang issue, post training ([#6816](https://github.com/Lightning-AI/lightning/pull/6816)) -- Fixed a bug where `TensorBoardLogger` would give a warning and not log correctly to a symbolic link `save_dir` ([#6730](https://github.com/Lightning-AI/lightning/pull/6730)) -- Fixed bug where `predict` could not be used when `progress_bar_refresh_rate=0` ([#6884](https://github.com/Lightning-AI/lightning/pull/6884)) +- Fixed resolve a bug with omegaconf and xm.save ([#6741](https://github.com/Lightning-AI/pytorch-lightning/pull/6741)) +- Fixed an issue with IterableDataset when __len__ is not defined ([#6828](https://github.com/Lightning-AI/pytorch-lightning/pull/6828)) +- Sanitize None params during pruning ([#6836](https://github.com/Lightning-AI/pytorch-lightning/pull/6836)) +- Enforce an epoch scheduler interval when using SWA ([#6588](https://github.com/Lightning-AI/pytorch-lightning/pull/6588)) +- Fixed TPU Colab hang issue, post training ([#6816](https://github.com/Lightning-AI/pytorch-lightning/pull/6816)) +- Fixed a bug where `TensorBoardLogger` would give a warning and not log correctly to a symbolic link `save_dir` ([#6730](https://github.com/Lightning-AI/pytorch-lightning/pull/6730)) +- Fixed bug where `predict` could not be used when `progress_bar_refresh_rate=0` ([#6884](https://github.com/Lightning-AI/pytorch-lightning/pull/6884)) ## [1.2.6] - 2021-03-30 ### Changed -- Changed the behavior of `on_epoch_start` to run at the beginning of validation & test epoch ([#6498](https://github.com/Lightning-AI/lightning/pull/6498)) +- Changed the behavior of `on_epoch_start` to run at the beginning of validation & test epoch ([#6498](https://github.com/Lightning-AI/pytorch-lightning/pull/6498)) ### Removed -- Removed legacy code to include `step` dictionary returns in `callback_metrics`. Use `self.log_dict` instead. ([#6682](https://github.com/Lightning-AI/lightning/pull/6682)) +- Removed legacy code to include `step` dictionary returns in `callback_metrics`. Use `self.log_dict` instead. ([#6682](https://github.com/Lightning-AI/pytorch-lightning/pull/6682)) ### Fixed -- Fixed `DummyLogger.log_hyperparams` raising a `TypeError` when running with `fast_dev_run=True` ([#6398](https://github.com/Lightning-AI/lightning/pull/6398)) -- Fixed error on TPUs when there was no `ModelCheckpoint` ([#6654](https://github.com/Lightning-AI/lightning/pull/6654)) -- Fixed `trainer.test` freeze on TPUs ([#6654](https://github.com/Lightning-AI/lightning/pull/6654)) -- Fixed a bug where gradients were disabled after calling `Trainer.predict` ([#6657](https://github.com/Lightning-AI/lightning/pull/6657)) -- Fixed bug where no TPUs were detected in a TPU pod env ([#6719](https://github.com/Lightning-AI/lightning/pull/6719)) +- Fixed `DummyLogger.log_hyperparams` raising a `TypeError` when running with `fast_dev_run=True` ([#6398](https://github.com/Lightning-AI/pytorch-lightning/pull/6398)) +- Fixed error on TPUs when there was no `ModelCheckpoint` ([#6654](https://github.com/Lightning-AI/pytorch-lightning/pull/6654)) +- Fixed `trainer.test` freeze on TPUs ([#6654](https://github.com/Lightning-AI/pytorch-lightning/pull/6654)) +- Fixed a bug where gradients were disabled after calling `Trainer.predict` ([#6657](https://github.com/Lightning-AI/pytorch-lightning/pull/6657)) +- Fixed bug where no TPUs were detected in a TPU pod env ([#6719](https://github.com/Lightning-AI/pytorch-lightning/pull/6719)) ## [1.2.5] - 2021-03-23 ### Changed -- Update Gradient Clipping for the TPU Accelerator ([#6576](https://github.com/Lightning-AI/lightning/pull/6576)) -- Refactored setup for typing friendly ([#6590](https://github.com/Lightning-AI/lightning/pull/6590)) +- Update Gradient Clipping for the TPU Accelerator ([#6576](https://github.com/Lightning-AI/pytorch-lightning/pull/6576)) +- Refactored setup for typing friendly ([#6590](https://github.com/Lightning-AI/pytorch-lightning/pull/6590)) ### Fixed -- Fixed a bug where `all_gather` would not work correctly with `tpu_cores=8` ([#6587](https://github.com/Lightning-AI/lightning/pull/6587)) -- Fixed comparing required versions ([#6434](https://github.com/Lightning-AI/lightning/pull/6434)) -- Fixed duplicate logs appearing in console when using the python logging module ([#6275](https://github.com/Lightning-AI/lightning/pull/6275)) -- Added Autocast in validation, test and predict modes for Native AMP ([#6565](https://github.com/Lightning-AI/lightning/pull/6565)) +- Fixed a bug where `all_gather` would not work correctly with `tpu_cores=8` ([#6587](https://github.com/Lightning-AI/pytorch-lightning/pull/6587)) +- Fixed comparing required versions ([#6434](https://github.com/Lightning-AI/pytorch-lightning/pull/6434)) +- Fixed duplicate logs appearing in console when using the python logging module ([#6275](https://github.com/Lightning-AI/pytorch-lightning/pull/6275)) +- Added Autocast in validation, test and predict modes for Native AMP ([#6565](https://github.com/Lightning-AI/pytorch-lightning/pull/6565)) ## [1.2.4] - 2021-03-16 ### Changed -- Changed the default of `find_unused_parameters` back to `True` in DDP and DDP Spawn ([#6438](https://github.com/Lightning-AI/lightning/pull/6438)) +- Changed the default of `find_unused_parameters` back to `True` in DDP and DDP Spawn ([#6438](https://github.com/Lightning-AI/pytorch-lightning/pull/6438)) ### Fixed -- Expose DeepSpeed loss parameters to allow users to fix loss instability ([#6115](https://github.com/Lightning-AI/lightning/pull/6115)) -- Fixed DP reduction with collection ([#6324](https://github.com/Lightning-AI/lightning/pull/6324)) -- Fixed an issue where the tuner would not tune the learning rate if also tuning the batch size ([#4688](https://github.com/Lightning-AI/lightning/pull/4688)) -- Fixed broadcast to use PyTorch `broadcast_object_list` and add `reduce_decision` ([#6410](https://github.com/Lightning-AI/lightning/pull/6410)) -- Fixed logger creating directory structure too early in DDP ([#6380](https://github.com/Lightning-AI/lightning/pull/6380)) -- Fixed DeepSpeed additional memory use on rank 0 when default device not set early enough ([#6460](https://github.com/Lightning-AI/lightning/pull/6460)) -- Fixed an issue with `Tuner.scale_batch_size` not finding the batch size attribute in the datamodule ([#5968](https://github.com/Lightning-AI/lightning/pull/5968)) -- Fixed an exception in the layer summary when the model contains torch.jit scripted submodules ([#6511](https://github.com/Lightning-AI/lightning/pull/6511)) -- Fixed when Train loop config was run during `Trainer.predict` ([#6541](https://github.com/Lightning-AI/lightning/pull/6541)) +- Expose DeepSpeed loss parameters to allow users to fix loss instability ([#6115](https://github.com/Lightning-AI/pytorch-lightning/pull/6115)) +- Fixed DP reduction with collection ([#6324](https://github.com/Lightning-AI/pytorch-lightning/pull/6324)) +- Fixed an issue where the tuner would not tune the learning rate if also tuning the batch size ([#4688](https://github.com/Lightning-AI/pytorch-lightning/pull/4688)) +- Fixed broadcast to use PyTorch `broadcast_object_list` and add `reduce_decision` ([#6410](https://github.com/Lightning-AI/pytorch-lightning/pull/6410)) +- Fixed logger creating directory structure too early in DDP ([#6380](https://github.com/Lightning-AI/pytorch-lightning/pull/6380)) +- Fixed DeepSpeed additional memory use on rank 0 when default device not set early enough ([#6460](https://github.com/Lightning-AI/pytorch-lightning/pull/6460)) +- Fixed an issue with `Tuner.scale_batch_size` not finding the batch size attribute in the datamodule ([#5968](https://github.com/Lightning-AI/pytorch-lightning/pull/5968)) +- Fixed an exception in the layer summary when the model contains torch.jit scripted submodules ([#6511](https://github.com/Lightning-AI/pytorch-lightning/pull/6511)) +- Fixed when Train loop config was run during `Trainer.predict` ([#6541](https://github.com/Lightning-AI/pytorch-lightning/pull/6541)) ## [1.2.3] - 2021-03-09 ### Fixed -- Fixed `ModelPruning(make_pruning_permanent=True)` pruning buffers getting removed when saved during training ([#6073](https://github.com/Lightning-AI/lightning/pull/6073)) -- Fixed when `_stable_1d_sort` to work when `n >= N` ([#6177](https://github.com/Lightning-AI/lightning/pull/6177)) -- Fixed `AttributeError` when `logger=None` on TPU ([#6221](https://github.com/Lightning-AI/lightning/pull/6221)) -- Fixed PyTorch Profiler with `emit_nvtx` ([#6260](https://github.com/Lightning-AI/lightning/pull/6260)) -- Fixed `trainer.test` from `best_path` hangs after calling `trainer.fit` ([#6272](https://github.com/Lightning-AI/lightning/pull/6272)) -- Fixed `SingleTPU` calling `all_gather` ([#6296](https://github.com/Lightning-AI/lightning/pull/6296)) -- Ensure we check DeepSpeed/Sharded in multi-node DDP ([#6297](https://github.com/Lightning-AI/lightning/pull/6297) -- Check `LightningOptimizer` doesn't delete optimizer hooks ([#6305](https://github.com/Lightning-AI/lightning/pull/6305) -- Resolve memory leak for evaluation ([#6326](https://github.com/Lightning-AI/lightning/pull/6326) -- Ensure that clip gradients is only called if the value is greater than 0 ([#6330](https://github.com/Lightning-AI/lightning/pull/6330) -- Fixed `Trainer` not resetting `lightning_optimizers` when calling `Trainer.fit()` multiple times ([#6372](https://github.com/Lightning-AI/lightning/pull/6372)) +- Fixed `ModelPruning(make_pruning_permanent=True)` pruning buffers getting removed when saved during training ([#6073](https://github.com/Lightning-AI/pytorch-lightning/pull/6073)) +- Fixed when `_stable_1d_sort` to work when `n >= N` ([#6177](https://github.com/Lightning-AI/pytorch-lightning/pull/6177)) +- Fixed `AttributeError` when `logger=None` on TPU ([#6221](https://github.com/Lightning-AI/pytorch-lightning/pull/6221)) +- Fixed PyTorch Profiler with `emit_nvtx` ([#6260](https://github.com/Lightning-AI/pytorch-lightning/pull/6260)) +- Fixed `trainer.test` from `best_path` hangs after calling `trainer.fit` ([#6272](https://github.com/Lightning-AI/pytorch-lightning/pull/6272)) +- Fixed `SingleTPU` calling `all_gather` ([#6296](https://github.com/Lightning-AI/pytorch-lightning/pull/6296)) +- Ensure we check DeepSpeed/Sharded in multi-node DDP ([#6297](https://github.com/Lightning-AI/pytorch-lightning/pull/6297) +- Check `LightningOptimizer` doesn't delete optimizer hooks ([#6305](https://github.com/Lightning-AI/pytorch-lightning/pull/6305) +- Resolve memory leak for evaluation ([#6326](https://github.com/Lightning-AI/pytorch-lightning/pull/6326) +- Ensure that clip gradients is only called if the value is greater than 0 ([#6330](https://github.com/Lightning-AI/pytorch-lightning/pull/6330) +- Fixed `Trainer` not resetting `lightning_optimizers` when calling `Trainer.fit()` multiple times ([#6372](https://github.com/Lightning-AI/pytorch-lightning/pull/6372)) ## [1.2.2] - 2021-03-02 ### Added -- Added `checkpoint` parameter to callback's `on_save_checkpoint` hook ([#6072](https://github.com/Lightning-AI/lightning/pull/6072)) +- Added `checkpoint` parameter to callback's `on_save_checkpoint` hook ([#6072](https://github.com/Lightning-AI/pytorch-lightning/pull/6072)) ### Changed -- Changed the order of `backward`, `step`, `zero_grad` to `zero_grad`, `backward`, `step` ([#6147](https://github.com/Lightning-AI/lightning/pull/6147)) -- Changed default for DeepSpeed CPU Offload to False, due to prohibitively slow speeds at smaller scale ([#6262](https://github.com/Lightning-AI/lightning/pull/6262)) +- Changed the order of `backward`, `step`, `zero_grad` to `zero_grad`, `backward`, `step` ([#6147](https://github.com/Lightning-AI/pytorch-lightning/pull/6147)) +- Changed default for DeepSpeed CPU Offload to False, due to prohibitively slow speeds at smaller scale ([#6262](https://github.com/Lightning-AI/pytorch-lightning/pull/6262)) ### Fixed -- Fixed epoch level schedulers not being called when `val_check_interval < 1.0` ([#6075](https://github.com/Lightning-AI/lightning/pull/6075)) -- Fixed multiple early stopping callbacks ([#6197](https://github.com/Lightning-AI/lightning/pull/6197)) -- Fixed incorrect usage of `detach()`, `cpu()`, `to()` ([#6216](https://github.com/Lightning-AI/lightning/pull/6216)) -- Fixed LBFGS optimizer support which didn't converge in automatic optimization ([#6147](https://github.com/Lightning-AI/lightning/pull/6147)) -- Prevent `WandbLogger` from dropping values ([#5931](https://github.com/Lightning-AI/lightning/pull/5931)) -- Fixed error thrown when using valid distributed mode in multi node ([#6297](https://github.com/Lightning-AI/lightning/pull/6297) +- Fixed epoch level schedulers not being called when `val_check_interval < 1.0` ([#6075](https://github.com/Lightning-AI/pytorch-lightning/pull/6075)) +- Fixed multiple early stopping callbacks ([#6197](https://github.com/Lightning-AI/pytorch-lightning/pull/6197)) +- Fixed incorrect usage of `detach()`, `cpu()`, `to()` ([#6216](https://github.com/Lightning-AI/pytorch-lightning/pull/6216)) +- Fixed LBFGS optimizer support which didn't converge in automatic optimization ([#6147](https://github.com/Lightning-AI/pytorch-lightning/pull/6147)) +- Prevent `WandbLogger` from dropping values ([#5931](https://github.com/Lightning-AI/pytorch-lightning/pull/5931)) +- Fixed error thrown when using valid distributed mode in multi node ([#6297](https://github.com/Lightning-AI/pytorch-lightning/pull/6297) ## [1.2.1] - 2021-02-23 ### Fixed -- Fixed incorrect yield logic for the amp autocast context manager ([#6080](https://github.com/Lightning-AI/lightning/pull/6080)) -- Fixed priority of plugin/accelerator when setting distributed mode ([#6089](https://github.com/Lightning-AI/lightning/pull/6089)) -- Fixed error message for AMP + CPU incompatibility ([#6107](https://github.com/Lightning-AI/lightning/pull/6107)) -- Disabled batch transfer in DP mode ([#6093](https://github.com/Lightning-AI/lightning/pull/6093)) +- Fixed incorrect yield logic for the amp autocast context manager ([#6080](https://github.com/Lightning-AI/pytorch-lightning/pull/6080)) +- Fixed priority of plugin/accelerator when setting distributed mode ([#6089](https://github.com/Lightning-AI/pytorch-lightning/pull/6089)) +- Fixed error message for AMP + CPU incompatibility ([#6107](https://github.com/Lightning-AI/pytorch-lightning/pull/6107)) +- Disabled batch transfer in DP mode ([#6093](https://github.com/Lightning-AI/pytorch-lightning/pull/6093)) ## [1.2.0] - 2021-02-18 ### Added -- Added `DataType`, `AverageMethod` and `MDMCAverageMethod` enum in metrics ([#5657](https://github.com/Lightning-AI/lightning/pull/5689)) -- Added support for summarized model total params size in megabytes ([#5590](https://github.com/Lightning-AI/lightning/pull/5590)) -- Added support for multiple train loaders ([#1959](https://github.com/Lightning-AI/lightning/pull/1959)) -- Added `Accuracy` metric now generalizes to Top-k accuracy for (multi-dimensional) multi-class inputs using the `top_k` parameter ([#4838](https://github.com/Lightning-AI/lightning/pull/4838)) -- Added `Accuracy` metric now enables the computation of subset accuracy for multi-label or multi-dimensional multi-class inputs with the `subset_accuracy` parameter ([#4838](https://github.com/Lightning-AI/lightning/pull/4838)) -- Added `HammingDistance` metric to compute the hamming distance (loss) ([#4838](https://github.com/Lightning-AI/lightning/pull/4838)) -- Added `max_fpr` parameter to `auroc` metric for computing partial auroc metric ([#3790](https://github.com/Lightning-AI/lightning/pull/3790)) -- Added `StatScores` metric to compute the number of true positives, false positives, true negatives and false negatives ([#4839](https://github.com/Lightning-AI/lightning/pull/4839)) -- Added `R2Score` metric ([#5241](https://github.com/Lightning-AI/lightning/pull/5241)) -- Added `LambdaCallback` ([#5347](https://github.com/Lightning-AI/lightning/pull/5347)) -- Added `BackboneLambdaFinetuningCallback` ([#5377](https://github.com/Lightning-AI/lightning/pull/5377)) -- Accelerator `all_gather` supports collection ([#5221](https://github.com/Lightning-AI/lightning/pull/5221)) -- Added `image_gradients` functional metric to compute the image gradients of a given input image. ([#5056](https://github.com/Lightning-AI/lightning/pull/5056)) -- Added `MetricCollection` ([#4318](https://github.com/Lightning-AI/lightning/pull/4318)) -- Added `.clone()` method to metrics ([#4318](https://github.com/Lightning-AI/lightning/pull/4318)) -- Added `IoU` class interface ([#4704](https://github.com/Lightning-AI/lightning/pull/4704)) +- Added `DataType`, `AverageMethod` and `MDMCAverageMethod` enum in metrics ([#5657](https://github.com/Lightning-AI/pytorch-lightning/pull/5689)) +- Added support for summarized model total params size in megabytes ([#5590](https://github.com/Lightning-AI/pytorch-lightning/pull/5590)) +- Added support for multiple train loaders ([#1959](https://github.com/Lightning-AI/pytorch-lightning/pull/1959)) +- Added `Accuracy` metric now generalizes to Top-k accuracy for (multi-dimensional) multi-class inputs using the `top_k` parameter ([#4838](https://github.com/Lightning-AI/pytorch-lightning/pull/4838)) +- Added `Accuracy` metric now enables the computation of subset accuracy for multi-label or multi-dimensional multi-class inputs with the `subset_accuracy` parameter ([#4838](https://github.com/Lightning-AI/pytorch-lightning/pull/4838)) +- Added `HammingDistance` metric to compute the hamming distance (loss) ([#4838](https://github.com/Lightning-AI/pytorch-lightning/pull/4838)) +- Added `max_fpr` parameter to `auroc` metric for computing partial auroc metric ([#3790](https://github.com/Lightning-AI/pytorch-lightning/pull/3790)) +- Added `StatScores` metric to compute the number of true positives, false positives, true negatives and false negatives ([#4839](https://github.com/Lightning-AI/pytorch-lightning/pull/4839)) +- Added `R2Score` metric ([#5241](https://github.com/Lightning-AI/pytorch-lightning/pull/5241)) +- Added `LambdaCallback` ([#5347](https://github.com/Lightning-AI/pytorch-lightning/pull/5347)) +- Added `BackboneLambdaFinetuningCallback` ([#5377](https://github.com/Lightning-AI/pytorch-lightning/pull/5377)) +- Accelerator `all_gather` supports collection ([#5221](https://github.com/Lightning-AI/pytorch-lightning/pull/5221)) +- Added `image_gradients` functional metric to compute the image gradients of a given input image. ([#5056](https://github.com/Lightning-AI/pytorch-lightning/pull/5056)) +- Added `MetricCollection` ([#4318](https://github.com/Lightning-AI/pytorch-lightning/pull/4318)) +- Added `.clone()` method to metrics ([#4318](https://github.com/Lightning-AI/pytorch-lightning/pull/4318)) +- Added `IoU` class interface ([#4704](https://github.com/Lightning-AI/pytorch-lightning/pull/4704)) - Support to tie weights after moving model to TPU via `on_post_move_to_device` hook -- Added missing val/test hooks in `LightningModule` ([#5467](https://github.com/Lightning-AI/lightning/pull/5467)) -- The `Recall` and `Precision` metrics (and their functional counterparts `recall` and `precision`) can now be generalized to Recall@K and Precision@K with the use of `top_k` parameter ([#4842](https://github.com/Lightning-AI/lightning/pull/4842)) -- Added `ModelPruning` Callback ([#5618](https://github.com/Lightning-AI/lightning/pull/5618), - [#5825](https://github.com/Lightning-AI/lightning/pull/5825), - [#6045](https://github.com/Lightning-AI/lightning/pull/6045)) -- Added `PyTorchProfiler` ([#5560](https://github.com/Lightning-AI/lightning/pull/5560)) -- Added compositional metrics ([#5464](https://github.com/Lightning-AI/lightning/pull/5464)) -- Added Trainer method `predict(...)` for high performance predictions ([#5579](https://github.com/Lightning-AI/lightning/pull/5579)) -- Added `on_before_batch_transfer` and `on_after_batch_transfer` data hooks ([#3671](https://github.com/Lightning-AI/lightning/pull/3671)) -- Added AUC/AUROC class interface ([#5479](https://github.com/Lightning-AI/lightning/pull/5479)) -- Added `PredictLoop` object ([#5752](https://github.com/Lightning-AI/lightning/pull/5752)) -- Added `QuantizationAwareTraining` callback ([#5706](https://github.com/Lightning-AI/lightning/pull/5706), - [#6040](https://github.com/Lightning-AI/lightning/pull/6040)) -- Added `LightningModule.configure_callbacks` to enable the definition of model-specific callbacks ([#5621](https://github.com/Lightning-AI/lightning/pull/5621)) -- Added `dim` to `PSNR` metric for mean-squared-error reduction ([#5957](https://github.com/Lightning-AI/lightning/pull/5957)) -- Added promxial policy optimization template to pl_examples ([#5394](https://github.com/Lightning-AI/lightning/pull/5394)) -- Added `log_graph` to `CometLogger` ([#5295](https://github.com/Lightning-AI/lightning/pull/5295)) -- Added possibility for nested loaders ([#5404](https://github.com/Lightning-AI/lightning/pull/5404)) -- Added `sync_step` to Wandb logger ([#5351](https://github.com/Lightning-AI/lightning/pull/5351)) -- Added `StochasticWeightAveraging` callback ([#5640](https://github.com/Lightning-AI/lightning/pull/5640)) -- Added `LightningDataModule.from_datasets(...)` ([#5133](https://github.com/Lightning-AI/lightning/pull/5133)) -- Added `PL_TORCH_DISTRIBUTED_BACKEND` env variable to select backend ([#5981](https://github.com/Lightning-AI/lightning/pull/5981)) -- Added `Trainer` flag to activate Stochastic Weight Averaging (SWA) `Trainer(stochastic_weight_avg=True)` ([#6038](https://github.com/Lightning-AI/lightning/pull/6038)) -- Added DeepSpeed integration ([#5954](https://github.com/Lightning-AI/lightning/pull/5954), - [#6042](https://github.com/Lightning-AI/lightning/pull/6042)) +- Added missing val/test hooks in `LightningModule` ([#5467](https://github.com/Lightning-AI/pytorch-lightning/pull/5467)) +- The `Recall` and `Precision` metrics (and their functional counterparts `recall` and `precision`) can now be generalized to Recall@K and Precision@K with the use of `top_k` parameter ([#4842](https://github.com/Lightning-AI/pytorch-lightning/pull/4842)) +- Added `ModelPruning` Callback ([#5618](https://github.com/Lightning-AI/pytorch-lightning/pull/5618), + [#5825](https://github.com/Lightning-AI/pytorch-lightning/pull/5825), + [#6045](https://github.com/Lightning-AI/pytorch-lightning/pull/6045)) +- Added `PyTorchProfiler` ([#5560](https://github.com/Lightning-AI/pytorch-lightning/pull/5560)) +- Added compositional metrics ([#5464](https://github.com/Lightning-AI/pytorch-lightning/pull/5464)) +- Added Trainer method `predict(...)` for high performance predictions ([#5579](https://github.com/Lightning-AI/pytorch-lightning/pull/5579)) +- Added `on_before_batch_transfer` and `on_after_batch_transfer` data hooks ([#3671](https://github.com/Lightning-AI/pytorch-lightning/pull/3671)) +- Added AUC/AUROC class interface ([#5479](https://github.com/Lightning-AI/pytorch-lightning/pull/5479)) +- Added `PredictLoop` object ([#5752](https://github.com/Lightning-AI/pytorch-lightning/pull/5752)) +- Added `QuantizationAwareTraining` callback ([#5706](https://github.com/Lightning-AI/pytorch-lightning/pull/5706), + [#6040](https://github.com/Lightning-AI/pytorch-lightning/pull/6040)) +- Added `LightningModule.configure_callbacks` to enable the definition of model-specific callbacks ([#5621](https://github.com/Lightning-AI/pytorch-lightning/pull/5621)) +- Added `dim` to `PSNR` metric for mean-squared-error reduction ([#5957](https://github.com/Lightning-AI/pytorch-lightning/pull/5957)) +- Added promxial policy optimization template to pl_examples ([#5394](https://github.com/Lightning-AI/pytorch-lightning/pull/5394)) +- Added `log_graph` to `CometLogger` ([#5295](https://github.com/Lightning-AI/pytorch-lightning/pull/5295)) +- Added possibility for nested loaders ([#5404](https://github.com/Lightning-AI/pytorch-lightning/pull/5404)) +- Added `sync_step` to Wandb logger ([#5351](https://github.com/Lightning-AI/pytorch-lightning/pull/5351)) +- Added `StochasticWeightAveraging` callback ([#5640](https://github.com/Lightning-AI/pytorch-lightning/pull/5640)) +- Added `LightningDataModule.from_datasets(...)` ([#5133](https://github.com/Lightning-AI/pytorch-lightning/pull/5133)) +- Added `PL_TORCH_DISTRIBUTED_BACKEND` env variable to select backend ([#5981](https://github.com/Lightning-AI/pytorch-lightning/pull/5981)) +- Added `Trainer` flag to activate Stochastic Weight Averaging (SWA) `Trainer(stochastic_weight_avg=True)` ([#6038](https://github.com/Lightning-AI/pytorch-lightning/pull/6038)) +- Added DeepSpeed integration ([#5954](https://github.com/Lightning-AI/pytorch-lightning/pull/5954), + [#6042](https://github.com/Lightning-AI/pytorch-lightning/pull/6042)) ### Changed -- Changed `stat_scores` metric now calculates stat scores over all classes and gains new parameters, in line with the new `StatScores` metric ([#4839](https://github.com/Lightning-AI/lightning/pull/4839)) -- Changed `computer_vision_fine_tunning` example to use `BackboneLambdaFinetuningCallback` ([#5377](https://github.com/Lightning-AI/lightning/pull/5377)) -- Changed `automatic casting` for LoggerConnector `metrics` ([#5218](https://github.com/Lightning-AI/lightning/pull/5218)) -- Changed `iou` [func] to allow float input ([#4704](https://github.com/Lightning-AI/lightning/pull/4704)) -- Metric `compute()` method will no longer automatically call `reset()` ([#5409](https://github.com/Lightning-AI/lightning/pull/5409)) -- Set PyTorch 1.4 as min requirements, also for testing and examples `torchvision>=0.5` and `torchtext>=0.5` ([#5418](https://github.com/Lightning-AI/lightning/pull/5418)) -- Changed `callbacks` argument in `Trainer` to allow `Callback` input ([#5446](https://github.com/Lightning-AI/lightning/pull/5446)) -- Changed the default of `find_unused_parameters` to `False` in DDP ([#5185](https://github.com/Lightning-AI/lightning/pull/5185)) -- Changed `ModelCheckpoint` version suffixes to start at 1 ([#5008](https://github.com/Lightning-AI/lightning/pull/5008)) -- Progress bar metrics tensors are now converted to float ([#5692](https://github.com/Lightning-AI/lightning/pull/5692)) -- Changed the default value for the `progress_bar_refresh_rate` Trainer argument in Google COLAB notebooks to 20 ([#5516](https://github.com/Lightning-AI/lightning/pull/5516)) -- Extended support for purely iteration-based training ([#5726](https://github.com/Lightning-AI/lightning/pull/5726)) -- Made `LightningModule.global_rank`, `LightningModule.local_rank` and `LightningModule.logger` read-only properties ([#5730](https://github.com/Lightning-AI/lightning/pull/5730)) -- Forced `ModelCheckpoint` callbacks to run after all others to guarantee all states are saved to the checkpoint ([#5731](https://github.com/Lightning-AI/lightning/pull/5731)) +- Changed `stat_scores` metric now calculates stat scores over all classes and gains new parameters, in line with the new `StatScores` metric ([#4839](https://github.com/Lightning-AI/pytorch-lightning/pull/4839)) +- Changed `computer_vision_fine_tunning` example to use `BackboneLambdaFinetuningCallback` ([#5377](https://github.com/Lightning-AI/pytorch-lightning/pull/5377)) +- Changed `automatic casting` for LoggerConnector `metrics` ([#5218](https://github.com/Lightning-AI/pytorch-lightning/pull/5218)) +- Changed `iou` [func] to allow float input ([#4704](https://github.com/Lightning-AI/pytorch-lightning/pull/4704)) +- Metric `compute()` method will no longer automatically call `reset()` ([#5409](https://github.com/Lightning-AI/pytorch-lightning/pull/5409)) +- Set PyTorch 1.4 as min requirements, also for testing and examples `torchvision>=0.5` and `torchtext>=0.5` ([#5418](https://github.com/Lightning-AI/pytorch-lightning/pull/5418)) +- Changed `callbacks` argument in `Trainer` to allow `Callback` input ([#5446](https://github.com/Lightning-AI/pytorch-lightning/pull/5446)) +- Changed the default of `find_unused_parameters` to `False` in DDP ([#5185](https://github.com/Lightning-AI/pytorch-lightning/pull/5185)) +- Changed `ModelCheckpoint` version suffixes to start at 1 ([#5008](https://github.com/Lightning-AI/pytorch-lightning/pull/5008)) +- Progress bar metrics tensors are now converted to float ([#5692](https://github.com/Lightning-AI/pytorch-lightning/pull/5692)) +- Changed the default value for the `progress_bar_refresh_rate` Trainer argument in Google COLAB notebooks to 20 ([#5516](https://github.com/Lightning-AI/pytorch-lightning/pull/5516)) +- Extended support for purely iteration-based training ([#5726](https://github.com/Lightning-AI/pytorch-lightning/pull/5726)) +- Made `LightningModule.global_rank`, `LightningModule.local_rank` and `LightningModule.logger` read-only properties ([#5730](https://github.com/Lightning-AI/pytorch-lightning/pull/5730)) +- Forced `ModelCheckpoint` callbacks to run after all others to guarantee all states are saved to the checkpoint ([#5731](https://github.com/Lightning-AI/pytorch-lightning/pull/5731)) - Refactored Accelerators and Plugins: - * Added base classes for plugins ([#5715](https://github.com/Lightning-AI/lightning/pull/5715)) - * Added parallel plugins for DP, DDP, DDPSpawn, DDP2 and Horovod ([#5714](https://github.com/Lightning-AI/lightning/pull/5714)) - * Precision Plugins ([#5718](https://github.com/Lightning-AI/lightning/pull/5718)) - * Added new Accelerators for CPU, GPU and TPU ([#5719](https://github.com/Lightning-AI/lightning/pull/5719)) - * Added RPC and Sharded plugins ([#5732](https://github.com/Lightning-AI/lightning/pull/5732)) - * Added missing `LightningModule`-wrapper logic to new plugins and accelerator ([#5734](https://github.com/Lightning-AI/lightning/pull/5734)) - * Moved device-specific teardown logic from training loop to accelerator ([#5973](https://github.com/Lightning-AI/lightning/pull/5973)) - * Moved accelerator_connector.py to the connectors subfolder ([#6033](https://github.com/Lightning-AI/lightning/pull/6033)) - * Trainer only references accelerator ([#6039](https://github.com/Lightning-AI/lightning/pull/6039)) - * Made parallel devices optional across all plugins ([#6051](https://github.com/Lightning-AI/lightning/pull/6051)) - * Cleaning ([#5948](https://github.com/Lightning-AI/lightning/pull/5948), - [#5949](https://github.com/Lightning-AI/lightning/pull/5949), - [#5950](https://github.com/Lightning-AI/lightning/pull/5950)) -- Enabled `self.log` in callbacks ([#5094](https://github.com/Lightning-AI/lightning/pull/5094)) -- Renamed xxx_AVAILABLE as protected ([#5082](https://github.com/Lightning-AI/lightning/pull/5082)) -- Unified module names in Utils ([#5199](https://github.com/Lightning-AI/lightning/pull/5199)) -- Separated utils: imports & enums ([#5256](https://github.com/Lightning-AI/lightning/pull/5256) - [#5874](https://github.com/Lightning-AI/lightning/pull/5874)) -- Refactor: clean trainer device & distributed getters ([#5300](https://github.com/Lightning-AI/lightning/pull/5300)) -- Simplified training phase as LightningEnum ([#5419](https://github.com/Lightning-AI/lightning/pull/5419)) -- Updated metrics to use LightningEnum ([#5689](https://github.com/Lightning-AI/lightning/pull/5689)) -- Changed the seq of `on_train_batch_end`, `on_batch_end` & `on_train_epoch_end`, `on_epoch_end hooks` ([#5688](https://github.com/Lightning-AI/lightning/pull/5688)) -- Refactored `setup_training` and remove `test_mode` ([#5388](https://github.com/Lightning-AI/lightning/pull/5388)) -- Disabled training with zero `num_training_batches` when insufficient `limit_train_batches` ([#5703](https://github.com/Lightning-AI/lightning/pull/5703)) -- Refactored `EpochResultStore` ([#5522](https://github.com/Lightning-AI/lightning/pull/5522)) -- Update `lr_finder` to check for attribute if not running `fast_dev_run` ([#5990](https://github.com/Lightning-AI/lightning/pull/5990)) -- LightningOptimizer manual optimizer is more flexible and expose `toggle_model` ([#5771](https://github.com/Lightning-AI/lightning/pull/5771)) -- `MlflowLogger` limit parameter value length to 250 char ([#5893](https://github.com/Lightning-AI/lightning/pull/5893)) -- Re-introduced fix for Hydra directory sync with multiple process ([#5993](https://github.com/Lightning-AI/lightning/pull/5993)) + * Added base classes for plugins ([#5715](https://github.com/Lightning-AI/pytorch-lightning/pull/5715)) + * Added parallel plugins for DP, DDP, DDPSpawn, DDP2 and Horovod ([#5714](https://github.com/Lightning-AI/pytorch-lightning/pull/5714)) + * Precision Plugins ([#5718](https://github.com/Lightning-AI/pytorch-lightning/pull/5718)) + * Added new Accelerators for CPU, GPU and TPU ([#5719](https://github.com/Lightning-AI/pytorch-lightning/pull/5719)) + * Added RPC and Sharded plugins ([#5732](https://github.com/Lightning-AI/pytorch-lightning/pull/5732)) + * Added missing `LightningModule`-wrapper logic to new plugins and accelerator ([#5734](https://github.com/Lightning-AI/pytorch-lightning/pull/5734)) + * Moved device-specific teardown logic from training loop to accelerator ([#5973](https://github.com/Lightning-AI/pytorch-lightning/pull/5973)) + * Moved accelerator_connector.py to the connectors subfolder ([#6033](https://github.com/Lightning-AI/pytorch-lightning/pull/6033)) + * Trainer only references accelerator ([#6039](https://github.com/Lightning-AI/pytorch-lightning/pull/6039)) + * Made parallel devices optional across all plugins ([#6051](https://github.com/Lightning-AI/pytorch-lightning/pull/6051)) + * Cleaning ([#5948](https://github.com/Lightning-AI/pytorch-lightning/pull/5948), + [#5949](https://github.com/Lightning-AI/pytorch-lightning/pull/5949), + [#5950](https://github.com/Lightning-AI/pytorch-lightning/pull/5950)) +- Enabled `self.log` in callbacks ([#5094](https://github.com/Lightning-AI/pytorch-lightning/pull/5094)) +- Renamed xxx_AVAILABLE as protected ([#5082](https://github.com/Lightning-AI/pytorch-lightning/pull/5082)) +- Unified module names in Utils ([#5199](https://github.com/Lightning-AI/pytorch-lightning/pull/5199)) +- Separated utils: imports & enums ([#5256](https://github.com/Lightning-AI/pytorch-lightning/pull/5256) + [#5874](https://github.com/Lightning-AI/pytorch-lightning/pull/5874)) +- Refactor: clean trainer device & distributed getters ([#5300](https://github.com/Lightning-AI/pytorch-lightning/pull/5300)) +- Simplified training phase as LightningEnum ([#5419](https://github.com/Lightning-AI/pytorch-lightning/pull/5419)) +- Updated metrics to use LightningEnum ([#5689](https://github.com/Lightning-AI/pytorch-lightning/pull/5689)) +- Changed the seq of `on_train_batch_end`, `on_batch_end` & `on_train_epoch_end`, `on_epoch_end hooks` ([#5688](https://github.com/Lightning-AI/pytorch-lightning/pull/5688)) +- Refactored `setup_training` and remove `test_mode` ([#5388](https://github.com/Lightning-AI/pytorch-lightning/pull/5388)) +- Disabled training with zero `num_training_batches` when insufficient `limit_train_batches` ([#5703](https://github.com/Lightning-AI/pytorch-lightning/pull/5703)) +- Refactored `EpochResultStore` ([#5522](https://github.com/Lightning-AI/pytorch-lightning/pull/5522)) +- Update `lr_finder` to check for attribute if not running `fast_dev_run` ([#5990](https://github.com/Lightning-AI/pytorch-lightning/pull/5990)) +- LightningOptimizer manual optimizer is more flexible and expose `toggle_model` ([#5771](https://github.com/Lightning-AI/pytorch-lightning/pull/5771)) +- `MlflowLogger` limit parameter value length to 250 char ([#5893](https://github.com/Lightning-AI/pytorch-lightning/pull/5893)) +- Re-introduced fix for Hydra directory sync with multiple process ([#5993](https://github.com/Lightning-AI/pytorch-lightning/pull/5993)) ### Deprecated -- Function `stat_scores_multiple_classes` is deprecated in favor of `stat_scores` ([#4839](https://github.com/Lightning-AI/lightning/pull/4839)) -- Moved accelerators and plugins to its `legacy` pkg ([#5645](https://github.com/Lightning-AI/lightning/pull/5645)) -- Deprecated `LightningDistributedDataParallel` in favor of new wrapper module `LightningDistributedModule` ([#5185](https://github.com/Lightning-AI/lightning/pull/5185)) -- Deprecated `LightningDataParallel` in favor of new wrapper module `LightningParallelModule` ([#5670](https://github.com/Lightning-AI/lightning/pull/5670)) -- Renamed utils modules ([#5199](https://github.com/Lightning-AI/lightning/pull/5199)) +- Function `stat_scores_multiple_classes` is deprecated in favor of `stat_scores` ([#4839](https://github.com/Lightning-AI/pytorch-lightning/pull/4839)) +- Moved accelerators and plugins to its `legacy` pkg ([#5645](https://github.com/Lightning-AI/pytorch-lightning/pull/5645)) +- Deprecated `LightningDistributedDataParallel` in favor of new wrapper module `LightningDistributedModule` ([#5185](https://github.com/Lightning-AI/pytorch-lightning/pull/5185)) +- Deprecated `LightningDataParallel` in favor of new wrapper module `LightningParallelModule` ([#5670](https://github.com/Lightning-AI/pytorch-lightning/pull/5670)) +- Renamed utils modules ([#5199](https://github.com/Lightning-AI/pytorch-lightning/pull/5199)) * `argparse_utils` >> `argparse` * `model_utils` >> `model_helpers` * `warning_utils` >> `warnings` * `xla_device_utils` >> `xla_device` -- Deprecated using `'val_loss'` to set the `ModelCheckpoint` monitor ([#6012](https://github.com/Lightning-AI/lightning/pull/6012)) -- Deprecated `.get_model()` with explicit `.lightning_module` property ([#6035](https://github.com/Lightning-AI/lightning/pull/6035)) -- Deprecated Trainer attribute `accelerator_backend` in favor of `accelerator` ([#6034](https://github.com/Lightning-AI/lightning/pull/6034)) +- Deprecated using `'val_loss'` to set the `ModelCheckpoint` monitor ([#6012](https://github.com/Lightning-AI/pytorch-lightning/pull/6012)) +- Deprecated `.get_model()` with explicit `.lightning_module` property ([#6035](https://github.com/Lightning-AI/pytorch-lightning/pull/6035)) +- Deprecated Trainer attribute `accelerator_backend` in favor of `accelerator` ([#6034](https://github.com/Lightning-AI/pytorch-lightning/pull/6034)) ### Removed -- Removed deprecated checkpoint argument `filepath` ([#5321](https://github.com/Lightning-AI/lightning/pull/5321)) -- Removed deprecated `Fbeta`, `f1_score` and `fbeta_score` metrics ([#5322](https://github.com/Lightning-AI/lightning/pull/5322)) -- Removed deprecated `TrainResult` ([#5323](https://github.com/Lightning-AI/lightning/pull/5323)) -- Removed deprecated `EvalResult` ([#5633](https://github.com/Lightning-AI/lightning/pull/5633)) -- Removed `LoggerStages` ([#5673](https://github.com/Lightning-AI/lightning/pull/5673)) +- Removed deprecated checkpoint argument `filepath` ([#5321](https://github.com/Lightning-AI/pytorch-lightning/pull/5321)) +- Removed deprecated `Fbeta`, `f1_score` and `fbeta_score` metrics ([#5322](https://github.com/Lightning-AI/pytorch-lightning/pull/5322)) +- Removed deprecated `TrainResult` ([#5323](https://github.com/Lightning-AI/pytorch-lightning/pull/5323)) +- Removed deprecated `EvalResult` ([#5633](https://github.com/Lightning-AI/pytorch-lightning/pull/5633)) +- Removed `LoggerStages` ([#5673](https://github.com/Lightning-AI/pytorch-lightning/pull/5673)) ### Fixed -- Fixed distributed setting and `ddp_cpu` only with `num_processes>1` ([#5297](https://github.com/Lightning-AI/lightning/pull/5297)) -- Fixed `num_workers` for Windows example ([#5375](https://github.com/Lightning-AI/lightning/pull/5375)) -- Fixed loading yaml ([#5619](https://github.com/Lightning-AI/lightning/pull/5619)) -- Fixed support custom DataLoader with DDP if they can be re-instantiated ([#5745](https://github.com/Lightning-AI/lightning/pull/5745)) -- Fixed repeated `.fit()` calls ignore max_steps iteration bound ([#5936](https://github.com/Lightning-AI/lightning/pull/5936)) -- Fixed throwing `MisconfigurationError` on unknown mode ([#5255](https://github.com/Lightning-AI/lightning/pull/5255)) -- Resolve bug with Finetuning ([#5744](https://github.com/Lightning-AI/lightning/pull/5744)) -- Fixed `ModelCheckpoint` race condition in file existence check ([#5155](https://github.com/Lightning-AI/lightning/pull/5155)) -- Fixed some compatibility with PyTorch 1.8 ([#5864](https://github.com/Lightning-AI/lightning/pull/5864)) -- Fixed forward cache ([#5895](https://github.com/Lightning-AI/lightning/pull/5895)) -- Fixed recursive detach of tensors to CPU ([#6007](https://github.com/Lightning-AI/lightning/pull/6007)) -- Fixed passing wrong strings for scheduler interval doesn't throw an error ([#5923](https://github.com/Lightning-AI/lightning/pull/5923)) -- Fixed wrong `requires_grad` state after `return None` with multiple optimizers ([#5738](https://github.com/Lightning-AI/lightning/pull/5638)) -- Fixed add `on_epoch_end` hook at the end of `validation`, `test` epoch ([#5986](https://github.com/Lightning-AI/lightning/pull/5986)) -- Fixed missing `process_dataloader` call for `TPUSpawn` when in distributed mode ([#6015](https://github.com/Lightning-AI/lightning/pull/6015)) -- Fixed progress bar flickering by appending 0 to floats/strings ([#6009](https://github.com/Lightning-AI/lightning/pull/6009)) -- Fixed synchronization issues with TPU training ([#6027](https://github.com/Lightning-AI/lightning/pull/6027)) -- Fixed `hparams.yaml` saved twice when using `TensorBoardLogger` ([#5953](https://github.com/Lightning-AI/lightning/pull/5953)) -- Fixed basic examples ([#5912](https://github.com/Lightning-AI/lightning/pull/5912), - [#5985](https://github.com/Lightning-AI/lightning/pull/5985)) -- Fixed `fairscale` compatible with PT 1.8 ([#5996](https://github.com/Lightning-AI/lightning/pull/5996)) -- Ensured `process_dataloader` is called when `tpu_cores > 1` to use Parallel DataLoader ([#6015](https://github.com/Lightning-AI/lightning/pull/6015)) -- Attempted SLURM auto resume call when non-shell call fails ([#6002](https://github.com/Lightning-AI/lightning/pull/6002)) -- Fixed wrapping optimizers upon assignment ([#6006](https://github.com/Lightning-AI/lightning/pull/6006)) -- Fixed allowing hashing of metrics with lists in their state ([#5939](https://github.com/Lightning-AI/lightning/pull/5939)) +- Fixed distributed setting and `ddp_cpu` only with `num_processes>1` ([#5297](https://github.com/Lightning-AI/pytorch-lightning/pull/5297)) +- Fixed `num_workers` for Windows example ([#5375](https://github.com/Lightning-AI/pytorch-lightning/pull/5375)) +- Fixed loading yaml ([#5619](https://github.com/Lightning-AI/pytorch-lightning/pull/5619)) +- Fixed support custom DataLoader with DDP if they can be re-instantiated ([#5745](https://github.com/Lightning-AI/pytorch-lightning/pull/5745)) +- Fixed repeated `.fit()` calls ignore max_steps iteration bound ([#5936](https://github.com/Lightning-AI/pytorch-lightning/pull/5936)) +- Fixed throwing `MisconfigurationError` on unknown mode ([#5255](https://github.com/Lightning-AI/pytorch-lightning/pull/5255)) +- Resolve bug with Finetuning ([#5744](https://github.com/Lightning-AI/pytorch-lightning/pull/5744)) +- Fixed `ModelCheckpoint` race condition in file existence check ([#5155](https://github.com/Lightning-AI/pytorch-lightning/pull/5155)) +- Fixed some compatibility with PyTorch 1.8 ([#5864](https://github.com/Lightning-AI/pytorch-lightning/pull/5864)) +- Fixed forward cache ([#5895](https://github.com/Lightning-AI/pytorch-lightning/pull/5895)) +- Fixed recursive detach of tensors to CPU ([#6007](https://github.com/Lightning-AI/pytorch-lightning/pull/6007)) +- Fixed passing wrong strings for scheduler interval doesn't throw an error ([#5923](https://github.com/Lightning-AI/pytorch-lightning/pull/5923)) +- Fixed wrong `requires_grad` state after `return None` with multiple optimizers ([#5738](https://github.com/Lightning-AI/pytorch-lightning/pull/5638)) +- Fixed add `on_epoch_end` hook at the end of `validation`, `test` epoch ([#5986](https://github.com/Lightning-AI/pytorch-lightning/pull/5986)) +- Fixed missing `process_dataloader` call for `TPUSpawn` when in distributed mode ([#6015](https://github.com/Lightning-AI/pytorch-lightning/pull/6015)) +- Fixed progress bar flickering by appending 0 to floats/strings ([#6009](https://github.com/Lightning-AI/pytorch-lightning/pull/6009)) +- Fixed synchronization issues with TPU training ([#6027](https://github.com/Lightning-AI/pytorch-lightning/pull/6027)) +- Fixed `hparams.yaml` saved twice when using `TensorBoardLogger` ([#5953](https://github.com/Lightning-AI/pytorch-lightning/pull/5953)) +- Fixed basic examples ([#5912](https://github.com/Lightning-AI/pytorch-lightning/pull/5912), + [#5985](https://github.com/Lightning-AI/pytorch-lightning/pull/5985)) +- Fixed `fairscale` compatible with PT 1.8 ([#5996](https://github.com/Lightning-AI/pytorch-lightning/pull/5996)) +- Ensured `process_dataloader` is called when `tpu_cores > 1` to use Parallel DataLoader ([#6015](https://github.com/Lightning-AI/pytorch-lightning/pull/6015)) +- Attempted SLURM auto resume call when non-shell call fails ([#6002](https://github.com/Lightning-AI/pytorch-lightning/pull/6002)) +- Fixed wrapping optimizers upon assignment ([#6006](https://github.com/Lightning-AI/pytorch-lightning/pull/6006)) +- Fixed allowing hashing of metrics with lists in their state ([#5939](https://github.com/Lightning-AI/pytorch-lightning/pull/5939)) ## [1.1.8] - 2021-02-08 ### Fixed -- Separate epoch validation from step validation ([#5208](https://github.com/Lightning-AI/lightning/pull/5208)) -- Fixed `toggle_optimizers` not handling all optimizer parameters ([#5775](https://github.com/Lightning-AI/lightning/pull/5775)) +- Separate epoch validation from step validation ([#5208](https://github.com/Lightning-AI/pytorch-lightning/pull/5208)) +- Fixed `toggle_optimizers` not handling all optimizer parameters ([#5775](https://github.com/Lightning-AI/pytorch-lightning/pull/5775)) ## [1.1.7] - 2021-02-03 ### Fixed -- Fixed `TensorBoardLogger` not closing `SummaryWriter` on `finalize` ([#5696](https://github.com/Lightning-AI/lightning/pull/5696)) -- Fixed filtering of pytorch "unsqueeze" warning when using DP ([#5622](https://github.com/Lightning-AI/lightning/pull/5622)) -- Fixed `num_classes` argument in F1 metric ([#5663](https://github.com/Lightning-AI/lightning/pull/5663)) -- Fixed `log_dir` property ([#5537](https://github.com/Lightning-AI/lightning/pull/5537)) -- Fixed a race condition in `ModelCheckpoint` when checking if a checkpoint file exists ([#5144](https://github.com/Lightning-AI/lightning/pull/5144)) -- Remove unnecessary intermediate layers in Dockerfiles ([#5697](https://github.com/Lightning-AI/lightning/pull/5697)) -- Fixed auto learning rate ordering ([#5638](https://github.com/Lightning-AI/lightning/pull/5638)) +- Fixed `TensorBoardLogger` not closing `SummaryWriter` on `finalize` ([#5696](https://github.com/Lightning-AI/pytorch-lightning/pull/5696)) +- Fixed filtering of pytorch "unsqueeze" warning when using DP ([#5622](https://github.com/Lightning-AI/pytorch-lightning/pull/5622)) +- Fixed `num_classes` argument in F1 metric ([#5663](https://github.com/Lightning-AI/pytorch-lightning/pull/5663)) +- Fixed `log_dir` property ([#5537](https://github.com/Lightning-AI/pytorch-lightning/pull/5537)) +- Fixed a race condition in `ModelCheckpoint` when checking if a checkpoint file exists ([#5144](https://github.com/Lightning-AI/pytorch-lightning/pull/5144)) +- Remove unnecessary intermediate layers in Dockerfiles ([#5697](https://github.com/Lightning-AI/pytorch-lightning/pull/5697)) +- Fixed auto learning rate ordering ([#5638](https://github.com/Lightning-AI/pytorch-lightning/pull/5638)) ## [1.1.6] - 2021-01-26 ### Changed -- Increased TPU check timeout from 20s to 100s ([#5598](https://github.com/Lightning-AI/lightning/pull/5598)) -- Ignored `step` param in Neptune logger's log_metric method ([#5510](https://github.com/Lightning-AI/lightning/pull/5510)) -- Pass batch outputs to `on_train_batch_end` instead of `epoch_end` outputs ([#4369](https://github.com/Lightning-AI/lightning/pull/4369)) +- Increased TPU check timeout from 20s to 100s ([#5598](https://github.com/Lightning-AI/pytorch-lightning/pull/5598)) +- Ignored `step` param in Neptune logger's log_metric method ([#5510](https://github.com/Lightning-AI/pytorch-lightning/pull/5510)) +- Pass batch outputs to `on_train_batch_end` instead of `epoch_end` outputs ([#4369](https://github.com/Lightning-AI/pytorch-lightning/pull/4369)) ### Fixed -- Fixed `toggle_optimizer` to reset `requires_grad` state ([#5574](https://github.com/Lightning-AI/lightning/pull/5574)) -- Fixed FileNotFoundError for best checkpoint when using DDP with Hydra ([#5629](https://github.com/Lightning-AI/lightning/pull/5629)) -- Fixed an error when logging a progress bar metric with a reserved name ([#5620](https://github.com/Lightning-AI/lightning/pull/5620)) -- Fixed `Metric`'s `state_dict` not included when child modules ([#5614](https://github.com/Lightning-AI/lightning/pull/5614)) -- Fixed Neptune logger creating multiple experiments when GPUs > 1 ([#3256](https://github.com/Lightning-AI/lightning/pull/3256)) -- Fixed duplicate logs appearing in console when using the python logging module ([#5509](https://github.com/Lightning-AI/lightning/pull/5509)) -- Fixed tensor printing in `trainer.test()` ([#5138](https://github.com/Lightning-AI/lightning/pull/5138)) -- Fixed not using dataloader when `hparams` present ([#4559](https://github.com/Lightning-AI/lightning/pull/4559)) +- Fixed `toggle_optimizer` to reset `requires_grad` state ([#5574](https://github.com/Lightning-AI/pytorch-lightning/pull/5574)) +- Fixed FileNotFoundError for best checkpoint when using DDP with Hydra ([#5629](https://github.com/Lightning-AI/pytorch-lightning/pull/5629)) +- Fixed an error when logging a progress bar metric with a reserved name ([#5620](https://github.com/Lightning-AI/pytorch-lightning/pull/5620)) +- Fixed `Metric`'s `state_dict` not included when child modules ([#5614](https://github.com/Lightning-AI/pytorch-lightning/pull/5614)) +- Fixed Neptune logger creating multiple experiments when GPUs > 1 ([#3256](https://github.com/Lightning-AI/pytorch-lightning/pull/3256)) +- Fixed duplicate logs appearing in console when using the python logging module ([#5509](https://github.com/Lightning-AI/pytorch-lightning/pull/5509)) +- Fixed tensor printing in `trainer.test()` ([#5138](https://github.com/Lightning-AI/pytorch-lightning/pull/5138)) +- Fixed not using dataloader when `hparams` present ([#4559](https://github.com/Lightning-AI/pytorch-lightning/pull/4559)) ## [1.1.5] - 2021-01-19 ### Fixed -- Fixed a visual bug in the progress bar display initialization ([#4579](https://github.com/Lightning-AI/lightning/pull/4579)) -- Fixed logging `on_train_batch_end` in a callback with multiple optimizers ([#5521](https://github.com/Lightning-AI/lightning/pull/5521)) -- Fixed `reinit_scheduler_properties` with correct optimizer ([#5519](https://github.com/Lightning-AI/lightning/pull/5519)) -- Fixed `val_check_interval` with `fast_dev_run` ([#5540](https://github.com/Lightning-AI/lightning/pull/5540)) +- Fixed a visual bug in the progress bar display initialization ([#4579](https://github.com/Lightning-AI/pytorch-lightning/pull/4579)) +- Fixed logging `on_train_batch_end` in a callback with multiple optimizers ([#5521](https://github.com/Lightning-AI/pytorch-lightning/pull/5521)) +- Fixed `reinit_scheduler_properties` with correct optimizer ([#5519](https://github.com/Lightning-AI/pytorch-lightning/pull/5519)) +- Fixed `val_check_interval` with `fast_dev_run` ([#5540](https://github.com/Lightning-AI/pytorch-lightning/pull/5540)) ## [1.1.4] - 2021-01-12 ### Added -- Add automatic optimization property setter to lightning module ([#5169](https://github.com/Lightning-AI/lightning/pull/5169)) +- Add automatic optimization property setter to lightning module ([#5169](https://github.com/Lightning-AI/pytorch-lightning/pull/5169)) ### Changed -- Changed deprecated `enable_pl_optimizer=True` ([#5244](https://github.com/Lightning-AI/lightning/pull/5244)) +- Changed deprecated `enable_pl_optimizer=True` ([#5244](https://github.com/Lightning-AI/pytorch-lightning/pull/5244)) ### Fixed -- Fixed `transfer_batch_to_device` for DDP with `len(devices_ids) == 1` ([#5195](https://github.com/Lightning-AI/lightning/pull/5195)) -- Logging only on `not should_accumulate()` during training ([#5417](https://github.com/Lightning-AI/lightning/pull/5417)) -- Resolve interpolation bug with Hydra ([#5406](https://github.com/Lightning-AI/lightning/pull/5406)) -- Check environ before selecting a seed to prevent warning message ([#4743](https://github.com/Lightning-AI/lightning/pull/4743)) -- Fixed signature mismatch in `model_to_device` of `DDPCPUHPCAccelerator` ([#5505](https://github.com/Lightning-AI/lightning/pull/5505)) +- Fixed `transfer_batch_to_device` for DDP with `len(devices_ids) == 1` ([#5195](https://github.com/Lightning-AI/pytorch-lightning/pull/5195)) +- Logging only on `not should_accumulate()` during training ([#5417](https://github.com/Lightning-AI/pytorch-lightning/pull/5417)) +- Resolve interpolation bug with Hydra ([#5406](https://github.com/Lightning-AI/pytorch-lightning/pull/5406)) +- Check environ before selecting a seed to prevent warning message ([#4743](https://github.com/Lightning-AI/pytorch-lightning/pull/4743)) +- Fixed signature mismatch in `model_to_device` of `DDPCPUHPCAccelerator` ([#5505](https://github.com/Lightning-AI/pytorch-lightning/pull/5505)) ## [1.1.3] - 2021-01-05 ### Added -- Added a check for optimizer attached to `lr_scheduler` ([#5338](https://github.com/Lightning-AI/lightning/pull/5338)) -- Added support for passing non-existing filepaths to `resume_from_checkpoint` ([#4402](https://github.com/Lightning-AI/lightning/pull/4402)) +- Added a check for optimizer attached to `lr_scheduler` ([#5338](https://github.com/Lightning-AI/pytorch-lightning/pull/5338)) +- Added support for passing non-existing filepaths to `resume_from_checkpoint` ([#4402](https://github.com/Lightning-AI/pytorch-lightning/pull/4402)) ### Changed -- Skip restore from `resume_from_checkpoint` while `testing` ([#5161](https://github.com/Lightning-AI/lightning/pull/5161)) -- Allowed `log_momentum` for adaptive optimizers in `LearningRateMonitor` ([#5333](https://github.com/Lightning-AI/lightning/pull/5333)) -- Disabled checkpointing, earlystopping and logging with `fast_dev_run` ([#5277](https://github.com/Lightning-AI/lightning/pull/5277)) -- Distributed group defaults to `WORLD` if `None` ([#5125](https://github.com/Lightning-AI/lightning/pull/5125)) +- Skip restore from `resume_from_checkpoint` while `testing` ([#5161](https://github.com/Lightning-AI/pytorch-lightning/pull/5161)) +- Allowed `log_momentum` for adaptive optimizers in `LearningRateMonitor` ([#5333](https://github.com/Lightning-AI/pytorch-lightning/pull/5333)) +- Disabled checkpointing, earlystopping and logging with `fast_dev_run` ([#5277](https://github.com/Lightning-AI/pytorch-lightning/pull/5277)) +- Distributed group defaults to `WORLD` if `None` ([#5125](https://github.com/Lightning-AI/pytorch-lightning/pull/5125)) ### Fixed -- Fixed `trainer.test` returning non-test metrics ([#5214](https://github.com/Lightning-AI/lightning/pull/5214)) -- Fixed metric state reset ([#5273](https://github.com/Lightning-AI/lightning/pull/5273)) -- Fixed `--num-nodes` on `DDPSequentialPlugin` ([#5327](https://github.com/Lightning-AI/lightning/pull/5327)) -- Fixed invalid value for `weights_summary` ([#5296](https://github.com/Lightning-AI/lightning/pull/5296)) -- Fixed `Trainer.test` not using the latest `best_model_path` ([#5161](https://github.com/Lightning-AI/lightning/pull/5161)) -- Fixed existence check for hparams not using underlying filesystem ([#5250](https://github.com/Lightning-AI/lightning/pull/5250)) -- Fixed `LightningOptimizer` AMP bug ([#5191](https://github.com/Lightning-AI/lightning/pull/5191)) -- Fixed casted key to string in `_flatten_dict` ([#5354](https://github.com/Lightning-AI/lightning/pull/5354)) +- Fixed `trainer.test` returning non-test metrics ([#5214](https://github.com/Lightning-AI/pytorch-lightning/pull/5214)) +- Fixed metric state reset ([#5273](https://github.com/Lightning-AI/pytorch-lightning/pull/5273)) +- Fixed `--num-nodes` on `DDPSequentialPlugin` ([#5327](https://github.com/Lightning-AI/pytorch-lightning/pull/5327)) +- Fixed invalid value for `weights_summary` ([#5296](https://github.com/Lightning-AI/pytorch-lightning/pull/5296)) +- Fixed `Trainer.test` not using the latest `best_model_path` ([#5161](https://github.com/Lightning-AI/pytorch-lightning/pull/5161)) +- Fixed existence check for hparams not using underlying filesystem ([#5250](https://github.com/Lightning-AI/pytorch-lightning/pull/5250)) +- Fixed `LightningOptimizer` AMP bug ([#5191](https://github.com/Lightning-AI/pytorch-lightning/pull/5191)) +- Fixed casted key to string in `_flatten_dict` ([#5354](https://github.com/Lightning-AI/pytorch-lightning/pull/5354)) ## [1.1.2] - 2020-12-23 ### Added -- Support number for logging with `sync_dist=True` ([#5080](https://github.com/Lightning-AI/lightning/pull/5080)) -- Added offset logging step when resuming for Wandb logger ([#5050](https://github.com/Lightning-AI/lightning/pull/5050)) +- Support number for logging with `sync_dist=True` ([#5080](https://github.com/Lightning-AI/pytorch-lightning/pull/5080)) +- Added offset logging step when resuming for Wandb logger ([#5050](https://github.com/Lightning-AI/pytorch-lightning/pull/5050)) ### Removed -- `enable_pl_optimizer=False` by default to temporarily fix AMP issues ([#5163](https://github.com/Lightning-AI/lightning/pull/5163)) +- `enable_pl_optimizer=False` by default to temporarily fix AMP issues ([#5163](https://github.com/Lightning-AI/pytorch-lightning/pull/5163)) ### Fixed -- Metric reduction with Logging ([#5150](https://github.com/Lightning-AI/lightning/pull/5150)) -- Remove nan loss in manual optimization ([#5121](https://github.com/Lightning-AI/lightning/pull/5121)) -- Un-balanced logging properly supported ([#5119](https://github.com/Lightning-AI/lightning/pull/5119)) -- Fix hanging in DDP HPC accelerators ([#5157](https://github.com/Lightning-AI/lightning/pull/5157)) -- Fix reset `TensorRunningAccum` ([#5106](https://github.com/Lightning-AI/lightning/pull/5106)) -- Updated `DALIClassificationLoader` to not use deprecated arguments ([#4925](https://github.com/Lightning-AI/lightning/pull/4925)) -- Corrected call to `torch.no_grad` ([#5124](https://github.com/Lightning-AI/lightning/pull/5124)) +- Metric reduction with Logging ([#5150](https://github.com/Lightning-AI/pytorch-lightning/pull/5150)) +- Remove nan loss in manual optimization ([#5121](https://github.com/Lightning-AI/pytorch-lightning/pull/5121)) +- Un-balanced logging properly supported ([#5119](https://github.com/Lightning-AI/pytorch-lightning/pull/5119)) +- Fix hanging in DDP HPC accelerators ([#5157](https://github.com/Lightning-AI/pytorch-lightning/pull/5157)) +- Fix reset `TensorRunningAccum` ([#5106](https://github.com/Lightning-AI/pytorch-lightning/pull/5106)) +- Updated `DALIClassificationLoader` to not use deprecated arguments ([#4925](https://github.com/Lightning-AI/pytorch-lightning/pull/4925)) +- Corrected call to `torch.no_grad` ([#5124](https://github.com/Lightning-AI/pytorch-lightning/pull/5124)) ## [1.1.1] - 2020-12-15 ### Added -- Add a notebook example to reach a quick baseline of ~94% accuracy on CIFAR10 using Resnet in Lightning ([#4818](https://github.com/Lightning-AI/lightning/pull/4818)) +- Add a notebook example to reach a quick baseline of ~94% accuracy on CIFAR10 using Resnet in Lightning ([#4818](https://github.com/Lightning-AI/pytorch-lightning/pull/4818)) ### Changed -- Simplify accelerator steps ([#5015](https://github.com/Lightning-AI/lightning/pull/5015)) -- Refactor load in checkpoint connector ([#4593](https://github.com/Lightning-AI/lightning/pull/4593)) -- Fixed the saved filename in `ModelCheckpoint` when it already exists ([#4861](https://github.com/Lightning-AI/lightning/pull/4861)) +- Simplify accelerator steps ([#5015](https://github.com/Lightning-AI/pytorch-lightning/pull/5015)) +- Refactor load in checkpoint connector ([#4593](https://github.com/Lightning-AI/pytorch-lightning/pull/4593)) +- Fixed the saved filename in `ModelCheckpoint` when it already exists ([#4861](https://github.com/Lightning-AI/pytorch-lightning/pull/4861)) ### Removed -- Drop duplicate metrics ([#5014](https://github.com/Lightning-AI/lightning/pull/5014)) -- Remove beta arg from F1 class and functional ([#5076](https://github.com/Lightning-AI/lightning/pull/5076)) +- Drop duplicate metrics ([#5014](https://github.com/Lightning-AI/pytorch-lightning/pull/5014)) +- Remove beta arg from F1 class and functional ([#5076](https://github.com/Lightning-AI/pytorch-lightning/pull/5076)) ### Fixed -- Fixed trainer by default `None` in `DDPAccelerator` ([#4915](https://github.com/Lightning-AI/lightning/pull/4915)) -- Fixed `LightningOptimizer` to expose optimizer attributes ([#5095](https://github.com/Lightning-AI/lightning/pull/5095)) -- Do not warn when the `name` key is used in the `lr_scheduler` dict ([#5057](https://github.com/Lightning-AI/lightning/pull/5057)) -- Check if optimizer supports closure ([#4981](https://github.com/Lightning-AI/lightning/pull/4981)) +- Fixed trainer by default `None` in `DDPAccelerator` ([#4915](https://github.com/Lightning-AI/pytorch-lightning/pull/4915)) +- Fixed `LightningOptimizer` to expose optimizer attributes ([#5095](https://github.com/Lightning-AI/pytorch-lightning/pull/5095)) +- Do not warn when the `name` key is used in the `lr_scheduler` dict ([#5057](https://github.com/Lightning-AI/pytorch-lightning/pull/5057)) +- Check if optimizer supports closure ([#4981](https://github.com/Lightning-AI/pytorch-lightning/pull/4981)) - Add deprecated metric utility functions back to functional ( - [#5067](https://github.com/Lightning-AI/lightning/pull/5067), - [#5068](https://github.com/Lightning-AI/lightning/pull/5068)) -- Allow any input in `to_onnx` and `to_torchscript` ([#4378](https://github.com/Lightning-AI/lightning/pull/4378)) -- Fixed `DDPHPCAccelerator` hangs in DDP construction by calling `init_device` ([#5157](https://github.com/Lightning-AI/lightning/pull/5157)) + [#5067](https://github.com/Lightning-AI/pytorch-lightning/pull/5067), + [#5068](https://github.com/Lightning-AI/pytorch-lightning/pull/5068)) +- Allow any input in `to_onnx` and `to_torchscript` ([#4378](https://github.com/Lightning-AI/pytorch-lightning/pull/4378)) +- Fixed `DDPHPCAccelerator` hangs in DDP construction by calling `init_device` ([#5157](https://github.com/Lightning-AI/pytorch-lightning/pull/5157)) ## [1.1.0] - 2020-12-09 ### Added -- Added "monitor" key to saved `ModelCheckpoints` ([#4383](https://github.com/Lightning-AI/lightning/pull/4383)) -- Added `ConfusionMatrix` class interface ([#4348](https://github.com/Lightning-AI/lightning/pull/4348)) -- Added multiclass AUROC metric ([#4236](https://github.com/Lightning-AI/lightning/pull/4236)) -- Added global step indexing to the checkpoint name for a better sub-epoch checkpointing experience ([#3807](https://github.com/Lightning-AI/lightning/pull/3807)) -- Added optimizer hooks in callbacks ([#4379](https://github.com/Lightning-AI/lightning/pull/4379)) -- Added option to log momentum ([#4384](https://github.com/Lightning-AI/lightning/pull/4384)) -- Added `current_score` to `ModelCheckpoint.on_save_checkpoint` ([#4721](https://github.com/Lightning-AI/lightning/pull/4721)) +- Added "monitor" key to saved `ModelCheckpoints` ([#4383](https://github.com/Lightning-AI/pytorch-lightning/pull/4383)) +- Added `ConfusionMatrix` class interface ([#4348](https://github.com/Lightning-AI/pytorch-lightning/pull/4348)) +- Added multiclass AUROC metric ([#4236](https://github.com/Lightning-AI/pytorch-lightning/pull/4236)) +- Added global step indexing to the checkpoint name for a better sub-epoch checkpointing experience ([#3807](https://github.com/Lightning-AI/pytorch-lightning/pull/3807)) +- Added optimizer hooks in callbacks ([#4379](https://github.com/Lightning-AI/pytorch-lightning/pull/4379)) +- Added option to log momentum ([#4384](https://github.com/Lightning-AI/pytorch-lightning/pull/4384)) +- Added `current_score` to `ModelCheckpoint.on_save_checkpoint` ([#4721](https://github.com/Lightning-AI/pytorch-lightning/pull/4721)) - Added logging using `self.log` in train and evaluation for epoch end hooks ( - [#4552](https://github.com/Lightning-AI/lightning/pull/4552), - [#4495](https://github.com/Lightning-AI/lightning/pull/4495), - [#4439](https://github.com/Lightning-AI/lightning/pull/4439), - [#4684](https://github.com/Lightning-AI/lightning/pull/4684), - [#4913](https://github.com/Lightning-AI/lightning/pull/4913)) -- Added ability for DDP plugin to modify optimizer state saving ([#4675](https://github.com/Lightning-AI/lightning/pull/4675)) -- Added `prefix` argument in loggers ([#4557](https://github.com/Lightning-AI/lightning/pull/4557)) -- Added printing of total num of params, trainable and non-trainable params in ModelSummary ([#4521](https://github.com/Lightning-AI/lightning/pull/4521)) -- Added `PrecisionRecallCurve, ROC, AveragePrecision` class metric ([#4549](https://github.com/Lightning-AI/lightning/pull/4549)) -- Added custom `Apex` and `NativeAMP` as `Precision plugins` ([#4355](https://github.com/Lightning-AI/lightning/pull/4355)) -- Added `DALI MNIST` example ([#3721](https://github.com/Lightning-AI/lightning/pull/3721)) + [#4552](https://github.com/Lightning-AI/pytorch-lightning/pull/4552), + [#4495](https://github.com/Lightning-AI/pytorch-lightning/pull/4495), + [#4439](https://github.com/Lightning-AI/pytorch-lightning/pull/4439), + [#4684](https://github.com/Lightning-AI/pytorch-lightning/pull/4684), + [#4913](https://github.com/Lightning-AI/pytorch-lightning/pull/4913)) +- Added ability for DDP plugin to modify optimizer state saving ([#4675](https://github.com/Lightning-AI/pytorch-lightning/pull/4675)) +- Added `prefix` argument in loggers ([#4557](https://github.com/Lightning-AI/pytorch-lightning/pull/4557)) +- Added printing of total num of params, trainable and non-trainable params in ModelSummary ([#4521](https://github.com/Lightning-AI/pytorch-lightning/pull/4521)) +- Added `PrecisionRecallCurve, ROC, AveragePrecision` class metric ([#4549](https://github.com/Lightning-AI/pytorch-lightning/pull/4549)) +- Added custom `Apex` and `NativeAMP` as `Precision plugins` ([#4355](https://github.com/Lightning-AI/pytorch-lightning/pull/4355)) +- Added `DALI MNIST` example ([#3721](https://github.com/Lightning-AI/pytorch-lightning/pull/3721)) - Added `sharded plugin` for DDP for multi-gpu training memory optimizations ( - [#4639](https://github.com/Lightning-AI/lightning/pull/4639), - [#4686](https://github.com/Lightning-AI/lightning/pull/4686), - [#4737](https://github.com/Lightning-AI/lightning/pull/4737), - [#4773](https://github.com/Lightning-AI/lightning/pull/4773)) -- Added `experiment_id` to the NeptuneLogger ([#3462](https://github.com/Lightning-AI/lightning/pull/3462)) -- Added `PyTorch Geometric` integration example with Lightning ([#4568](https://github.com/Lightning-AI/lightning/pull/4568)) -- Added `all_gather` method to `LightningModule` which allows gradient based tensor synchronizations for use-cases such as negative sampling. ([#5012](https://github.com/Lightning-AI/lightning/pull/5012)) -- Enabled `self.log` in most functions ([#4969](https://github.com/Lightning-AI/lightning/pull/4969)) -- Added changeable extension variable for `ModelCheckpoint` ([#4977](https://github.com/Lightning-AI/lightning/pull/4977)) + [#4639](https://github.com/Lightning-AI/pytorch-lightning/pull/4639), + [#4686](https://github.com/Lightning-AI/pytorch-lightning/pull/4686), + [#4737](https://github.com/Lightning-AI/pytorch-lightning/pull/4737), + [#4773](https://github.com/Lightning-AI/pytorch-lightning/pull/4773)) +- Added `experiment_id` to the NeptuneLogger ([#3462](https://github.com/Lightning-AI/pytorch-lightning/pull/3462)) +- Added `PyTorch Geometric` integration example with Lightning ([#4568](https://github.com/Lightning-AI/pytorch-lightning/pull/4568)) +- Added `all_gather` method to `LightningModule` which allows gradient based tensor synchronizations for use-cases such as negative sampling. ([#5012](https://github.com/Lightning-AI/pytorch-lightning/pull/5012)) +- Enabled `self.log` in most functions ([#4969](https://github.com/Lightning-AI/pytorch-lightning/pull/4969)) +- Added changeable extension variable for `ModelCheckpoint` ([#4977](https://github.com/Lightning-AI/pytorch-lightning/pull/4977)) ### Changed -- Tuner algorithms will be skipped if `fast_dev_run=True` ([#3903](https://github.com/Lightning-AI/lightning/pull/3903)) -- `WandbLogger` does not force wandb `reinit` arg to True anymore and creates a run only when needed ([#4648](https://github.com/Lightning-AI/lightning/pull/4648)) -- Changed `automatic_optimization` to be a model attribute ([#4602](https://github.com/Lightning-AI/lightning/pull/4602)) -- Changed `Simple Profiler` report to order by percentage time spent + num calls ([#4880](https://github.com/Lightning-AI/lightning/pull/4880)) -- Simplify optimization Logic ([#4984](https://github.com/Lightning-AI/lightning/pull/4984)) -- Classification metrics overhaul ([#4837](https://github.com/Lightning-AI/lightning/pull/4837)) -- Updated `fast_dev_run` to accept integer representing num_batches ([#4629](https://github.com/Lightning-AI/lightning/pull/4629)) -- Refactored optimizer ([#4658](https://github.com/Lightning-AI/lightning/pull/4658)) +- Tuner algorithms will be skipped if `fast_dev_run=True` ([#3903](https://github.com/Lightning-AI/pytorch-lightning/pull/3903)) +- `WandbLogger` does not force wandb `reinit` arg to True anymore and creates a run only when needed ([#4648](https://github.com/Lightning-AI/pytorch-lightning/pull/4648)) +- Changed `automatic_optimization` to be a model attribute ([#4602](https://github.com/Lightning-AI/pytorch-lightning/pull/4602)) +- Changed `Simple Profiler` report to order by percentage time spent + num calls ([#4880](https://github.com/Lightning-AI/pytorch-lightning/pull/4880)) +- Simplify optimization Logic ([#4984](https://github.com/Lightning-AI/pytorch-lightning/pull/4984)) +- Classification metrics overhaul ([#4837](https://github.com/Lightning-AI/pytorch-lightning/pull/4837)) +- Updated `fast_dev_run` to accept integer representing num_batches ([#4629](https://github.com/Lightning-AI/pytorch-lightning/pull/4629)) +- Refactored optimizer ([#4658](https://github.com/Lightning-AI/pytorch-lightning/pull/4658)) ### Deprecated -- Deprecated `prefix` argument in `ModelCheckpoint` ([#4765](https://github.com/Lightning-AI/lightning/pull/4765)) -- Deprecated the old way of assigning hyper-parameters through `self.hparams = ...` ([#4813](https://github.com/Lightning-AI/lightning/pull/4813)) -- Deprecated `mode='auto'` from `ModelCheckpoint` and `EarlyStopping` ([#4695](https://github.com/Lightning-AI/lightning/pull/4695)) +- Deprecated `prefix` argument in `ModelCheckpoint` ([#4765](https://github.com/Lightning-AI/pytorch-lightning/pull/4765)) +- Deprecated the old way of assigning hyper-parameters through `self.hparams = ...` ([#4813](https://github.com/Lightning-AI/pytorch-lightning/pull/4813)) +- Deprecated `mode='auto'` from `ModelCheckpoint` and `EarlyStopping` ([#4695](https://github.com/Lightning-AI/pytorch-lightning/pull/4695)) ### Removed -- Removed `reorder` parameter of the `auc` metric ([#5004](https://github.com/Lightning-AI/lightning/pull/5004)) -- Removed `multiclass_roc` and `multiclass_precision_recall_curve`, use `roc` and `precision_recall_curve` instead ([#4549](https://github.com/Lightning-AI/lightning/pull/4549)) +- Removed `reorder` parameter of the `auc` metric ([#5004](https://github.com/Lightning-AI/pytorch-lightning/pull/5004)) +- Removed `multiclass_roc` and `multiclass_precision_recall_curve`, use `roc` and `precision_recall_curve` instead ([#4549](https://github.com/Lightning-AI/pytorch-lightning/pull/4549)) ### Fixed -- Added feature to move tensors to CPU before saving ([#4309](https://github.com/Lightning-AI/lightning/pull/4309)) -- Fixed `LoggerConnector` to have logged metrics on root device in DP ([#4138](https://github.com/Lightning-AI/lightning/pull/4138)) -- Auto convert tensors to contiguous format when `gather_all` ([#4907](https://github.com/Lightning-AI/lightning/pull/4907)) -- Fixed `PYTHONPATH` for ddp test model ([#4528](https://github.com/Lightning-AI/lightning/pull/4528)) -- Fixed allowing logger to support indexing ([#4595](https://github.com/Lightning-AI/lightning/pull/4595)) -- Fixed DDP and manual_optimization ([#4976](https://github.com/Lightning-AI/lightning/pull/4976)) +- Added feature to move tensors to CPU before saving ([#4309](https://github.com/Lightning-AI/pytorch-lightning/pull/4309)) +- Fixed `LoggerConnector` to have logged metrics on root device in DP ([#4138](https://github.com/Lightning-AI/pytorch-lightning/pull/4138)) +- Auto convert tensors to contiguous format when `gather_all` ([#4907](https://github.com/Lightning-AI/pytorch-lightning/pull/4907)) +- Fixed `PYTHONPATH` for ddp test model ([#4528](https://github.com/Lightning-AI/pytorch-lightning/pull/4528)) +- Fixed allowing logger to support indexing ([#4595](https://github.com/Lightning-AI/pytorch-lightning/pull/4595)) +- Fixed DDP and manual_optimization ([#4976](https://github.com/Lightning-AI/pytorch-lightning/pull/4976)) ## [1.0.8] - 2020-11-24 ### Added -- Added casting to python types for numpy scalars when logging `hparams` ([#4647](https://github.com/Lightning-AI/lightning/pull/4647)) -- Added warning when progress bar refresh rate is less than 20 on Google Colab to prevent crashing ([#4654](https://github.com/Lightning-AI/lightning/pull/4654)) -- Added `F1` class metric ([#4656](https://github.com/Lightning-AI/lightning/pull/4656)) +- Added casting to python types for numpy scalars when logging `hparams` ([#4647](https://github.com/Lightning-AI/pytorch-lightning/pull/4647)) +- Added warning when progress bar refresh rate is less than 20 on Google Colab to prevent crashing ([#4654](https://github.com/Lightning-AI/pytorch-lightning/pull/4654)) +- Added `F1` class metric ([#4656](https://github.com/Lightning-AI/pytorch-lightning/pull/4656)) ### Changed -- Consistently use `step=trainer.global_step` in `LearningRateMonitor` independently of `logging_interval` ([#4376](https://github.com/Lightning-AI/lightning/pull/4376)) -- Metric states are no longer as default added to `state_dict` ([#4685](https://github.com/Lightning-AI/lightning/pull/4685)) -- Renamed class metric `Fbeta` >> `FBeta` ([#4656](https://github.com/Lightning-AI/lightning/pull/4656)) -- Model summary: add 1 decimal place ([#4745](https://github.com/Lightning-AI/lightning/pull/4745)) -- Do not override `PYTHONWARNINGS` ([#4700](https://github.com/Lightning-AI/lightning/pull/4700)) -- Changed `init_ddp_connection` moved from `DDP` to `DDPPlugin` ([#4407](https://github.com/Lightning-AI/lightning/pull/4407)) +- Consistently use `step=trainer.global_step` in `LearningRateMonitor` independently of `logging_interval` ([#4376](https://github.com/Lightning-AI/pytorch-lightning/pull/4376)) +- Metric states are no longer as default added to `state_dict` ([#4685](https://github.com/Lightning-AI/pytorch-lightning/pull/4685)) +- Renamed class metric `Fbeta` >> `FBeta` ([#4656](https://github.com/Lightning-AI/pytorch-lightning/pull/4656)) +- Model summary: add 1 decimal place ([#4745](https://github.com/Lightning-AI/pytorch-lightning/pull/4745)) +- Do not override `PYTHONWARNINGS` ([#4700](https://github.com/Lightning-AI/pytorch-lightning/pull/4700)) +- Changed `init_ddp_connection` moved from `DDP` to `DDPPlugin` ([#4407](https://github.com/Lightning-AI/pytorch-lightning/pull/4407)) ### Fixed -- Fixed checkpoint `hparams` dict casting when `omegaconf` is available ([#4770](https://github.com/Lightning-AI/lightning/pull/4770)) -- Fixed incomplete progress bars when total batches not divisible by refresh rate ([#4577](https://github.com/Lightning-AI/lightning/pull/4577)) -- Updated SSIM metric ([#4566](https://github.com/Lightning-AI/lightning/pull/4566)) -- Fixed batch_arg_name - add `batch_arg_name` to all calls to `_adjust_batch_size`bug ([#4812](https://github.com/Lightning-AI/lightning/pull/4812)) -- Fixed `torchtext` data to GPU ([#4785](https://github.com/Lightning-AI/lightning/pull/4785)) -- Fixed a crash bug in MLFlow logger ([#4716](https://github.com/Lightning-AI/lightning/pull/4716)) +- Fixed checkpoint `hparams` dict casting when `omegaconf` is available ([#4770](https://github.com/Lightning-AI/pytorch-lightning/pull/4770)) +- Fixed incomplete progress bars when total batches not divisible by refresh rate ([#4577](https://github.com/Lightning-AI/pytorch-lightning/pull/4577)) +- Updated SSIM metric ([#4566](https://github.com/Lightning-AI/pytorch-lightning/pull/4566)) +- Fixed batch_arg_name - add `batch_arg_name` to all calls to `_adjust_batch_size`bug ([#4812](https://github.com/Lightning-AI/pytorch-lightning/pull/4812)) +- Fixed `torchtext` data to GPU ([#4785](https://github.com/Lightning-AI/pytorch-lightning/pull/4785)) +- Fixed a crash bug in MLFlow logger ([#4716](https://github.com/Lightning-AI/pytorch-lightning/pull/4716)) ## [1.0.7] - 2020-11-17 ### Added -- Added lambda closure to `manual_optimizer_step` ([#4618](https://github.com/Lightning-AI/lightning/pull/4618)) +- Added lambda closure to `manual_optimizer_step` ([#4618](https://github.com/Lightning-AI/pytorch-lightning/pull/4618)) ### Changed -- Change Metrics `persistent` default mode to `False` ([#4685](https://github.com/Lightning-AI/lightning/pull/4685)) -- LoggerConnector log_metrics will use `total_batch_idx` instead of `global_step` when logging on `training step` ([#4738](https://github.com/Lightning-AI/lightning/pull/4738)) +- Change Metrics `persistent` default mode to `False` ([#4685](https://github.com/Lightning-AI/pytorch-lightning/pull/4685)) +- LoggerConnector log_metrics will use `total_batch_idx` instead of `global_step` when logging on `training step` ([#4738](https://github.com/Lightning-AI/pytorch-lightning/pull/4738)) ### Fixed -- Prevent crash if `sync_dist=True` on CPU ([#4626](https://github.com/Lightning-AI/lightning/pull/4626)) -- Fixed average pbar Metrics ([#4534](https://github.com/Lightning-AI/lightning/pull/4534)) -- Fixed `setup` callback hook to correctly pass the LightningModule through ([#4608](https://github.com/Lightning-AI/lightning/pull/4608)) -- Allowing decorate model init with saving `hparams` inside ([#4662](https://github.com/Lightning-AI/lightning/pull/4662)) -- Fixed `split_idx` set by `LoggerConnector` in `on_trainer_init` to `Trainer` ([#4697](https://github.com/Lightning-AI/lightning/pull/4697)) +- Prevent crash if `sync_dist=True` on CPU ([#4626](https://github.com/Lightning-AI/pytorch-lightning/pull/4626)) +- Fixed average pbar Metrics ([#4534](https://github.com/Lightning-AI/pytorch-lightning/pull/4534)) +- Fixed `setup` callback hook to correctly pass the LightningModule through ([#4608](https://github.com/Lightning-AI/pytorch-lightning/pull/4608)) +- Allowing decorate model init with saving `hparams` inside ([#4662](https://github.com/Lightning-AI/pytorch-lightning/pull/4662)) +- Fixed `split_idx` set by `LoggerConnector` in `on_trainer_init` to `Trainer` ([#4697](https://github.com/Lightning-AI/pytorch-lightning/pull/4697)) ## [1.0.6] - 2020-11-11 ### Added -- Added metrics aggregation in Horovod and fixed early stopping ([#3775](https://github.com/Lightning-AI/lightning/pull/3775)) -- Added `manual_optimizer_step` which work with `AMP Native` and `accumulated_grad_batches` ([#4485](https://github.com/Lightning-AI/lightning/pull/4485)) -- Added `persistent(mode)` method to metrics, to enable and disable metric states being added to `state_dict` ([#4482](https://github.com/Lightning-AI/lightning/pull/4482)) -- Added congratulations at the end of our notebooks ([#4555](https://github.com/Lightning-AI/lightning/pull/4555)) -- Added parameters `move_metrics_to_cpu` in Trainer to disable gpu leak ([#4592](https://github.com/Lightning-AI/lightning/pull/4592)) +- Added metrics aggregation in Horovod and fixed early stopping ([#3775](https://github.com/Lightning-AI/pytorch-lightning/pull/3775)) +- Added `manual_optimizer_step` which work with `AMP Native` and `accumulated_grad_batches` ([#4485](https://github.com/Lightning-AI/pytorch-lightning/pull/4485)) +- Added `persistent(mode)` method to metrics, to enable and disable metric states being added to `state_dict` ([#4482](https://github.com/Lightning-AI/pytorch-lightning/pull/4482)) +- Added congratulations at the end of our notebooks ([#4555](https://github.com/Lightning-AI/pytorch-lightning/pull/4555)) +- Added parameters `move_metrics_to_cpu` in Trainer to disable gpu leak ([#4592](https://github.com/Lightning-AI/pytorch-lightning/pull/4592)) ### Changed -- Changed `fsspec` to tuner ([#4458](https://github.com/Lightning-AI/lightning/pull/4458)) -- Unify SLURM/TorchElastic under backend plugin ([#4578](https://github.com/Lightning-AI/lightning/pull/4578), - [#4580](https://github.com/Lightning-AI/lightning/pull/4580), - [#4581](https://github.com/Lightning-AI/lightning/pull/4581), - [#4582](https://github.com/Lightning-AI/lightning/pull/4582), - [#4583](https://github.com/Lightning-AI/lightning/pull/4583)) +- Changed `fsspec` to tuner ([#4458](https://github.com/Lightning-AI/pytorch-lightning/pull/4458)) +- Unify SLURM/TorchElastic under backend plugin ([#4578](https://github.com/Lightning-AI/pytorch-lightning/pull/4578), + [#4580](https://github.com/Lightning-AI/pytorch-lightning/pull/4580), + [#4581](https://github.com/Lightning-AI/pytorch-lightning/pull/4581), + [#4582](https://github.com/Lightning-AI/pytorch-lightning/pull/4582), + [#4583](https://github.com/Lightning-AI/pytorch-lightning/pull/4583)) ### Fixed -- Fixed feature-lack in `hpc_load` ([#4526](https://github.com/Lightning-AI/lightning/pull/4526)) -- Fixed metrics states being overridden in DDP mode ([#4482](https://github.com/Lightning-AI/lightning/pull/4482)) -- Fixed `lightning_getattr`, `lightning_hasattr` not finding the correct attributes in datamodule ([#4347](https://github.com/Lightning-AI/lightning/pull/4347)) -- Fixed automatic optimization AMP by `manual_optimization_step` ([#4485](https://github.com/Lightning-AI/lightning/pull/4485)) -- Replace `MisconfigurationException` with warning in `ModelCheckpoint` Callback ([#4560](https://github.com/Lightning-AI/lightning/pull/4560)) -- Fixed logged keys in mlflow logger ([#4412](https://github.com/Lightning-AI/lightning/pull/4412)) -- Fixed `is_picklable` by catching `AttributeError` ([#4508](https://github.com/Lightning-AI/lightning/pull/4508)) -- Fixed multi test dataloaders dict `AttributeError` error ([#4480](https://github.com/Lightning-AI/lightning/pull/4480)) -- Fixed show progress bar only for `progress_rank 0` on `DDP_SLURM` ([#4437](https://github.com/Lightning-AI/lightning/pull/4437)) +- Fixed feature-lack in `hpc_load` ([#4526](https://github.com/Lightning-AI/pytorch-lightning/pull/4526)) +- Fixed metrics states being overridden in DDP mode ([#4482](https://github.com/Lightning-AI/pytorch-lightning/pull/4482)) +- Fixed `lightning_getattr`, `lightning_hasattr` not finding the correct attributes in datamodule ([#4347](https://github.com/Lightning-AI/pytorch-lightning/pull/4347)) +- Fixed automatic optimization AMP by `manual_optimization_step` ([#4485](https://github.com/Lightning-AI/pytorch-lightning/pull/4485)) +- Replace `MisconfigurationException` with warning in `ModelCheckpoint` Callback ([#4560](https://github.com/Lightning-AI/pytorch-lightning/pull/4560)) +- Fixed logged keys in mlflow logger ([#4412](https://github.com/Lightning-AI/pytorch-lightning/pull/4412)) +- Fixed `is_picklable` by catching `AttributeError` ([#4508](https://github.com/Lightning-AI/pytorch-lightning/pull/4508)) +- Fixed multi test dataloaders dict `AttributeError` error ([#4480](https://github.com/Lightning-AI/pytorch-lightning/pull/4480)) +- Fixed show progress bar only for `progress_rank 0` on `DDP_SLURM` ([#4437](https://github.com/Lightning-AI/pytorch-lightning/pull/4437)) ## [1.0.5] - 2020-11-03 ### Added -- Added PyTorch 1.7 Stable support ([#3821](https://github.com/Lightning-AI/lightning/pull/3821)) -- Added timeout for `tpu_device_exists` to ensure process does not hang indefinitely ([#4340](https://github.com/Lightning-AI/lightning/pull/4340)) +- Added PyTorch 1.7 Stable support ([#3821](https://github.com/Lightning-AI/pytorch-lightning/pull/3821)) +- Added timeout for `tpu_device_exists` to ensure process does not hang indefinitely ([#4340](https://github.com/Lightning-AI/pytorch-lightning/pull/4340)) ### Changed -- W&B log in sync with `Trainer` step ([#4405](https://github.com/Lightning-AI/lightning/pull/4405)) -- Hook `on_after_backward` is called only when `optimizer_step` is being called ([#4439](https://github.com/Lightning-AI/lightning/pull/4439)) -- Moved `track_and_norm_grad` into `training loop` and called only when `optimizer_step` is being called ([#4439](https://github.com/Lightning-AI/lightning/pull/4439)) -- Changed type checker with explicit cast of `ref_model` object ([#4457](https://github.com/Lightning-AI/lightning/pull/4457)) -- Changed `distributed_backend` -> `accelerator` ([#4429](https://github.com/Lightning-AI/lightning/pull/4429)) +- W&B log in sync with `Trainer` step ([#4405](https://github.com/Lightning-AI/pytorch-lightning/pull/4405)) +- Hook `on_after_backward` is called only when `optimizer_step` is being called ([#4439](https://github.com/Lightning-AI/pytorch-lightning/pull/4439)) +- Moved `track_and_norm_grad` into `training loop` and called only when `optimizer_step` is being called ([#4439](https://github.com/Lightning-AI/pytorch-lightning/pull/4439)) +- Changed type checker with explicit cast of `ref_model` object ([#4457](https://github.com/Lightning-AI/pytorch-lightning/pull/4457)) +- Changed `distributed_backend` -> `accelerator` ([#4429](https://github.com/Lightning-AI/pytorch-lightning/pull/4429)) ### Deprecated -- Deprecated passing `ModelCheckpoint` instance to `checkpoint_callback` Trainer argument ([#4336](https://github.com/Lightning-AI/lightning/pull/4336)) +- Deprecated passing `ModelCheckpoint` instance to `checkpoint_callback` Trainer argument ([#4336](https://github.com/Lightning-AI/pytorch-lightning/pull/4336)) ### Fixed -- Disable saving checkpoints if not trained ([#4372](https://github.com/Lightning-AI/lightning/pull/4372)) -- Fixed error using `auto_select_gpus=True` with `gpus=-1` ([#4209](https://github.com/Lightning-AI/lightning/pull/4209)) -- Disabled training when `limit_train_batches=0` ([#4371](https://github.com/Lightning-AI/lightning/pull/4371)) -- Fixed that metrics do not store computational graph for all seen data ([#4313](https://github.com/Lightning-AI/lightning/pull/4313)) -- Fixed AMP unscale for `on_after_backward` ([#4439](https://github.com/Lightning-AI/lightning/pull/4439)) -- Fixed TorchScript export when module includes Metrics ([#4428](https://github.com/Lightning-AI/lightning/pull/4428)) -- Fixed TorchScript trace method's data to device and docstring ([#4360](https://github.com/Lightning-AI/lightning/pull/4360)) -- Fixed CSV logger warning ([#4419](https://github.com/Lightning-AI/lightning/pull/4419)) -- Fixed skip DDP parameter sync ([#4301](https://github.com/Lightning-AI/lightning/pull/4301)) -- Fixed `WandbLogger` _sanitize_callable function ([#4422](https://github.com/Lightning-AI/lightning/pull/4422)) -- Fixed `AMP Native` `_unscale` gradient ([#4441](https://github.com/Lightning-AI/lightning/pull/4441)) +- Disable saving checkpoints if not trained ([#4372](https://github.com/Lightning-AI/pytorch-lightning/pull/4372)) +- Fixed error using `auto_select_gpus=True` with `gpus=-1` ([#4209](https://github.com/Lightning-AI/pytorch-lightning/pull/4209)) +- Disabled training when `limit_train_batches=0` ([#4371](https://github.com/Lightning-AI/pytorch-lightning/pull/4371)) +- Fixed that metrics do not store computational graph for all seen data ([#4313](https://github.com/Lightning-AI/pytorch-lightning/pull/4313)) +- Fixed AMP unscale for `on_after_backward` ([#4439](https://github.com/Lightning-AI/pytorch-lightning/pull/4439)) +- Fixed TorchScript export when module includes Metrics ([#4428](https://github.com/Lightning-AI/pytorch-lightning/pull/4428)) +- Fixed TorchScript trace method's data to device and docstring ([#4360](https://github.com/Lightning-AI/pytorch-lightning/pull/4360)) +- Fixed CSV logger warning ([#4419](https://github.com/Lightning-AI/pytorch-lightning/pull/4419)) +- Fixed skip DDP parameter sync ([#4301](https://github.com/Lightning-AI/pytorch-lightning/pull/4301)) +- Fixed `WandbLogger` _sanitize_callable function ([#4422](https://github.com/Lightning-AI/pytorch-lightning/pull/4422)) +- Fixed `AMP Native` `_unscale` gradient ([#4441](https://github.com/Lightning-AI/pytorch-lightning/pull/4441)) ## [1.0.4] - 2020-10-27 ### Added -- Added `dirpath` and `filename` parameter in `ModelCheckpoint` ([#4213](https://github.com/Lightning-AI/lightning/pull/4213)) -- Added plugins docs and DDPPlugin to customize ddp across all accelerators ([#4258](https://github.com/Lightning-AI/lightning/pull/4285)) -- Added `strict` option to the scheduler dictionary ([#3586](https://github.com/Lightning-AI/lightning/pull/3586)) -- Added `fsspec` support for profilers ([#4162](https://github.com/Lightning-AI/lightning/pull/4162)) -- Added autogenerated helptext to `Trainer.add_argparse_args` ([#4344](https://github.com/Lightning-AI/lightning/pull/4344)) -- Added support for string values in `Trainer`'s `profiler` parameter ([#3656](https://github.com/Lightning-AI/lightning/pull/3656)) -- Added `optimizer_closure` to `optimizer.step` when supported ([#4190](https://github.com/Lightning-AI/lightning/pull/4190)) -- Added unification of regression metrics ([#4166](https://github.com/Lightning-AI/lightning/pull/4166)) -- Added checkpoint load from Bytes ([#4314](https://github.com/Lightning-AI/lightning/pull/4314)) +- Added `dirpath` and `filename` parameter in `ModelCheckpoint` ([#4213](https://github.com/Lightning-AI/pytorch-lightning/pull/4213)) +- Added plugins docs and DDPPlugin to customize ddp across all accelerators ([#4258](https://github.com/Lightning-AI/pytorch-lightning/pull/4285)) +- Added `strict` option to the scheduler dictionary ([#3586](https://github.com/Lightning-AI/pytorch-lightning/pull/3586)) +- Added `fsspec` support for profilers ([#4162](https://github.com/Lightning-AI/pytorch-lightning/pull/4162)) +- Added autogenerated helptext to `Trainer.add_argparse_args` ([#4344](https://github.com/Lightning-AI/pytorch-lightning/pull/4344)) +- Added support for string values in `Trainer`'s `profiler` parameter ([#3656](https://github.com/Lightning-AI/pytorch-lightning/pull/3656)) +- Added `optimizer_closure` to `optimizer.step` when supported ([#4190](https://github.com/Lightning-AI/pytorch-lightning/pull/4190)) +- Added unification of regression metrics ([#4166](https://github.com/Lightning-AI/pytorch-lightning/pull/4166)) +- Added checkpoint load from Bytes ([#4314](https://github.com/Lightning-AI/pytorch-lightning/pull/4314)) ### Changed -- Improved error messages for invalid `configure_optimizers` returns ([#3587](https://github.com/Lightning-AI/lightning/pull/3587)) -- Allow changing the logged step value in `validation_step` ([#4130](https://github.com/Lightning-AI/lightning/pull/4130)) -- Allow setting `replace_sampler_ddp=True` with a distributed sampler already added ([#4273](https://github.com/Lightning-AI/lightning/pull/4273)) -- Fixed sanitized parameters for `WandbLogger.log_hyperparams` ([#4320](https://github.com/Lightning-AI/lightning/pull/4320)) +- Improved error messages for invalid `configure_optimizers` returns ([#3587](https://github.com/Lightning-AI/pytorch-lightning/pull/3587)) +- Allow changing the logged step value in `validation_step` ([#4130](https://github.com/Lightning-AI/pytorch-lightning/pull/4130)) +- Allow setting `replace_sampler_ddp=True` with a distributed sampler already added ([#4273](https://github.com/Lightning-AI/pytorch-lightning/pull/4273)) +- Fixed sanitized parameters for `WandbLogger.log_hyperparams` ([#4320](https://github.com/Lightning-AI/pytorch-lightning/pull/4320)) ### Deprecated -- Deprecated `filepath` in `ModelCheckpoint` ([#4213](https://github.com/Lightning-AI/lightning/pull/4213)) -- Deprecated `reorder` parameter of the `auc` metric ([#4237](https://github.com/Lightning-AI/lightning/pull/4237)) -- Deprecated bool values in `Trainer`'s `profiler` parameter ([#3656](https://github.com/Lightning-AI/lightning/pull/3656)) +- Deprecated `filepath` in `ModelCheckpoint` ([#4213](https://github.com/Lightning-AI/pytorch-lightning/pull/4213)) +- Deprecated `reorder` parameter of the `auc` metric ([#4237](https://github.com/Lightning-AI/pytorch-lightning/pull/4237)) +- Deprecated bool values in `Trainer`'s `profiler` parameter ([#3656](https://github.com/Lightning-AI/pytorch-lightning/pull/3656)) ### Fixed -- Fixed setting device ids in DDP ([#4297](https://github.com/Lightning-AI/lightning/pull/4297)) -- Fixed synchronization of best model path in `ddp_accelerator` ([#4323](https://github.com/Lightning-AI/lightning/pull/4323)) -- Fixed `WandbLogger` not uploading checkpoint artifacts at the end of training ([#4341](https://github.com/Lightning-AI/lightning/pull/4341)) -- Fixed `FBeta` computation ([#4183](https://github.com/Lightning-AI/lightning/pull/4183)) -- Fixed `accumulation across batches` has completed `before breaking training loop` ([#4278](https://github.com/Lightning-AI/lightning/pull/4278)) -- Fixed `ModelCheckpoint` don't increase current_epoch and global_step when not training ([#4291](https://github.com/Lightning-AI/lightning/pull/4291)) -- Fixed `COMET_EXPERIMENT_KEY` environment variable usage in comet logger ([#4230](https://github.com/Lightning-AI/lightning/pull/4230)) +- Fixed setting device ids in DDP ([#4297](https://github.com/Lightning-AI/pytorch-lightning/pull/4297)) +- Fixed synchronization of best model path in `ddp_accelerator` ([#4323](https://github.com/Lightning-AI/pytorch-lightning/pull/4323)) +- Fixed `WandbLogger` not uploading checkpoint artifacts at the end of training ([#4341](https://github.com/Lightning-AI/pytorch-lightning/pull/4341)) +- Fixed `FBeta` computation ([#4183](https://github.com/Lightning-AI/pytorch-lightning/pull/4183)) +- Fixed `accumulation across batches` has completed `before breaking training loop` ([#4278](https://github.com/Lightning-AI/pytorch-lightning/pull/4278)) +- Fixed `ModelCheckpoint` don't increase current_epoch and global_step when not training ([#4291](https://github.com/Lightning-AI/pytorch-lightning/pull/4291)) +- Fixed `COMET_EXPERIMENT_KEY` environment variable usage in comet logger ([#4230](https://github.com/Lightning-AI/pytorch-lightning/pull/4230)) ## [1.0.3] - 2020-10-20 ### Added -- Added persistent flag to `Metric.add_state` ([#4195](https://github.com/Lightning-AI/lightning/pull/4195)) +- Added persistent flag to `Metric.add_state` ([#4195](https://github.com/Lightning-AI/pytorch-lightning/pull/4195)) ### Changed -- Used `checkpoint_connector.hpc_save` in SLURM ([#4217](https://github.com/Lightning-AI/lightning/pull/4217)) -- Moved base req. to root ([#4219](https://github.com/Lightning-AI/lightning/pull/4219)) +- Used `checkpoint_connector.hpc_save` in SLURM ([#4217](https://github.com/Lightning-AI/pytorch-lightning/pull/4217)) +- Moved base req. to root ([#4219](https://github.com/Lightning-AI/pytorch-lightning/pull/4219)) ### Fixed -- Fixed `hparams` assign in init ([#4189](https://github.com/Lightning-AI/lightning/pull/4189)) -- Fixed overwrite check for model hooks ([#4010](https://github.com/Lightning-AI/lightning/pull/4010)) +- Fixed `hparams` assign in init ([#4189](https://github.com/Lightning-AI/pytorch-lightning/pull/4189)) +- Fixed overwrite check for model hooks ([#4010](https://github.com/Lightning-AI/pytorch-lightning/pull/4010)) ## [1.0.2] - 2020-10-15 ### Added -- Added trace functionality to the function `to_torchscript` ([#4142](https://github.com/Lightning-AI/lightning/pull/4142)) +- Added trace functionality to the function `to_torchscript` ([#4142](https://github.com/Lightning-AI/pytorch-lightning/pull/4142)) ### Changed -- Called `on_load_checkpoint` before loading `state_dict` ([#4057](https://github.com/Lightning-AI/lightning/pull/4057)) +- Called `on_load_checkpoint` before loading `state_dict` ([#4057](https://github.com/Lightning-AI/pytorch-lightning/pull/4057)) ### Removed -- Removed duplicate metric vs step log for train loop ([#4173](https://github.com/Lightning-AI/lightning/pull/4173)) +- Removed duplicate metric vs step log for train loop ([#4173](https://github.com/Lightning-AI/pytorch-lightning/pull/4173)) ### Fixed -- Fixed the `self.log` problem in `validation_step()` ([#4169](https://github.com/Lightning-AI/lightning/pull/4169)) -- Fixed `hparams` saving - save the state when `save_hyperparameters()` is called [in `__init__`] ([#4163](https://github.com/Lightning-AI/lightning/pull/4163)) -- Fixed runtime failure while exporting `hparams` to yaml ([#4158](https://github.com/Lightning-AI/lightning/pull/4158)) +- Fixed the `self.log` problem in `validation_step()` ([#4169](https://github.com/Lightning-AI/pytorch-lightning/pull/4169)) +- Fixed `hparams` saving - save the state when `save_hyperparameters()` is called [in `__init__`] ([#4163](https://github.com/Lightning-AI/pytorch-lightning/pull/4163)) +- Fixed runtime failure while exporting `hparams` to yaml ([#4158](https://github.com/Lightning-AI/pytorch-lightning/pull/4158)) ## [1.0.1] - 2020-10-14 ### Added -- Added getstate/setstate method for torch.save serialization ([#4127](https://github.com/Lightning-AI/lightning/pull/4127)) +- Added getstate/setstate method for torch.save serialization ([#4127](https://github.com/Lightning-AI/pytorch-lightning/pull/4127)) ## [1.0.0] - 2020-10-13 ### Added -- Added Explained Variance Metric + metric fix ([#4013](https://github.com/Lightning-AI/lightning/pull/4013)) -- Added Metric <-> Lightning Module integration tests ([#4008](https://github.com/Lightning-AI/lightning/pull/4008)) -- Added parsing OS env vars in `Trainer` ([#4022](https://github.com/Lightning-AI/lightning/pull/4022)) -- Added classification metrics ([#4043](https://github.com/Lightning-AI/lightning/pull/4043)) -- Updated explained variance metric ([#4024](https://github.com/Lightning-AI/lightning/pull/4024)) -- Enabled plugins ([#4041](https://github.com/Lightning-AI/lightning/pull/4041)) -- Enabled custom clusters ([#4048](https://github.com/Lightning-AI/lightning/pull/4048)) -- Enabled passing in custom accelerators ([#4050](https://github.com/Lightning-AI/lightning/pull/4050)) -- Added `LightningModule.toggle_optimizer` ([#4058](https://github.com/Lightning-AI/lightning/pull/4058)) -- Added `LightningModule.manual_backward` ([#4063](https://github.com/Lightning-AI/lightning/pull/4063)) -- Added `output` argument to `*_batch_end` hooks ([#3965](https://github.com/Lightning-AI/lightning/pull/3965), - [#3966](https://github.com/Lightning-AI/lightning/pull/3966)) -- Added `output` argument to `*_epoch_end` hooks ([#3967](https://github.com/Lightning-AI/lightning/pull/3967)) +- Added Explained Variance Metric + metric fix ([#4013](https://github.com/Lightning-AI/pytorch-lightning/pull/4013)) +- Added Metric <-> Lightning Module integration tests ([#4008](https://github.com/Lightning-AI/pytorch-lightning/pull/4008)) +- Added parsing OS env vars in `Trainer` ([#4022](https://github.com/Lightning-AI/pytorch-lightning/pull/4022)) +- Added classification metrics ([#4043](https://github.com/Lightning-AI/pytorch-lightning/pull/4043)) +- Updated explained variance metric ([#4024](https://github.com/Lightning-AI/pytorch-lightning/pull/4024)) +- Enabled plugins ([#4041](https://github.com/Lightning-AI/pytorch-lightning/pull/4041)) +- Enabled custom clusters ([#4048](https://github.com/Lightning-AI/pytorch-lightning/pull/4048)) +- Enabled passing in custom accelerators ([#4050](https://github.com/Lightning-AI/pytorch-lightning/pull/4050)) +- Added `LightningModule.toggle_optimizer` ([#4058](https://github.com/Lightning-AI/pytorch-lightning/pull/4058)) +- Added `LightningModule.manual_backward` ([#4063](https://github.com/Lightning-AI/pytorch-lightning/pull/4063)) +- Added `output` argument to `*_batch_end` hooks ([#3965](https://github.com/Lightning-AI/pytorch-lightning/pull/3965), + [#3966](https://github.com/Lightning-AI/pytorch-lightning/pull/3966)) +- Added `output` argument to `*_epoch_end` hooks ([#3967](https://github.com/Lightning-AI/pytorch-lightning/pull/3967)) ### Changed -- Integrated metrics API with self.log ([#3961](https://github.com/Lightning-AI/lightning/pull/3961)) -- Decoupled Apex ([#4052](https://github.com/Lightning-AI/lightning/pull/4052), - [#4054](https://github.com/Lightning-AI/lightning/pull/4054), - [#4055](https://github.com/Lightning-AI/lightning/pull/4055), - [#4056](https://github.com/Lightning-AI/lightning/pull/4056), - [#4058](https://github.com/Lightning-AI/lightning/pull/4058), - [#4060](https://github.com/Lightning-AI/lightning/pull/4060), - [#4061](https://github.com/Lightning-AI/lightning/pull/4061), - [#4062](https://github.com/Lightning-AI/lightning/pull/4062), - [#4063](https://github.com/Lightning-AI/lightning/pull/4063), - [#4064](https://github.com/Lightning-AI/lightning/pull/4064), - [#4065](https://github.com/Lightning-AI/lightning/pull/4065)) -- Renamed all backends to `Accelerator` ([#4066](https://github.com/Lightning-AI/lightning/pull/4066)) -- Enabled manual returns ([#4089](https://github.com/Lightning-AI/lightning/pull/4089)) +- Integrated metrics API with self.log ([#3961](https://github.com/Lightning-AI/pytorch-lightning/pull/3961)) +- Decoupled Apex ([#4052](https://github.com/Lightning-AI/pytorch-lightning/pull/4052), + [#4054](https://github.com/Lightning-AI/pytorch-lightning/pull/4054), + [#4055](https://github.com/Lightning-AI/pytorch-lightning/pull/4055), + [#4056](https://github.com/Lightning-AI/pytorch-lightning/pull/4056), + [#4058](https://github.com/Lightning-AI/pytorch-lightning/pull/4058), + [#4060](https://github.com/Lightning-AI/pytorch-lightning/pull/4060), + [#4061](https://github.com/Lightning-AI/pytorch-lightning/pull/4061), + [#4062](https://github.com/Lightning-AI/pytorch-lightning/pull/4062), + [#4063](https://github.com/Lightning-AI/pytorch-lightning/pull/4063), + [#4064](https://github.com/Lightning-AI/pytorch-lightning/pull/4064), + [#4065](https://github.com/Lightning-AI/pytorch-lightning/pull/4065)) +- Renamed all backends to `Accelerator` ([#4066](https://github.com/Lightning-AI/pytorch-lightning/pull/4066)) +- Enabled manual returns ([#4089](https://github.com/Lightning-AI/pytorch-lightning/pull/4089)) ### Removed -- Removed support for EvalResult and TrainResult ([#3968](https://github.com/Lightning-AI/lightning/pull/3968)) -- Removed deprecated trainer flags: `overfit_pct`, `log_save_interval`, `row_log_interval` ([#3969](https://github.com/Lightning-AI/lightning/pull/3969)) -- Removed deprecated early_stop_callback ([#3982](https://github.com/Lightning-AI/lightning/pull/3982)) -- Removed deprecated model hooks ([#3980](https://github.com/Lightning-AI/lightning/pull/3980)) -- Removed deprecated callbacks ([#3979](https://github.com/Lightning-AI/lightning/pull/3979)) -- Removed `trainer` argument in `LightningModule.backward` [#4056](https://github.com/Lightning-AI/lightning/pull/4056)) +- Removed support for EvalResult and TrainResult ([#3968](https://github.com/Lightning-AI/pytorch-lightning/pull/3968)) +- Removed deprecated trainer flags: `overfit_pct`, `log_save_interval`, `row_log_interval` ([#3969](https://github.com/Lightning-AI/pytorch-lightning/pull/3969)) +- Removed deprecated early_stop_callback ([#3982](https://github.com/Lightning-AI/pytorch-lightning/pull/3982)) +- Removed deprecated model hooks ([#3980](https://github.com/Lightning-AI/pytorch-lightning/pull/3980)) +- Removed deprecated callbacks ([#3979](https://github.com/Lightning-AI/pytorch-lightning/pull/3979)) +- Removed `trainer` argument in `LightningModule.backward` [#4056](https://github.com/Lightning-AI/pytorch-lightning/pull/4056)) ### Fixed -- Fixed `current_epoch` property update to reflect true epoch number inside `LightningDataModule`, when `reload_dataloaders_every_epoch=True`. ([#3974](https://github.com/Lightning-AI/lightning/pull/3974)) -- Fixed to print scaler value in progress bar ([#4053](https://github.com/Lightning-AI/lightning/pull/4053)) -- Fixed mismatch between docstring and code regarding when `on_load_checkpoint` hook is called ([#3996](https://github.com/Lightning-AI/lightning/pull/3996)) +- Fixed `current_epoch` property update to reflect true epoch number inside `LightningDataModule`, when `reload_dataloaders_every_epoch=True`. ([#3974](https://github.com/Lightning-AI/pytorch-lightning/pull/3974)) +- Fixed to print scaler value in progress bar ([#4053](https://github.com/Lightning-AI/pytorch-lightning/pull/4053)) +- Fixed mismatch between docstring and code regarding when `on_load_checkpoint` hook is called ([#3996](https://github.com/Lightning-AI/pytorch-lightning/pull/3996)) ## [0.10.0] - 2020-10-07 ### Added -- Added new Metrics API. ([#3868](https://github.com/Lightning-AI/lightning/pull/3868), [#3921](https://github.com/Lightning-AI/lightning/pull/3921)) -- Enable PyTorch 1.7 compatibility ([#3541](https://github.com/Lightning-AI/lightning/pull/3541)) -- Added `LightningModule.to_torchscript` to support exporting as `ScriptModule` ([#3258](https://github.com/Lightning-AI/lightning/pull/3258)) -- Added warning when dropping unpicklable `hparams` ([#2874](https://github.com/Lightning-AI/lightning/pull/2874)) -- Added EMB similarity ([#3349](https://github.com/Lightning-AI/lightning/pull/3349)) -- Added `ModelCheckpoint.to_yaml` method ([#3048](https://github.com/Lightning-AI/lightning/pull/3048)) -- Allow `ModelCheckpoint` monitor to be `None`, meaning it will always save ([#3630](https://github.com/Lightning-AI/lightning/pull/3630)) -- Disabled optimizers setup during testing ([#3059](https://github.com/Lightning-AI/lightning/pull/3059)) -- Added support for datamodules to save and load checkpoints when training ([#3563](https://github.com/Lightning-AI/lightning/pull/3563)) -- Added support for datamodule in learning rate finder ([#3425](https://github.com/Lightning-AI/lightning/pull/3425)) -- Added gradient clip test for native AMP ([#3754](https://github.com/Lightning-AI/lightning/pull/3754)) -- Added dist lib to enable syncing anything across devices ([#3762](https://github.com/Lightning-AI/lightning/pull/3762)) -- Added `broadcast` to `TPUBackend` ([#3814](https://github.com/Lightning-AI/lightning/pull/3814)) -- Added `XLADeviceUtils` class to check XLA device type ([#3274](https://github.com/Lightning-AI/lightning/pull/3274)) +- Added new Metrics API. ([#3868](https://github.com/Lightning-AI/pytorch-lightning/pull/3868), [#3921](https://github.com/Lightning-AI/pytorch-lightning/pull/3921)) +- Enable PyTorch 1.7 compatibility ([#3541](https://github.com/Lightning-AI/pytorch-lightning/pull/3541)) +- Added `LightningModule.to_torchscript` to support exporting as `ScriptModule` ([#3258](https://github.com/Lightning-AI/pytorch-lightning/pull/3258)) +- Added warning when dropping unpicklable `hparams` ([#2874](https://github.com/Lightning-AI/pytorch-lightning/pull/2874)) +- Added EMB similarity ([#3349](https://github.com/Lightning-AI/pytorch-lightning/pull/3349)) +- Added `ModelCheckpoint.to_yaml` method ([#3048](https://github.com/Lightning-AI/pytorch-lightning/pull/3048)) +- Allow `ModelCheckpoint` monitor to be `None`, meaning it will always save ([#3630](https://github.com/Lightning-AI/pytorch-lightning/pull/3630)) +- Disabled optimizers setup during testing ([#3059](https://github.com/Lightning-AI/pytorch-lightning/pull/3059)) +- Added support for datamodules to save and load checkpoints when training ([#3563](https://github.com/Lightning-AI/pytorch-lightning/pull/3563)) +- Added support for datamodule in learning rate finder ([#3425](https://github.com/Lightning-AI/pytorch-lightning/pull/3425)) +- Added gradient clip test for native AMP ([#3754](https://github.com/Lightning-AI/pytorch-lightning/pull/3754)) +- Added dist lib to enable syncing anything across devices ([#3762](https://github.com/Lightning-AI/pytorch-lightning/pull/3762)) +- Added `broadcast` to `TPUBackend` ([#3814](https://github.com/Lightning-AI/pytorch-lightning/pull/3814)) +- Added `XLADeviceUtils` class to check XLA device type ([#3274](https://github.com/Lightning-AI/pytorch-lightning/pull/3274)) ### Changed - Refactored accelerator backends: - * moved TPU `xxx_step` to backend ([#3118](https://github.com/Lightning-AI/lightning/pull/3118)) - * refactored DDP backend `forward` ([#3119](https://github.com/Lightning-AI/lightning/pull/3119)) - * refactored GPU backend `__step` ([#3120](https://github.com/Lightning-AI/lightning/pull/3120)) - * refactored Horovod backend ([#3121](https://github.com/Lightning-AI/lightning/pull/3121), - [#3122](https://github.com/Lightning-AI/lightning/pull/3122)) - * remove obscure forward call in eval + CPU backend `___step` ([#3123](https://github.com/Lightning-AI/lightning/pull/3123)) - * reduced all simplified forward ([#3126](https://github.com/Lightning-AI/lightning/pull/3126)) - * added hook base method ([#3127](https://github.com/Lightning-AI/lightning/pull/3127)) - * refactor eval loop to use hooks - use `test_mode` for if so we can split later ([#3129](https://github.com/Lightning-AI/lightning/pull/3129)) - * moved `___step_end` hooks ([#3130](https://github.com/Lightning-AI/lightning/pull/3130)) - * training forward refactor ([#3134](https://github.com/Lightning-AI/lightning/pull/3134)) - * training AMP scaling refactor ([#3135](https://github.com/Lightning-AI/lightning/pull/3135)) - * eval step scaling factor ([#3136](https://github.com/Lightning-AI/lightning/pull/3136)) - * add eval loop object to streamline eval loop ([#3138](https://github.com/Lightning-AI/lightning/pull/3138)) - * refactored dataloader process hook ([#3139](https://github.com/Lightning-AI/lightning/pull/3139)) - * refactored inner eval loop ([#3141](https://github.com/Lightning-AI/lightning/pull/3141)) - * final inner eval loop hooks ([#3154](https://github.com/Lightning-AI/lightning/pull/3154)) - * clean up hooks in `run_evaluation` ([#3156](https://github.com/Lightning-AI/lightning/pull/3156)) - * clean up data reset ([#3161](https://github.com/Lightning-AI/lightning/pull/3161)) - * expand eval loop out ([#3165](https://github.com/Lightning-AI/lightning/pull/3165)) - * moved hooks around in eval loop ([#3195](https://github.com/Lightning-AI/lightning/pull/3195)) - * remove `_evaluate` fx ([#3197](https://github.com/Lightning-AI/lightning/pull/3197)) - * `Trainer.fit` hook clean up ([#3198](https://github.com/Lightning-AI/lightning/pull/3198)) - * DDPs train hooks ([#3203](https://github.com/Lightning-AI/lightning/pull/3203)) - * refactor DDP backend ([#3204](https://github.com/Lightning-AI/lightning/pull/3204), - [#3207](https://github.com/Lightning-AI/lightning/pull/3207), - [#3208](https://github.com/Lightning-AI/lightning/pull/3208), - [#3209](https://github.com/Lightning-AI/lightning/pull/3209), - [#3210](https://github.com/Lightning-AI/lightning/pull/3210)) - * reduced accelerator selection ([#3211](https://github.com/Lightning-AI/lightning/pull/3211)) - * group prepare data hook ([#3212](https://github.com/Lightning-AI/lightning/pull/3212)) - * added data connector ([#3285](https://github.com/Lightning-AI/lightning/pull/3285)) - * modular is_overridden ([#3290](https://github.com/Lightning-AI/lightning/pull/3290)) - * adding `Trainer.tune()` ([#3293](https://github.com/Lightning-AI/lightning/pull/3293)) - * move `run_pretrain_routine` -> `setup_training` ([#3294](https://github.com/Lightning-AI/lightning/pull/3294)) - * move train outside of setup training ([#3297](https://github.com/Lightning-AI/lightning/pull/3297)) - * move `prepare_data` to data connector ([#3307](https://github.com/Lightning-AI/lightning/pull/3307)) - * moved accelerator router ([#3309](https://github.com/Lightning-AI/lightning/pull/3309)) - * train loop refactor - moving train loop to own object ([#3310](https://github.com/Lightning-AI/lightning/pull/3310), - [#3312](https://github.com/Lightning-AI/lightning/pull/3312), - [#3313](https://github.com/Lightning-AI/lightning/pull/3313), - [#3314](https://github.com/Lightning-AI/lightning/pull/3314)) - * duplicate data interface definition up into DataHooks class ([#3344](https://github.com/Lightning-AI/lightning/pull/3344)) - * inner train loop ([#3359](https://github.com/Lightning-AI/lightning/pull/3359), - [#3361](https://github.com/Lightning-AI/lightning/pull/3361), - [#3362](https://github.com/Lightning-AI/lightning/pull/3362), - [#3363](https://github.com/Lightning-AI/lightning/pull/3363), - [#3365](https://github.com/Lightning-AI/lightning/pull/3365), - [#3366](https://github.com/Lightning-AI/lightning/pull/3366), - [#3367](https://github.com/Lightning-AI/lightning/pull/3367), - [#3368](https://github.com/Lightning-AI/lightning/pull/3368), - [#3369](https://github.com/Lightning-AI/lightning/pull/3369), - [#3370](https://github.com/Lightning-AI/lightning/pull/3370), - [#3371](https://github.com/Lightning-AI/lightning/pull/3371), - [#3372](https://github.com/Lightning-AI/lightning/pull/3372), - [#3373](https://github.com/Lightning-AI/lightning/pull/3373), - [#3374](https://github.com/Lightning-AI/lightning/pull/3374), - [#3375](https://github.com/Lightning-AI/lightning/pull/3375), - [#3376](https://github.com/Lightning-AI/lightning/pull/3376), - [#3385](https://github.com/Lightning-AI/lightning/pull/3385), - [#3388](https://github.com/Lightning-AI/lightning/pull/3388), - [#3397](https://github.com/Lightning-AI/lightning/pull/3397)) - * all logging related calls in a connector ([#3395](https://github.com/Lightning-AI/lightning/pull/3395)) - * device parser ([#3400](https://github.com/Lightning-AI/lightning/pull/3400), - [#3405](https://github.com/Lightning-AI/lightning/pull/3405)) - * added model connector ([#3407](https://github.com/Lightning-AI/lightning/pull/3407)) - * moved eval loop logging to loggers ([#3408](https://github.com/Lightning-AI/lightning/pull/3408)) - * moved eval loop (#3412[#3408](https://github.com/Lightning-AI/lightning/pull/3408)) - * trainer/separate argparse ([#3421](https://github.com/Lightning-AI/lightning/pull/3421), - [#3428](https://github.com/Lightning-AI/lightning/pull/3428), - [#3432](https://github.com/Lightning-AI/lightning/pull/3432)) - * move `lr_finder` ([#3434](https://github.com/Lightning-AI/lightning/pull/3434)) - * organize args (#[#3435](https://github.com/Lightning-AI/lightning/pull/3435), - [#3442](https://github.com/Lightning-AI/lightning/pull/3442), - [#3447](https://github.com/Lightning-AI/lightning/pull/3447), - [#3448](https://github.com/Lightning-AI/lightning/pull/3448), - [#3449](https://github.com/Lightning-AI/lightning/pull/3449), - [#3456](https://github.com/Lightning-AI/lightning/pull/3456)) - * move specific accelerator code ([#3457](https://github.com/Lightning-AI/lightning/pull/3457)) - * group connectors ([#3472](https://github.com/Lightning-AI/lightning/pull/3472)) - * accelerator connector methods x/n ([#3469](https://github.com/Lightning-AI/lightning/pull/3469), - [#3470](https://github.com/Lightning-AI/lightning/pull/3470), - [#3474](https://github.com/Lightning-AI/lightning/pull/3474)) - * merge backends x/n ([#3476](https://github.com/Lightning-AI/lightning/pull/3476), - [#3477](https://github.com/Lightning-AI/lightning/pull/3477), - [#3478](https://github.com/Lightning-AI/lightning/pull/3478), - [#3480](https://github.com/Lightning-AI/lightning/pull/3480), - [#3482](https://github.com/Lightning-AI/lightning/pull/3482)) - * apex plugin ([#3502](https://github.com/Lightning-AI/lightning/pull/3502)) - * precision plugins ([#3504](https://github.com/Lightning-AI/lightning/pull/3504)) - * Result - make monitor default to `checkpoint_on` to simplify ([#3571](https://github.com/Lightning-AI/lightning/pull/3571)) - * reference to the Trainer on the `LightningDataModule` ([#3684](https://github.com/Lightning-AI/lightning/pull/3684)) - * add `.log` to lightning module ([#3686](https://github.com/Lightning-AI/lightning/pull/3686), - [#3699](https://github.com/Lightning-AI/lightning/pull/3699), - [#3701](https://github.com/Lightning-AI/lightning/pull/3701), - [#3704](https://github.com/Lightning-AI/lightning/pull/3704), - [#3715](https://github.com/Lightning-AI/lightning/pull/3715)) - * enable tracking original metric when step and epoch are both true ([#3685](https://github.com/Lightning-AI/lightning/pull/3685)) - * deprecated results obj, added support for simpler comms ([#3681](https://github.com/Lightning-AI/lightning/pull/3681)) - * move backends back to individual files ([#3712](https://github.com/Lightning-AI/lightning/pull/3712)) - * fixes logging for eval steps ([#3763](https://github.com/Lightning-AI/lightning/pull/3763)) - * decoupled DDP, DDP spawn ([#3733](https://github.com/Lightning-AI/lightning/pull/3733), - [#3766](https://github.com/Lightning-AI/lightning/pull/3766), - [#3767](https://github.com/Lightning-AI/lightning/pull/3767), - [#3774](https://github.com/Lightning-AI/lightning/pull/3774), - [#3802](https://github.com/Lightning-AI/lightning/pull/3802), - [#3806](https://github.com/Lightning-AI/lightning/pull/3806), - [#3817](https://github.com/Lightning-AI/lightning/pull/3817), - [#3819](https://github.com/Lightning-AI/lightning/pull/3819), - [#3927](https://github.com/Lightning-AI/lightning/pull/3927)) - * remove weight loading hack for ddp_cpu ([#3808](https://github.com/Lightning-AI/lightning/pull/3808)) - * separate `torchelastic` from DDP ([#3810](https://github.com/Lightning-AI/lightning/pull/3810)) - * separate SLURM from DDP ([#3809](https://github.com/Lightning-AI/lightning/pull/3809)) - * decoupled DDP2 ([#3816](https://github.com/Lightning-AI/lightning/pull/3816)) - * bug fix with logging val epoch end + monitor ([#3812](https://github.com/Lightning-AI/lightning/pull/3812)) - * callback system and init DDP ([#3836](https://github.com/Lightning-AI/lightning/pull/3836)) - * adding compute environments ([#3837](https://github.com/Lightning-AI/lightning/pull/3837), [#3842](https://github.com/Lightning-AI/lightning/pull/3842)) - * epoch can now log independently ([#3843](https://github.com/Lightning-AI/lightning/pull/3843)) - * test selecting the correct backend. temp backends while slurm and TorchElastic are decoupled ([#3848](https://github.com/Lightning-AI/lightning/pull/3848)) - * fixed `init_slurm_connection` causing hostname errors ([#3856](https://github.com/Lightning-AI/lightning/pull/3856)) - * moves init apex from LM to apex connector ([#3923](https://github.com/Lightning-AI/lightning/pull/3923)) - * moves sync bn to each backend ([#3925](https://github.com/Lightning-AI/lightning/pull/3925)) - * moves configure ddp to each backend ([#3924](https://github.com/Lightning-AI/lightning/pull/3924)) -- Deprecation warning ([#3844](https://github.com/Lightning-AI/lightning/pull/3844)) -- Changed `LearningRateLogger` to `LearningRateMonitor` ([#3251](https://github.com/Lightning-AI/lightning/pull/3251)) -- Used `fsspec` instead of `gfile` for all IO ([#3320](https://github.com/Lightning-AI/lightning/pull/3320)) - * Swapped `torch.load` for `fsspec` load in DDP spawn backend ([#3787](https://github.com/Lightning-AI/lightning/pull/3787)) - * Swapped `torch.load` for `fsspec` load in cloud_io loading ([#3692](https://github.com/Lightning-AI/lightning/pull/3692)) - * Added support for `to_disk()` to use remote filepaths with `fsspec` ([#3930](https://github.com/Lightning-AI/lightning/pull/3930)) - * Updated model_checkpoint's to_yaml to use `fsspec` open ([#3801](https://github.com/Lightning-AI/lightning/pull/3801)) - * Fixed `fsspec` is inconsistent when doing `fs.ls` ([#3805](https://github.com/Lightning-AI/lightning/pull/3805)) -- Refactor `GPUStatsMonitor` to improve training speed ([#3257](https://github.com/Lightning-AI/lightning/pull/3257)) -- Changed IoU score behavior for classes absent in target and pred ([#3098](https://github.com/Lightning-AI/lightning/pull/3098)) -- Changed IoU `remove_bg` bool to `ignore_index` optional int ([#3098](https://github.com/Lightning-AI/lightning/pull/3098)) -- Changed defaults of `save_top_k` and `save_last` to `None` in ModelCheckpoint ([#3680](https://github.com/Lightning-AI/lightning/pull/3680)) -- `row_log_interval` and `log_save_interval` are now based on training loop's `global_step` instead of epoch-internal batch index ([#3667](https://github.com/Lightning-AI/lightning/pull/3667)) -- Silenced some warnings. verified ddp refactors ([#3483](https://github.com/Lightning-AI/lightning/pull/3483)) -- Cleaning up stale logger tests ([#3490](https://github.com/Lightning-AI/lightning/pull/3490)) -- Allow `ModelCheckpoint` monitor to be `None` ([#3633](https://github.com/Lightning-AI/lightning/pull/3633)) -- Enable `None` model checkpoint default ([#3669](https://github.com/Lightning-AI/lightning/pull/3669)) -- Skipped `best_model_path` if `checkpoint_callback` is `None` ([#2962](https://github.com/Lightning-AI/lightning/pull/2962)) -- Used `raise .. from ..` to explicitly chain exceptions ([#3750](https://github.com/Lightning-AI/lightning/pull/3750)) -- Mocking loggers ([#3596](https://github.com/Lightning-AI/lightning/pull/3596), - [#3617](https://github.com/Lightning-AI/lightning/pull/3617), - [#3851](https://github.com/Lightning-AI/lightning/pull/3851), - [#3859](https://github.com/Lightning-AI/lightning/pull/3859), - [#3884](https://github.com/Lightning-AI/lightning/pull/3884), - [#3853](https://github.com/Lightning-AI/lightning/pull/3853), - [#3910](https://github.com/Lightning-AI/lightning/pull/3910), - [#3889](https://github.com/Lightning-AI/lightning/pull/3889), - [#3926](https://github.com/Lightning-AI/lightning/pull/3926)) -- Write predictions in LightningModule instead of EvalResult [#3882](https://github.com/Lightning-AI/lightning/pull/3882) + * moved TPU `xxx_step` to backend ([#3118](https://github.com/Lightning-AI/pytorch-lightning/pull/3118)) + * refactored DDP backend `forward` ([#3119](https://github.com/Lightning-AI/pytorch-lightning/pull/3119)) + * refactored GPU backend `__step` ([#3120](https://github.com/Lightning-AI/pytorch-lightning/pull/3120)) + * refactored Horovod backend ([#3121](https://github.com/Lightning-AI/pytorch-lightning/pull/3121), + [#3122](https://github.com/Lightning-AI/pytorch-lightning/pull/3122)) + * remove obscure forward call in eval + CPU backend `___step` ([#3123](https://github.com/Lightning-AI/pytorch-lightning/pull/3123)) + * reduced all simplified forward ([#3126](https://github.com/Lightning-AI/pytorch-lightning/pull/3126)) + * added hook base method ([#3127](https://github.com/Lightning-AI/pytorch-lightning/pull/3127)) + * refactor eval loop to use hooks - use `test_mode` for if so we can split later ([#3129](https://github.com/Lightning-AI/pytorch-lightning/pull/3129)) + * moved `___step_end` hooks ([#3130](https://github.com/Lightning-AI/pytorch-lightning/pull/3130)) + * training forward refactor ([#3134](https://github.com/Lightning-AI/pytorch-lightning/pull/3134)) + * training AMP scaling refactor ([#3135](https://github.com/Lightning-AI/pytorch-lightning/pull/3135)) + * eval step scaling factor ([#3136](https://github.com/Lightning-AI/pytorch-lightning/pull/3136)) + * add eval loop object to streamline eval loop ([#3138](https://github.com/Lightning-AI/pytorch-lightning/pull/3138)) + * refactored dataloader process hook ([#3139](https://github.com/Lightning-AI/pytorch-lightning/pull/3139)) + * refactored inner eval loop ([#3141](https://github.com/Lightning-AI/pytorch-lightning/pull/3141)) + * final inner eval loop hooks ([#3154](https://github.com/Lightning-AI/pytorch-lightning/pull/3154)) + * clean up hooks in `run_evaluation` ([#3156](https://github.com/Lightning-AI/pytorch-lightning/pull/3156)) + * clean up data reset ([#3161](https://github.com/Lightning-AI/pytorch-lightning/pull/3161)) + * expand eval loop out ([#3165](https://github.com/Lightning-AI/pytorch-lightning/pull/3165)) + * moved hooks around in eval loop ([#3195](https://github.com/Lightning-AI/pytorch-lightning/pull/3195)) + * remove `_evaluate` fx ([#3197](https://github.com/Lightning-AI/pytorch-lightning/pull/3197)) + * `Trainer.fit` hook clean up ([#3198](https://github.com/Lightning-AI/pytorch-lightning/pull/3198)) + * DDPs train hooks ([#3203](https://github.com/Lightning-AI/pytorch-lightning/pull/3203)) + * refactor DDP backend ([#3204](https://github.com/Lightning-AI/pytorch-lightning/pull/3204), + [#3207](https://github.com/Lightning-AI/pytorch-lightning/pull/3207), + [#3208](https://github.com/Lightning-AI/pytorch-lightning/pull/3208), + [#3209](https://github.com/Lightning-AI/pytorch-lightning/pull/3209), + [#3210](https://github.com/Lightning-AI/pytorch-lightning/pull/3210)) + * reduced accelerator selection ([#3211](https://github.com/Lightning-AI/pytorch-lightning/pull/3211)) + * group prepare data hook ([#3212](https://github.com/Lightning-AI/pytorch-lightning/pull/3212)) + * added data connector ([#3285](https://github.com/Lightning-AI/pytorch-lightning/pull/3285)) + * modular is_overridden ([#3290](https://github.com/Lightning-AI/pytorch-lightning/pull/3290)) + * adding `Trainer.tune()` ([#3293](https://github.com/Lightning-AI/pytorch-lightning/pull/3293)) + * move `run_pretrain_routine` -> `setup_training` ([#3294](https://github.com/Lightning-AI/pytorch-lightning/pull/3294)) + * move train outside of setup training ([#3297](https://github.com/Lightning-AI/pytorch-lightning/pull/3297)) + * move `prepare_data` to data connector ([#3307](https://github.com/Lightning-AI/pytorch-lightning/pull/3307)) + * moved accelerator router ([#3309](https://github.com/Lightning-AI/pytorch-lightning/pull/3309)) + * train loop refactor - moving train loop to own object ([#3310](https://github.com/Lightning-AI/pytorch-lightning/pull/3310), + [#3312](https://github.com/Lightning-AI/pytorch-lightning/pull/3312), + [#3313](https://github.com/Lightning-AI/pytorch-lightning/pull/3313), + [#3314](https://github.com/Lightning-AI/pytorch-lightning/pull/3314)) + * duplicate data interface definition up into DataHooks class ([#3344](https://github.com/Lightning-AI/pytorch-lightning/pull/3344)) + * inner train loop ([#3359](https://github.com/Lightning-AI/pytorch-lightning/pull/3359), + [#3361](https://github.com/Lightning-AI/pytorch-lightning/pull/3361), + [#3362](https://github.com/Lightning-AI/pytorch-lightning/pull/3362), + [#3363](https://github.com/Lightning-AI/pytorch-lightning/pull/3363), + [#3365](https://github.com/Lightning-AI/pytorch-lightning/pull/3365), + [#3366](https://github.com/Lightning-AI/pytorch-lightning/pull/3366), + [#3367](https://github.com/Lightning-AI/pytorch-lightning/pull/3367), + [#3368](https://github.com/Lightning-AI/pytorch-lightning/pull/3368), + [#3369](https://github.com/Lightning-AI/pytorch-lightning/pull/3369), + [#3370](https://github.com/Lightning-AI/pytorch-lightning/pull/3370), + [#3371](https://github.com/Lightning-AI/pytorch-lightning/pull/3371), + [#3372](https://github.com/Lightning-AI/pytorch-lightning/pull/3372), + [#3373](https://github.com/Lightning-AI/pytorch-lightning/pull/3373), + [#3374](https://github.com/Lightning-AI/pytorch-lightning/pull/3374), + [#3375](https://github.com/Lightning-AI/pytorch-lightning/pull/3375), + [#3376](https://github.com/Lightning-AI/pytorch-lightning/pull/3376), + [#3385](https://github.com/Lightning-AI/pytorch-lightning/pull/3385), + [#3388](https://github.com/Lightning-AI/pytorch-lightning/pull/3388), + [#3397](https://github.com/Lightning-AI/pytorch-lightning/pull/3397)) + * all logging related calls in a connector ([#3395](https://github.com/Lightning-AI/pytorch-lightning/pull/3395)) + * device parser ([#3400](https://github.com/Lightning-AI/pytorch-lightning/pull/3400), + [#3405](https://github.com/Lightning-AI/pytorch-lightning/pull/3405)) + * added model connector ([#3407](https://github.com/Lightning-AI/pytorch-lightning/pull/3407)) + * moved eval loop logging to loggers ([#3408](https://github.com/Lightning-AI/pytorch-lightning/pull/3408)) + * moved eval loop (#3412[#3408](https://github.com/Lightning-AI/pytorch-lightning/pull/3408)) + * trainer/separate argparse ([#3421](https://github.com/Lightning-AI/pytorch-lightning/pull/3421), + [#3428](https://github.com/Lightning-AI/pytorch-lightning/pull/3428), + [#3432](https://github.com/Lightning-AI/pytorch-lightning/pull/3432)) + * move `lr_finder` ([#3434](https://github.com/Lightning-AI/pytorch-lightning/pull/3434)) + * organize args (#[#3435](https://github.com/Lightning-AI/pytorch-lightning/pull/3435), + [#3442](https://github.com/Lightning-AI/pytorch-lightning/pull/3442), + [#3447](https://github.com/Lightning-AI/pytorch-lightning/pull/3447), + [#3448](https://github.com/Lightning-AI/pytorch-lightning/pull/3448), + [#3449](https://github.com/Lightning-AI/pytorch-lightning/pull/3449), + [#3456](https://github.com/Lightning-AI/pytorch-lightning/pull/3456)) + * move specific accelerator code ([#3457](https://github.com/Lightning-AI/pytorch-lightning/pull/3457)) + * group connectors ([#3472](https://github.com/Lightning-AI/pytorch-lightning/pull/3472)) + * accelerator connector methods x/n ([#3469](https://github.com/Lightning-AI/pytorch-lightning/pull/3469), + [#3470](https://github.com/Lightning-AI/pytorch-lightning/pull/3470), + [#3474](https://github.com/Lightning-AI/pytorch-lightning/pull/3474)) + * merge backends x/n ([#3476](https://github.com/Lightning-AI/pytorch-lightning/pull/3476), + [#3477](https://github.com/Lightning-AI/pytorch-lightning/pull/3477), + [#3478](https://github.com/Lightning-AI/pytorch-lightning/pull/3478), + [#3480](https://github.com/Lightning-AI/pytorch-lightning/pull/3480), + [#3482](https://github.com/Lightning-AI/pytorch-lightning/pull/3482)) + * apex plugin ([#3502](https://github.com/Lightning-AI/pytorch-lightning/pull/3502)) + * precision plugins ([#3504](https://github.com/Lightning-AI/pytorch-lightning/pull/3504)) + * Result - make monitor default to `checkpoint_on` to simplify ([#3571](https://github.com/Lightning-AI/pytorch-lightning/pull/3571)) + * reference to the Trainer on the `LightningDataModule` ([#3684](https://github.com/Lightning-AI/pytorch-lightning/pull/3684)) + * add `.log` to lightning module ([#3686](https://github.com/Lightning-AI/pytorch-lightning/pull/3686), + [#3699](https://github.com/Lightning-AI/pytorch-lightning/pull/3699), + [#3701](https://github.com/Lightning-AI/pytorch-lightning/pull/3701), + [#3704](https://github.com/Lightning-AI/pytorch-lightning/pull/3704), + [#3715](https://github.com/Lightning-AI/pytorch-lightning/pull/3715)) + * enable tracking original metric when step and epoch are both true ([#3685](https://github.com/Lightning-AI/pytorch-lightning/pull/3685)) + * deprecated results obj, added support for simpler comms ([#3681](https://github.com/Lightning-AI/pytorch-lightning/pull/3681)) + * move backends back to individual files ([#3712](https://github.com/Lightning-AI/pytorch-lightning/pull/3712)) + * fixes logging for eval steps ([#3763](https://github.com/Lightning-AI/pytorch-lightning/pull/3763)) + * decoupled DDP, DDP spawn ([#3733](https://github.com/Lightning-AI/pytorch-lightning/pull/3733), + [#3766](https://github.com/Lightning-AI/pytorch-lightning/pull/3766), + [#3767](https://github.com/Lightning-AI/pytorch-lightning/pull/3767), + [#3774](https://github.com/Lightning-AI/pytorch-lightning/pull/3774), + [#3802](https://github.com/Lightning-AI/pytorch-lightning/pull/3802), + [#3806](https://github.com/Lightning-AI/pytorch-lightning/pull/3806), + [#3817](https://github.com/Lightning-AI/pytorch-lightning/pull/3817), + [#3819](https://github.com/Lightning-AI/pytorch-lightning/pull/3819), + [#3927](https://github.com/Lightning-AI/pytorch-lightning/pull/3927)) + * remove weight loading hack for ddp_cpu ([#3808](https://github.com/Lightning-AI/pytorch-lightning/pull/3808)) + * separate `torchelastic` from DDP ([#3810](https://github.com/Lightning-AI/pytorch-lightning/pull/3810)) + * separate SLURM from DDP ([#3809](https://github.com/Lightning-AI/pytorch-lightning/pull/3809)) + * decoupled DDP2 ([#3816](https://github.com/Lightning-AI/pytorch-lightning/pull/3816)) + * bug fix with logging val epoch end + monitor ([#3812](https://github.com/Lightning-AI/pytorch-lightning/pull/3812)) + * callback system and init DDP ([#3836](https://github.com/Lightning-AI/pytorch-lightning/pull/3836)) + * adding compute environments ([#3837](https://github.com/Lightning-AI/pytorch-lightning/pull/3837), [#3842](https://github.com/Lightning-AI/pytorch-lightning/pull/3842)) + * epoch can now log independently ([#3843](https://github.com/Lightning-AI/pytorch-lightning/pull/3843)) + * test selecting the correct backend. temp backends while slurm and TorchElastic are decoupled ([#3848](https://github.com/Lightning-AI/pytorch-lightning/pull/3848)) + * fixed `init_slurm_connection` causing hostname errors ([#3856](https://github.com/Lightning-AI/pytorch-lightning/pull/3856)) + * moves init apex from LM to apex connector ([#3923](https://github.com/Lightning-AI/pytorch-lightning/pull/3923)) + * moves sync bn to each backend ([#3925](https://github.com/Lightning-AI/pytorch-lightning/pull/3925)) + * moves configure ddp to each backend ([#3924](https://github.com/Lightning-AI/pytorch-lightning/pull/3924)) +- Deprecation warning ([#3844](https://github.com/Lightning-AI/pytorch-lightning/pull/3844)) +- Changed `LearningRateLogger` to `LearningRateMonitor` ([#3251](https://github.com/Lightning-AI/pytorch-lightning/pull/3251)) +- Used `fsspec` instead of `gfile` for all IO ([#3320](https://github.com/Lightning-AI/pytorch-lightning/pull/3320)) + * Swapped `torch.load` for `fsspec` load in DDP spawn backend ([#3787](https://github.com/Lightning-AI/pytorch-lightning/pull/3787)) + * Swapped `torch.load` for `fsspec` load in cloud_io loading ([#3692](https://github.com/Lightning-AI/pytorch-lightning/pull/3692)) + * Added support for `to_disk()` to use remote filepaths with `fsspec` ([#3930](https://github.com/Lightning-AI/pytorch-lightning/pull/3930)) + * Updated model_checkpoint's to_yaml to use `fsspec` open ([#3801](https://github.com/Lightning-AI/pytorch-lightning/pull/3801)) + * Fixed `fsspec` is inconsistent when doing `fs.ls` ([#3805](https://github.com/Lightning-AI/pytorch-lightning/pull/3805)) +- Refactor `GPUStatsMonitor` to improve training speed ([#3257](https://github.com/Lightning-AI/pytorch-lightning/pull/3257)) +- Changed IoU score behavior for classes absent in target and pred ([#3098](https://github.com/Lightning-AI/pytorch-lightning/pull/3098)) +- Changed IoU `remove_bg` bool to `ignore_index` optional int ([#3098](https://github.com/Lightning-AI/pytorch-lightning/pull/3098)) +- Changed defaults of `save_top_k` and `save_last` to `None` in ModelCheckpoint ([#3680](https://github.com/Lightning-AI/pytorch-lightning/pull/3680)) +- `row_log_interval` and `log_save_interval` are now based on training loop's `global_step` instead of epoch-internal batch index ([#3667](https://github.com/Lightning-AI/pytorch-lightning/pull/3667)) +- Silenced some warnings. verified ddp refactors ([#3483](https://github.com/Lightning-AI/pytorch-lightning/pull/3483)) +- Cleaning up stale logger tests ([#3490](https://github.com/Lightning-AI/pytorch-lightning/pull/3490)) +- Allow `ModelCheckpoint` monitor to be `None` ([#3633](https://github.com/Lightning-AI/pytorch-lightning/pull/3633)) +- Enable `None` model checkpoint default ([#3669](https://github.com/Lightning-AI/pytorch-lightning/pull/3669)) +- Skipped `best_model_path` if `checkpoint_callback` is `None` ([#2962](https://github.com/Lightning-AI/pytorch-lightning/pull/2962)) +- Used `raise .. from ..` to explicitly chain exceptions ([#3750](https://github.com/Lightning-AI/pytorch-lightning/pull/3750)) +- Mocking loggers ([#3596](https://github.com/Lightning-AI/pytorch-lightning/pull/3596), + [#3617](https://github.com/Lightning-AI/pytorch-lightning/pull/3617), + [#3851](https://github.com/Lightning-AI/pytorch-lightning/pull/3851), + [#3859](https://github.com/Lightning-AI/pytorch-lightning/pull/3859), + [#3884](https://github.com/Lightning-AI/pytorch-lightning/pull/3884), + [#3853](https://github.com/Lightning-AI/pytorch-lightning/pull/3853), + [#3910](https://github.com/Lightning-AI/pytorch-lightning/pull/3910), + [#3889](https://github.com/Lightning-AI/pytorch-lightning/pull/3889), + [#3926](https://github.com/Lightning-AI/pytorch-lightning/pull/3926)) +- Write predictions in LightningModule instead of EvalResult [#3882](https://github.com/Lightning-AI/pytorch-lightning/pull/3882) ### Deprecated -- Deprecated `TrainResult` and `EvalResult`, use `self.log` and `self.write` from the `LightningModule` to log metrics and write predictions. `training_step` can now only return a scalar (for the loss) or a dictionary with anything you want. ([#3681](https://github.com/Lightning-AI/lightning/pull/3681)) -- Deprecate `early_stop_callback` Trainer argument ([#3845](https://github.com/Lightning-AI/lightning/pull/3845)) -- Rename Trainer arguments `row_log_interval` >> `log_every_n_steps` and `log_save_interval` >> `flush_logs_every_n_steps` ([#3748](https://github.com/Lightning-AI/lightning/pull/3748)) +- Deprecated `TrainResult` and `EvalResult`, use `self.log` and `self.write` from the `LightningModule` to log metrics and write predictions. `training_step` can now only return a scalar (for the loss) or a dictionary with anything you want. ([#3681](https://github.com/Lightning-AI/pytorch-lightning/pull/3681)) +- Deprecate `early_stop_callback` Trainer argument ([#3845](https://github.com/Lightning-AI/pytorch-lightning/pull/3845)) +- Rename Trainer arguments `row_log_interval` >> `log_every_n_steps` and `log_save_interval` >> `flush_logs_every_n_steps` ([#3748](https://github.com/Lightning-AI/pytorch-lightning/pull/3748)) ### Removed -- Removed experimental Metric API ([#3943](https://github.com/Lightning-AI/lightning/pull/3943), - [#3949](https://github.com/Lightning-AI/lightning/pull/3949), - [#3946](https://github.com/Lightning-AI/lightning/pull/3946)), listed changes before final removal: - * Added `EmbeddingSimilarity` metric ([#3349](https://github.com/Lightning-AI/lightning/pull/3349), [#3358](https://github.com/Lightning-AI/lightning/pull/3358)) - * Added hooks to metric module interface ([#2528](https://github.com/Lightning-AI/lightning/pull/2528)) - * Added error when AUROC metric is used for multiclass problems ([#3350](https://github.com/Lightning-AI/lightning/pull/3350)) - * Fixed `ModelCheckpoint` with `save_top_k=-1` option not tracking the best models when a monitor metric is available ([#3735](https://github.com/Lightning-AI/lightning/pull/3735)) - * Fixed counter-intuitive error being thrown in `Accuracy` metric for zero target tensor ([#3764](https://github.com/Lightning-AI/lightning/pull/3764)) - * Fixed aggregation of metrics ([#3517](https://github.com/Lightning-AI/lightning/pull/3517)) - * Fixed Metric aggregation ([#3321](https://github.com/Lightning-AI/lightning/pull/3321)) - * Fixed RMSLE metric ([#3188](https://github.com/Lightning-AI/lightning/pull/3188)) - * Renamed `reduction` to `class_reduction` in classification metrics ([#3322](https://github.com/Lightning-AI/lightning/pull/3322)) - * Changed `class_reduction` similar to sklearn for classification metrics ([#3322](https://github.com/Lightning-AI/lightning/pull/3322)) - * Renaming of precision recall metric ([#3308](https://github.com/Lightning-AI/lightning/pull/3308)) +- Removed experimental Metric API ([#3943](https://github.com/Lightning-AI/pytorch-lightning/pull/3943), + [#3949](https://github.com/Lightning-AI/pytorch-lightning/pull/3949), + [#3946](https://github.com/Lightning-AI/pytorch-lightning/pull/3946)), listed changes before final removal: + * Added `EmbeddingSimilarity` metric ([#3349](https://github.com/Lightning-AI/pytorch-lightning/pull/3349), [#3358](https://github.com/Lightning-AI/pytorch-lightning/pull/3358)) + * Added hooks to metric module interface ([#2528](https://github.com/Lightning-AI/pytorch-lightning/pull/2528)) + * Added error when AUROC metric is used for multiclass problems ([#3350](https://github.com/Lightning-AI/pytorch-lightning/pull/3350)) + * Fixed `ModelCheckpoint` with `save_top_k=-1` option not tracking the best models when a monitor metric is available ([#3735](https://github.com/Lightning-AI/pytorch-lightning/pull/3735)) + * Fixed counter-intuitive error being thrown in `Accuracy` metric for zero target tensor ([#3764](https://github.com/Lightning-AI/pytorch-lightning/pull/3764)) + * Fixed aggregation of metrics ([#3517](https://github.com/Lightning-AI/pytorch-lightning/pull/3517)) + * Fixed Metric aggregation ([#3321](https://github.com/Lightning-AI/pytorch-lightning/pull/3321)) + * Fixed RMSLE metric ([#3188](https://github.com/Lightning-AI/pytorch-lightning/pull/3188)) + * Renamed `reduction` to `class_reduction` in classification metrics ([#3322](https://github.com/Lightning-AI/pytorch-lightning/pull/3322)) + * Changed `class_reduction` similar to sklearn for classification metrics ([#3322](https://github.com/Lightning-AI/pytorch-lightning/pull/3322)) + * Renaming of precision recall metric ([#3308](https://github.com/Lightning-AI/pytorch-lightning/pull/3308)) ### Fixed -- Fixed `on_train_batch_start` hook to end epoch early ([#3700](https://github.com/Lightning-AI/lightning/pull/3700)) -- Fixed `num_sanity_val_steps` is clipped to `limit_val_batches` ([#2917](https://github.com/Lightning-AI/lightning/pull/2917)) -- Fixed ONNX model save on GPU ([#3145](https://github.com/Lightning-AI/lightning/pull/3145)) -- Fixed `GpuUsageLogger` to work on different platforms ([#3008](https://github.com/Lightning-AI/lightning/pull/3008)) -- Fixed auto-scale batch size not dumping `auto_lr_find` parameter ([#3151](https://github.com/Lightning-AI/lightning/pull/3151)) -- Fixed `batch_outputs` with optimizer frequencies ([#3229](https://github.com/Lightning-AI/lightning/pull/3229)) -- Fixed setting batch size in `LightningModule.datamodule` when using `auto_scale_batch_size` ([#3266](https://github.com/Lightning-AI/lightning/pull/3266)) -- Fixed Horovod distributed backend compatibility with native AMP ([#3404](https://github.com/Lightning-AI/lightning/pull/3404)) -- Fixed batch size auto scaling exceeding the size of the dataset ([#3271](https://github.com/Lightning-AI/lightning/pull/3271)) -- Fixed getting `experiment_id` from MLFlow only once instead of each training loop ([#3394](https://github.com/Lightning-AI/lightning/pull/3394)) -- Fixed `overfit_batches` which now correctly disables shuffling for the training loader. ([#3501](https://github.com/Lightning-AI/lightning/pull/3501)) -- Fixed gradient norm tracking for `row_log_interval > 1` ([#3489](https://github.com/Lightning-AI/lightning/pull/3489)) -- Fixed `ModelCheckpoint` name formatting ([#3164](https://github.com/Lightning-AI/lightning/pull/3163)) -- Fixed example implementation of AutoEncoder ([#3190](https://github.com/Lightning-AI/lightning/pull/3190)) -- Fixed invalid paths when remote logging with TensorBoard ([#3236](https://github.com/Lightning-AI/lightning/pull/3236)) -- Fixed change `t()` to `transpose()` as XLA devices do not support `.t()` on 1-dim tensor ([#3252](https://github.com/Lightning-AI/lightning/pull/3252)) -- Fixed (weights only) checkpoints loading without PL ([#3287](https://github.com/Lightning-AI/lightning/pull/3287)) -- Fixed `gather_all_tensors` cross GPUs in DDP ([#3319](https://github.com/Lightning-AI/lightning/pull/3319)) -- Fixed CometML save dir ([#3419](https://github.com/Lightning-AI/lightning/pull/3419)) -- Fixed forward key metrics ([#3467](https://github.com/Lightning-AI/lightning/pull/3467)) -- Fixed normalize mode at confusion matrix (replace NaNs with zeros) ([#3465](https://github.com/Lightning-AI/lightning/pull/3465)) -- Fixed global step increment in training loop when `training_epoch_end` hook is used ([#3673](https://github.com/Lightning-AI/lightning/pull/3673)) -- Fixed dataloader shuffling not getting turned off with `overfit_batches > 0` and `distributed_backend = "ddp"` ([#3534](https://github.com/Lightning-AI/lightning/pull/3534)) -- Fixed determinism in `DDPSpawnBackend` when using `seed_everything` in main process ([#3335](https://github.com/Lightning-AI/lightning/pull/3335)) -- Fixed `ModelCheckpoint` `period` to actually save every `period` epochs ([#3630](https://github.com/Lightning-AI/lightning/pull/3630)) -- Fixed `val_progress_bar` total with `num_sanity_val_steps` ([#3751](https://github.com/Lightning-AI/lightning/pull/3751)) -- Fixed Tuner dump: add `current_epoch` to dumped_params ([#3261](https://github.com/Lightning-AI/lightning/pull/3261)) -- Fixed `current_epoch` and `global_step` properties mismatch between `Trainer` and `LightningModule` ([#3785](https://github.com/Lightning-AI/lightning/pull/3785)) -- Fixed learning rate scheduler for optimizers with internal state ([#3897](https://github.com/Lightning-AI/lightning/pull/3897)) -- Fixed `tbptt_reduce_fx` when non-floating tensors are logged ([#3796](https://github.com/Lightning-AI/lightning/pull/3796)) -- Fixed model checkpoint frequency ([#3852](https://github.com/Lightning-AI/lightning/pull/3852)) -- Fixed logging non-tensor scalar with result breaks subsequent epoch aggregation ([#3855](https://github.com/Lightning-AI/lightning/pull/3855)) -- Fixed `TrainerEvaluationLoopMixin` activates `model.train()` at the end ([#3858](https://github.com/Lightning-AI/lightning/pull/3858)) -- Fixed `overfit_batches` when using with multiple val/test_dataloaders ([#3857](https://github.com/Lightning-AI/lightning/pull/3857)) -- Fixed enables `training_step` to return `None` ([#3862](https://github.com/Lightning-AI/lightning/pull/3862)) -- Fixed init nan for checkpointing ([#3863](https://github.com/Lightning-AI/lightning/pull/3863)) -- Fixed for `load_from_checkpoint` ([#2776](https://github.com/Lightning-AI/lightning/pull/2776)) -- Fixes incorrect `batch_sizes` when Dataloader returns a dict with multiple tensors ([#3668](https://github.com/Lightning-AI/lightning/pull/3668)) -- Fixed unexpected signature for `validation_step` ([#3947](https://github.com/Lightning-AI/lightning/pull/3947)) +- Fixed `on_train_batch_start` hook to end epoch early ([#3700](https://github.com/Lightning-AI/pytorch-lightning/pull/3700)) +- Fixed `num_sanity_val_steps` is clipped to `limit_val_batches` ([#2917](https://github.com/Lightning-AI/pytorch-lightning/pull/2917)) +- Fixed ONNX model save on GPU ([#3145](https://github.com/Lightning-AI/pytorch-lightning/pull/3145)) +- Fixed `GpuUsageLogger` to work on different platforms ([#3008](https://github.com/Lightning-AI/pytorch-lightning/pull/3008)) +- Fixed auto-scale batch size not dumping `auto_lr_find` parameter ([#3151](https://github.com/Lightning-AI/pytorch-lightning/pull/3151)) +- Fixed `batch_outputs` with optimizer frequencies ([#3229](https://github.com/Lightning-AI/pytorch-lightning/pull/3229)) +- Fixed setting batch size in `LightningModule.datamodule` when using `auto_scale_batch_size` ([#3266](https://github.com/Lightning-AI/pytorch-lightning/pull/3266)) +- Fixed Horovod distributed backend compatibility with native AMP ([#3404](https://github.com/Lightning-AI/pytorch-lightning/pull/3404)) +- Fixed batch size auto scaling exceeding the size of the dataset ([#3271](https://github.com/Lightning-AI/pytorch-lightning/pull/3271)) +- Fixed getting `experiment_id` from MLFlow only once instead of each training loop ([#3394](https://github.com/Lightning-AI/pytorch-lightning/pull/3394)) +- Fixed `overfit_batches` which now correctly disables shuffling for the training loader. ([#3501](https://github.com/Lightning-AI/pytorch-lightning/pull/3501)) +- Fixed gradient norm tracking for `row_log_interval > 1` ([#3489](https://github.com/Lightning-AI/pytorch-lightning/pull/3489)) +- Fixed `ModelCheckpoint` name formatting ([#3164](https://github.com/Lightning-AI/pytorch-lightning/pull/3163)) +- Fixed example implementation of AutoEncoder ([#3190](https://github.com/Lightning-AI/pytorch-lightning/pull/3190)) +- Fixed invalid paths when remote logging with TensorBoard ([#3236](https://github.com/Lightning-AI/pytorch-lightning/pull/3236)) +- Fixed change `t()` to `transpose()` as XLA devices do not support `.t()` on 1-dim tensor ([#3252](https://github.com/Lightning-AI/pytorch-lightning/pull/3252)) +- Fixed (weights only) checkpoints loading without PL ([#3287](https://github.com/Lightning-AI/pytorch-lightning/pull/3287)) +- Fixed `gather_all_tensors` cross GPUs in DDP ([#3319](https://github.com/Lightning-AI/pytorch-lightning/pull/3319)) +- Fixed CometML save dir ([#3419](https://github.com/Lightning-AI/pytorch-lightning/pull/3419)) +- Fixed forward key metrics ([#3467](https://github.com/Lightning-AI/pytorch-lightning/pull/3467)) +- Fixed normalize mode at confusion matrix (replace NaNs with zeros) ([#3465](https://github.com/Lightning-AI/pytorch-lightning/pull/3465)) +- Fixed global step increment in training loop when `training_epoch_end` hook is used ([#3673](https://github.com/Lightning-AI/pytorch-lightning/pull/3673)) +- Fixed dataloader shuffling not getting turned off with `overfit_batches > 0` and `distributed_backend = "ddp"` ([#3534](https://github.com/Lightning-AI/pytorch-lightning/pull/3534)) +- Fixed determinism in `DDPSpawnBackend` when using `seed_everything` in main process ([#3335](https://github.com/Lightning-AI/pytorch-lightning/pull/3335)) +- Fixed `ModelCheckpoint` `period` to actually save every `period` epochs ([#3630](https://github.com/Lightning-AI/pytorch-lightning/pull/3630)) +- Fixed `val_progress_bar` total with `num_sanity_val_steps` ([#3751](https://github.com/Lightning-AI/pytorch-lightning/pull/3751)) +- Fixed Tuner dump: add `current_epoch` to dumped_params ([#3261](https://github.com/Lightning-AI/pytorch-lightning/pull/3261)) +- Fixed `current_epoch` and `global_step` properties mismatch between `Trainer` and `LightningModule` ([#3785](https://github.com/Lightning-AI/pytorch-lightning/pull/3785)) +- Fixed learning rate scheduler for optimizers with internal state ([#3897](https://github.com/Lightning-AI/pytorch-lightning/pull/3897)) +- Fixed `tbptt_reduce_fx` when non-floating tensors are logged ([#3796](https://github.com/Lightning-AI/pytorch-lightning/pull/3796)) +- Fixed model checkpoint frequency ([#3852](https://github.com/Lightning-AI/pytorch-lightning/pull/3852)) +- Fixed logging non-tensor scalar with result breaks subsequent epoch aggregation ([#3855](https://github.com/Lightning-AI/pytorch-lightning/pull/3855)) +- Fixed `TrainerEvaluationLoopMixin` activates `model.train()` at the end ([#3858](https://github.com/Lightning-AI/pytorch-lightning/pull/3858)) +- Fixed `overfit_batches` when using with multiple val/test_dataloaders ([#3857](https://github.com/Lightning-AI/pytorch-lightning/pull/3857)) +- Fixed enables `training_step` to return `None` ([#3862](https://github.com/Lightning-AI/pytorch-lightning/pull/3862)) +- Fixed init nan for checkpointing ([#3863](https://github.com/Lightning-AI/pytorch-lightning/pull/3863)) +- Fixed for `load_from_checkpoint` ([#2776](https://github.com/Lightning-AI/pytorch-lightning/pull/2776)) +- Fixes incorrect `batch_sizes` when Dataloader returns a dict with multiple tensors ([#3668](https://github.com/Lightning-AI/pytorch-lightning/pull/3668)) +- Fixed unexpected signature for `validation_step` ([#3947](https://github.com/Lightning-AI/pytorch-lightning/pull/3947)) ## [0.9.0] - 2020-08-20 ### Added -- Added SyncBN for DDP ([#2801](https://github.com/Lightning-AI/lightning/pull/2801), - [#2838](https://github.com/Lightning-AI/lightning/pull/2838)) -- Added basic `CSVLogger` ([#2721](https://github.com/Lightning-AI/lightning/pull/2721)) -- Added SSIM metrics ([#2671](https://github.com/Lightning-AI/lightning/pull/2671)) -- Added BLEU metrics ([#2535](https://github.com/Lightning-AI/lightning/pull/2535)) -- Added support to export a model to ONNX format ([#2596](https://github.com/Lightning-AI/lightning/pull/2596)) -- Added support for `Trainer(num_sanity_val_steps=-1)` to check all validation data before training ([#2246](https://github.com/Lightning-AI/lightning/pull/2246)) +- Added SyncBN for DDP ([#2801](https://github.com/Lightning-AI/pytorch-lightning/pull/2801), + [#2838](https://github.com/Lightning-AI/pytorch-lightning/pull/2838)) +- Added basic `CSVLogger` ([#2721](https://github.com/Lightning-AI/pytorch-lightning/pull/2721)) +- Added SSIM metrics ([#2671](https://github.com/Lightning-AI/pytorch-lightning/pull/2671)) +- Added BLEU metrics ([#2535](https://github.com/Lightning-AI/pytorch-lightning/pull/2535)) +- Added support to export a model to ONNX format ([#2596](https://github.com/Lightning-AI/pytorch-lightning/pull/2596)) +- Added support for `Trainer(num_sanity_val_steps=-1)` to check all validation data before training ([#2246](https://github.com/Lightning-AI/pytorch-lightning/pull/2246)) - Added struct. output: - * tests for val loop flow ([#2605](https://github.com/Lightning-AI/lightning/pull/2605)) - * `EvalResult` support for train and val. loop ([#2615](https://github.com/Lightning-AI/lightning/pull/2615), - [#2651](https://github.com/Lightning-AI/lightning/pull/2651)) - * weighted average in results obj ([#2930](https://github.com/Lightning-AI/lightning/pull/2930)) - * fix result obj DP auto reduce ([#3013](https://github.com/Lightning-AI/lightning/pull/3013)) -- Added class `LightningDataModule` ([#2668](https://github.com/Lightning-AI/lightning/pull/2668)) -- Added support for PyTorch 1.6 ([#2745](https://github.com/Lightning-AI/lightning/pull/2745)) -- Added call DataModule hooks implicitly in trainer ([#2755](https://github.com/Lightning-AI/lightning/pull/2755)) -- Added support for Mean in DDP Sync ([#2568](https://github.com/Lightning-AI/lightning/pull/2568)) -- Added remaining `sklearn` metrics: `AveragePrecision`, `BalancedAccuracy`, `CohenKappaScore`, `DCG`, `Hamming`, `Hinge`, `Jaccard`, `MeanAbsoluteError`, `MeanSquaredError`, `MeanSquaredLogError`, `MedianAbsoluteError`, `R2Score`, `MeanPoissonDeviance`, `MeanGammaDeviance`, `MeanTweedieDeviance`, `ExplainedVariance` ([#2562](https://github.com/Lightning-AI/lightning/pull/2562)) -- Added support for `limit_{mode}_batches (int)` to work with infinite dataloader (IterableDataset) ([#2840](https://github.com/Lightning-AI/lightning/pull/2840)) -- Added support returning python scalars in DP ([#1935](https://github.com/Lightning-AI/lightning/pull/1935)) -- Added support to Tensorboard logger for OmegaConf `hparams` ([#2846](https://github.com/Lightning-AI/lightning/pull/2846)) -- Added tracking of basic states in `Trainer` ([#2541](https://github.com/Lightning-AI/lightning/pull/2541)) -- Tracks all outputs including TBPTT and multiple optimizers ([#2890](https://github.com/Lightning-AI/lightning/pull/2890)) -- Added GPU Usage Logger ([#2932](https://github.com/Lightning-AI/lightning/pull/2932)) -- Added `strict=False` for `load_from_checkpoint` ([#2819](https://github.com/Lightning-AI/lightning/pull/2819)) -- Added saving test predictions on multiple GPUs ([#2926](https://github.com/Lightning-AI/lightning/pull/2926)) -- Auto log the computational graph for loggers that support this ([#3003](https://github.com/Lightning-AI/lightning/pull/3003)) -- Added warning when changing monitor and using results obj ([#3014](https://github.com/Lightning-AI/lightning/pull/3014)) -- Added a hook `transfer_batch_to_device` to the `LightningDataModule` ([#3038](https://github.com/Lightning-AI/lightning/pull/3038)) + * tests for val loop flow ([#2605](https://github.com/Lightning-AI/pytorch-lightning/pull/2605)) + * `EvalResult` support for train and val. loop ([#2615](https://github.com/Lightning-AI/pytorch-lightning/pull/2615), + [#2651](https://github.com/Lightning-AI/pytorch-lightning/pull/2651)) + * weighted average in results obj ([#2930](https://github.com/Lightning-AI/pytorch-lightning/pull/2930)) + * fix result obj DP auto reduce ([#3013](https://github.com/Lightning-AI/pytorch-lightning/pull/3013)) +- Added class `LightningDataModule` ([#2668](https://github.com/Lightning-AI/pytorch-lightning/pull/2668)) +- Added support for PyTorch 1.6 ([#2745](https://github.com/Lightning-AI/pytorch-lightning/pull/2745)) +- Added call DataModule hooks implicitly in trainer ([#2755](https://github.com/Lightning-AI/pytorch-lightning/pull/2755)) +- Added support for Mean in DDP Sync ([#2568](https://github.com/Lightning-AI/pytorch-lightning/pull/2568)) +- Added remaining `sklearn` metrics: `AveragePrecision`, `BalancedAccuracy`, `CohenKappaScore`, `DCG`, `Hamming`, `Hinge`, `Jaccard`, `MeanAbsoluteError`, `MeanSquaredError`, `MeanSquaredLogError`, `MedianAbsoluteError`, `R2Score`, `MeanPoissonDeviance`, `MeanGammaDeviance`, `MeanTweedieDeviance`, `ExplainedVariance` ([#2562](https://github.com/Lightning-AI/pytorch-lightning/pull/2562)) +- Added support for `limit_{mode}_batches (int)` to work with infinite dataloader (IterableDataset) ([#2840](https://github.com/Lightning-AI/pytorch-lightning/pull/2840)) +- Added support returning python scalars in DP ([#1935](https://github.com/Lightning-AI/pytorch-lightning/pull/1935)) +- Added support to Tensorboard logger for OmegaConf `hparams` ([#2846](https://github.com/Lightning-AI/pytorch-lightning/pull/2846)) +- Added tracking of basic states in `Trainer` ([#2541](https://github.com/Lightning-AI/pytorch-lightning/pull/2541)) +- Tracks all outputs including TBPTT and multiple optimizers ([#2890](https://github.com/Lightning-AI/pytorch-lightning/pull/2890)) +- Added GPU Usage Logger ([#2932](https://github.com/Lightning-AI/pytorch-lightning/pull/2932)) +- Added `strict=False` for `load_from_checkpoint` ([#2819](https://github.com/Lightning-AI/pytorch-lightning/pull/2819)) +- Added saving test predictions on multiple GPUs ([#2926](https://github.com/Lightning-AI/pytorch-lightning/pull/2926)) +- Auto log the computational graph for loggers that support this ([#3003](https://github.com/Lightning-AI/pytorch-lightning/pull/3003)) +- Added warning when changing monitor and using results obj ([#3014](https://github.com/Lightning-AI/pytorch-lightning/pull/3014)) +- Added a hook `transfer_batch_to_device` to the `LightningDataModule` ([#3038](https://github.com/Lightning-AI/pytorch-lightning/pull/3038)) ### Changed -- Truncated long version numbers in progress bar ([#2594](https://github.com/Lightning-AI/lightning/pull/2594)) -- Enabling val/test loop disabling ([#2692](https://github.com/Lightning-AI/lightning/pull/2692)) +- Truncated long version numbers in progress bar ([#2594](https://github.com/Lightning-AI/pytorch-lightning/pull/2594)) +- Enabling val/test loop disabling ([#2692](https://github.com/Lightning-AI/pytorch-lightning/pull/2692)) - Refactored into `accelerator` module: - * GPU training ([#2704](https://github.com/Lightning-AI/lightning/pull/2704)) - * TPU training ([#2708](https://github.com/Lightning-AI/lightning/pull/2708)) - * DDP(2) backend ([#2796](https://github.com/Lightning-AI/lightning/pull/2796)) - * Retrieve last logged val from result by key ([#3049](https://github.com/Lightning-AI/lightning/pull/3049)) -- Using `.comet.config` file for `CometLogger` ([#1913](https://github.com/Lightning-AI/lightning/pull/1913)) -- Updated hooks arguments - breaking for `setup` and `teardown` ([#2850](https://github.com/Lightning-AI/lightning/pull/2850)) -- Using `gfile` to support remote directories ([#2164](https://github.com/Lightning-AI/lightning/pull/2164)) -- Moved optimizer creation after device placement for DDP backends ([#2904](https://github.com/Lightning-AI/lightning/pull/2904)) -- Support `**DictConfig` for `hparam` serialization ([#2519](https://github.com/Lightning-AI/lightning/pull/2519)) -- Removed callback metrics from test results obj ([#2994](https://github.com/Lightning-AI/lightning/pull/2994)) -- Re-enabled naming metrics in ckpt name ([#3060](https://github.com/Lightning-AI/lightning/pull/3060)) -- Changed progress bar epoch counting to start from 0 ([#3061](https://github.com/Lightning-AI/lightning/pull/3061)) + * GPU training ([#2704](https://github.com/Lightning-AI/pytorch-lightning/pull/2704)) + * TPU training ([#2708](https://github.com/Lightning-AI/pytorch-lightning/pull/2708)) + * DDP(2) backend ([#2796](https://github.com/Lightning-AI/pytorch-lightning/pull/2796)) + * Retrieve last logged val from result by key ([#3049](https://github.com/Lightning-AI/pytorch-lightning/pull/3049)) +- Using `.comet.config` file for `CometLogger` ([#1913](https://github.com/Lightning-AI/pytorch-lightning/pull/1913)) +- Updated hooks arguments - breaking for `setup` and `teardown` ([#2850](https://github.com/Lightning-AI/pytorch-lightning/pull/2850)) +- Using `gfile` to support remote directories ([#2164](https://github.com/Lightning-AI/pytorch-lightning/pull/2164)) +- Moved optimizer creation after device placement for DDP backends ([#2904](https://github.com/Lightning-AI/pytorch-lightning/pull/2904)) +- Support `**DictConfig` for `hparam` serialization ([#2519](https://github.com/Lightning-AI/pytorch-lightning/pull/2519)) +- Removed callback metrics from test results obj ([#2994](https://github.com/Lightning-AI/pytorch-lightning/pull/2994)) +- Re-enabled naming metrics in ckpt name ([#3060](https://github.com/Lightning-AI/pytorch-lightning/pull/3060)) +- Changed progress bar epoch counting to start from 0 ([#3061](https://github.com/Lightning-AI/pytorch-lightning/pull/3061)) ### Deprecated -- Deprecated Trainer attribute `ckpt_path`, which will now be set by `weights_save_path` ([#2681](https://github.com/Lightning-AI/lightning/pull/2681)) +- Deprecated Trainer attribute `ckpt_path`, which will now be set by `weights_save_path` ([#2681](https://github.com/Lightning-AI/pytorch-lightning/pull/2681)) ### Removed -- Removed deprecated: ([#2760](https://github.com/Lightning-AI/lightning/pull/2760)) +- Removed deprecated: ([#2760](https://github.com/Lightning-AI/pytorch-lightning/pull/2760)) * core decorator `data_loader` * Module hook `on_sanity_check_start` and loading `load_from_metrics` * package `pl.logging` @@ -3943,87 +3943,87 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- Fixed `accumulate_grad_batches` for last batch ([#2853](https://github.com/Lightning-AI/lightning/pull/2853)) -- Fixed setup call while testing ([#2624](https://github.com/Lightning-AI/lightning/pull/2624)) -- Fixed local rank zero casting ([#2640](https://github.com/Lightning-AI/lightning/pull/2640)) -- Fixed single scalar return from training ([#2587](https://github.com/Lightning-AI/lightning/pull/2587)) -- Fixed Horovod backend to scale LR schedlers with the optimizer ([#2626](https://github.com/Lightning-AI/lightning/pull/2626)) -- Fixed `dtype` and `device` properties not getting updated in submodules ([#2657](https://github.com/Lightning-AI/lightning/pull/2657)) -- Fixed `fast_dev_run` to run for all dataloaders ([#2581](https://github.com/Lightning-AI/lightning/pull/2581)) -- Fixed `save_dir` in loggers getting ignored by default value of `weights_save_path` when user did not specify `weights_save_path` ([#2681](https://github.com/Lightning-AI/lightning/pull/2681)) -- Fixed `weights_save_path` getting ignored when `logger=False` is passed to Trainer ([#2681](https://github.com/Lightning-AI/lightning/pull/2681)) -- Fixed TPU multi-core and Float16 ([#2632](https://github.com/Lightning-AI/lightning/pull/2632)) -- Fixed test metrics not being logged with `LoggerCollection` ([#2723](https://github.com/Lightning-AI/lightning/pull/2723)) -- Fixed data transfer to device when using `torchtext.data.Field` and `include_lengths is True` ([#2689](https://github.com/Lightning-AI/lightning/pull/2689)) -- Fixed shuffle argument for distributed sampler ([#2789](https://github.com/Lightning-AI/lightning/pull/2789)) -- Fixed logging interval ([#2694](https://github.com/Lightning-AI/lightning/pull/2694)) -- Fixed loss value in the progress bar is wrong when `accumulate_grad_batches > 1` ([#2738](https://github.com/Lightning-AI/lightning/pull/2738)) -- Fixed correct CWD for ddp sub-processes when using Hydra ([#2719](https://github.com/Lightning-AI/lightning/pull/2719)) -- Fixed selecting GPUs using `CUDA_VISIBLE_DEVICES` ([#2739](https://github.com/Lightning-AI/lightning/pull/2739)) -- Fixed false `num_classes` warning in metrics ([#2781](https://github.com/Lightning-AI/lightning/pull/2781)) -- Fixed shell injection vulnerability in subprocess call ([#2786](https://github.com/Lightning-AI/lightning/pull/2786)) -- Fixed LR finder and `hparams` compatibility ([#2821](https://github.com/Lightning-AI/lightning/pull/2821)) -- Fixed `ModelCheckpoint` not saving the latest information when `save_last=True` ([#2881](https://github.com/Lightning-AI/lightning/pull/2881)) -- Fixed ImageNet example: learning rate scheduler, number of workers and batch size when using DDP ([#2889](https://github.com/Lightning-AI/lightning/pull/2889)) -- Fixed apex gradient clipping ([#2829](https://github.com/Lightning-AI/lightning/pull/2829)) -- Fixed save apex scaler states ([#2828](https://github.com/Lightning-AI/lightning/pull/2828)) -- Fixed a model loading issue with inheritance and variable positional arguments ([#2911](https://github.com/Lightning-AI/lightning/pull/2911)) -- Fixed passing `non_blocking=True` when transferring a batch object that does not support it ([#2910](https://github.com/Lightning-AI/lightning/pull/2910)) -- Fixed checkpointing to remote file paths ([#2925](https://github.com/Lightning-AI/lightning/pull/2925)) -- Fixed adding val step argument to metrics ([#2986](https://github.com/Lightning-AI/lightning/pull/2986)) -- Fixed an issue that caused `Trainer.test()` to stall in ddp mode ([#2997](https://github.com/Lightning-AI/lightning/pull/2997)) -- Fixed gathering of results with tensors of varying shape ([#3020](https://github.com/Lightning-AI/lightning/pull/3020)) -- Fixed batch size auto-scaling feature to set the new value on the correct model attribute ([#3043](https://github.com/Lightning-AI/lightning/pull/3043)) -- Fixed automatic batch scaling not working with half precision ([#3045](https://github.com/Lightning-AI/lightning/pull/3045)) -- Fixed setting device to root gpu ([#3042](https://github.com/Lightning-AI/lightning/pull/3042)) +- Fixed `accumulate_grad_batches` for last batch ([#2853](https://github.com/Lightning-AI/pytorch-lightning/pull/2853)) +- Fixed setup call while testing ([#2624](https://github.com/Lightning-AI/pytorch-lightning/pull/2624)) +- Fixed local rank zero casting ([#2640](https://github.com/Lightning-AI/pytorch-lightning/pull/2640)) +- Fixed single scalar return from training ([#2587](https://github.com/Lightning-AI/pytorch-lightning/pull/2587)) +- Fixed Horovod backend to scale LR schedlers with the optimizer ([#2626](https://github.com/Lightning-AI/pytorch-lightning/pull/2626)) +- Fixed `dtype` and `device` properties not getting updated in submodules ([#2657](https://github.com/Lightning-AI/pytorch-lightning/pull/2657)) +- Fixed `fast_dev_run` to run for all dataloaders ([#2581](https://github.com/Lightning-AI/pytorch-lightning/pull/2581)) +- Fixed `save_dir` in loggers getting ignored by default value of `weights_save_path` when user did not specify `weights_save_path` ([#2681](https://github.com/Lightning-AI/pytorch-lightning/pull/2681)) +- Fixed `weights_save_path` getting ignored when `logger=False` is passed to Trainer ([#2681](https://github.com/Lightning-AI/pytorch-lightning/pull/2681)) +- Fixed TPU multi-core and Float16 ([#2632](https://github.com/Lightning-AI/pytorch-lightning/pull/2632)) +- Fixed test metrics not being logged with `LoggerCollection` ([#2723](https://github.com/Lightning-AI/pytorch-lightning/pull/2723)) +- Fixed data transfer to device when using `torchtext.data.Field` and `include_lengths is True` ([#2689](https://github.com/Lightning-AI/pytorch-lightning/pull/2689)) +- Fixed shuffle argument for distributed sampler ([#2789](https://github.com/Lightning-AI/pytorch-lightning/pull/2789)) +- Fixed logging interval ([#2694](https://github.com/Lightning-AI/pytorch-lightning/pull/2694)) +- Fixed loss value in the progress bar is wrong when `accumulate_grad_batches > 1` ([#2738](https://github.com/Lightning-AI/pytorch-lightning/pull/2738)) +- Fixed correct CWD for ddp sub-processes when using Hydra ([#2719](https://github.com/Lightning-AI/pytorch-lightning/pull/2719)) +- Fixed selecting GPUs using `CUDA_VISIBLE_DEVICES` ([#2739](https://github.com/Lightning-AI/pytorch-lightning/pull/2739)) +- Fixed false `num_classes` warning in metrics ([#2781](https://github.com/Lightning-AI/pytorch-lightning/pull/2781)) +- Fixed shell injection vulnerability in subprocess call ([#2786](https://github.com/Lightning-AI/pytorch-lightning/pull/2786)) +- Fixed LR finder and `hparams` compatibility ([#2821](https://github.com/Lightning-AI/pytorch-lightning/pull/2821)) +- Fixed `ModelCheckpoint` not saving the latest information when `save_last=True` ([#2881](https://github.com/Lightning-AI/pytorch-lightning/pull/2881)) +- Fixed ImageNet example: learning rate scheduler, number of workers and batch size when using DDP ([#2889](https://github.com/Lightning-AI/pytorch-lightning/pull/2889)) +- Fixed apex gradient clipping ([#2829](https://github.com/Lightning-AI/pytorch-lightning/pull/2829)) +- Fixed save apex scaler states ([#2828](https://github.com/Lightning-AI/pytorch-lightning/pull/2828)) +- Fixed a model loading issue with inheritance and variable positional arguments ([#2911](https://github.com/Lightning-AI/pytorch-lightning/pull/2911)) +- Fixed passing `non_blocking=True` when transferring a batch object that does not support it ([#2910](https://github.com/Lightning-AI/pytorch-lightning/pull/2910)) +- Fixed checkpointing to remote file paths ([#2925](https://github.com/Lightning-AI/pytorch-lightning/pull/2925)) +- Fixed adding val step argument to metrics ([#2986](https://github.com/Lightning-AI/pytorch-lightning/pull/2986)) +- Fixed an issue that caused `Trainer.test()` to stall in ddp mode ([#2997](https://github.com/Lightning-AI/pytorch-lightning/pull/2997)) +- Fixed gathering of results with tensors of varying shape ([#3020](https://github.com/Lightning-AI/pytorch-lightning/pull/3020)) +- Fixed batch size auto-scaling feature to set the new value on the correct model attribute ([#3043](https://github.com/Lightning-AI/pytorch-lightning/pull/3043)) +- Fixed automatic batch scaling not working with half precision ([#3045](https://github.com/Lightning-AI/pytorch-lightning/pull/3045)) +- Fixed setting device to root gpu ([#3042](https://github.com/Lightning-AI/pytorch-lightning/pull/3042)) ## [0.8.5] - 2020-07-09 ### Added -- Added a PSNR metric: peak signal-to-noise ratio ([#2483](https://github.com/Lightning-AI/lightning/pull/2483)) -- Added functional regression metrics ([#2492](https://github.com/Lightning-AI/lightning/pull/2492)) +- Added a PSNR metric: peak signal-to-noise ratio ([#2483](https://github.com/Lightning-AI/pytorch-lightning/pull/2483)) +- Added functional regression metrics ([#2492](https://github.com/Lightning-AI/pytorch-lightning/pull/2492)) ### Removed -- Removed auto val reduce ([#2462](https://github.com/Lightning-AI/lightning/pull/2462)) +- Removed auto val reduce ([#2462](https://github.com/Lightning-AI/pytorch-lightning/pull/2462)) ### Fixed -- Flattening Wandb Hyperparameters ([#2459](https://github.com/Lightning-AI/lightning/pull/2459)) -- Fixed using the same DDP python interpreter and actually running ([#2482](https://github.com/Lightning-AI/lightning/pull/2482)) -- Fixed model summary input type conversion for models that have input dtype different from model parameters ([#2510](https://github.com/Lightning-AI/lightning/pull/2510)) -- Made `TensorBoardLogger` and `CometLogger` pickleable ([#2518](https://github.com/Lightning-AI/lightning/pull/2518)) -- Fixed a problem with `MLflowLogger` creating multiple run folders ([#2502](https://github.com/Lightning-AI/lightning/pull/2502)) -- Fixed global_step increment ([#2455](https://github.com/Lightning-AI/lightning/pull/2455)) -- Fixed TPU hanging example ([#2488](https://github.com/Lightning-AI/lightning/pull/2488)) -- Fixed `argparse` default value bug ([#2526](https://github.com/Lightning-AI/lightning/pull/2526)) -- Fixed Dice and IoU to avoid NaN by adding small eps ([#2545](https://github.com/Lightning-AI/lightning/pull/2545)) -- Fixed accumulate gradients schedule at epoch 0 (continued) ([#2513](https://github.com/Lightning-AI/lightning/pull/2513)) -- Fixed Trainer `.fit()` returning last not best weights in "ddp_spawn" ([#2565](https://github.com/Lightning-AI/lightning/pull/2565)) -- Fixed passing (do not pass) TPU weights back on test ([#2566](https://github.com/Lightning-AI/lightning/pull/2566)) -- Fixed DDP tests and `.test()` ([#2512](https://github.com/Lightning-AI/lightning/pull/2512), - [#2570](https://github.com/Lightning-AI/lightning/pull/2570)) +- Flattening Wandb Hyperparameters ([#2459](https://github.com/Lightning-AI/pytorch-lightning/pull/2459)) +- Fixed using the same DDP python interpreter and actually running ([#2482](https://github.com/Lightning-AI/pytorch-lightning/pull/2482)) +- Fixed model summary input type conversion for models that have input dtype different from model parameters ([#2510](https://github.com/Lightning-AI/pytorch-lightning/pull/2510)) +- Made `TensorBoardLogger` and `CometLogger` pickleable ([#2518](https://github.com/Lightning-AI/pytorch-lightning/pull/2518)) +- Fixed a problem with `MLflowLogger` creating multiple run folders ([#2502](https://github.com/Lightning-AI/pytorch-lightning/pull/2502)) +- Fixed global_step increment ([#2455](https://github.com/Lightning-AI/pytorch-lightning/pull/2455)) +- Fixed TPU hanging example ([#2488](https://github.com/Lightning-AI/pytorch-lightning/pull/2488)) +- Fixed `argparse` default value bug ([#2526](https://github.com/Lightning-AI/pytorch-lightning/pull/2526)) +- Fixed Dice and IoU to avoid NaN by adding small eps ([#2545](https://github.com/Lightning-AI/pytorch-lightning/pull/2545)) +- Fixed accumulate gradients schedule at epoch 0 (continued) ([#2513](https://github.com/Lightning-AI/pytorch-lightning/pull/2513)) +- Fixed Trainer `.fit()` returning last not best weights in "ddp_spawn" ([#2565](https://github.com/Lightning-AI/pytorch-lightning/pull/2565)) +- Fixed passing (do not pass) TPU weights back on test ([#2566](https://github.com/Lightning-AI/pytorch-lightning/pull/2566)) +- Fixed DDP tests and `.test()` ([#2512](https://github.com/Lightning-AI/pytorch-lightning/pull/2512), + [#2570](https://github.com/Lightning-AI/pytorch-lightning/pull/2570)) ## [0.8.4] - 2020-07-01 ### Added -- Added reduce ddp results on eval ([#2434](https://github.com/Lightning-AI/lightning/pull/2434)) -- Added a warning when an `IterableDataset` has `__len__` defined ([#2437](https://github.com/Lightning-AI/lightning/pull/2437)) +- Added reduce ddp results on eval ([#2434](https://github.com/Lightning-AI/pytorch-lightning/pull/2434)) +- Added a warning when an `IterableDataset` has `__len__` defined ([#2437](https://github.com/Lightning-AI/pytorch-lightning/pull/2437)) ### Changed -- Enabled no returns from eval ([#2446](https://github.com/Lightning-AI/lightning/pull/2446)) +- Enabled no returns from eval ([#2446](https://github.com/Lightning-AI/pytorch-lightning/pull/2446)) ### Fixed -- Fixes train outputs ([#2428](https://github.com/Lightning-AI/lightning/pull/2428)) -- Fixes Conda dependencies ([#2412](https://github.com/Lightning-AI/lightning/pull/2412)) -- Fixed Apex scaling with decoupled backward ([#2433](https://github.com/Lightning-AI/lightning/pull/2433)) -- Fixed crashing or wrong displaying progressbar because of missing ipywidgets ([#2417](https://github.com/Lightning-AI/lightning/pull/2417)) +- Fixes train outputs ([#2428](https://github.com/Lightning-AI/pytorch-lightning/pull/2428)) +- Fixes Conda dependencies ([#2412](https://github.com/Lightning-AI/pytorch-lightning/pull/2412)) +- Fixed Apex scaling with decoupled backward ([#2433](https://github.com/Lightning-AI/pytorch-lightning/pull/2433)) +- Fixed crashing or wrong displaying progressbar because of missing ipywidgets ([#2417](https://github.com/Lightning-AI/pytorch-lightning/pull/2417)) - Fixed TPU saving dir ([fc26078e](https://github.com/Lightning-AI/lightning/commit/fc26078e395f8a001f4c6dd7b3fe7ca202f914a3), [04e68f02](https://github.com/Lightning-AI/lightning/commit/04e68f022fc03dd5f1555ee86dea997d42a448ad)) -- Fixed logging on rank 0 only ([#2425](https://github.com/Lightning-AI/lightning/pull/2425)) +- Fixed logging on rank 0 only ([#2425](https://github.com/Lightning-AI/pytorch-lightning/pull/2425)) ## [0.8.3] - 2020-06-29 @@ -4037,115 +4037,115 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added -- Added TorchText support for moving data to GPU ([#2379](https://github.com/Lightning-AI/lightning/pull/2379)) +- Added TorchText support for moving data to GPU ([#2379](https://github.com/Lightning-AI/pytorch-lightning/pull/2379)) ### Changed -- Changed epoch indexing from 0 instead of 1 ([#2289](https://github.com/Lightning-AI/lightning/pull/2289)) -- Refactor Model `backward` ([#2276](https://github.com/Lightning-AI/lightning/pull/2276)) -- Refactored `training_batch` + tests to verify correctness ([#2327](https://github.com/Lightning-AI/lightning/pull/2327), - [#2328](https://github.com/Lightning-AI/lightning/pull/2328)) -- Refactored training loop ([#2336](https://github.com/Lightning-AI/lightning/pull/2336)) -- Made optimization steps for hooks ([#2363](https://github.com/Lightning-AI/lightning/pull/2363)) -- Changed default apex level to 'O2' ([#2362](https://github.com/Lightning-AI/lightning/pull/2362)) +- Changed epoch indexing from 0 instead of 1 ([#2289](https://github.com/Lightning-AI/pytorch-lightning/pull/2289)) +- Refactor Model `backward` ([#2276](https://github.com/Lightning-AI/pytorch-lightning/pull/2276)) +- Refactored `training_batch` + tests to verify correctness ([#2327](https://github.com/Lightning-AI/pytorch-lightning/pull/2327), + [#2328](https://github.com/Lightning-AI/pytorch-lightning/pull/2328)) +- Refactored training loop ([#2336](https://github.com/Lightning-AI/pytorch-lightning/pull/2336)) +- Made optimization steps for hooks ([#2363](https://github.com/Lightning-AI/pytorch-lightning/pull/2363)) +- Changed default apex level to 'O2' ([#2362](https://github.com/Lightning-AI/pytorch-lightning/pull/2362)) ### Removed -- Moved `TrainsLogger` to Bolts ([#2384](https://github.com/Lightning-AI/lightning/pull/2384)) +- Moved `TrainsLogger` to Bolts ([#2384](https://github.com/Lightning-AI/pytorch-lightning/pull/2384)) ### Fixed -- Fixed parsing TPU arguments and TPU tests ([#2094](https://github.com/Lightning-AI/lightning/pull/2094)) -- Fixed number batches in case of multiple dataloaders and `limit_{*}_batches` ([#1920](https://github.com/Lightning-AI/lightning/pull/1920), - [#2226](https://github.com/Lightning-AI/lightning/pull/2226)) -- Fixed an issue with forward hooks not being removed after model summary ([#2298](https://github.com/Lightning-AI/lightning/pull/2298)) -- Fix for `load_from_checkpoint()` not working with absolute path on Windows ([#2294](https://github.com/Lightning-AI/lightning/pull/2294)) -- Fixed an issue how _has_len handles `NotImplementedError` e.g. raised by `torchtext.data.Iterator` ([#2293](https://github.com/Lightning-AI/lightning/pull/2293)), ([#2307](https://github.com/Lightning-AI/lightning/pull/2307)) -- Fixed `average_precision` metric ([#2319](https://github.com/Lightning-AI/lightning/pull/2319)) -- Fixed ROC metric for CUDA tensors ([#2304](https://github.com/Lightning-AI/lightning/pull/2304)) -- Fixed lost compatibility with custom datatypes implementing `.to` ([#2335](https://github.com/Lightning-AI/lightning/pull/2335)) -- Fixed loading model with kwargs ([#2387](https://github.com/Lightning-AI/lightning/pull/2387)) -- Fixed sum(0) for `trainer.num_val_batches` ([#2268](https://github.com/Lightning-AI/lightning/pull/2268)) -- Fixed checking if the parameters are a `DictConfig` Object ([#2216](https://github.com/Lightning-AI/lightning/pull/2216)) -- Fixed SLURM weights saving ([#2341](https://github.com/Lightning-AI/lightning/pull/2341)) -- Fixed swaps LR scheduler order ([#2356](https://github.com/Lightning-AI/lightning/pull/2356)) -- Fixed adding tensorboard `hparams` logging test ([#2342](https://github.com/Lightning-AI/lightning/pull/2342)) -- Fixed use model ref for tear down ([#2360](https://github.com/Lightning-AI/lightning/pull/2360)) -- Fixed logger crash on DDP ([#2388](https://github.com/Lightning-AI/lightning/pull/2388)) -- Fixed several issues with early stopping and checkpoint callbacks ([#1504](https://github.com/Lightning-AI/lightning/pull/1504), - [#2391](https://github.com/Lightning-AI/lightning/pull/2391)) -- Fixed loading past checkpoints from v0.7.x ([#2405](https://github.com/Lightning-AI/lightning/pull/2405)) -- Fixed loading model without arguments ([#2403](https://github.com/Lightning-AI/lightning/pull/2403)) -- Fixed Windows compatibility issue ([#2358](https://github.com/Lightning-AI/lightning/pull/2358)) +- Fixed parsing TPU arguments and TPU tests ([#2094](https://github.com/Lightning-AI/pytorch-lightning/pull/2094)) +- Fixed number batches in case of multiple dataloaders and `limit_{*}_batches` ([#1920](https://github.com/Lightning-AI/pytorch-lightning/pull/1920), + [#2226](https://github.com/Lightning-AI/pytorch-lightning/pull/2226)) +- Fixed an issue with forward hooks not being removed after model summary ([#2298](https://github.com/Lightning-AI/pytorch-lightning/pull/2298)) +- Fix for `load_from_checkpoint()` not working with absolute path on Windows ([#2294](https://github.com/Lightning-AI/pytorch-lightning/pull/2294)) +- Fixed an issue how _has_len handles `NotImplementedError` e.g. raised by `torchtext.data.Iterator` ([#2293](https://github.com/Lightning-AI/pytorch-lightning/pull/2293)), ([#2307](https://github.com/Lightning-AI/pytorch-lightning/pull/2307)) +- Fixed `average_precision` metric ([#2319](https://github.com/Lightning-AI/pytorch-lightning/pull/2319)) +- Fixed ROC metric for CUDA tensors ([#2304](https://github.com/Lightning-AI/pytorch-lightning/pull/2304)) +- Fixed lost compatibility with custom datatypes implementing `.to` ([#2335](https://github.com/Lightning-AI/pytorch-lightning/pull/2335)) +- Fixed loading model with kwargs ([#2387](https://github.com/Lightning-AI/pytorch-lightning/pull/2387)) +- Fixed sum(0) for `trainer.num_val_batches` ([#2268](https://github.com/Lightning-AI/pytorch-lightning/pull/2268)) +- Fixed checking if the parameters are a `DictConfig` Object ([#2216](https://github.com/Lightning-AI/pytorch-lightning/pull/2216)) +- Fixed SLURM weights saving ([#2341](https://github.com/Lightning-AI/pytorch-lightning/pull/2341)) +- Fixed swaps LR scheduler order ([#2356](https://github.com/Lightning-AI/pytorch-lightning/pull/2356)) +- Fixed adding tensorboard `hparams` logging test ([#2342](https://github.com/Lightning-AI/pytorch-lightning/pull/2342)) +- Fixed use model ref for tear down ([#2360](https://github.com/Lightning-AI/pytorch-lightning/pull/2360)) +- Fixed logger crash on DDP ([#2388](https://github.com/Lightning-AI/pytorch-lightning/pull/2388)) +- Fixed several issues with early stopping and checkpoint callbacks ([#1504](https://github.com/Lightning-AI/pytorch-lightning/pull/1504), + [#2391](https://github.com/Lightning-AI/pytorch-lightning/pull/2391)) +- Fixed loading past checkpoints from v0.7.x ([#2405](https://github.com/Lightning-AI/pytorch-lightning/pull/2405)) +- Fixed loading model without arguments ([#2403](https://github.com/Lightning-AI/pytorch-lightning/pull/2403)) +- Fixed Windows compatibility issue ([#2358](https://github.com/Lightning-AI/pytorch-lightning/pull/2358)) ## [0.8.1] - 2020-06-19 ### Fixed -- Fixed the `load_from_checkpoint` path detected as URL bug ([#2244](https://github.com/Lightning-AI/lightning/pull/2244)) -- Fixed hooks - added barrier ([#2245](https://github.com/Lightning-AI/lightning/pull/2245), - [#2257](https://github.com/Lightning-AI/lightning/pull/2257), - [#2260](https://github.com/Lightning-AI/lightning/pull/220)) -- Fixed `hparams` - remove frame inspection on `self.hparams` ([#2253](https://github.com/Lightning-AI/lightning/pull/2253)) -- Fixed setup and on fit calls ([#2252](https://github.com/Lightning-AI/lightning/pull/2252)) -- Fixed GPU template ([#2255](https://github.com/Lightning-AI/lightning/pull/2255)) +- Fixed the `load_from_checkpoint` path detected as URL bug ([#2244](https://github.com/Lightning-AI/pytorch-lightning/pull/2244)) +- Fixed hooks - added barrier ([#2245](https://github.com/Lightning-AI/pytorch-lightning/pull/2245), + [#2257](https://github.com/Lightning-AI/pytorch-lightning/pull/2257), + [#2260](https://github.com/Lightning-AI/pytorch-lightning/pull/220)) +- Fixed `hparams` - remove frame inspection on `self.hparams` ([#2253](https://github.com/Lightning-AI/pytorch-lightning/pull/2253)) +- Fixed setup and on fit calls ([#2252](https://github.com/Lightning-AI/pytorch-lightning/pull/2252)) +- Fixed GPU template ([#2255](https://github.com/Lightning-AI/pytorch-lightning/pull/2255)) ## [0.8.0] - 2020-06-18 ### Added -- Added `overfit_batches`, `limit_{val|test}_batches` flags (overfit now uses training set for all three) ([#2213](https://github.com/Lightning-AI/lightning/pull/2213)) +- Added `overfit_batches`, `limit_{val|test}_batches` flags (overfit now uses training set for all three) ([#2213](https://github.com/Lightning-AI/pytorch-lightning/pull/2213)) - Added metrics - * Base classes ([#1326](https://github.com/Lightning-AI/lightning/pull/1326), - [#1877](https://github.com/Lightning-AI/lightning/pull/1877)) - * Sklearn metrics classes ([#1327](https://github.com/Lightning-AI/lightning/pull/1327)) - * Native torch metrics ([#1488](https://github.com/Lightning-AI/lightning/pull/1488), - [#2062](https://github.com/Lightning-AI/lightning/pull/2062)) - * docs for all Metrics ([#2184](https://github.com/Lightning-AI/lightning/pull/2184), - [#2209](https://github.com/Lightning-AI/lightning/pull/2209)) - * Regression metrics ([#2221](https://github.com/Lightning-AI/lightning/pull/2221)) -- Allow dataloaders without sampler field present ([#1907](https://github.com/Lightning-AI/lightning/pull/1907)) -- Added option `save_last` to save the model at the end of every epoch in `ModelCheckpoint` ([#1908](https://github.com/Lightning-AI/lightning/pull/1908)) -- Early stopping checks `on_validation_end` ([#1458](https://github.com/Lightning-AI/lightning/pull/1458)) -- Speed up single-core TPU training by loading data using `ParallelLoader` ([#2033](https://github.com/Lightning-AI/lightning/pull/2033)) -- Added a model hook `transfer_batch_to_device` that enables moving custom data structures to the target device ([#1756](https://github.com/Lightning-AI/lightning/pull/1756)) -- Added [black](https://black.readthedocs.io/en/stable/) formatter for the code with code-checker on pull ([#1610](https://github.com/Lightning-AI/lightning/pull/1610)) -- Added back the slow spawn ddp implementation as `ddp_spawn` ([#2115](https://github.com/Lightning-AI/lightning/pull/2115)) -- Added loading checkpoints from URLs ([#1667](https://github.com/Lightning-AI/lightning/pull/1667)) -- Added a callback method `on_keyboard_interrupt` for handling KeyboardInterrupt events during training ([#2134](https://github.com/Lightning-AI/lightning/pull/2134)) -- Added a decorator `auto_move_data` that moves data to the correct device when using the LightningModule for inference ([#1905](https://github.com/Lightning-AI/lightning/pull/1905)) -- Added `ckpt_path` option to `LightningModule.test(...)` to load particular checkpoint ([#2190](https://github.com/Lightning-AI/lightning/pull/2190)) -- Added `setup` and `teardown` hooks for model ([#2229](https://github.com/Lightning-AI/lightning/pull/2229)) + * Base classes ([#1326](https://github.com/Lightning-AI/pytorch-lightning/pull/1326), + [#1877](https://github.com/Lightning-AI/pytorch-lightning/pull/1877)) + * Sklearn metrics classes ([#1327](https://github.com/Lightning-AI/pytorch-lightning/pull/1327)) + * Native torch metrics ([#1488](https://github.com/Lightning-AI/pytorch-lightning/pull/1488), + [#2062](https://github.com/Lightning-AI/pytorch-lightning/pull/2062)) + * docs for all Metrics ([#2184](https://github.com/Lightning-AI/pytorch-lightning/pull/2184), + [#2209](https://github.com/Lightning-AI/pytorch-lightning/pull/2209)) + * Regression metrics ([#2221](https://github.com/Lightning-AI/pytorch-lightning/pull/2221)) +- Allow dataloaders without sampler field present ([#1907](https://github.com/Lightning-AI/pytorch-lightning/pull/1907)) +- Added option `save_last` to save the model at the end of every epoch in `ModelCheckpoint` ([#1908](https://github.com/Lightning-AI/pytorch-lightning/pull/1908)) +- Early stopping checks `on_validation_end` ([#1458](https://github.com/Lightning-AI/pytorch-lightning/pull/1458)) +- Speed up single-core TPU training by loading data using `ParallelLoader` ([#2033](https://github.com/Lightning-AI/pytorch-lightning/pull/2033)) +- Added a model hook `transfer_batch_to_device` that enables moving custom data structures to the target device ([#1756](https://github.com/Lightning-AI/pytorch-lightning/pull/1756)) +- Added [black](https://black.readthedocs.io/en/stable/) formatter for the code with code-checker on pull ([#1610](https://github.com/Lightning-AI/pytorch-lightning/pull/1610)) +- Added back the slow spawn ddp implementation as `ddp_spawn` ([#2115](https://github.com/Lightning-AI/pytorch-lightning/pull/2115)) +- Added loading checkpoints from URLs ([#1667](https://github.com/Lightning-AI/pytorch-lightning/pull/1667)) +- Added a callback method `on_keyboard_interrupt` for handling KeyboardInterrupt events during training ([#2134](https://github.com/Lightning-AI/pytorch-lightning/pull/2134)) +- Added a decorator `auto_move_data` that moves data to the correct device when using the LightningModule for inference ([#1905](https://github.com/Lightning-AI/pytorch-lightning/pull/1905)) +- Added `ckpt_path` option to `LightningModule.test(...)` to load particular checkpoint ([#2190](https://github.com/Lightning-AI/pytorch-lightning/pull/2190)) +- Added `setup` and `teardown` hooks for model ([#2229](https://github.com/Lightning-AI/pytorch-lightning/pull/2229)) ### Changed -- Allow user to select individual TPU core to train on ([#1729](https://github.com/Lightning-AI/lightning/pull/1729)) -- Removed non-finite values from loss in `LRFinder` ([#1862](https://github.com/Lightning-AI/lightning/pull/1862)) -- Allow passing model hyperparameters as complete kwarg list ([#1896](https://github.com/Lightning-AI/lightning/pull/1896)) -- Renamed `ModelCheckpoint`'s attributes `best` to `best_model_score` and `kth_best_model` to `kth_best_model_path` ([#1799](https://github.com/Lightning-AI/lightning/pull/1799)) -- Re-Enable Logger's `ImportError`s ([#1938](https://github.com/Lightning-AI/lightning/pull/1938)) -- Changed the default value of the Trainer argument `weights_summary` from `full` to `top` ([#2029](https://github.com/Lightning-AI/lightning/pull/2029)) -- Raise an error when lightning replaces an existing sampler ([#2020](https://github.com/Lightning-AI/lightning/pull/2020)) -- Enabled `prepare_data` from correct processes - clarify local vs global rank ([#2166](https://github.com/Lightning-AI/lightning/pull/2166)) -- Remove explicit flush from tensorboard logger ([#2126](https://github.com/Lightning-AI/lightning/pull/2126)) -- Changed epoch indexing from 1 instead of 0 ([#2206](https://github.com/Lightning-AI/lightning/pull/2206)) +- Allow user to select individual TPU core to train on ([#1729](https://github.com/Lightning-AI/pytorch-lightning/pull/1729)) +- Removed non-finite values from loss in `LRFinder` ([#1862](https://github.com/Lightning-AI/pytorch-lightning/pull/1862)) +- Allow passing model hyperparameters as complete kwarg list ([#1896](https://github.com/Lightning-AI/pytorch-lightning/pull/1896)) +- Renamed `ModelCheckpoint`'s attributes `best` to `best_model_score` and `kth_best_model` to `kth_best_model_path` ([#1799](https://github.com/Lightning-AI/pytorch-lightning/pull/1799)) +- Re-Enable Logger's `ImportError`s ([#1938](https://github.com/Lightning-AI/pytorch-lightning/pull/1938)) +- Changed the default value of the Trainer argument `weights_summary` from `full` to `top` ([#2029](https://github.com/Lightning-AI/pytorch-lightning/pull/2029)) +- Raise an error when lightning replaces an existing sampler ([#2020](https://github.com/Lightning-AI/pytorch-lightning/pull/2020)) +- Enabled `prepare_data` from correct processes - clarify local vs global rank ([#2166](https://github.com/Lightning-AI/pytorch-lightning/pull/2166)) +- Remove explicit flush from tensorboard logger ([#2126](https://github.com/Lightning-AI/pytorch-lightning/pull/2126)) +- Changed epoch indexing from 1 instead of 0 ([#2206](https://github.com/Lightning-AI/pytorch-lightning/pull/2206)) ### Deprecated -- Deprecated flags: ([#2213](https://github.com/Lightning-AI/lightning/pull/2213)) +- Deprecated flags: ([#2213](https://github.com/Lightning-AI/pytorch-lightning/pull/2213)) * `overfit_pct` in favour of `overfit_batches` * `val_percent_check` in favour of `limit_val_batches` * `test_percent_check` in favour of `limit_test_batches` -- Deprecated `ModelCheckpoint`'s attributes `best` and `kth_best_model` ([#1799](https://github.com/Lightning-AI/lightning/pull/1799)) -- Dropped official support/testing for older PyTorch versions <1.3 ([#1917](https://github.com/Lightning-AI/lightning/pull/1917)) -- Deprecated Trainer `proc_rank` in favour of `global_rank` ([#2166](https://github.com/Lightning-AI/lightning/pull/2166), - [#2269](https://github.com/Lightning-AI/lightning/pull/2269)) +- Deprecated `ModelCheckpoint`'s attributes `best` and `kth_best_model` ([#1799](https://github.com/Lightning-AI/pytorch-lightning/pull/1799)) +- Dropped official support/testing for older PyTorch versions <1.3 ([#1917](https://github.com/Lightning-AI/pytorch-lightning/pull/1917)) +- Deprecated Trainer `proc_rank` in favour of `global_rank` ([#2166](https://github.com/Lightning-AI/pytorch-lightning/pull/2166), + [#2269](https://github.com/Lightning-AI/pytorch-lightning/pull/2269)) ### Removed -- Removed unintended Trainer argument `progress_bar_callback`, the callback should be passed in by `Trainer(callbacks=[...])` instead ([#1855](https://github.com/Lightning-AI/lightning/pull/1855)) -- Removed obsolete `self._device` in Trainer ([#1849](https://github.com/Lightning-AI/lightning/pull/1849)) -- Removed deprecated API ([#2073](https://github.com/Lightning-AI/lightning/pull/2073)) +- Removed unintended Trainer argument `progress_bar_callback`, the callback should be passed in by `Trainer(callbacks=[...])` instead ([#1855](https://github.com/Lightning-AI/pytorch-lightning/pull/1855)) +- Removed obsolete `self._device` in Trainer ([#1849](https://github.com/Lightning-AI/pytorch-lightning/pull/1849)) +- Removed deprecated API ([#2073](https://github.com/Lightning-AI/pytorch-lightning/pull/2073)) * Packages: `pl.pt_overrides`, `pl.root_module` * Modules: `pl.logging.comet_logger`, `pl.logging.mlflow_logger`, `pl.logging.test_tube_logger`, `pl.overrides.override_data_parallel`, `pl.core.model_saving`, `pl.core.root_module` * Trainer arguments: `add_row_log_interval`, `default_save_path`, `gradient_clip`, `nb_gpu_nodes`, `max_nb_epochs`, `min_nb_epochs`, `nb_sanity_val_steps` @@ -4153,386 +4153,386 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- Run graceful training teardown on interpreter exit ([#1631](https://github.com/Lightning-AI/lightning/pull/1631)) -- Fixed user warning when apex was used together with learning rate schedulers ([#1873](https://github.com/Lightning-AI/lightning/pull/1873)) -- Fixed multiple calls of `EarlyStopping` callback ([#1863](https://github.com/Lightning-AI/lightning/pull/1863)) -- Fixed an issue with `Trainer.from_argparse_args` when passing in unknown Trainer args ([#1932](https://github.com/Lightning-AI/lightning/pull/1932)) -- Fixed bug related to logger not being reset correctly for model after tuner algorithms ([#1933](https://github.com/Lightning-AI/lightning/pull/1933)) -- Fixed root node resolution for SLURM cluster with dash in host name ([#1954](https://github.com/Lightning-AI/lightning/pull/1954)) -- Fixed `LearningRateLogger` in multi-scheduler setting ([#1944](https://github.com/Lightning-AI/lightning/pull/1944)) -- Fixed test configuration check and testing ([#1804](https://github.com/Lightning-AI/lightning/pull/1804)) -- Fixed an issue with Trainer constructor silently ignoring unknown/misspelled arguments ([#1820](https://github.com/Lightning-AI/lightning/pull/1820)) -- Fixed `save_weights_only` in ModelCheckpoint ([#1780](https://github.com/Lightning-AI/lightning/pull/1780)) -- Allow use of same `WandbLogger` instance for multiple training loops ([#2055](https://github.com/Lightning-AI/lightning/pull/2055)) -- Fixed an issue with `_auto_collect_arguments` collecting local variables that are not constructor arguments and not working for signatures that have the instance not named `self` ([#2048](https://github.com/Lightning-AI/lightning/pull/2048)) -- Fixed mistake in parameters' grad norm tracking ([#2012](https://github.com/Lightning-AI/lightning/pull/2012)) -- Fixed CPU and hanging GPU crash ([#2118](https://github.com/Lightning-AI/lightning/pull/2118)) -- Fixed an issue with the model summary and `example_input_array` depending on a specific ordering of the submodules in a LightningModule ([#1773](https://github.com/Lightning-AI/lightning/pull/1773)) -- Fixed Tpu logging ([#2230](https://github.com/Lightning-AI/lightning/pull/2230)) -- Fixed Pid port + duplicate `rank_zero` logging ([#2140](https://github.com/Lightning-AI/lightning/pull/2140), - [#2231](https://github.com/Lightning-AI/lightning/pull/2231)) +- Run graceful training teardown on interpreter exit ([#1631](https://github.com/Lightning-AI/pytorch-lightning/pull/1631)) +- Fixed user warning when apex was used together with learning rate schedulers ([#1873](https://github.com/Lightning-AI/pytorch-lightning/pull/1873)) +- Fixed multiple calls of `EarlyStopping` callback ([#1863](https://github.com/Lightning-AI/pytorch-lightning/pull/1863)) +- Fixed an issue with `Trainer.from_argparse_args` when passing in unknown Trainer args ([#1932](https://github.com/Lightning-AI/pytorch-lightning/pull/1932)) +- Fixed bug related to logger not being reset correctly for model after tuner algorithms ([#1933](https://github.com/Lightning-AI/pytorch-lightning/pull/1933)) +- Fixed root node resolution for SLURM cluster with dash in host name ([#1954](https://github.com/Lightning-AI/pytorch-lightning/pull/1954)) +- Fixed `LearningRateLogger` in multi-scheduler setting ([#1944](https://github.com/Lightning-AI/pytorch-lightning/pull/1944)) +- Fixed test configuration check and testing ([#1804](https://github.com/Lightning-AI/pytorch-lightning/pull/1804)) +- Fixed an issue with Trainer constructor silently ignoring unknown/misspelled arguments ([#1820](https://github.com/Lightning-AI/pytorch-lightning/pull/1820)) +- Fixed `save_weights_only` in ModelCheckpoint ([#1780](https://github.com/Lightning-AI/pytorch-lightning/pull/1780)) +- Allow use of same `WandbLogger` instance for multiple training loops ([#2055](https://github.com/Lightning-AI/pytorch-lightning/pull/2055)) +- Fixed an issue with `_auto_collect_arguments` collecting local variables that are not constructor arguments and not working for signatures that have the instance not named `self` ([#2048](https://github.com/Lightning-AI/pytorch-lightning/pull/2048)) +- Fixed mistake in parameters' grad norm tracking ([#2012](https://github.com/Lightning-AI/pytorch-lightning/pull/2012)) +- Fixed CPU and hanging GPU crash ([#2118](https://github.com/Lightning-AI/pytorch-lightning/pull/2118)) +- Fixed an issue with the model summary and `example_input_array` depending on a specific ordering of the submodules in a LightningModule ([#1773](https://github.com/Lightning-AI/pytorch-lightning/pull/1773)) +- Fixed Tpu logging ([#2230](https://github.com/Lightning-AI/pytorch-lightning/pull/2230)) +- Fixed Pid port + duplicate `rank_zero` logging ([#2140](https://github.com/Lightning-AI/pytorch-lightning/pull/2140), + [#2231](https://github.com/Lightning-AI/pytorch-lightning/pull/2231)) ## [0.7.6] - 2020-05-16 ### Added -- Added callback for logging learning rates ([#1498](https://github.com/Lightning-AI/lightning/pull/1498)) -- Added transfer learning example (for a binary classification task in computer vision) ([#1564](https://github.com/Lightning-AI/lightning/pull/1564)) -- Added type hints in `Trainer.fit()` and `Trainer.test()` to reflect that also a list of dataloaders can be passed in ([#1723](https://github.com/Lightning-AI/lightning/pull/1723)). -- Added auto scaling of batch size ([#1638](https://github.com/Lightning-AI/lightning/pull/1638)) -- The progress bar metrics now also get updated in `training_epoch_end` ([#1724](https://github.com/Lightning-AI/lightning/pull/1724)) -- Enable `NeptuneLogger` to work with `distributed_backend=ddp` ([#1753](https://github.com/Lightning-AI/lightning/pull/1753)) -- Added option to provide seed to random generators to ensure reproducibility ([#1572](https://github.com/Lightning-AI/lightning/pull/1572)) -- Added override for hparams in `load_from_ckpt` ([#1797](https://github.com/Lightning-AI/lightning/pull/1797)) -- Added support multi-node distributed execution under `torchelastic` ([#1811](https://github.com/Lightning-AI/lightning/pull/1811), - [#1818](https://github.com/Lightning-AI/lightning/pull/1818)) -- Added using `store_true` for bool args ([#1822](https://github.com/Lightning-AI/lightning/pull/1822), - [#1842](https://github.com/Lightning-AI/lightning/pull/1842)) -- Added dummy logger for internally disabling logging for some features ([#1836](https://github.com/Lightning-AI/lightning/pull/1836)) +- Added callback for logging learning rates ([#1498](https://github.com/Lightning-AI/pytorch-lightning/pull/1498)) +- Added transfer learning example (for a binary classification task in computer vision) ([#1564](https://github.com/Lightning-AI/pytorch-lightning/pull/1564)) +- Added type hints in `Trainer.fit()` and `Trainer.test()` to reflect that also a list of dataloaders can be passed in ([#1723](https://github.com/Lightning-AI/pytorch-lightning/pull/1723)). +- Added auto scaling of batch size ([#1638](https://github.com/Lightning-AI/pytorch-lightning/pull/1638)) +- The progress bar metrics now also get updated in `training_epoch_end` ([#1724](https://github.com/Lightning-AI/pytorch-lightning/pull/1724)) +- Enable `NeptuneLogger` to work with `distributed_backend=ddp` ([#1753](https://github.com/Lightning-AI/pytorch-lightning/pull/1753)) +- Added option to provide seed to random generators to ensure reproducibility ([#1572](https://github.com/Lightning-AI/pytorch-lightning/pull/1572)) +- Added override for hparams in `load_from_ckpt` ([#1797](https://github.com/Lightning-AI/pytorch-lightning/pull/1797)) +- Added support multi-node distributed execution under `torchelastic` ([#1811](https://github.com/Lightning-AI/pytorch-lightning/pull/1811), + [#1818](https://github.com/Lightning-AI/pytorch-lightning/pull/1818)) +- Added using `store_true` for bool args ([#1822](https://github.com/Lightning-AI/pytorch-lightning/pull/1822), + [#1842](https://github.com/Lightning-AI/pytorch-lightning/pull/1842)) +- Added dummy logger for internally disabling logging for some features ([#1836](https://github.com/Lightning-AI/pytorch-lightning/pull/1836)) ### Changed -- Enable `non-blocking` for device transfers to GPU ([#1843](https://github.com/Lightning-AI/lightning/pull/1843)) -- Replace mata_tags.csv with hparams.yaml ([#1271](https://github.com/Lightning-AI/lightning/pull/1271)) -- Reduction when `batch_size < num_gpus` ([#1609](https://github.com/Lightning-AI/lightning/pull/1609)) -- Updated LightningTemplateModel to look more like Colab example ([#1577](https://github.com/Lightning-AI/lightning/pull/1577)) -- Don't convert `namedtuple` to `tuple` when transferring the batch to target device ([#1589](https://github.com/Lightning-AI/lightning/pull/1589)) -- Allow passing hparams as keyword argument to LightningModule when loading from checkpoint ([#1639](https://github.com/Lightning-AI/lightning/pull/1639)) -- Args should come after the last positional argument ([#1807](https://github.com/Lightning-AI/lightning/pull/1807)) -- Made ddp the default if no backend specified with multiple GPUs ([#1789](https://github.com/Lightning-AI/lightning/pull/1789)) +- Enable `non-blocking` for device transfers to GPU ([#1843](https://github.com/Lightning-AI/pytorch-lightning/pull/1843)) +- Replace mata_tags.csv with hparams.yaml ([#1271](https://github.com/Lightning-AI/pytorch-lightning/pull/1271)) +- Reduction when `batch_size < num_gpus` ([#1609](https://github.com/Lightning-AI/pytorch-lightning/pull/1609)) +- Updated LightningTemplateModel to look more like Colab example ([#1577](https://github.com/Lightning-AI/pytorch-lightning/pull/1577)) +- Don't convert `namedtuple` to `tuple` when transferring the batch to target device ([#1589](https://github.com/Lightning-AI/pytorch-lightning/pull/1589)) +- Allow passing hparams as keyword argument to LightningModule when loading from checkpoint ([#1639](https://github.com/Lightning-AI/pytorch-lightning/pull/1639)) +- Args should come after the last positional argument ([#1807](https://github.com/Lightning-AI/pytorch-lightning/pull/1807)) +- Made ddp the default if no backend specified with multiple GPUs ([#1789](https://github.com/Lightning-AI/pytorch-lightning/pull/1789)) ### Deprecated -- Deprecated `tags_csv` in favor of `hparams_file` ([#1271](https://github.com/Lightning-AI/lightning/pull/1271)) +- Deprecated `tags_csv` in favor of `hparams_file` ([#1271](https://github.com/Lightning-AI/pytorch-lightning/pull/1271)) ### Fixed -- Fixed broken link in PR template ([#1675](https://github.com/Lightning-AI/lightning/pull/1675)) -- Fixed ModelCheckpoint not None checking filepath ([#1654](https://github.com/Lightning-AI/lightning/pull/1654)) -- Trainer now calls `on_load_checkpoint()` when resuming from a checkpoint ([#1666](https://github.com/Lightning-AI/lightning/pull/1666)) -- Fixed sampler logic for ddp with iterable dataset ([#1734](https://github.com/Lightning-AI/lightning/pull/1734)) -- Fixed `_reset_eval_dataloader()` for IterableDataset ([#1560](https://github.com/Lightning-AI/lightning/pull/1560)) -- Fixed Horovod distributed backend to set the `root_gpu` property ([#1669](https://github.com/Lightning-AI/lightning/pull/1669)) -- Fixed wandb logger `global_step` affects other loggers ([#1492](https://github.com/Lightning-AI/lightning/pull/1492)) -- Fixed disabling progress bar on non-zero ranks using Horovod backend ([#1709](https://github.com/Lightning-AI/lightning/pull/1709)) -- Fixed bugs that prevent lr finder to be used together with early stopping and validation dataloaders ([#1676](https://github.com/Lightning-AI/lightning/pull/1676)) -- Fixed a bug in Trainer that prepended the checkpoint path with `version_` when it shouldn't ([#1748](https://github.com/Lightning-AI/lightning/pull/1748)) -- Fixed lr key name in case of param groups in LearningRateLogger ([#1719](https://github.com/Lightning-AI/lightning/pull/1719)) -- Fixed accumulation parameter and suggestion method for learning rate finder ([#1801](https://github.com/Lightning-AI/lightning/pull/1801)) -- Fixed num processes wasn't being set properly and auto sampler was ddp failing ([#1819](https://github.com/Lightning-AI/lightning/pull/1819)) -- Fixed bugs in semantic segmentation example ([#1824](https://github.com/Lightning-AI/lightning/pull/1824)) -- Fixed saving native AMP scaler state ([#1777](https://github.com/Lightning-AI/lightning/pull/1777)) -- Fixed native amp + ddp ([#1788](https://github.com/Lightning-AI/lightning/pull/1788)) -- Fixed `hparam` logging with metrics ([#1647](https://github.com/Lightning-AI/lightning/pull/1647)) +- Fixed broken link in PR template ([#1675](https://github.com/Lightning-AI/pytorch-lightning/pull/1675)) +- Fixed ModelCheckpoint not None checking filepath ([#1654](https://github.com/Lightning-AI/pytorch-lightning/pull/1654)) +- Trainer now calls `on_load_checkpoint()` when resuming from a checkpoint ([#1666](https://github.com/Lightning-AI/pytorch-lightning/pull/1666)) +- Fixed sampler logic for ddp with iterable dataset ([#1734](https://github.com/Lightning-AI/pytorch-lightning/pull/1734)) +- Fixed `_reset_eval_dataloader()` for IterableDataset ([#1560](https://github.com/Lightning-AI/pytorch-lightning/pull/1560)) +- Fixed Horovod distributed backend to set the `root_gpu` property ([#1669](https://github.com/Lightning-AI/pytorch-lightning/pull/1669)) +- Fixed wandb logger `global_step` affects other loggers ([#1492](https://github.com/Lightning-AI/pytorch-lightning/pull/1492)) +- Fixed disabling progress bar on non-zero ranks using Horovod backend ([#1709](https://github.com/Lightning-AI/pytorch-lightning/pull/1709)) +- Fixed bugs that prevent lr finder to be used together with early stopping and validation dataloaders ([#1676](https://github.com/Lightning-AI/pytorch-lightning/pull/1676)) +- Fixed a bug in Trainer that prepended the checkpoint path with `version_` when it shouldn't ([#1748](https://github.com/Lightning-AI/pytorch-lightning/pull/1748)) +- Fixed lr key name in case of param groups in LearningRateLogger ([#1719](https://github.com/Lightning-AI/pytorch-lightning/pull/1719)) +- Fixed accumulation parameter and suggestion method for learning rate finder ([#1801](https://github.com/Lightning-AI/pytorch-lightning/pull/1801)) +- Fixed num processes wasn't being set properly and auto sampler was ddp failing ([#1819](https://github.com/Lightning-AI/pytorch-lightning/pull/1819)) +- Fixed bugs in semantic segmentation example ([#1824](https://github.com/Lightning-AI/pytorch-lightning/pull/1824)) +- Fixed saving native AMP scaler state ([#1777](https://github.com/Lightning-AI/pytorch-lightning/pull/1777)) +- Fixed native amp + ddp ([#1788](https://github.com/Lightning-AI/pytorch-lightning/pull/1788)) +- Fixed `hparam` logging with metrics ([#1647](https://github.com/Lightning-AI/pytorch-lightning/pull/1647)) ## [0.7.5] - 2020-04-27 ### Changed -- Allow logging of metrics together with `hparams` ([#1630](https://github.com/Lightning-AI/lightning/pull/1630)) +- Allow logging of metrics together with `hparams` ([#1630](https://github.com/Lightning-AI/pytorch-lightning/pull/1630)) ### Removed -- Removed Warning from trainer loop ([#1634](https://github.com/Lightning-AI/lightning/pull/1634)) +- Removed Warning from trainer loop ([#1634](https://github.com/Lightning-AI/pytorch-lightning/pull/1634)) ### Fixed -- Fixed ModelCheckpoint not being fixable ([#1632](https://github.com/Lightning-AI/lightning/pull/1632)) -- Fixed CPU DDP breaking change and DDP change ([#1635](https://github.com/Lightning-AI/lightning/pull/1635)) -- Tested pickling ([#1636](https://github.com/Lightning-AI/lightning/pull/1636)) +- Fixed ModelCheckpoint not being fixable ([#1632](https://github.com/Lightning-AI/pytorch-lightning/pull/1632)) +- Fixed CPU DDP breaking change and DDP change ([#1635](https://github.com/Lightning-AI/pytorch-lightning/pull/1635)) +- Tested pickling ([#1636](https://github.com/Lightning-AI/pytorch-lightning/pull/1636)) ## [0.7.4] - 2020-04-26 ### Added -- Added flag `replace_sampler_ddp` to manually disable sampler replacement in DDP ([#1513](https://github.com/Lightning-AI/lightning/pull/1513)) +- Added flag `replace_sampler_ddp` to manually disable sampler replacement in DDP ([#1513](https://github.com/Lightning-AI/pytorch-lightning/pull/1513)) - Added `auto_select_gpus` flag to trainer that enables automatic selection of available GPUs on exclusive mode systems. -- Added learning rate finder ([#1347](https://github.com/Lightning-AI/lightning/pull/1347)) -- Added support for DDP mode in clusters without SLURM ([#1387](https://github.com/Lightning-AI/lightning/pull/1387)) -- Added `test_dataloaders` parameter to `Trainer.test()` ([#1434](https://github.com/Lightning-AI/lightning/pull/1434)) -- Added `terminate_on_nan` flag to trainer that performs a NaN check with each training iteration when set to `True` ([#1475](https://github.com/Lightning-AI/lightning/pull/1475)) -- Added speed parity tests (max 1 sec difference per epoch)([#1482](https://github.com/Lightning-AI/lightning/pull/1482)) -- Added `ddp_cpu` backend for testing ddp without GPUs ([#1158](https://github.com/Lightning-AI/lightning/pull/1158)) -- Added [Horovod](http://horovod.ai) support as a distributed backend `Trainer(distributed_backend='horovod')` ([#1529](https://github.com/Lightning-AI/lightning/pull/1529)) -- Added support for 8 core distributed training on Kaggle TPU's ([#1568](https://github.com/Lightning-AI/lightning/pull/1568)) -- Added support for native AMP ([#1561](https://github.com/Lightning-AI/lightning/pull/1561), - [#1580](https://github.com/Lightning-AI/lightning/pull/1580)) +- Added learning rate finder ([#1347](https://github.com/Lightning-AI/pytorch-lightning/pull/1347)) +- Added support for DDP mode in clusters without SLURM ([#1387](https://github.com/Lightning-AI/pytorch-lightning/pull/1387)) +- Added `test_dataloaders` parameter to `Trainer.test()` ([#1434](https://github.com/Lightning-AI/pytorch-lightning/pull/1434)) +- Added `terminate_on_nan` flag to trainer that performs a NaN check with each training iteration when set to `True` ([#1475](https://github.com/Lightning-AI/pytorch-lightning/pull/1475)) +- Added speed parity tests (max 1 sec difference per epoch)([#1482](https://github.com/Lightning-AI/pytorch-lightning/pull/1482)) +- Added `ddp_cpu` backend for testing ddp without GPUs ([#1158](https://github.com/Lightning-AI/pytorch-lightning/pull/1158)) +- Added [Horovod](http://horovod.ai) support as a distributed backend `Trainer(distributed_backend='horovod')` ([#1529](https://github.com/Lightning-AI/pytorch-lightning/pull/1529)) +- Added support for 8 core distributed training on Kaggle TPU's ([#1568](https://github.com/Lightning-AI/pytorch-lightning/pull/1568)) +- Added support for native AMP ([#1561](https://github.com/Lightning-AI/pytorch-lightning/pull/1561), + [#1580](https://github.com/Lightning-AI/pytorch-lightning/pull/1580)) ### Changed -- Changed the default behaviour to no longer include a NaN check with each training iteration ([#1475](https://github.com/Lightning-AI/lightning/pull/1475)) -- Decoupled the progress bar from trainer` it is a callback now and can be customized or even be replaced entirely ([#1450](https://github.com/Lightning-AI/lightning/pull/1450)). -- Changed lr schedule step interval behavior to update every backwards pass instead of every forwards pass ([#1477](https://github.com/Lightning-AI/lightning/pull/1477)) -- Defines shared proc. rank, remove rank from instances (e.g. loggers) ([#1408](https://github.com/Lightning-AI/lightning/pull/1408)) -- Updated semantic segmentation example with custom U-Net and logging ([#1371](https://github.com/Lightning-AI/lightning/pull/1371)) -- Disabled val and test shuffling ([#1600](https://github.com/Lightning-AI/lightning/pull/1600)) +- Changed the default behaviour to no longer include a NaN check with each training iteration ([#1475](https://github.com/Lightning-AI/pytorch-lightning/pull/1475)) +- Decoupled the progress bar from trainer` it is a callback now and can be customized or even be replaced entirely ([#1450](https://github.com/Lightning-AI/pytorch-lightning/pull/1450)). +- Changed lr schedule step interval behavior to update every backwards pass instead of every forwards pass ([#1477](https://github.com/Lightning-AI/pytorch-lightning/pull/1477)) +- Defines shared proc. rank, remove rank from instances (e.g. loggers) ([#1408](https://github.com/Lightning-AI/pytorch-lightning/pull/1408)) +- Updated semantic segmentation example with custom U-Net and logging ([#1371](https://github.com/Lightning-AI/pytorch-lightning/pull/1371)) +- Disabled val and test shuffling ([#1600](https://github.com/Lightning-AI/pytorch-lightning/pull/1600)) ### Deprecated -- Deprecated `training_tqdm_dict` in favor of `progress_bar_dict` ([#1450](https://github.com/Lightning-AI/lightning/pull/1450)). +- Deprecated `training_tqdm_dict` in favor of `progress_bar_dict` ([#1450](https://github.com/Lightning-AI/pytorch-lightning/pull/1450)). ### Removed -- Removed `test_dataloaders` parameter from `Trainer.fit()` ([#1434](https://github.com/Lightning-AI/lightning/pull/1434)) +- Removed `test_dataloaders` parameter from `Trainer.fit()` ([#1434](https://github.com/Lightning-AI/pytorch-lightning/pull/1434)) ### Fixed -- Added the possibility to pass nested metrics dictionaries to loggers ([#1582](https://github.com/Lightning-AI/lightning/pull/1582)) -- Fixed memory leak from opt return ([#1528](https://github.com/Lightning-AI/lightning/pull/1528)) -- Fixed saving checkpoint before deleting old ones ([#1453](https://github.com/Lightning-AI/lightning/pull/1453)) -- Fixed loggers - flushing last logged metrics even before continue, e.g. `trainer.test()` results ([#1459](https://github.com/Lightning-AI/lightning/pull/1459)) -- Fixed optimizer configuration when `configure_optimizers` returns dict without `lr_scheduler` ([#1443](https://github.com/Lightning-AI/lightning/pull/1443)) -- Fixed `LightningModule` - mixing hparams and arguments in `LightningModule.__init__()` crashes load_from_checkpoint() ([#1505](https://github.com/Lightning-AI/lightning/pull/1505)) -- Added a missing call to the `on_before_zero_grad` model hook ([#1493](https://github.com/Lightning-AI/lightning/pull/1493)). -- Allow use of sweeps with `WandbLogger` ([#1512](https://github.com/Lightning-AI/lightning/pull/1512)) -- Fixed a bug that caused the `callbacks` Trainer argument to reference a global variable ([#1534](https://github.com/Lightning-AI/lightning/pull/1534)). -- Fixed a bug that set all boolean CLI arguments from `Trainer.add_argparse_args` always to True ([#1571](https://github.com/Lightning-AI/lightning/pull/1571)) -- Fixed do not copy the batch when training on a single GPU ([#1576](https://github.com/Lightning-AI/lightning/pull/1576), - [#1579](https://github.com/Lightning-AI/lightning/pull/1579)) -- Fixed soft checkpoint removing on DDP ([#1408](https://github.com/Lightning-AI/lightning/pull/1408)) -- Fixed automatic parser bug ([#1585](https://github.com/Lightning-AI/lightning/pull/1585)) -- Fixed bool conversion from string ([#1606](https://github.com/Lightning-AI/lightning/pull/1606)) +- Added the possibility to pass nested metrics dictionaries to loggers ([#1582](https://github.com/Lightning-AI/pytorch-lightning/pull/1582)) +- Fixed memory leak from opt return ([#1528](https://github.com/Lightning-AI/pytorch-lightning/pull/1528)) +- Fixed saving checkpoint before deleting old ones ([#1453](https://github.com/Lightning-AI/pytorch-lightning/pull/1453)) +- Fixed loggers - flushing last logged metrics even before continue, e.g. `trainer.test()` results ([#1459](https://github.com/Lightning-AI/pytorch-lightning/pull/1459)) +- Fixed optimizer configuration when `configure_optimizers` returns dict without `lr_scheduler` ([#1443](https://github.com/Lightning-AI/pytorch-lightning/pull/1443)) +- Fixed `LightningModule` - mixing hparams and arguments in `LightningModule.__init__()` crashes load_from_checkpoint() ([#1505](https://github.com/Lightning-AI/pytorch-lightning/pull/1505)) +- Added a missing call to the `on_before_zero_grad` model hook ([#1493](https://github.com/Lightning-AI/pytorch-lightning/pull/1493)). +- Allow use of sweeps with `WandbLogger` ([#1512](https://github.com/Lightning-AI/pytorch-lightning/pull/1512)) +- Fixed a bug that caused the `callbacks` Trainer argument to reference a global variable ([#1534](https://github.com/Lightning-AI/pytorch-lightning/pull/1534)). +- Fixed a bug that set all boolean CLI arguments from `Trainer.add_argparse_args` always to True ([#1571](https://github.com/Lightning-AI/pytorch-lightning/pull/1571)) +- Fixed do not copy the batch when training on a single GPU ([#1576](https://github.com/Lightning-AI/pytorch-lightning/pull/1576), + [#1579](https://github.com/Lightning-AI/pytorch-lightning/pull/1579)) +- Fixed soft checkpoint removing on DDP ([#1408](https://github.com/Lightning-AI/pytorch-lightning/pull/1408)) +- Fixed automatic parser bug ([#1585](https://github.com/Lightning-AI/pytorch-lightning/pull/1585)) +- Fixed bool conversion from string ([#1606](https://github.com/Lightning-AI/pytorch-lightning/pull/1606)) ## [0.7.3] - 2020-04-09 ### Added -- Added `rank_zero_warn` for warning only in rank 0 ([#1428](https://github.com/Lightning-AI/lightning/pull/1428)) +- Added `rank_zero_warn` for warning only in rank 0 ([#1428](https://github.com/Lightning-AI/pytorch-lightning/pull/1428)) ### Fixed -- Fixed default `DistributedSampler` for DDP training ([#1425](https://github.com/Lightning-AI/lightning/pull/1425)) -- Fixed workers warning not on windows ([#1430](https://github.com/Lightning-AI/lightning/pull/1430)) -- Fixed returning tuple from `run_training_batch` ([#1431](https://github.com/Lightning-AI/lightning/pull/1431)) -- Fixed gradient clipping ([#1438](https://github.com/Lightning-AI/lightning/pull/1438)) -- Fixed pretty print ([#1441](https://github.com/Lightning-AI/lightning/pull/1441)) +- Fixed default `DistributedSampler` for DDP training ([#1425](https://github.com/Lightning-AI/pytorch-lightning/pull/1425)) +- Fixed workers warning not on windows ([#1430](https://github.com/Lightning-AI/pytorch-lightning/pull/1430)) +- Fixed returning tuple from `run_training_batch` ([#1431](https://github.com/Lightning-AI/pytorch-lightning/pull/1431)) +- Fixed gradient clipping ([#1438](https://github.com/Lightning-AI/pytorch-lightning/pull/1438)) +- Fixed pretty print ([#1441](https://github.com/Lightning-AI/pytorch-lightning/pull/1441)) ## [0.7.2] - 2020-04-07 ### Added -- Added same step loggers' metrics aggregation ([#1278](https://github.com/Lightning-AI/lightning/pull/1278)) -- Added parity test between a vanilla MNIST model and lightning model ([#1284](https://github.com/Lightning-AI/lightning/pull/1284)) -- Added parity test between a vanilla RNN model and lightning model ([#1351](https://github.com/Lightning-AI/lightning/pull/1351)) -- Added Reinforcement Learning - Deep Q-network (DQN) lightning example ([#1232](https://github.com/Lightning-AI/lightning/pull/1232)) -- Added support for hierarchical `dict` ([#1152](https://github.com/Lightning-AI/lightning/pull/1152)) -- Added `TrainsLogger` class ([#1122](https://github.com/Lightning-AI/lightning/pull/1122)) -- Added type hints to `pl.core` ([#946](https://github.com/Lightning-AI/lightning/pull/946)) -- Added support for `IterableDataset` in validation and testing ([#1104](https://github.com/Lightning-AI/lightning/pull/1104)) -- Added support for non-primitive types in `hparams` for `TensorboardLogger` ([#1130](https://github.com/Lightning-AI/lightning/pull/1130)) -- Added a check that stops the training when loss or weights contain `NaN` or `inf` values. ([#1097](https://github.com/Lightning-AI/lightning/pull/1097)) -- Added support for `IterableDataset` when `val_check_interval=1.0` (default), this will trigger validation at the end of each epoch. ([#1283](https://github.com/Lightning-AI/lightning/pull/1283)) -- Added `summary` method to Profilers. ([#1259](https://github.com/Lightning-AI/lightning/pull/1259)) -- Added informative errors if user defined dataloader has zero length ([#1280](https://github.com/Lightning-AI/lightning/pull/1280)) -- Added testing for python 3.8 ([#915](https://github.com/Lightning-AI/lightning/pull/915)) -- Added model configuration checking ([#1199](https://github.com/Lightning-AI/lightning/pull/1199)) -- Added support for optimizer frequencies through `LightningModule.configure_optimizers()` ([#1269](https://github.com/Lightning-AI/lightning/pull/1269)) -- Added option to run without an optimizer by returning `None` from `configure_optimizers`. ([#1279](https://github.com/Lightning-AI/lightning/pull/1279)) -- Added a warning when the number of data loader workers is small. ([#1378](https://github.com/Lightning-AI/lightning/pull/1378)) +- Added same step loggers' metrics aggregation ([#1278](https://github.com/Lightning-AI/pytorch-lightning/pull/1278)) +- Added parity test between a vanilla MNIST model and lightning model ([#1284](https://github.com/Lightning-AI/pytorch-lightning/pull/1284)) +- Added parity test between a vanilla RNN model and lightning model ([#1351](https://github.com/Lightning-AI/pytorch-lightning/pull/1351)) +- Added Reinforcement Learning - Deep Q-network (DQN) lightning example ([#1232](https://github.com/Lightning-AI/pytorch-lightning/pull/1232)) +- Added support for hierarchical `dict` ([#1152](https://github.com/Lightning-AI/pytorch-lightning/pull/1152)) +- Added `TrainsLogger` class ([#1122](https://github.com/Lightning-AI/pytorch-lightning/pull/1122)) +- Added type hints to `pl.core` ([#946](https://github.com/Lightning-AI/pytorch-lightning/pull/946)) +- Added support for `IterableDataset` in validation and testing ([#1104](https://github.com/Lightning-AI/pytorch-lightning/pull/1104)) +- Added support for non-primitive types in `hparams` for `TensorboardLogger` ([#1130](https://github.com/Lightning-AI/pytorch-lightning/pull/1130)) +- Added a check that stops the training when loss or weights contain `NaN` or `inf` values. ([#1097](https://github.com/Lightning-AI/pytorch-lightning/pull/1097)) +- Added support for `IterableDataset` when `val_check_interval=1.0` (default), this will trigger validation at the end of each epoch. ([#1283](https://github.com/Lightning-AI/pytorch-lightning/pull/1283)) +- Added `summary` method to Profilers. ([#1259](https://github.com/Lightning-AI/pytorch-lightning/pull/1259)) +- Added informative errors if user defined dataloader has zero length ([#1280](https://github.com/Lightning-AI/pytorch-lightning/pull/1280)) +- Added testing for python 3.8 ([#915](https://github.com/Lightning-AI/pytorch-lightning/pull/915)) +- Added model configuration checking ([#1199](https://github.com/Lightning-AI/pytorch-lightning/pull/1199)) +- Added support for optimizer frequencies through `LightningModule.configure_optimizers()` ([#1269](https://github.com/Lightning-AI/pytorch-lightning/pull/1269)) +- Added option to run without an optimizer by returning `None` from `configure_optimizers`. ([#1279](https://github.com/Lightning-AI/pytorch-lightning/pull/1279)) +- Added a warning when the number of data loader workers is small. ([#1378](https://github.com/Lightning-AI/pytorch-lightning/pull/1378)) ### Changed -- Changed (renamed and refatored) `TensorRunningMean` -> `TensorRunningAccum`: running accumulations were generalized. ([#1278](https://github.com/Lightning-AI/lightning/pull/1278)) -- Changed `progress_bar_refresh_rate` trainer flag to disable progress bar when set to 0. ([#1108](https://github.com/Lightning-AI/lightning/pull/1108)) -- Enhanced `load_from_checkpoint` to also forward params to the model ([#1307](https://github.com/Lightning-AI/lightning/pull/1307)) -- Updated references to `self.forward()` to instead use the `__call__` interface. ([#1211](https://github.com/Lightning-AI/lightning/pull/1211)) -- Changed default behaviour of `configure_optimizers` to use no optimizer rather than Adam. ([#1279](https://github.com/Lightning-AI/lightning/pull/1279)) -- Allow to upload models on W&B ([#1339](https://github.com/Lightning-AI/lightning/pull/1339)) -- On DP and DDP2 unsqueeze is automated now ([#1319](https://github.com/Lightning-AI/lightning/pull/1319)) -- Did not always create a DataLoader during reinstantiation, but the same type as before (if subclass of DataLoader) ([#1346](https://github.com/Lightning-AI/lightning/pull/1346)) -- Did not interfere with a default sampler ([#1318](https://github.com/Lightning-AI/lightning/pull/1318)) -- Remove default Adam optimizer ([#1317](https://github.com/Lightning-AI/lightning/pull/1317)) -- Give warnings for unimplemented required lightning methods ([#1317](https://github.com/Lightning-AI/lightning/pull/1317)) -- Made `evaluate` method private >> `Trainer._evaluate(...)`. ([#1260](https://github.com/Lightning-AI/lightning/pull/1260)) -- Simplify the PL examples structure (shallower and more readable) ([#1247](https://github.com/Lightning-AI/lightning/pull/1247)) -- Changed min max gpu memory to be on their own plots ([#1358](https://github.com/Lightning-AI/lightning/pull/1358)) -- Remove `.item` which causes sync issues ([#1254](https://github.com/Lightning-AI/lightning/pull/1254)) -- Changed smoothing in TQDM to decrease variability of time remaining between training / eval ([#1194](https://github.com/Lightning-AI/lightning/pull/1194)) -- Change default logger to dedicated one ([#1064](https://github.com/Lightning-AI/lightning/pull/1064)) +- Changed (renamed and refatored) `TensorRunningMean` -> `TensorRunningAccum`: running accumulations were generalized. ([#1278](https://github.com/Lightning-AI/pytorch-lightning/pull/1278)) +- Changed `progress_bar_refresh_rate` trainer flag to disable progress bar when set to 0. ([#1108](https://github.com/Lightning-AI/pytorch-lightning/pull/1108)) +- Enhanced `load_from_checkpoint` to also forward params to the model ([#1307](https://github.com/Lightning-AI/pytorch-lightning/pull/1307)) +- Updated references to `self.forward()` to instead use the `__call__` interface. ([#1211](https://github.com/Lightning-AI/pytorch-lightning/pull/1211)) +- Changed default behaviour of `configure_optimizers` to use no optimizer rather than Adam. ([#1279](https://github.com/Lightning-AI/pytorch-lightning/pull/1279)) +- Allow to upload models on W&B ([#1339](https://github.com/Lightning-AI/pytorch-lightning/pull/1339)) +- On DP and DDP2 unsqueeze is automated now ([#1319](https://github.com/Lightning-AI/pytorch-lightning/pull/1319)) +- Did not always create a DataLoader during reinstantiation, but the same type as before (if subclass of DataLoader) ([#1346](https://github.com/Lightning-AI/pytorch-lightning/pull/1346)) +- Did not interfere with a default sampler ([#1318](https://github.com/Lightning-AI/pytorch-lightning/pull/1318)) +- Remove default Adam optimizer ([#1317](https://github.com/Lightning-AI/pytorch-lightning/pull/1317)) +- Give warnings for unimplemented required lightning methods ([#1317](https://github.com/Lightning-AI/pytorch-lightning/pull/1317)) +- Made `evaluate` method private >> `Trainer._evaluate(...)`. ([#1260](https://github.com/Lightning-AI/pytorch-lightning/pull/1260)) +- Simplify the PL examples structure (shallower and more readable) ([#1247](https://github.com/Lightning-AI/pytorch-lightning/pull/1247)) +- Changed min max gpu memory to be on their own plots ([#1358](https://github.com/Lightning-AI/pytorch-lightning/pull/1358)) +- Remove `.item` which causes sync issues ([#1254](https://github.com/Lightning-AI/pytorch-lightning/pull/1254)) +- Changed smoothing in TQDM to decrease variability of time remaining between training / eval ([#1194](https://github.com/Lightning-AI/pytorch-lightning/pull/1194)) +- Change default logger to dedicated one ([#1064](https://github.com/Lightning-AI/pytorch-lightning/pull/1064)) ### Deprecated -- Deprecated Trainer argument `print_nan_grads` ([#1097](https://github.com/Lightning-AI/lightning/pull/1097)) -- Deprecated Trainer argument `show_progress_bar` ([#1108](https://github.com/Lightning-AI/lightning/pull/1108)) +- Deprecated Trainer argument `print_nan_grads` ([#1097](https://github.com/Lightning-AI/pytorch-lightning/pull/1097)) +- Deprecated Trainer argument `show_progress_bar` ([#1108](https://github.com/Lightning-AI/pytorch-lightning/pull/1108)) ### Removed -- Removed test for no test dataloader in .fit ([#1495](https://github.com/Lightning-AI/lightning/pull/1495)) -- Removed duplicated module `pl.utilities.arg_parse` for loading CLI arguments ([#1167](https://github.com/Lightning-AI/lightning/pull/1167)) -- Removed wandb logger's `finalize` method ([#1193](https://github.com/Lightning-AI/lightning/pull/1193)) -- Dropped `torchvision` dependency in tests and added own MNIST dataset class instead ([#986](https://github.com/Lightning-AI/lightning/pull/986)) +- Removed test for no test dataloader in .fit ([#1495](https://github.com/Lightning-AI/pytorch-lightning/pull/1495)) +- Removed duplicated module `pl.utilities.arg_parse` for loading CLI arguments ([#1167](https://github.com/Lightning-AI/pytorch-lightning/pull/1167)) +- Removed wandb logger's `finalize` method ([#1193](https://github.com/Lightning-AI/pytorch-lightning/pull/1193)) +- Dropped `torchvision` dependency in tests and added own MNIST dataset class instead ([#986](https://github.com/Lightning-AI/pytorch-lightning/pull/986)) ### Fixed -- Fixed `model_checkpoint` when saving all models ([#1359](https://github.com/Lightning-AI/lightning/pull/1359)) -- `Trainer.add_argparse_args` classmethod fixed. Now it adds a type for the arguments ([#1147](https://github.com/Lightning-AI/lightning/pull/1147)) -- Fixed bug related to type checking of `ReduceLROnPlateau` lr schedulers([#1126](https://github.com/Lightning-AI/lightning/pull/1126)) -- Fixed a bug to ensure lightning checkpoints to be backward compatible ([#1132](https://github.com/Lightning-AI/lightning/pull/1132)) -- Fixed a bug that created an extra dataloader with active `reload_dataloaders_every_epoch` ([#1196](https://github.com/Lightning-AI/lightning/pull/1196)) -- Fixed all warnings and errors in the docs build process ([#1191](https://github.com/Lightning-AI/lightning/pull/1191)) -- Fixed an issue where `val_percent_check=0` would not disable validation ([#1251](https://github.com/Lightning-AI/lightning/pull/1251)) -- Fixed average of incomplete `TensorRunningMean` ([#1309](https://github.com/Lightning-AI/lightning/pull/1309)) -- Fixed `WandbLogger.watch` with `wandb.init()` ([#1311](https://github.com/Lightning-AI/lightning/pull/1311)) -- Fixed an issue with early stopping that would prevent it from monitoring training metrics when validation is disabled / not implemented ([#1235](https://github.com/Lightning-AI/lightning/pull/1235)). -- Fixed a bug that would cause `trainer.test()` to run on the validation set when overloading `validation_epoch_end` and `test_end` ([#1353](https://github.com/Lightning-AI/lightning/pull/1353)) -- Fixed `WandbLogger.watch` - use of the watch method without importing `wandb` ([#1311](https://github.com/Lightning-AI/lightning/pull/1311)) -- Fixed `WandbLogger` to be used with 'ddp' - allow reinits in sub-processes ([#1149](https://github.com/Lightning-AI/lightning/pull/1149), - [#1360](https://github.com/Lightning-AI/lightning/pull/1360)) -- Made `training_epoch_end` behave like `validation_epoch_end` ([#1357](https://github.com/Lightning-AI/lightning/pull/1357)) -- Fixed `fast_dev_run` running validation twice ([#1365](https://github.com/Lightning-AI/lightning/pull/1365)) -- Fixed pickle error from quick patch `__code__` ([#1352](https://github.com/Lightning-AI/lightning/pull/1352)) -- Fixed memory leak on GPU0 ([#1094](https://github.com/Lightning-AI/lightning/pull/1094), - [#1349](https://github.com/Lightning-AI/lightning/pull/1349)) -- Fixed checkpointing interval ([#1272](https://github.com/Lightning-AI/lightning/pull/1272)) -- Fixed validation and training loops run the partial dataset ([#1192](https://github.com/Lightning-AI/lightning/pull/1192)) -- Fixed running `on_validation_end` only on main process in DDP ([#1125](https://github.com/Lightning-AI/lightning/pull/1125)) -- Fixed `load_spawn_weights` only in proc rank 0 ([#1385](https://github.com/Lightning-AI/lightning/pull/1385)) -- Fixes using deprecated `use_amp` attribute ([#1145](https://github.com/Lightning-AI/lightning/pull/1145)) -- Fixed Tensorboard logger error: lightning_logs directory not exists in multi-node DDP on nodes with rank != 0 ([#1377](https://github.com/Lightning-AI/lightning/pull/1377)) -- Fixed `Unimplemented backend XLA` error on TPU ([#1387](https://github.com/Lightning-AI/lightning/pull/1387)) +- Fixed `model_checkpoint` when saving all models ([#1359](https://github.com/Lightning-AI/pytorch-lightning/pull/1359)) +- `Trainer.add_argparse_args` classmethod fixed. Now it adds a type for the arguments ([#1147](https://github.com/Lightning-AI/pytorch-lightning/pull/1147)) +- Fixed bug related to type checking of `ReduceLROnPlateau` lr schedulers([#1126](https://github.com/Lightning-AI/pytorch-lightning/pull/1126)) +- Fixed a bug to ensure lightning checkpoints to be backward compatible ([#1132](https://github.com/Lightning-AI/pytorch-lightning/pull/1132)) +- Fixed a bug that created an extra dataloader with active `reload_dataloaders_every_epoch` ([#1196](https://github.com/Lightning-AI/pytorch-lightning/pull/1196)) +- Fixed all warnings and errors in the docs build process ([#1191](https://github.com/Lightning-AI/pytorch-lightning/pull/1191)) +- Fixed an issue where `val_percent_check=0` would not disable validation ([#1251](https://github.com/Lightning-AI/pytorch-lightning/pull/1251)) +- Fixed average of incomplete `TensorRunningMean` ([#1309](https://github.com/Lightning-AI/pytorch-lightning/pull/1309)) +- Fixed `WandbLogger.watch` with `wandb.init()` ([#1311](https://github.com/Lightning-AI/pytorch-lightning/pull/1311)) +- Fixed an issue with early stopping that would prevent it from monitoring training metrics when validation is disabled / not implemented ([#1235](https://github.com/Lightning-AI/pytorch-lightning/pull/1235)). +- Fixed a bug that would cause `trainer.test()` to run on the validation set when overloading `validation_epoch_end` and `test_end` ([#1353](https://github.com/Lightning-AI/pytorch-lightning/pull/1353)) +- Fixed `WandbLogger.watch` - use of the watch method without importing `wandb` ([#1311](https://github.com/Lightning-AI/pytorch-lightning/pull/1311)) +- Fixed `WandbLogger` to be used with 'ddp' - allow reinits in sub-processes ([#1149](https://github.com/Lightning-AI/pytorch-lightning/pull/1149), + [#1360](https://github.com/Lightning-AI/pytorch-lightning/pull/1360)) +- Made `training_epoch_end` behave like `validation_epoch_end` ([#1357](https://github.com/Lightning-AI/pytorch-lightning/pull/1357)) +- Fixed `fast_dev_run` running validation twice ([#1365](https://github.com/Lightning-AI/pytorch-lightning/pull/1365)) +- Fixed pickle error from quick patch `__code__` ([#1352](https://github.com/Lightning-AI/pytorch-lightning/pull/1352)) +- Fixed memory leak on GPU0 ([#1094](https://github.com/Lightning-AI/pytorch-lightning/pull/1094), + [#1349](https://github.com/Lightning-AI/pytorch-lightning/pull/1349)) +- Fixed checkpointing interval ([#1272](https://github.com/Lightning-AI/pytorch-lightning/pull/1272)) +- Fixed validation and training loops run the partial dataset ([#1192](https://github.com/Lightning-AI/pytorch-lightning/pull/1192)) +- Fixed running `on_validation_end` only on main process in DDP ([#1125](https://github.com/Lightning-AI/pytorch-lightning/pull/1125)) +- Fixed `load_spawn_weights` only in proc rank 0 ([#1385](https://github.com/Lightning-AI/pytorch-lightning/pull/1385)) +- Fixes using deprecated `use_amp` attribute ([#1145](https://github.com/Lightning-AI/pytorch-lightning/pull/1145)) +- Fixed Tensorboard logger error: lightning_logs directory not exists in multi-node DDP on nodes with rank != 0 ([#1377](https://github.com/Lightning-AI/pytorch-lightning/pull/1377)) +- Fixed `Unimplemented backend XLA` error on TPU ([#1387](https://github.com/Lightning-AI/pytorch-lightning/pull/1387)) ## [0.7.1] - 2020-03-07 ### Fixed -- Fixes `print` issues and `data_loader` ([#1080](https://github.com/Lightning-AI/lightning/pull/1080)) +- Fixes `print` issues and `data_loader` ([#1080](https://github.com/Lightning-AI/pytorch-lightning/pull/1080)) ## [0.7.0] - 2020-03-06 ### Added -- Added automatic sampler setup. Depending on DDP or TPU, lightning configures the sampler correctly (user needs to do nothing) ([#926](https://github.com/Lightning-AI/lightning/pull/926)) -- Added `reload_dataloaders_every_epoch=False` flag for trainer. Some users require reloading data every epoch ([#926](https://github.com/Lightning-AI/lightning/pull/926)) -- Added `progress_bar_refresh_rate=50` flag for trainer. Throttle refresh rate on notebooks ([#926](https://github.com/Lightning-AI/lightning/pull/926)) +- Added automatic sampler setup. Depending on DDP or TPU, lightning configures the sampler correctly (user needs to do nothing) ([#926](https://github.com/Lightning-AI/pytorch-lightning/pull/926)) +- Added `reload_dataloaders_every_epoch=False` flag for trainer. Some users require reloading data every epoch ([#926](https://github.com/Lightning-AI/pytorch-lightning/pull/926)) +- Added `progress_bar_refresh_rate=50` flag for trainer. Throttle refresh rate on notebooks ([#926](https://github.com/Lightning-AI/pytorch-lightning/pull/926)) - Updated governance docs -- Added a check to ensure that the metric used for early stopping exists before training commences ([#542](https://github.com/Lightning-AI/lightning/pull/542)) -- Added `optimizer_idx` argument to `backward` hook ([#733](https://github.com/Lightning-AI/lightning/pull/733)) -- Added `entity` argument to `WandbLogger` to be passed to `wandb.init` ([#783](https://github.com/Lightning-AI/lightning/pull/783)) -- Added a tool for profiling training runs ([#782](https://github.com/Lightning-AI/lightning/pull/782)) -- Improved flexibility for naming of TensorBoard logs, can now set `version` to a `str` to just save to that directory, and use `name=''` to prevent experiment-name directory ([#804](https://github.com/Lightning-AI/lightning/pull/804)) -- Added option to specify `step` key when logging metrics ([#808](https://github.com/Lightning-AI/lightning/pull/808)) -- Added `train_dataloader`, `val_dataloader` and `test_dataloader` arguments to `Trainer.fit()`, for alternative data parsing ([#759](https://github.com/Lightning-AI/lightning/pull/759)) -- Added Tensor Processing Unit (TPU) support ([#868](https://github.com/Lightning-AI/lightning/pull/868)) -- Added semantic segmentation example ([#751](https://github.com/Lightning-AI/lightning/pull/751),[#876](https://github.com/Lightning-AI/lightning/pull/876), - [#881](https://github.com/Lightning-AI/lightning/pull/881)) -- Split callbacks in multiple files ([#849](https://github.com/Lightning-AI/lightning/pull/849)) -- Support for user defined callbacks ([#889](https://github.com/Lightning-AI/lightning/pull/889) and [#950](https://github.com/Lightning-AI/lightning/pull/950)) -- Added support for multiple loggers to be passed to `Trainer` as an iterable (e.g. list, tuple, etc.) ([#903](https://github.com/Lightning-AI/lightning/pull/903)) -- Added support for step-based learning rate scheduling ([#941](https://github.com/Lightning-AI/lightning/pull/941)) -- Added support for logging `hparams` as dict ([#1029](https://github.com/Lightning-AI/lightning/pull/1029)) -- Checkpoint and early stopping now work without val. step ([#1041](https://github.com/Lightning-AI/lightning/pull/1041)) -- Support graceful training cleanup after Keyboard Interrupt ([#856](https://github.com/Lightning-AI/lightning/pull/856), - [#1019](https://github.com/Lightning-AI/lightning/pull/1019)) -- Added type hints for function arguments ([#912](https://github.com/Lightning-AI/lightning/pull/912), ) -- Added default `argparser` for `Trainer` ([#952](https://github.com/Lightning-AI/lightning/pull/1023), - [#1023](https://github.com/Lightning-AI/lightning/pull/1023)) -- Added TPU gradient clipping ([#963](https://github.com/Lightning-AI/lightning/pull/963)) -- Added max/min number of steps in `Trainer` ([#728](https://github.com/Lightning-AI/lightning/pull/728)) +- Added a check to ensure that the metric used for early stopping exists before training commences ([#542](https://github.com/Lightning-AI/pytorch-lightning/pull/542)) +- Added `optimizer_idx` argument to `backward` hook ([#733](https://github.com/Lightning-AI/pytorch-lightning/pull/733)) +- Added `entity` argument to `WandbLogger` to be passed to `wandb.init` ([#783](https://github.com/Lightning-AI/pytorch-lightning/pull/783)) +- Added a tool for profiling training runs ([#782](https://github.com/Lightning-AI/pytorch-lightning/pull/782)) +- Improved flexibility for naming of TensorBoard logs, can now set `version` to a `str` to just save to that directory, and use `name=''` to prevent experiment-name directory ([#804](https://github.com/Lightning-AI/pytorch-lightning/pull/804)) +- Added option to specify `step` key when logging metrics ([#808](https://github.com/Lightning-AI/pytorch-lightning/pull/808)) +- Added `train_dataloader`, `val_dataloader` and `test_dataloader` arguments to `Trainer.fit()`, for alternative data parsing ([#759](https://github.com/Lightning-AI/pytorch-lightning/pull/759)) +- Added Tensor Processing Unit (TPU) support ([#868](https://github.com/Lightning-AI/pytorch-lightning/pull/868)) +- Added semantic segmentation example ([#751](https://github.com/Lightning-AI/pytorch-lightning/pull/751),[#876](https://github.com/Lightning-AI/pytorch-lightning/pull/876), + [#881](https://github.com/Lightning-AI/pytorch-lightning/pull/881)) +- Split callbacks in multiple files ([#849](https://github.com/Lightning-AI/pytorch-lightning/pull/849)) +- Support for user defined callbacks ([#889](https://github.com/Lightning-AI/pytorch-lightning/pull/889) and [#950](https://github.com/Lightning-AI/pytorch-lightning/pull/950)) +- Added support for multiple loggers to be passed to `Trainer` as an iterable (e.g. list, tuple, etc.) ([#903](https://github.com/Lightning-AI/pytorch-lightning/pull/903)) +- Added support for step-based learning rate scheduling ([#941](https://github.com/Lightning-AI/pytorch-lightning/pull/941)) +- Added support for logging `hparams` as dict ([#1029](https://github.com/Lightning-AI/pytorch-lightning/pull/1029)) +- Checkpoint and early stopping now work without val. step ([#1041](https://github.com/Lightning-AI/pytorch-lightning/pull/1041)) +- Support graceful training cleanup after Keyboard Interrupt ([#856](https://github.com/Lightning-AI/pytorch-lightning/pull/856), + [#1019](https://github.com/Lightning-AI/pytorch-lightning/pull/1019)) +- Added type hints for function arguments ([#912](https://github.com/Lightning-AI/pytorch-lightning/pull/912), ) +- Added default `argparser` for `Trainer` ([#952](https://github.com/Lightning-AI/pytorch-lightning/pull/1023), + [#1023](https://github.com/Lightning-AI/pytorch-lightning/pull/1023)) +- Added TPU gradient clipping ([#963](https://github.com/Lightning-AI/pytorch-lightning/pull/963)) +- Added max/min number of steps in `Trainer` ([#728](https://github.com/Lightning-AI/pytorch-lightning/pull/728)) ### Changed -- Improved `NeptuneLogger` by adding `close_after_fit` argument to allow logging after training([#908](https://github.com/Lightning-AI/lightning/pull/1084)) -- Changed default TQDM to use `tqdm.auto` for prettier outputs in IPython notebooks ([#752](https://github.com/Lightning-AI/lightning/pull/752)) -- Changed `pl.logging` to `pl.loggers` ([#767](https://github.com/Lightning-AI/lightning/pull/767)) -- Moved the default `tqdm_dict` definition from Trainer to `LightningModule`, so it can be overridden by the user ([#749](https://github.com/Lightning-AI/lightning/pull/749)) -- Moved functionality of `LightningModule.load_from_metrics` into `LightningModule.load_from_checkpoint` ([#995](https://github.com/Lightning-AI/lightning/pull/995)) -- Changed Checkpoint path parameter from `filepath` to `dirpath` ([#1016](https://github.com/Lightning-AI/lightning/pull/1016)) -- Freezed models `hparams` as `Namespace` property ([#1029](https://github.com/Lightning-AI/lightning/pull/1029)) -- Dropped `logging` config in package init ([#1015](https://github.com/Lightning-AI/lightning/pull/1015)) -- Renames model steps ([#1051](https://github.com/Lightning-AI/lightning/pull/1051)) +- Improved `NeptuneLogger` by adding `close_after_fit` argument to allow logging after training([#908](https://github.com/Lightning-AI/pytorch-lightning/pull/1084)) +- Changed default TQDM to use `tqdm.auto` for prettier outputs in IPython notebooks ([#752](https://github.com/Lightning-AI/pytorch-lightning/pull/752)) +- Changed `pl.logging` to `pl.loggers` ([#767](https://github.com/Lightning-AI/pytorch-lightning/pull/767)) +- Moved the default `tqdm_dict` definition from Trainer to `LightningModule`, so it can be overridden by the user ([#749](https://github.com/Lightning-AI/pytorch-lightning/pull/749)) +- Moved functionality of `LightningModule.load_from_metrics` into `LightningModule.load_from_checkpoint` ([#995](https://github.com/Lightning-AI/pytorch-lightning/pull/995)) +- Changed Checkpoint path parameter from `filepath` to `dirpath` ([#1016](https://github.com/Lightning-AI/pytorch-lightning/pull/1016)) +- Freezed models `hparams` as `Namespace` property ([#1029](https://github.com/Lightning-AI/pytorch-lightning/pull/1029)) +- Dropped `logging` config in package init ([#1015](https://github.com/Lightning-AI/pytorch-lightning/pull/1015)) +- Renames model steps ([#1051](https://github.com/Lightning-AI/pytorch-lightning/pull/1051)) - `training_end` >> `training_epoch_end` - `validation_end` >> `validation_epoch_end` - `test_end` >> `test_epoch_end` -- Refactor dataloading, supports infinite dataloader ([#955](https://github.com/Lightning-AI/lightning/pull/955)) -- Create single file in `TensorBoardLogger` ([#777](https://github.com/Lightning-AI/lightning/pull/777)) +- Refactor dataloading, supports infinite dataloader ([#955](https://github.com/Lightning-AI/pytorch-lightning/pull/955)) +- Create single file in `TensorBoardLogger` ([#777](https://github.com/Lightning-AI/pytorch-lightning/pull/777)) ### Deprecated -- Deprecated `pl.logging` ([#767](https://github.com/Lightning-AI/lightning/pull/767)) -- Deprecated `LightningModule.load_from_metrics` in favour of `LightningModule.load_from_checkpoint` ([#995](https://github.com/Lightning-AI/lightning/pull/995), - [#1079](https://github.com/Lightning-AI/lightning/pull/1079)) -- Deprecated `@data_loader` decorator ([#926](https://github.com/Lightning-AI/lightning/pull/926)) -- Deprecated model steps `training_end`, `validation_end` and `test_end` ([#1051](https://github.com/Lightning-AI/lightning/pull/1051), - [#1056](https://github.com/Lightning-AI/lightning/pull/1056)) +- Deprecated `pl.logging` ([#767](https://github.com/Lightning-AI/pytorch-lightning/pull/767)) +- Deprecated `LightningModule.load_from_metrics` in favour of `LightningModule.load_from_checkpoint` ([#995](https://github.com/Lightning-AI/pytorch-lightning/pull/995), + [#1079](https://github.com/Lightning-AI/pytorch-lightning/pull/1079)) +- Deprecated `@data_loader` decorator ([#926](https://github.com/Lightning-AI/pytorch-lightning/pull/926)) +- Deprecated model steps `training_end`, `validation_end` and `test_end` ([#1051](https://github.com/Lightning-AI/pytorch-lightning/pull/1051), + [#1056](https://github.com/Lightning-AI/pytorch-lightning/pull/1056)) ### Removed -- Removed dependency on `pandas` ([#736](https://github.com/Lightning-AI/lightning/pull/736)) -- Removed dependency on `torchvision` ([#797](https://github.com/Lightning-AI/lightning/pull/797)) -- Removed dependency on `scikit-learn` ([#801](https://github.com/Lightning-AI/lightning/pull/801)) +- Removed dependency on `pandas` ([#736](https://github.com/Lightning-AI/pytorch-lightning/pull/736)) +- Removed dependency on `torchvision` ([#797](https://github.com/Lightning-AI/pytorch-lightning/pull/797)) +- Removed dependency on `scikit-learn` ([#801](https://github.com/Lightning-AI/pytorch-lightning/pull/801)) ### Fixed -- Fixed a bug where early stopping `on_end_epoch` would be called inconsistently when `check_val_every_n_epoch == 0` ([#743](https://github.com/Lightning-AI/lightning/pull/743)) -- Fixed a bug where the model checkpointer didn't write to the same directory as the logger ([#771](https://github.com/Lightning-AI/lightning/pull/771)) -- Fixed a bug where the `TensorBoardLogger` class would create an additional empty log file during fitting ([#777](https://github.com/Lightning-AI/lightning/pull/777)) -- Fixed a bug where `global_step` was advanced incorrectly when using `accumulate_grad_batches > 1` ([#832](https://github.com/Lightning-AI/lightning/pull/832)) -- Fixed a bug when calling `self.logger.experiment` with multiple loggers ([#1009](https://github.com/Lightning-AI/lightning/pull/1009)) -- Fixed a bug when calling `logger.append_tags` on a `NeptuneLogger` with a single tag ([#1009](https://github.com/Lightning-AI/lightning/pull/1009)) -- Fixed sending back data from `.spawn` by saving and loading the trained model in/out of the process ([#1017](https://github.com/Lightning-AI/lightning/pull/1017) -- Fixed port collision on DDP ([#1010](https://github.com/Lightning-AI/lightning/pull/1010)) -- Fixed/tested pass overrides ([#918](https://github.com/Lightning-AI/lightning/pull/918)) -- Fixed comet logger to log after train ([#892](https://github.com/Lightning-AI/lightning/pull/892)) -- Remove deprecated args to learning rate step function ([#890](https://github.com/Lightning-AI/lightning/pull/890)) +- Fixed a bug where early stopping `on_end_epoch` would be called inconsistently when `check_val_every_n_epoch == 0` ([#743](https://github.com/Lightning-AI/pytorch-lightning/pull/743)) +- Fixed a bug where the model checkpointer didn't write to the same directory as the logger ([#771](https://github.com/Lightning-AI/pytorch-lightning/pull/771)) +- Fixed a bug where the `TensorBoardLogger` class would create an additional empty log file during fitting ([#777](https://github.com/Lightning-AI/pytorch-lightning/pull/777)) +- Fixed a bug where `global_step` was advanced incorrectly when using `accumulate_grad_batches > 1` ([#832](https://github.com/Lightning-AI/pytorch-lightning/pull/832)) +- Fixed a bug when calling `self.logger.experiment` with multiple loggers ([#1009](https://github.com/Lightning-AI/pytorch-lightning/pull/1009)) +- Fixed a bug when calling `logger.append_tags` on a `NeptuneLogger` with a single tag ([#1009](https://github.com/Lightning-AI/pytorch-lightning/pull/1009)) +- Fixed sending back data from `.spawn` by saving and loading the trained model in/out of the process ([#1017](https://github.com/Lightning-AI/pytorch-lightning/pull/1017) +- Fixed port collision on DDP ([#1010](https://github.com/Lightning-AI/pytorch-lightning/pull/1010)) +- Fixed/tested pass overrides ([#918](https://github.com/Lightning-AI/pytorch-lightning/pull/918)) +- Fixed comet logger to log after train ([#892](https://github.com/Lightning-AI/pytorch-lightning/pull/892)) +- Remove deprecated args to learning rate step function ([#890](https://github.com/Lightning-AI/pytorch-lightning/pull/890)) ## [0.6.0] - 2020-01-21 ### Added -- Added support for resuming from a specific checkpoint via `resume_from_checkpoint` argument ([#516](https://github.com/Lightning-AI/lightning/pull/516)) -- Added support for `ReduceLROnPlateau` scheduler ([#320](https://github.com/Lightning-AI/lightning/pull/320)) -- Added support for Apex mode `O2` in conjunction with Data Parallel ([#493](https://github.com/Lightning-AI/lightning/pull/493)) -- Added option (`save_top_k`) to save the top k models in the `ModelCheckpoint` class ([#128](https://github.com/Lightning-AI/lightning/pull/128)) -- Added `on_train_start` and `on_train_end` hooks to `ModelHooks` ([#598](https://github.com/Lightning-AI/lightning/pull/598)) -- Added `TensorBoardLogger` ([#607](https://github.com/Lightning-AI/lightning/pull/607)) -- Added support for weight summary of model with multiple inputs ([#543](https://github.com/Lightning-AI/lightning/pull/543)) -- Added `map_location` argument to `load_from_metrics` and `load_from_checkpoint` ([#625](https://github.com/Lightning-AI/lightning/pull/625)) -- Added option to disable validation by setting `val_percent_check=0` ([#649](https://github.com/Lightning-AI/lightning/pull/649)) -- Added `NeptuneLogger` class ([#648](https://github.com/Lightning-AI/lightning/pull/648)) -- Added `WandbLogger` class ([#627](https://github.com/Lightning-AI/lightning/pull/627)) +- Added support for resuming from a specific checkpoint via `resume_from_checkpoint` argument ([#516](https://github.com/Lightning-AI/pytorch-lightning/pull/516)) +- Added support for `ReduceLROnPlateau` scheduler ([#320](https://github.com/Lightning-AI/pytorch-lightning/pull/320)) +- Added support for Apex mode `O2` in conjunction with Data Parallel ([#493](https://github.com/Lightning-AI/pytorch-lightning/pull/493)) +- Added option (`save_top_k`) to save the top k models in the `ModelCheckpoint` class ([#128](https://github.com/Lightning-AI/pytorch-lightning/pull/128)) +- Added `on_train_start` and `on_train_end` hooks to `ModelHooks` ([#598](https://github.com/Lightning-AI/pytorch-lightning/pull/598)) +- Added `TensorBoardLogger` ([#607](https://github.com/Lightning-AI/pytorch-lightning/pull/607)) +- Added support for weight summary of model with multiple inputs ([#543](https://github.com/Lightning-AI/pytorch-lightning/pull/543)) +- Added `map_location` argument to `load_from_metrics` and `load_from_checkpoint` ([#625](https://github.com/Lightning-AI/pytorch-lightning/pull/625)) +- Added option to disable validation by setting `val_percent_check=0` ([#649](https://github.com/Lightning-AI/pytorch-lightning/pull/649)) +- Added `NeptuneLogger` class ([#648](https://github.com/Lightning-AI/pytorch-lightning/pull/648)) +- Added `WandbLogger` class ([#627](https://github.com/Lightning-AI/pytorch-lightning/pull/627)) ### Changed -- Changed the default progress bar to print to stdout instead of stderr ([#531](https://github.com/Lightning-AI/lightning/pull/531)) -- Renamed `step_idx` to `step`, `epoch_idx` to `epoch`, `max_num_epochs` to `max_epochs` and `min_num_epochs` to `min_epochs` ([#589](https://github.com/Lightning-AI/lightning/pull/589)) -- Renamed `total_batch_nb` to `total_batches`, `nb_val_batches` to `num_val_batches`, `nb_training_batches` to `num_training_batches`, `max_nb_epochs` to `max_epochs`, `min_nb_epochs` to `min_epochs`, `nb_test_batches` to `num_test_batches`, and `nb_val_batches` to `num_val_batches` ([#567](https://github.com/Lightning-AI/lightning/pull/567)) -- Changed gradient logging to use parameter names instead of indexes ([#660](https://github.com/Lightning-AI/lightning/pull/660)) -- Changed the default logger to `TensorBoardLogger` ([#609](https://github.com/Lightning-AI/lightning/pull/609)) -- Changed the directory for tensorboard logging to be the same as model checkpointing ([#706](https://github.com/Lightning-AI/lightning/pull/706)) +- Changed the default progress bar to print to stdout instead of stderr ([#531](https://github.com/Lightning-AI/pytorch-lightning/pull/531)) +- Renamed `step_idx` to `step`, `epoch_idx` to `epoch`, `max_num_epochs` to `max_epochs` and `min_num_epochs` to `min_epochs` ([#589](https://github.com/Lightning-AI/pytorch-lightning/pull/589)) +- Renamed `total_batch_nb` to `total_batches`, `nb_val_batches` to `num_val_batches`, `nb_training_batches` to `num_training_batches`, `max_nb_epochs` to `max_epochs`, `min_nb_epochs` to `min_epochs`, `nb_test_batches` to `num_test_batches`, and `nb_val_batches` to `num_val_batches` ([#567](https://github.com/Lightning-AI/pytorch-lightning/pull/567)) +- Changed gradient logging to use parameter names instead of indexes ([#660](https://github.com/Lightning-AI/pytorch-lightning/pull/660)) +- Changed the default logger to `TensorBoardLogger` ([#609](https://github.com/Lightning-AI/pytorch-lightning/pull/609)) +- Changed the directory for tensorboard logging to be the same as model checkpointing ([#706](https://github.com/Lightning-AI/pytorch-lightning/pull/706)) ### Deprecated -- Deprecated `max_nb_epochs` and `min_nb_epochs` ([#567](https://github.com/Lightning-AI/lightning/pull/567)) -- Deprecated the `on_sanity_check_start` hook in `ModelHooks` ([#598](https://github.com/Lightning-AI/lightning/pull/598)) +- Deprecated `max_nb_epochs` and `min_nb_epochs` ([#567](https://github.com/Lightning-AI/pytorch-lightning/pull/567)) +- Deprecated the `on_sanity_check_start` hook in `ModelHooks` ([#598](https://github.com/Lightning-AI/pytorch-lightning/pull/598)) ### Removed -- Removed the `save_best_only` argument from `ModelCheckpoint`, use `save_top_k=1` instead ([#128](https://github.com/Lightning-AI/lightning/pull/128)) +- Removed the `save_best_only` argument from `ModelCheckpoint`, use `save_top_k=1` instead ([#128](https://github.com/Lightning-AI/pytorch-lightning/pull/128)) ### Fixed -- Fixed a bug which occurred when using Adagrad with cuda ([#554](https://github.com/Lightning-AI/lightning/pull/554)) -- Fixed a bug where training would be on the GPU despite setting `gpus=0` or `gpus=[]` ([#561](https://github.com/Lightning-AI/lightning/pull/561)) -- Fixed an error with `print_nan_gradients` when some parameters do not require gradient ([#579](https://github.com/Lightning-AI/lightning/pull/579)) -- Fixed a bug where the progress bar would show an incorrect number of total steps during the validation sanity check when using multiple validation data loaders ([#597](https://github.com/Lightning-AI/lightning/pull/597)) -- Fixed support for PyTorch 1.1.0 ([#552](https://github.com/Lightning-AI/lightning/pull/552)) -- Fixed an issue with early stopping when using a `val_check_interval < 1.0` in `Trainer` ([#492](https://github.com/Lightning-AI/lightning/pull/492)) -- Fixed bugs relating to the `CometLogger` object that would cause it to not work properly ([#481](https://github.com/Lightning-AI/lightning/pull/481)) -- Fixed a bug that would occur when returning `-1` from `on_batch_start` following an early exit or when the batch was `None` ([#509](https://github.com/Lightning-AI/lightning/pull/509)) -- Fixed a potential race condition with several processes trying to create checkpoint directories ([#530](https://github.com/Lightning-AI/lightning/pull/530)) -- Fixed a bug where batch 'segments' would remain on the GPU when using `truncated_bptt > 1` ([#532](https://github.com/Lightning-AI/lightning/pull/532)) -- Fixed a bug when using `IterableDataset` ([#547](https://github.com/Lightning-AI/lightning/pull/547)) -- Fixed a bug where `.item` was called on non-tensor objects ([#602](https://github.com/Lightning-AI/lightning/pull/602)) -- Fixed a bug where `Trainer.train` would crash on an uninitialized variable if the trainer was run after resuming from a checkpoint that was already at `max_epochs` ([#608](https://github.com/Lightning-AI/lightning/pull/608)) -- Fixed a bug where early stopping would begin two epochs early ([#617](https://github.com/Lightning-AI/lightning/pull/617)) -- Fixed a bug where `num_training_batches` and `num_test_batches` would sometimes be rounded down to zero ([#649](https://github.com/Lightning-AI/lightning/pull/649)) -- Fixed a bug where an additional batch would be processed when manually setting `num_training_batches` ([#653](https://github.com/Lightning-AI/lightning/pull/653)) -- Fixed a bug when batches did not have a `.copy` method ([#701](https://github.com/Lightning-AI/lightning/pull/701)) -- Fixed a bug when using `log_gpu_memory=True` in Python 3.6 ([#715](https://github.com/Lightning-AI/lightning/pull/715)) -- Fixed a bug where checkpoint writing could exit before completion, giving incomplete checkpoints ([#689](https://github.com/Lightning-AI/lightning/pull/689)) -- Fixed a bug where `on_train_end` was not called when ealy stopping ([#723](https://github.com/Lightning-AI/lightning/pull/723)) +- Fixed a bug which occurred when using Adagrad with cuda ([#554](https://github.com/Lightning-AI/pytorch-lightning/pull/554)) +- Fixed a bug where training would be on the GPU despite setting `gpus=0` or `gpus=[]` ([#561](https://github.com/Lightning-AI/pytorch-lightning/pull/561)) +- Fixed an error with `print_nan_gradients` when some parameters do not require gradient ([#579](https://github.com/Lightning-AI/pytorch-lightning/pull/579)) +- Fixed a bug where the progress bar would show an incorrect number of total steps during the validation sanity check when using multiple validation data loaders ([#597](https://github.com/Lightning-AI/pytorch-lightning/pull/597)) +- Fixed support for PyTorch 1.1.0 ([#552](https://github.com/Lightning-AI/pytorch-lightning/pull/552)) +- Fixed an issue with early stopping when using a `val_check_interval < 1.0` in `Trainer` ([#492](https://github.com/Lightning-AI/pytorch-lightning/pull/492)) +- Fixed bugs relating to the `CometLogger` object that would cause it to not work properly ([#481](https://github.com/Lightning-AI/pytorch-lightning/pull/481)) +- Fixed a bug that would occur when returning `-1` from `on_batch_start` following an early exit or when the batch was `None` ([#509](https://github.com/Lightning-AI/pytorch-lightning/pull/509)) +- Fixed a potential race condition with several processes trying to create checkpoint directories ([#530](https://github.com/Lightning-AI/pytorch-lightning/pull/530)) +- Fixed a bug where batch 'segments' would remain on the GPU when using `truncated_bptt > 1` ([#532](https://github.com/Lightning-AI/pytorch-lightning/pull/532)) +- Fixed a bug when using `IterableDataset` ([#547](https://github.com/Lightning-AI/pytorch-lightning/pull/547)) +- Fixed a bug where `.item` was called on non-tensor objects ([#602](https://github.com/Lightning-AI/pytorch-lightning/pull/602)) +- Fixed a bug where `Trainer.train` would crash on an uninitialized variable if the trainer was run after resuming from a checkpoint that was already at `max_epochs` ([#608](https://github.com/Lightning-AI/pytorch-lightning/pull/608)) +- Fixed a bug where early stopping would begin two epochs early ([#617](https://github.com/Lightning-AI/pytorch-lightning/pull/617)) +- Fixed a bug where `num_training_batches` and `num_test_batches` would sometimes be rounded down to zero ([#649](https://github.com/Lightning-AI/pytorch-lightning/pull/649)) +- Fixed a bug where an additional batch would be processed when manually setting `num_training_batches` ([#653](https://github.com/Lightning-AI/pytorch-lightning/pull/653)) +- Fixed a bug when batches did not have a `.copy` method ([#701](https://github.com/Lightning-AI/pytorch-lightning/pull/701)) +- Fixed a bug when using `log_gpu_memory=True` in Python 3.6 ([#715](https://github.com/Lightning-AI/pytorch-lightning/pull/715)) +- Fixed a bug where checkpoint writing could exit before completion, giving incomplete checkpoints ([#689](https://github.com/Lightning-AI/pytorch-lightning/pull/689)) +- Fixed a bug where `on_train_end` was not called when ealy stopping ([#723](https://github.com/Lightning-AI/pytorch-lightning/pull/723)) ## [0.5.3] - 2019-11-06 diff --git a/src/lightning/pytorch/demos/mnist_datamodule.py b/src/lightning/pytorch/demos/mnist_datamodule.py index 73f46d4dc0986..9ecc5411ae974 100644 --- a/src/lightning/pytorch/demos/mnist_datamodule.py +++ b/src/lightning/pytorch/demos/mnist_datamodule.py @@ -36,7 +36,7 @@ class _MNIST(Dataset): """Carbon copy of ``tests_pytorch.helpers.datasets.MNIST``. We cannot import the tests as they are not distributed with the package. - See https://github.com/Lightning-AI/lightning/pull/7614#discussion_r671183652 for more context. + See https://github.com/Lightning-AI/pytorch-lightning/pull/7614#discussion_r671183652 for more context. .. warning:: This is meant for testing/debugging and is experimental. diff --git a/src/lightning/pytorch/loops/utilities.py b/src/lightning/pytorch/loops/utilities.py index 2aaf877c8913d..8e20f485828f0 100644 --- a/src/lightning/pytorch/loops/utilities.py +++ b/src/lightning/pytorch/loops/utilities.py @@ -163,7 +163,7 @@ def _decorator(self: _Loop, *args: Any, **kwargs: Any) -> Any: context_manager: type[AbstractContextManager] if _distributed_is_initialized() and dist.get_backend() == "gloo": # gloo backend does not work properly. - # https://github.com/Lightning-AI/lightning/pull/12715/files#r854569110 + # https://github.com/Lightning-AI/pytorch-lightning/pull/12715/files#r854569110 # TODO: explore why and possibly open an issue in PyTorch repository context_manager = torch.no_grad elif isinstance(self.trainer.accelerator, XLAAccelerator): diff --git a/src/lightning/pytorch/strategies/xla.py b/src/lightning/pytorch/strategies/xla.py index faffb30d6256f..cb70871c83e91 100644 --- a/src/lightning/pytorch/strategies/xla.py +++ b/src/lightning/pytorch/strategies/xla.py @@ -279,7 +279,7 @@ def setup_distributed(self) -> None: assert self.parallel_devices is not None if len(self.parallel_devices) == 1: # spawning only 1 device with PjRT is not supported: - # https://github.com/Lightning-AI/lightning/pull/17408#discussion_r1170671732 + # https://github.com/Lightning-AI/pytorch-lightning/pull/17408#discussion_r1170671732 raise NotImplementedError( "The `XLAStrategy` does not support running on a single device with the PjRT runtime." " Try using all devices or the `SingleDeviceXLAStrategy` strategy" diff --git a/src/lightning/pytorch/trainer/configuration_validator.py b/src/lightning/pytorch/trainer/configuration_validator.py index 23c04523cd8f8..cd163b31c8a66 100644 --- a/src/lightning/pytorch/trainer/configuration_validator.py +++ b/src/lightning/pytorch/trainer/configuration_validator.py @@ -78,14 +78,14 @@ def __verify_train_val_loop_configuration(trainer: "pl.Trainer", model: "pl.Ligh f"Support for `training_epoch_end` has been removed in v2.0.0. `{type(model).__name__}` implements this" " method. You can use the `on_train_epoch_end` hook instead. To access outputs, save them in-memory as" " instance attributes." - " You can find migration examples in https://github.com/Lightning-AI/lightning/pull/16520." + " You can find migration examples in https://github.com/Lightning-AI/pytorch-lightning/pull/16520." ) if callable(getattr(model, "validation_epoch_end", None)): raise NotImplementedError( f"Support for `validation_epoch_end` has been removed in v2.0.0. `{type(model).__name__}` implements this" " method. You can use the `on_validation_epoch_end` hook instead. To access outputs, save them in-memory as" " instance attributes." - " You can find migration examples in https://github.com/Lightning-AI/lightning/pull/16520." + " You can find migration examples in https://github.com/Lightning-AI/pytorch-lightning/pull/16520." ) @@ -112,7 +112,7 @@ def __verify_eval_loop_configuration(model: "pl.LightningModule", stage: str) -> f"Support for `{epoch_end_name}` has been removed in v2.0.0. `{type(model).__name__}` implements this" f" method. You can use the `on_{epoch_end_name}` hook instead. To access outputs, save them in-memory" " as instance attributes." - " You can find migration examples in https://github.com/Lightning-AI/lightning/pull/16520." + " You can find migration examples in https://github.com/Lightning-AI/pytorch-lightning/pull/16520." ) diff --git a/src/lightning/pytorch/utilities/migration/utils.py b/src/lightning/pytorch/utilities/migration/utils.py index 2c5656e1f1016..42074a3735c87 100644 --- a/src/lightning/pytorch/utilities/migration/utils.py +++ b/src/lightning/pytorch/utilities/migration/utils.py @@ -81,7 +81,7 @@ class pl_legacy_patch: unpickling old checkpoints. The following patches apply. 1. ``lightning.pytorch.utilities.argparse._gpus_arg_default``: Applies to all checkpoints saved prior to - version 1.2.8. See: https://github.com/Lightning-AI/lightning/pull/6898 + version 1.2.8. See: https://github.com/Lightning-AI/pytorch-lightning/pull/6898 2. ``lightning.pytorch.utilities.argparse_utils``: A module that was deprecated in 1.2 and removed in 1.4, but still needs to be available for import for legacy checkpoints. 3. ``lightning.pytorch.utilities.enums._FaultTolerantMode``: This enum was removed in 2.0 but was pickled diff --git a/tests/tests_fabric/strategies/test_xla.py b/tests/tests_fabric/strategies/test_xla.py index a260b3f231e1d..9ca21d8d8b894 100644 --- a/tests/tests_fabric/strategies/test_xla.py +++ b/tests/tests_fabric/strategies/test_xla.py @@ -31,7 +31,7 @@ def wrap_launch_function(fn, strategy, *args, **kwargs): # the launcher does not manage this automatically. explanation available in: - # https://github.com/Lightning-AI/lightning/pull/14926#discussion_r982976718 + # https://github.com/Lightning-AI/pytorch-lightning/pull/14926#discussion_r982976718 strategy.setup_environment() return fn(*args, **kwargs) diff --git a/tests/tests_fabric/utilities/test_distributed.py b/tests/tests_fabric/utilities/test_distributed.py index 9282f00f1ffb6..fa9cc0ed40e93 100644 --- a/tests/tests_fabric/utilities/test_distributed.py +++ b/tests/tests_fabric/utilities/test_distributed.py @@ -30,7 +30,7 @@ def wrap_launch_function(fn, strategy, *args, **kwargs): # the launcher does not manage this automatically. explanation available in: - # https://github.com/Lightning-AI/lightning/pull/14926#discussion_r982976718 + # https://github.com/Lightning-AI/pytorch-lightning/pull/14926#discussion_r982976718 strategy.setup_environment() return fn(*args, **kwargs) diff --git a/tests/tests_pytorch/models/test_tpu.py b/tests/tests_pytorch/models/test_tpu.py index 8067fd63b6562..e74778ee32f4e 100644 --- a/tests/tests_pytorch/models/test_tpu.py +++ b/tests/tests_pytorch/models/test_tpu.py @@ -270,7 +270,7 @@ def test_if_test_works_with_checkpoint_false(tmp_path): def wrap_launch_function(fn, strategy, *args, **kwargs): # the launcher does not manage this automatically. explanation available in: - # https://github.com/Lightning-AI/lightning/pull/14926#discussion_r982976718 + # https://github.com/Lightning-AI/pytorch-lightning/pull/14926#discussion_r982976718 strategy.setup_environment() return fn(*args, **kwargs) From 23260c3c0b650cfd2c8d2e28bf0d269eae747ca0 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 28 Apr 2025 14:32:35 +0200 Subject: [PATCH 036/112] drop mergify (#20770) * drop mergify * label conflist (cherry picked from commit 749dc90f1b19aa0c2cefcaf8fa798c22304d6618) --- .github/mergify.yml | 70 --------------------------- .github/workflows/label-conflicts.yml | 22 +++++++++ 2 files changed, 22 insertions(+), 70 deletions(-) delete mode 100644 .github/mergify.yml create mode 100644 .github/workflows/label-conflicts.yml diff --git a/.github/mergify.yml b/.github/mergify.yml deleted file mode 100644 index e75925a1ad664..0000000000000 --- a/.github/mergify.yml +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright The Lightning AI team. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -pull_request_rules: - - name: warn on conflicts - conditions: - - conflict - - -draft # filter-out GH draft PRs - - -label="has conflicts" - actions: - # comment: - # message: This pull request is now in conflict... :( - label: - add: ["has conflicts"] - - - name: resolved conflicts - conditions: - - -conflict - - label="has conflicts" - - -draft # filter-out GH draft PRs - - -merged # not merged yet - - -closed - actions: - label: - remove: ["has conflicts"] - - - name: Ready to Go - conditions: - - -conflict - - -draft # filter-out GH draft PRs - - -title~=(?i)wip # skip all PR that title contains “WIP” (ignoring case) - - "#approved-reviews-by>=2" # number of review approvals - - "#changes-requested-reviews-by=0" # no requested changes - actions: - label: - add: ["ready"] - - - name: Not ready yet - conditions: - - or: - - draft # filter-out GH draft PRs - - label="has conflicts" - - "#approved-reviews-by=0" # number of review approvals - - "#changes-requested-reviews-by>=1" # no requested changes - actions: - label: - remove: ["ready"] - - - name: add core reviewer - conditions: - - -conflict # skip if conflict - - -draft # filter-out GH draft PRs - - label="ready" - - "#approved-reviews-by<2" # number of review approvals - - "#review-requested<2" # number of requested reviews - actions: - request_reviews: - teams: - - "@Lightning-AI/core-lightning" diff --git a/.github/workflows/label-conflicts.yml b/.github/workflows/label-conflicts.yml new file mode 100644 index 0000000000000..06a809c48b541 --- /dev/null +++ b/.github/workflows/label-conflicts.yml @@ -0,0 +1,22 @@ +name: Label conflicts + +on: + push: + branches: ["main"] + pull_request_target: + types: ["synchronize", "reopened", "opened"] + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: false + +jobs: + triage-conflicts: + runs-on: ubuntu-latest + steps: + - uses: mschilde/auto-label-merge-conflicts@591722e97f3c4142df3eca156ed0dcf2bcd362bd # Oct 25, 2021 + with: + CONFLICT_LABEL_NAME: "has conflicts" + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + MAX_RETRIES: 3 + WAIT_MS: 5000 From 420ca46a9449840eca6362dce3706bbeba7a006d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 28 Apr 2025 14:33:12 +0200 Subject: [PATCH 037/112] build(deps): bump mypy from 1.11.0 to 1.15.0 in /requirements (#20759) * build(deps): bump mypy from 1.11.0 to 1.15.0 in /requirements Bumps [mypy](https://github.com/python/mypy) from 1.11.0 to 1.15.0. - [Changelog](https://github.com/python/mypy/blob/master/CHANGELOG.md) - [Commits](https://github.com/python/mypy/compare/v1.11...v1.15.0) --- updated-dependencies: - dependency-name: mypy dependency-version: 1.15.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * fixing * ignore --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka B Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 6da480d5e3304b91ac188b849ee06e369e878460) --- requirements/typing.txt | 2 +- src/lightning/fabric/fabric.py | 3 +-- src/lightning/pytorch/callbacks/lr_monitor.py | 6 +++--- src/lightning/pytorch/core/saving.py | 2 +- .../pytorch/trainer/connectors/logger_connector/result.py | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/requirements/typing.txt b/requirements/typing.txt index dfdb747eb8a66..becbd1e76c11d 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,4 +1,4 @@ -mypy==1.11.0 +mypy==1.15.0 torch==2.7.0 types-Markdown diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py index 058e5e7c40751..36ffc0c1c7772 100644 --- a/src/lightning/fabric/fabric.py +++ b/src/lightning/fabric/fabric.py @@ -367,8 +367,7 @@ def setup_dataloaders( ) for dataloader in dataloaders ] - dataloaders = dataloaders[0] if len(dataloaders) == 1 else dataloaders - return dataloaders # type: ignore[return-value] + return dataloaders[0] if len(dataloaders) == 1 else dataloaders def _setup_dataloader( self, dataloader: DataLoader, use_distributed_sampler: bool = True, move_to_device: bool = True diff --git a/src/lightning/pytorch/callbacks/lr_monitor.py b/src/lightning/pytorch/callbacks/lr_monitor.py index ca2b4a866ee50..36241b9e678e5 100644 --- a/src/lightning/pytorch/callbacks/lr_monitor.py +++ b/src/lightning/pytorch/callbacks/lr_monitor.py @@ -251,13 +251,13 @@ def _remap_keys(self, names: list[list[str]], token: str = "/pg1") -> None: elif new_name not in self.lrs: self.lrs[new_name] = [] - def _extract_momentum(self, param_group: dict[str, list], name: str, use_betas: bool) -> dict[str, float]: + def _extract_momentum(self, param_group: dict[str, list[float]], name: str, use_betas: bool) -> dict[str, float]: if not self.log_momentum: return {} momentum = param_group["betas"][0] if use_betas else param_group.get("momentum", 0) - self.last_momentum_values[name] = momentum - return {name: momentum} + self.last_momentum_values[name] = momentum # type: ignore[assignment] + return {name: momentum} # type: ignore[dict-item] def _extract_weight_decay(self, param_group: dict[str, Any], name: str) -> dict[str, Any]: """Extracts the weight decay statistics from a parameter group.""" diff --git a/src/lightning/pytorch/core/saving.py b/src/lightning/pytorch/core/saving.py index 09d888c56bdcd..21fd3912f7849 100644 --- a/src/lightning/pytorch/core/saving.py +++ b/src/lightning/pytorch/core/saving.py @@ -184,7 +184,7 @@ def _load_state( obj.on_load_checkpoint(checkpoint) # load the state_dict on the model automatically - keys = obj.load_state_dict(checkpoint["state_dict"], strict=strict) + keys = obj.load_state_dict(checkpoint["state_dict"], strict=strict) # type: ignore[arg-type] if not strict: if keys.missing_keys: diff --git a/src/lightning/pytorch/trainer/connectors/logger_connector/result.py b/src/lightning/pytorch/trainer/connectors/logger_connector/result.py index 0881ac0b3fa08..90ae28bb8c7ee 100644 --- a/src/lightning/pytorch/trainer/connectors/logger_connector/result.py +++ b/src/lightning/pytorch/trainer/connectors/logger_connector/result.py @@ -92,7 +92,7 @@ def _generate_sync_fn(self) -> None: fn = self.no_op if self.fn is None or not self.should or self.rank_zero_only else self.fn # save the function as `_fn` as the meta are being re-created and the object references need to match. # ignore typing, bad support for `partial`: mypy/issues/1484 - self._fn: Callable = partial(fn, reduce_op=self.op, group=self.group) # type: ignore[arg-type,operator,misc] + self._fn: Callable = partial(fn, reduce_op=self.op, group=self.group) # type: ignore[unused-ignore] @property def __call__(self) -> Any: From a42181e4ce386f19f07a99bea141a1dc04fa01dc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 10:30:44 +0200 Subject: [PATCH 038/112] build(deps): bump pytest-timeout from 2.1.0 to 2.3.1 in /requirements (#20787) Bumps [pytest-timeout](https://github.com/pytest-dev/pytest-timeout) from 2.1.0 to 2.3.1. - [Commits](https://github.com/pytest-dev/pytest-timeout/compare/2.1.0...2.3.1) --- updated-dependencies: - dependency-name: pytest-timeout dependency-version: 2.3.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit ec6e2e08ee45387299dc9c06a5d7e64977d1306e) --- requirements/fabric/test.txt | 2 +- requirements/pytorch/test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 00496eb58d4d0..b4ccede64e8db 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -2,7 +2,7 @@ coverage ==7.8.0 numpy >=1.17.2, <1.27.0 pytest ==8.3.5 pytest-cov ==6.1.1 -pytest-timeout ==2.1.0 +pytest-timeout ==2.3.1 pytest-rerunfailures ==12.0 pytest-random-order ==1.1.0 click ==8.1.7 diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 7c526b93d3ad8..4271e6a91b02d 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -1,7 +1,7 @@ coverage ==7.8.0 pytest ==8.3.5 pytest-cov ==6.1.1 -pytest-timeout ==2.1.0 +pytest-timeout ==2.3.1 pytest-rerunfailures ==12.0 pytest-random-order ==1.1.0 From 27bb9e25b2c18444d4085c235a706f179baa1aee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 10:30:53 +0200 Subject: [PATCH 039/112] build(deps): update ipython[notebook] requirement from <8.7.0 to <8.19.0 in /requirements (#20788) build(deps): update ipython[notebook] requirement in /requirements Updates the requirements on [ipython[notebook]](https://github.com/ipython/ipython) to permit the latest version. - [Release notes](https://github.com/ipython/ipython/releases) - [Commits](https://github.com/ipython/ipython/compare/rel-0.8.4...8.18.1) --- updated-dependencies: - dependency-name: ipython[notebook] dependency-version: 8.18.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 156ccd974104c1819a3a97d82354a547c2bb07a7) --- requirements/pytorch/docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/docs.txt b/requirements/pytorch/docs.txt index 21287196933ea..77c1c5ac97c9c 100644 --- a/requirements/pytorch/docs.txt +++ b/requirements/pytorch/docs.txt @@ -1,7 +1,7 @@ -r ../docs.txt nbformat # used for generate empty notebook -ipython[notebook] <8.7.0 +ipython[notebook] <8.19.0 setuptools<58.0 # workaround for `error in ipython setup command: use_2to3 is invalid.` -r ../../_notebooks/.actions/requires.txt From b7700ee18370552cf17d3d57c7f879cbf23d1585 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 10:31:03 +0200 Subject: [PATCH 040/112] build(deps): bump pkginfo from 1.12.0 to 1.12.1.2 in /requirements (#20789) Bumps [pkginfo](https://code.launchpad.net/~tseaver/pkginfo/trunk) from 1.12.0 to 1.12.1.2. --- updated-dependencies: - dependency-name: pkginfo dependency-version: 1.12.1.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 30df45cfed0a02a80b9e9cd8064eabb52e2444c8) --- requirements/ci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index f4f38126f440f..c1ca0030fb86a 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -4,5 +4,5 @@ awscli >=1.30.0, <1.41.0 twine ==6.0.1 importlib-metadata <8.0.0 wget -pkginfo ==1.12.0 +pkginfo ==1.12.1.2 packaging <25.1 From 33cdd3eede982bb9481d32fd75dc2a960a929df4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 10:31:21 +0200 Subject: [PATCH 041/112] build(deps): update importlib-metadata requirement from <8.0.0 to <9.0.0 in /requirements (#20790) build(deps): update importlib-metadata requirement in /requirements Updates the requirements on [importlib-metadata](https://github.com/python/importlib_metadata) to permit the latest version. - [Release notes](https://github.com/python/importlib_metadata/releases) - [Changelog](https://github.com/python/importlib_metadata/blob/main/NEWS.rst) - [Commits](https://github.com/python/importlib_metadata/compare/0.1...v8.7.0) --- updated-dependencies: - dependency-name: importlib-metadata dependency-version: 8.7.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 2f676784a7011eb254215856badb618db50d5894) --- requirements/ci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index c1ca0030fb86a..8054e2f2aacae 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -2,7 +2,7 @@ setuptools <70.1.1 wheel <0.46.0 awscli >=1.30.0, <1.41.0 twine ==6.0.1 -importlib-metadata <8.0.0 +importlib-metadata <9.0.0 wget pkginfo ==1.12.1.2 packaging <25.1 From ea49307c424b4900d4fcdfed280acda44cac80b3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 10:31:31 +0200 Subject: [PATCH 042/112] build(deps): update psutil requirement from <5.9.6 to <7.0.1 in /requirements (#20791) build(deps): update psutil requirement in /requirements Updates the requirements on [psutil](https://github.com/giampaolo/psutil) to permit the latest version. - [Changelog](https://github.com/giampaolo/psutil/blob/master/HISTORY.rst) - [Commits](https://github.com/giampaolo/psutil/compare/release-0.1.0...release-7.0.0) --- updated-dependencies: - dependency-name: psutil dependency-version: 7.0.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit c3ac7abcb47024728eabe71e5bb5d7ecab9a06fc) --- requirements/pytorch/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 4271e6a91b02d..2219e5e13116a 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -11,7 +11,7 @@ scikit-learn >0.22.1, <1.7.0 numpy >=1.17.2, <1.27.0 onnx >=1.12.0, <1.17.0 onnxruntime >=1.12.0, <1.21.0 -psutil <5.9.6 # for `DeviceStatsMonitor` +psutil <7.0.1 # for `DeviceStatsMonitor` pandas >1.0, <2.3.0 # needed in benchmarks fastapi # for `ServableModuleValidator` # not setting version as re-defined in App uvicorn # for `ServableModuleValidator` # not setting version as re-defined in App From 7d75a11b436034a87d79fcea8504d7ab884eca8b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 10:31:40 +0200 Subject: [PATCH 043/112] build(deps): update docutils requirement from <0.21,>=0.16 to >=0.16,<0.22 in /requirements (#20792) build(deps): update docutils requirement in /requirements Updates the requirements on [docutils](https://docutils.sourceforge.io) to permit the latest version. --- updated-dependencies: - dependency-name: docutils dependency-version: 0.21.2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 585396c5efddc51c42e9dc918615fbb5925dcdf0) --- requirements/docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/docs.txt b/requirements/docs.txt index c1b93d987a6e9..6b65450094164 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -3,7 +3,7 @@ myst-parser >=0.18.1, <3.0.0 nbsphinx >=0.8.5, <=0.9.2 nbconvert <7.14 # temporary fix for https://github.com/jupyter/nbconvert/issues/2092 pandoc >=1.0, <=2.3 -docutils >=0.16, <0.21 +docutils >=0.16, <0.22 sphinxcontrib-fulltoc >=1.0, <=1.2.0 sphinxcontrib-mockautodoc sphinx-autobuild From 489a602a5f3631e4856de69acc2118c0da13b7c5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 10:31:48 +0200 Subject: [PATCH 044/112] build(deps): update typing-extensions requirement from <4.11.0,>=4.4.0 to >=4.4.0,<4.14.0 in /requirements (#20794) build(deps): update typing-extensions requirement in /requirements Updates the requirements on [typing-extensions](https://github.com/python/typing_extensions) to permit the latest version. - [Release notes](https://github.com/python/typing_extensions/releases) - [Changelog](https://github.com/python/typing_extensions/blob/main/CHANGELOG.md) - [Commits](https://github.com/python/typing_extensions/compare/4.4.0...4.13.2) --- updated-dependencies: - dependency-name: typing-extensions dependency-version: 4.13.2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 5886b5d1b5467e2949069ed16022dfc69af563c7) --- requirements/fabric/base.txt | 2 +- requirements/pytorch/base.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 3fe9168c48e11..2561b2324b772 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -4,5 +4,5 @@ torch >=2.1.0, <2.8.0 fsspec[http] >=2022.5.0, <2025.4.0 packaging >=20.0, <=25.0 -typing-extensions >=4.4.0, <4.11.0 +typing-extensions >=4.4.0, <4.14.0 lightning-utilities >=0.10.0, <0.15.0 diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 7bc20cec191d7..e9632eb51ca4e 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -7,5 +7,5 @@ PyYAML >=5.4, <6.1.0 fsspec[http] >=2022.5.0, <2025.4.0 torchmetrics >=0.7.0, <1.8.0 packaging >=20.0, <=25.0 -typing-extensions >=4.4.0, <4.11.0 +typing-extensions >=4.4.0, <4.14.0 lightning-utilities >=0.10.0, <0.15.0 From d2fae010699ca015de1038d4a3b6c40483c62655 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 10:31:56 +0200 Subject: [PATCH 045/112] build(deps): update onnx requirement from <1.17.0,>=1.12.0 to >=1.12.0,<1.18.0 in /requirements (#20795) build(deps): update onnx requirement in /requirements Updates the requirements on [onnx](https://github.com/onnx/onnx) to permit the latest version. - [Release notes](https://github.com/onnx/onnx/releases) - [Changelog](https://github.com/onnx/onnx/blob/main/docs/Changelog-ml.md) - [Commits](https://github.com/onnx/onnx/compare/v1.12.0...v1.17.0) --- updated-dependencies: - dependency-name: onnx dependency-version: 1.17.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 23f02ce9794b4fd6f6f53c3c2bb8605cad1b4bac) --- requirements/pytorch/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 2219e5e13116a..412a8f270bf47 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -9,7 +9,7 @@ pytest-random-order ==1.1.0 cloudpickle >=1.3, <3.2.0 scikit-learn >0.22.1, <1.7.0 numpy >=1.17.2, <1.27.0 -onnx >=1.12.0, <1.17.0 +onnx >=1.12.0, <1.18.0 onnxruntime >=1.12.0, <1.21.0 psutil <7.0.1 # for `DeviceStatsMonitor` pandas >1.0, <2.3.0 # needed in benchmarks From f8cefd525af5b50afc8d7e33a703c2a7a5058b00 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 20:47:48 +0200 Subject: [PATCH 046/112] build(deps): bump pytest-random-order from 1.1.0 to 1.1.1 in /requirements (#20793) build(deps): bump pytest-random-order in /requirements Bumps [pytest-random-order](https://github.com/jbasko/pytest-random-order) from 1.1.0 to 1.1.1. - [Release notes](https://github.com/jbasko/pytest-random-order/releases) - [Commits](https://github.com/jbasko/pytest-random-order/compare/v1.1.0...v1.1.1) --- updated-dependencies: - dependency-name: pytest-random-order dependency-version: 1.1.1 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 4323cae1a3a430165b50d123da0197fa7fda538d) --- requirements/fabric/test.txt | 2 +- requirements/pytorch/test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index b4ccede64e8db..105e3d42416ed 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -4,6 +4,6 @@ pytest ==8.3.5 pytest-cov ==6.1.1 pytest-timeout ==2.3.1 pytest-rerunfailures ==12.0 -pytest-random-order ==1.1.0 +pytest-random-order ==1.1.1 click ==8.1.7 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 412a8f270bf47..fe39a58733eeb 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -3,7 +3,7 @@ pytest ==8.3.5 pytest-cov ==6.1.1 pytest-timeout ==2.3.1 pytest-rerunfailures ==12.0 -pytest-random-order ==1.1.0 +pytest-random-order ==1.1.1 # needed in tests cloudpickle >=1.3, <3.2.0 From 3d425ba1f0a20c2a932c29375e8b2560e032ee20 Mon Sep 17 00:00:00 2001 From: Mauricio Villegas <5780272+mauvilsa@users.noreply.github.com> Date: Tue, 6 May 2025 10:24:11 +0200 Subject: [PATCH 047/112] Remove LightningCLI `python>=3.11.9` xfail, since issue was resolved long ago (#20800) (cherry picked from commit 9caebba81e4e8c728a15e979c6b2126537f2dea3) --- requirements/pytorch/extra.txt | 2 +- tests/tests_pytorch/loggers/test_wandb.py | 2 -- tests/tests_pytorch/test_cli.py | 26 ----------------------- 3 files changed, 1 insertion(+), 29 deletions(-) diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index e14cb38297caa..139513fb22fd9 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -5,7 +5,7 @@ matplotlib>3.1, <3.9.0 omegaconf >=2.2.3, <2.4.0 hydra-core >=1.2.0, <1.4.0 -jsonargparse[signatures] >=4.27.7, <=4.35.0 +jsonargparse[signatures] >=4.28.0, <=4.40.0 rich >=12.3.0, <13.6.0 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute bitsandbytes >=0.45.2,<0.45.3; platform_system != "Darwin" diff --git a/tests/tests_pytorch/loggers/test_wandb.py b/tests/tests_pytorch/loggers/test_wandb.py index 35c1917983dcf..e9b9e9a8090b0 100644 --- a/tests/tests_pytorch/loggers/test_wandb.py +++ b/tests/tests_pytorch/loggers/test_wandb.py @@ -26,7 +26,6 @@ from lightning.pytorch.demos.boring_classes import BoringModel from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger from lightning.pytorch.utilities.exceptions import MisconfigurationException -from tests_pytorch.test_cli import _xfail_python_ge_3_11_9 def test_wandb_project_name(wandb_mock): @@ -645,7 +644,6 @@ def test_wandb_logger_download_artifact(wandb_mock, tmp_path): wandb_mock.Api().artifact.assert_called_once_with("test_artifact", type="model") -@_xfail_python_ge_3_11_9 @pytest.mark.parametrize(("log_model", "expected"), [("True", True), ("False", False), ("all", "all")]) def test_wandb_logger_cli_integration(log_model, expected, wandb_mock, monkeypatch, tmp_path): """Test that the WandbLogger can be used with the LightningCLI.""" diff --git a/tests/tests_pytorch/test_cli.py b/tests/tests_pytorch/test_cli.py index 5c33a8539b693..7658894b37414 100644 --- a/tests/tests_pytorch/test_cli.py +++ b/tests/tests_pytorch/test_cli.py @@ -29,7 +29,6 @@ import yaml from lightning_utilities import compare_version from lightning_utilities.test.warning import no_warning_call -from packaging.version import Version from tensorboard.backend.event_processing import event_accumulator from tensorboard.plugins.hparams.plugin_data_pb2 import HParamsPluginData from torch.optim import SGD @@ -65,14 +64,6 @@ def lazy_instance(*args, **kwargs): return None -_xfail_python_ge_3_11_9 = pytest.mark.xfail( - # https://github.com/omni-us/jsonargparse/issues/484 - Version(f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}") >= Version("3.11.9"), - strict=False, - reason="jsonargparse + Python 3.11.9 compatibility issue", -) - - @contextmanager def mock_subclasses(baseclass, *subclasses): """Mocks baseclass so that it only has the given child subclasses.""" @@ -356,7 +347,6 @@ def test_save_to_log_dir_false_error(): ) -@_xfail_python_ge_3_11_9 def test_lightning_cli_logger_save_config(cleandir): class LoggerSaveConfigCallback(SaveConfigCallback): def __init__(self, *args, **kwargs) -> None: @@ -753,7 +743,6 @@ def add_arguments_to_parser(self, parser): assert cli.trainer.lr_scheduler_configs[0].scheduler.step_size == 50 -@_xfail_python_ge_3_11_9 @RunIf(min_torch="2.2") @pytest.mark.parametrize("use_generic_base_class", [False, True]) def test_lightning_cli_optimizers_and_lr_scheduler_with_link_to(use_generic_base_class): @@ -801,7 +790,6 @@ def __init__(self, optim1: dict, optim2: dict, scheduler: dict): assert isinstance(cli.model.scheduler, torch.optim.lr_scheduler.ExponentialLR) -@_xfail_python_ge_3_11_9 @RunIf(min_torch="2.2") def test_lightning_cli_optimizers_and_lr_scheduler_with_callable_type(): class TestModel(BoringModel): @@ -1118,7 +1106,6 @@ def __init__(self, foo, bar=5): self.bar = bar -@_xfail_python_ge_3_11_9 def test_lightning_cli_model_short_arguments(): with ( mock.patch("sys.argv", ["any.py", "fit", "--model=BoringModel"]), @@ -1146,7 +1133,6 @@ def __init__(self, foo, bar=5): self.bar = bar -@_xfail_python_ge_3_11_9 def test_lightning_cli_datamodule_short_arguments(): # with set model with ( @@ -1200,7 +1186,6 @@ def test_lightning_cli_datamodule_short_arguments(): assert cli.parser.groups["data"].group_class is BoringDataModule -@_xfail_python_ge_3_11_9 @pytest.mark.parametrize("use_class_path_callbacks", [False, True]) def test_callbacks_append(use_class_path_callbacks): """This test validates registries are used when simplified command line are being used.""" @@ -1244,7 +1229,6 @@ def test_callbacks_append(use_class_path_callbacks): assert all(t in callback_types for t in expected) -@_xfail_python_ge_3_11_9 def test_optimizers_and_lr_schedulers_reload(cleandir): base = ["any.py", "--trainer.max_epochs=1"] input = base + [ @@ -1276,7 +1260,6 @@ def test_optimizers_and_lr_schedulers_reload(cleandir): LightningCLI(BoringModel, run=False) -@_xfail_python_ge_3_11_9 def test_optimizers_and_lr_schedulers_add_arguments_to_parser_implemented_reload(cleandir): class TestLightningCLI(LightningCLI): def __init__(self, *args): @@ -1540,7 +1523,6 @@ def test_cli_help_message(): assert "Implements Adam" in shorthand_help.getvalue() -@_xfail_python_ge_3_11_9 def test_cli_reducelronplateau(): with mock.patch( "sys.argv", ["any.py", "--optimizer=Adam", "--lr_scheduler=ReduceLROnPlateau", "--lr_scheduler.monitor=foo"] @@ -1551,7 +1533,6 @@ def test_cli_reducelronplateau(): assert config["lr_scheduler"]["scheduler"].monitor == "foo" -@_xfail_python_ge_3_11_9 def test_cli_configureoptimizers_can_be_overridden(): class MyCLI(LightningCLI): def __init__(self): @@ -1596,7 +1577,6 @@ def __init__(self, activation: torch.nn.Module = lazy_instance(torch.nn.LeakyReL assert cli.model.activation is not model.activation -@_xfail_python_ge_3_11_9 def test_ddpstrategy_instantiation_and_find_unused_parameters(mps_count_0): strategy_default = lazy_instance(DDPStrategy, find_unused_parameters=True) with mock.patch("sys.argv", ["any.py", "--trainer.strategy.process_group_backend=group"]): @@ -1612,7 +1592,6 @@ def test_ddpstrategy_instantiation_and_find_unused_parameters(mps_count_0): assert strategy_default is not cli.config_init.trainer.strategy -@_xfail_python_ge_3_11_9 def test_cli_logger_shorthand(): with mock.patch("sys.argv", ["any.py"]): cli = LightningCLI(TestModel, run=False, trainer_defaults={"logger": False}) @@ -1643,7 +1622,6 @@ def _test_logger_init_args(logger_name, init, unresolved=None): assert data["dict_kwargs"] == unresolved -@_xfail_python_ge_3_11_9 def test_comet_logger_init_args(): _test_logger_init_args( "CometLogger", @@ -1664,7 +1642,6 @@ def test_comet_logger_init_args(): strict=False, reason="TypeError on Windows when parsing", ) -@_xfail_python_ge_3_11_9 def test_neptune_logger_init_args(): _test_logger_init_args( "NeptuneLogger", @@ -1673,7 +1650,6 @@ def test_neptune_logger_init_args(): ) -@_xfail_python_ge_3_11_9 def test_tensorboard_logger_init_args(): _test_logger_init_args( "TensorBoardLogger", @@ -1685,7 +1661,6 @@ def test_tensorboard_logger_init_args(): ) -@_xfail_python_ge_3_11_9 def test_wandb_logger_init_args(): _test_logger_init_args( "WandbLogger", @@ -1770,7 +1745,6 @@ def __init__(self, a_func: Callable = torch.nn.Softmax): assert "a_func: torch.nn.Softmax" in out.getvalue() -@_xfail_python_ge_3_11_9 def test_pytorch_profiler_init_args(): from lightning.pytorch.profilers import Profiler, PyTorchProfiler From 60de1cfc86b75592577e515dbda7e0e241b80eb3 Mon Sep 17 00:00:00 2001 From: PL Ghost <75324987+pl-ghost@users.noreply.github.com> Date: Tue, 6 May 2025 11:05:19 +0200 Subject: [PATCH 048/112] docs: update ref to latest tutorials (#20785) update tutorials to `fd70f511` Co-authored-by: Borda (cherry picked from commit d5208f7052252baa3f2c5b59184b6cd38f0c80c4) --- _notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_notebooks b/_notebooks index 1c1160e543bb5..fd70f5114b21f 160000 --- a/_notebooks +++ b/_notebooks @@ -1 +1 @@ -Subproject commit 1c1160e543bb56760886a45dcb7e1e03a22f634c +Subproject commit fd70f5114b21f7f970bd5587b1d3def689507069 From 9b21c5543f99d8369114e475a2ce63bc40213eab Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Thu, 8 May 2025 16:17:25 +0200 Subject: [PATCH 049/112] updating issue template (#20720) * updating issue template * Update .github/ISSUE_TEMPLATE/1_bug_report.yaml * Update .github/ISSUE_TEMPLATE/1_bug_report.yaml * Apply suggestions from code review * Apply suggestions from code review --------- Co-authored-by: Ali Alshaarawy <45029495+ali-alshaar7@users.noreply.github.com> (cherry picked from commit 1b12c4bdd3dd0e221af03dd3deca18e21153cf47) --- .github/ISSUE_TEMPLATE/1_bug_report.yaml | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_bug_report.yaml b/.github/ISSUE_TEMPLATE/1_bug_report.yaml index 423de06f191bc..917a0fcdaf09e 100644 --- a/.github/ISSUE_TEMPLATE/1_bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/1_bug_report.yaml @@ -46,12 +46,22 @@ body: attributes: value: "**Note: The rest of this form is optional, but filling it out may help us to provide better support.**" + - type: input + attributes: + label: Reproduced in studio + description: > + Create a new Lightning Studio with code that reproduces the issue and share the link. + Also include all the relevant files and data required to reproduce shared issue. + In case the code does not crash, please add assert statements to show what is the real and expected output. + A simple guide on how to create such a studio can be found [here](https://www.youtube.com/watch?v=YcW-2Zt_bFg&ab_channel=LightningAI). + placeholder: https://lightning.ai/live-session/... + validations: + required: false - type: textarea attributes: label: How to reproduce the bug description: > - Provide steps and example code here. - You can also paste a link to Google Colab (see our [Colab bug report template](https://colab.research.google.com/github/Lightning-AI/lightning/blob/master/examples/pytorch/bug_report/bug_report_model.ipynb)) or adapt this minimal [snippet](https://github.com/Lightning-AI/lightning/blob/master/examples/pytorch/bug_report/bug_report_model.py). + In the special case when the issue can't be reproduced in a studio, provide steps and example code here. placeholder: | ```python # Sample code to reproduce the problem From 05612d2c9f487b52e16286f1353901ef353a27a3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 09:18:31 +0200 Subject: [PATCH 050/112] build(deps): bump sphinx-toolbox from 3.5.0 to 3.10.0 in /requirements (#20814) Bumps [sphinx-toolbox](https://github.com/sphinx-toolbox/sphinx-toolbox) from 3.5.0 to 3.10.0. - [Release notes](https://github.com/sphinx-toolbox/sphinx-toolbox/releases) - [Changelog](https://github.com/sphinx-toolbox/sphinx-toolbox/blob/master/doc-source/changelog.rst) - [Commits](https://github.com/sphinx-toolbox/sphinx-toolbox/compare/v3.5.0...v3.10.0) --- updated-dependencies: - dependency-name: sphinx-toolbox dependency-version: 3.10.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 7c418d6e9d603654348519545bd7c3c9c5f4ef35) --- requirements/docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/docs.txt b/requirements/docs.txt index 6b65450094164..2239744917261 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -12,7 +12,7 @@ sphinx-paramlinks >=0.5.1, <=0.6.0 sphinx-togglebutton >=0.2, <=0.3.2 sphinx-copybutton >=0.3, <=0.5.2 sphinx-multiproject -sphinx-toolbox ==3.5.0 +sphinx-toolbox ==3.10.0 sphinx-rtd-dark-mode sphinxcontrib-video ==0.2.0 jinja2 <3.2.0 From 6a91b591b8af994e41d6d72996351a08f1994dc8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 09:18:39 +0200 Subject: [PATCH 051/112] build(deps): update nbsphinx requirement from <=0.9.2,>=0.8.5 to >=0.8.5,<=0.9.7 in /requirements (#20815) build(deps): update nbsphinx requirement in /requirements Updates the requirements on [nbsphinx](https://github.com/spatialaudio/nbsphinx) to permit the latest version. - [Release notes](https://github.com/spatialaudio/nbsphinx/releases) - [Changelog](https://github.com/spatialaudio/nbsphinx/blob/master/NEWS.rst) - [Commits](https://github.com/spatialaudio/nbsphinx/compare/0.8.5...0.9.7) --- updated-dependencies: - dependency-name: nbsphinx dependency-version: 0.9.7 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 3dc7470430b1ec6cdc4b5a5abe5aa2016e5511c6) --- requirements/docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/docs.txt b/requirements/docs.txt index 2239744917261..cd4bde334799e 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,6 +1,6 @@ sphinx >5.0, <6.0 myst-parser >=0.18.1, <3.0.0 -nbsphinx >=0.8.5, <=0.9.2 +nbsphinx >=0.8.5, <=0.9.7 nbconvert <7.14 # temporary fix for https://github.com/jupyter/nbconvert/issues/2092 pandoc >=1.0, <=2.3 docutils >=0.16, <0.22 From d33230515b6408cdc1a0fcc4b722f9f586dfbf20 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 09:18:48 +0200 Subject: [PATCH 052/112] build(deps): bump twine from 6.0.1 to 6.1.0 in /requirements (#20816) Bumps [twine](https://github.com/pypa/twine) from 6.0.1 to 6.1.0. - [Release notes](https://github.com/pypa/twine/releases) - [Changelog](https://github.com/pypa/twine/blob/main/docs/changelog.rst) - [Commits](https://github.com/pypa/twine/compare/6.0.1...6.1.0) --- updated-dependencies: - dependency-name: twine dependency-version: 6.1.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 90b8b59feb82968e3e632ebd0d01b12edc7e9a5b) --- requirements/ci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index 8054e2f2aacae..8149e3eb896c8 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,7 +1,7 @@ setuptools <70.1.1 wheel <0.46.0 awscli >=1.30.0, <1.41.0 -twine ==6.0.1 +twine ==6.1.0 importlib-metadata <9.0.0 wget pkginfo ==1.12.1.2 From 4392dd76c0e7a2de01e59d73b29cb717436cb210 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 09:18:56 +0200 Subject: [PATCH 053/112] build(deps): update tqdm requirement from <4.67.0,>=4.57.0 to >=4.57.0,<4.68.0 in /requirements (#20817) build(deps): update tqdm requirement in /requirements Updates the requirements on [tqdm](https://github.com/tqdm/tqdm) to permit the latest version. - [Release notes](https://github.com/tqdm/tqdm/releases) - [Commits](https://github.com/tqdm/tqdm/compare/v4.57.0...v4.67.1) --- updated-dependencies: - dependency-name: tqdm dependency-version: 4.67.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 08abb526ae1dc5cfa376ed577bfd8b4c3149a47c) --- requirements/pytorch/base.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index e9632eb51ca4e..00889581f6407 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment torch >=2.1.0, <2.8.0 -tqdm >=4.57.0, <4.67.0 +tqdm >=4.57.0, <4.68.0 PyYAML >=5.4, <6.1.0 fsspec[http] >=2022.5.0, <2025.4.0 torchmetrics >=0.7.0, <1.8.0 From af1e871e2d498d82816601eec23b9ebb1523e811 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 09:19:03 +0200 Subject: [PATCH 054/112] build(deps): bump sphinxcontrib-video from 0.2.0 to 0.4.1 in /requirements (#20818) build(deps): bump sphinxcontrib-video in /requirements Bumps [sphinxcontrib-video](https://github.com/sphinx-contrib/video) from 0.2.0 to 0.4.1. - [Release notes](https://github.com/sphinx-contrib/video/releases) - [Commits](https://github.com/sphinx-contrib/video/compare/v0.2.0...v0.4.1) --- updated-dependencies: - dependency-name: sphinxcontrib-video dependency-version: 0.4.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit da5ce90131fcfdee8ac2544642a001b01106d1b7) --- requirements/docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/docs.txt b/requirements/docs.txt index cd4bde334799e..85141a80dcc7d 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -14,7 +14,7 @@ sphinx-copybutton >=0.3, <=0.5.2 sphinx-multiproject sphinx-toolbox ==3.10.0 sphinx-rtd-dark-mode -sphinxcontrib-video ==0.2.0 +sphinxcontrib-video ==0.4.1 jinja2 <3.2.0 lightning-utilities >=0.11.1, <0.15.0 From a7ad5cce95bddff2c51e5065f2209d3ea7a31931 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 09:19:11 +0200 Subject: [PATCH 055/112] build(deps): update rich requirement from <13.6.0,>=12.3.0 to >=12.3.0,<14.1.0 in /requirements (#20819) build(deps): update rich requirement in /requirements Updates the requirements on [rich](https://github.com/Textualize/rich) to permit the latest version. - [Release notes](https://github.com/Textualize/rich/releases) - [Changelog](https://github.com/Textualize/rich/blob/master/CHANGELOG.md) - [Commits](https://github.com/Textualize/rich/compare/v12.3.0...v14.0.0) --- updated-dependencies: - dependency-name: rich dependency-version: 14.0.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 86aed16253776bd4d488bf56acc0a9f6b896c696) --- requirements/pytorch/extra.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index 139513fb22fd9..21c38b1b5b121 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -6,6 +6,6 @@ matplotlib>3.1, <3.9.0 omegaconf >=2.2.3, <2.4.0 hydra-core >=1.2.0, <1.4.0 jsonargparse[signatures] >=4.28.0, <=4.40.0 -rich >=12.3.0, <13.6.0 +rich >=12.3.0, <14.1.0 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute bitsandbytes >=0.45.2,<0.45.3; platform_system != "Darwin" From e691c78ceb4de976133b10d06ed1462354986c37 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 09:19:18 +0200 Subject: [PATCH 056/112] build(deps): bump click from 8.1.7 to 8.1.8 in /requirements (#20820) Bumps [click](https://github.com/pallets/click) from 8.1.7 to 8.1.8. - [Release notes](https://github.com/pallets/click/releases) - [Changelog](https://github.com/pallets/click/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/click/compare/8.1.7...8.1.8) --- updated-dependencies: - dependency-name: click dependency-version: 8.1.8 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit c431887efd06bd5bb1904f88e44fec2853eaf3ef) --- requirements/fabric/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 105e3d42416ed..7ad26cef308b0 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -5,5 +5,5 @@ pytest-cov ==6.1.1 pytest-timeout ==2.3.1 pytest-rerunfailures ==12.0 pytest-random-order ==1.1.1 -click ==8.1.7 +click ==8.1.8 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute From a991b76d0e6f77bfe1f3b3b509dd6e6cf033a406 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 09:19:26 +0200 Subject: [PATCH 057/112] build(deps): update setuptools requirement from <70.1.1 to <80.4.1 in /requirements (#20821) build(deps): update setuptools requirement in /requirements Updates the requirements on [setuptools](https://github.com/pypa/setuptools) to permit the latest version. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/0.6...v80.4.0) --- updated-dependencies: - dependency-name: setuptools dependency-version: 80.4.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 68f2adc8803a94facf4f30f366bc3a2e7d9c520c) --- requirements/ci.txt | 2 +- requirements/pytorch/docs.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index 8149e3eb896c8..206501c3b21e5 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,4 +1,4 @@ -setuptools <70.1.1 +setuptools <80.4.1 wheel <0.46.0 awscli >=1.30.0, <1.41.0 twine ==6.1.0 diff --git a/requirements/pytorch/docs.txt b/requirements/pytorch/docs.txt index 77c1c5ac97c9c..1f4e0cb8031c4 100644 --- a/requirements/pytorch/docs.txt +++ b/requirements/pytorch/docs.txt @@ -2,6 +2,6 @@ nbformat # used for generate empty notebook ipython[notebook] <8.19.0 -setuptools<58.0 # workaround for `error in ipython setup command: use_2to3 is invalid.` +setuptools<81.0 # workaround for `error in ipython setup command: use_2to3 is invalid.` -r ../../_notebooks/.actions/requires.txt From 56da2662f1ca35c714c9d59dff46beec618bafe4 Mon Sep 17 00:00:00 2001 From: Kavyansh Tyagi <142140238+KAVYANSHTYAGI@users.noreply.github.com> Date: Mon, 12 May 2025 15:52:10 +0530 Subject: [PATCH 058/112] Fix: `TransformerEnginePrecision` conversion for layers with `bias=False` (#20805) * Update transformer_engine.py * Update test_transformer_engine.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 01ba7a1489498858617690ab921e84d5479c2eb1) --- .../plugins/precision/transformer_engine.py | 4 ++- .../precision/test_transformer_engine.py | 32 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/lightning/fabric/plugins/precision/transformer_engine.py b/src/lightning/fabric/plugins/precision/transformer_engine.py index c3ef84a453e73..bf1e51ea6b2b0 100644 --- a/src/lightning/fabric/plugins/precision/transformer_engine.py +++ b/src/lightning/fabric/plugins/precision/transformer_engine.py @@ -171,7 +171,9 @@ def _convert_layers(module: torch.nn.Module) -> None: elif isinstance(child, torch.nn.LayerNorm): replacement = te.LayerNorm(child.normalized_shape[0], eps=child.eps) replacement.weight.data = child.weight.data.clone() - replacement.bias.data = child.bias.data.clone() + # Check if bias exists before attempting to clone its data + if child.bias is not None and replacement.bias is not None: + replacement.bias.data = child.bias.data.clone() log.debug(f"Replacing layer {name!r} with Transformer Engine equivalent") module.__setattr__(name, replacement) else: diff --git a/tests/tests_fabric/plugins/precision/test_transformer_engine.py b/tests/tests_fabric/plugins/precision/test_transformer_engine.py index 033484aca9c90..ed7c984b1ae64 100644 --- a/tests/tests_fabric/plugins/precision/test_transformer_engine.py +++ b/tests/tests_fabric/plugins/precision/test_transformer_engine.py @@ -115,3 +115,35 @@ class TELayerNormMock(Mock): ... assert isinstance(model.l1, TELinearMock) assert isinstance(model.l2, TELayerNormMock) assert isinstance(model.l3.l, TELinearMock) + + +def test_convert_module_handles_linear_without_bias(monkeypatch): + module = lightning.fabric.plugins.precision.transformer_engine # Set up mock transformer_engine + monkeypatch.setattr(module, "_TRANSFORMER_ENGINE_AVAILABLE", lambda: True) + + transformer_engine_mock = Mock() + monkeypatch.setitem(sys.modules, "transformer_engine", transformer_engine_mock) + monkeypatch.setitem(sys.modules, "transformer_engine.pytorch", transformer_engine_mock.pytorch) + monkeypatch.setitem(sys.modules, "transformer_engine.common.recipe", transformer_engine_mock.recipe) + + class TELinearMock(torch.nn.Linear): # Mock the Linear replacement class + def __init__(self, in_features, out_features, bias=True): + super().__init__(in_features, out_features, bias) + + transformer_engine_mock.pytorch.Linear = TELinearMock + transformer_engine_mock.pytorch.LayerNorm = torch.nn.LayerNorm + transformer_engine_mock.recipe.DelayedScaling.return_value = None + + class BiaslessModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.linear = torch.nn.Linear(16, 32, bias=False) # This was causing the bug + + model = BiaslessModel() + precision = TransformerEnginePrecision(weights_dtype=torch.float16) + precision.replace_layers = True + + precision.convert_module(model) # This should no longer raise AttributeError + + assert isinstance(model.linear, TELinearMock) + assert model.linear.bias is None From 431ee9de747f1bd8f54c96d610e203af33790426 Mon Sep 17 00:00:00 2001 From: Alexander Zhipa Date: Fri, 16 May 2025 16:24:37 +0200 Subject: [PATCH 059/112] fix: convert step to int when logging (#20830) fix: convert step to int when logging (#20692) Co-authored-by: Alexander Zhipa (cherry picked from commit 3d398240d2f62f2ad05e9eff557d2d5cb44f235c) --- src/lightning/pytorch/CHANGELOG.md | 24 ++++++++ .../logger_connector/logger_connector.py | 13 ++-- .../connectors/test_logger_connector.py | 61 +++++++++++++++++++ 3 files changed, 92 insertions(+), 6 deletions(-) create mode 100644 tests/tests_pytorch/trainer/connectors/test_logger_connector.py diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index a24ff385cb12c..4b71ca14466f2 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). +## [unreleased] - YYYY-MM-DD + +### Added + +- + + +### Changed + +- + + +### Removed + +- + + +### Fixed + +- Fixed `logger_connector` has edge case where step can be a float ([#20692](https://github.com/Lightning-AI/pytorch-lightning/issues/20692)) + + +--- + ## [2.5.1] - 2025-03-18 ### Changed diff --git a/src/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py b/src/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py index ffc99a9772469..09addf5a5a58c 100644 --- a/src/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py +++ b/src/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py @@ -106,12 +106,13 @@ def log_metrics(self, metrics: _OUT_DICT, step: Optional[int] = None) -> None: scalar_metrics = convert_tensors_to_scalars(metrics) if step is None: - step = scalar_metrics.pop("step", None) - - if step is None: - # added metrics for convenience - scalar_metrics.setdefault("epoch", self.trainer.current_epoch) - step = self.trainer.fit_loop.epoch_loop._batches_that_stepped + step_metric = scalar_metrics.pop("step", None) + if step_metric is not None: + step = int(step_metric) + else: + # added metrics for convenience + scalar_metrics.setdefault("epoch", self.trainer.current_epoch) + step = self.trainer.fit_loop.epoch_loop._batches_that_stepped # log actual metrics for logger in self.trainer.loggers: diff --git a/tests/tests_pytorch/trainer/connectors/test_logger_connector.py b/tests/tests_pytorch/trainer/connectors/test_logger_connector.py new file mode 100644 index 0000000000000..7a89efd133235 --- /dev/null +++ b/tests/tests_pytorch/trainer/connectors/test_logger_connector.py @@ -0,0 +1,61 @@ +from unittest.mock import MagicMock, patch + +from lightning.pytorch import Trainer +from lightning.pytorch.loggers import Logger +from lightning.pytorch.trainer.connectors.logger_connector import _LoggerConnector + + +@patch("lightning.pytorch.trainer.connectors.logger_connector.logger_connector.convert_tensors_to_scalars") +def test_uses_provided_step(mock_convert): + """Test that the LoggerConnector uses explicitly provided step to log metrics.""" + + trainer = MagicMock(spec=Trainer) + trainer.loggers = [logger := MagicMock(spec=Logger)] + connector = _LoggerConnector(trainer) + mock_convert.return_value.pop.return_value = step = 42 + + connector.log_metrics((metrics := {"some_metric": 123}), step=step) + + assert connector._logged_metrics == metrics + mock_convert.assert_called_once_with(metrics) + logger.log_metrics.assert_called_once_with(metrics=mock_convert.return_value, step=step) + logger.save.assert_called_once_with() + + +@patch("lightning.pytorch.trainer.connectors.logger_connector.logger_connector.convert_tensors_to_scalars") +def test_uses_step_metric(mock_convert): + """Test that the LoggerConnector uses explicitly provided step metric to log metrics.""" + + trainer = MagicMock(spec=Trainer) + trainer.loggers = [logger := MagicMock(spec=Logger)] + connector = _LoggerConnector(trainer) + mock_convert.return_value.pop.return_value = step = 42.0 + + metrics = {"some_metric": 123} + connector.log_metrics(logged_metrics := {**metrics, "step": step}) + + assert connector._logged_metrics == logged_metrics + mock_convert.assert_called_once_with(logged_metrics) + logger.log_metrics.assert_called_once_with(metrics=mock_convert.return_value, step=int(step)) + logger.save.assert_called_once_with() + + +@patch("lightning.pytorch.trainer.connectors.logger_connector.logger_connector.convert_tensors_to_scalars") +def test_uses_batches_that_stepped(mock_convert): + """Test that the LoggerConnector uses implicitly provided batches_that_stepped to log metrics.""" + + trainer = MagicMock(spec=Trainer) + trainer.fit_loop = MagicMock() + trainer.loggers = [logger := MagicMock(spec=Logger)] + connector = _LoggerConnector(trainer) + mock_convert.return_value.pop.return_value = None + + connector.log_metrics(metrics := {"some_metric": 123}) + + assert connector._logged_metrics == metrics + mock_convert.assert_called_once_with(metrics) + logger.log_metrics.assert_called_once_with( + metrics=mock_convert.return_value, step=trainer.fit_loop.epoch_loop._batches_that_stepped + ) + logger.save.assert_called_once_with() + mock_convert.return_value.setdefault.assert_called_once_with("epoch", trainer.current_epoch) From 6d9c9f13e9378e29dee5bf6eb3f3d66f793c67e9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 09:30:28 +0200 Subject: [PATCH 060/112] build(deps): update myst-parser requirement from <3.0.0,>=0.18.1 to >=0.18.1,<4.0.0 in /requirements (#20835) build(deps): update myst-parser requirement in /requirements Updates the requirements on [myst-parser](https://github.com/executablebooks/MyST-Parser) to permit the latest version. - [Release notes](https://github.com/executablebooks/MyST-Parser/releases) - [Changelog](https://github.com/executablebooks/MyST-Parser/blob/master/CHANGELOG.md) - [Commits](https://github.com/executablebooks/MyST-Parser/compare/v0.18.1...v3.0.1) --- updated-dependencies: - dependency-name: myst-parser dependency-version: 3.0.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 673d9d0ffbdff4b8e93e8872a6b0a98092933771) --- requirements/docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/docs.txt b/requirements/docs.txt index 85141a80dcc7d..07fec592921f4 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,5 +1,5 @@ sphinx >5.0, <6.0 -myst-parser >=0.18.1, <3.0.0 +myst-parser >=0.18.1, <4.0.0 nbsphinx >=0.8.5, <=0.9.7 nbconvert <7.14 # temporary fix for https://github.com/jupyter/nbconvert/issues/2092 pandoc >=1.0, <=2.3 From cac8cd487b58b891f28b124b6e5acc91072ab063 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 09:30:36 +0200 Subject: [PATCH 061/112] build(deps): bump sphinx-toolbox from 3.10.0 to 4.0.0 in /requirements (#20836) Bumps [sphinx-toolbox](https://github.com/sphinx-toolbox/sphinx-toolbox) from 3.10.0 to 4.0.0. - [Release notes](https://github.com/sphinx-toolbox/sphinx-toolbox/releases) - [Changelog](https://github.com/sphinx-toolbox/sphinx-toolbox/blob/master/doc-source/changelog.rst) - [Commits](https://github.com/sphinx-toolbox/sphinx-toolbox/compare/v3.10.0...v4.0.0) --- updated-dependencies: - dependency-name: sphinx-toolbox dependency-version: 4.0.0 dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 089b2f6cf0e1e88851afbc87ae4a1e41463d0ffe) --- requirements/docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/docs.txt b/requirements/docs.txt index 07fec592921f4..1cd331da55dd4 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -12,7 +12,7 @@ sphinx-paramlinks >=0.5.1, <=0.6.0 sphinx-togglebutton >=0.2, <=0.3.2 sphinx-copybutton >=0.3, <=0.5.2 sphinx-multiproject -sphinx-toolbox ==3.10.0 +sphinx-toolbox ==4.0.0 sphinx-rtd-dark-mode sphinxcontrib-video ==0.4.1 jinja2 <3.2.0 From f277cb86760d91f81417eba7d28c7783b6b4bcb3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 09:30:44 +0200 Subject: [PATCH 062/112] build(deps): update setuptools requirement from <80.4.1 to <80.7.2 in /requirements (#20837) build(deps): update setuptools requirement in /requirements Updates the requirements on [setuptools](https://github.com/pypa/setuptools) to permit the latest version. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/0.6...v80.7.1) --- updated-dependencies: - dependency-name: setuptools dependency-version: 80.7.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 33693b391a94c2e222f8ce05046f3e44d098fe9d) --- requirements/ci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index 206501c3b21e5..df50862ebb111 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,4 +1,4 @@ -setuptools <80.4.1 +setuptools <80.7.2 wheel <0.46.0 awscli >=1.30.0, <1.41.0 twine ==6.1.0 From 29b36ae9fa070f0a6949e588e89dac2ea72ee467 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 09:30:52 +0200 Subject: [PATCH 063/112] build(deps): bump pytest-rerunfailures from 12.0 to 15.1 in /requirements (#20838) build(deps): bump pytest-rerunfailures in /requirements Bumps [pytest-rerunfailures](https://github.com/pytest-dev/pytest-rerunfailures) from 12.0 to 15.1. - [Changelog](https://github.com/pytest-dev/pytest-rerunfailures/blob/master/CHANGES.rst) - [Commits](https://github.com/pytest-dev/pytest-rerunfailures/compare/12.0...15.1) --- updated-dependencies: - dependency-name: pytest-rerunfailures dependency-version: '15.1' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit c3c69c554fd444c9a6ded64db6df7620def82071) --- requirements/fabric/test.txt | 2 +- requirements/pytorch/test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 7ad26cef308b0..efb18148387f3 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -3,7 +3,7 @@ numpy >=1.17.2, <1.27.0 pytest ==8.3.5 pytest-cov ==6.1.1 pytest-timeout ==2.3.1 -pytest-rerunfailures ==12.0 +pytest-rerunfailures ==15.1 pytest-random-order ==1.1.1 click ==8.1.8 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index fe39a58733eeb..022097a641432 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -2,7 +2,7 @@ coverage ==7.8.0 pytest ==8.3.5 pytest-cov ==6.1.1 pytest-timeout ==2.3.1 -pytest-rerunfailures ==12.0 +pytest-rerunfailures ==15.1 pytest-random-order ==1.1.1 # needed in tests From a83845dd348cdbed38408623292ecda73f64a022 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 09:31:02 +0200 Subject: [PATCH 064/112] build(deps): update matplotlib requirement from <3.9.0,>3.1 to >3.1,<3.10.0 in /requirements (#20839) build(deps): update matplotlib requirement in /requirements Updates the requirements on [matplotlib](https://github.com/matplotlib/matplotlib) to permit the latest version. - [Release notes](https://github.com/matplotlib/matplotlib/releases) - [Commits](https://github.com/matplotlib/matplotlib/compare/v3.1.1...v3.9.4) --- updated-dependencies: - dependency-name: matplotlib dependency-version: 3.9.4 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 857b0eb1e8472645e5d4de615d0b9a6b900fbc93) --- requirements/pytorch/extra.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index 21c38b1b5b121..edba87bdfb82d 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment # extended list of package dependencies to reach full functionality -matplotlib>3.1, <3.9.0 +matplotlib>3.1, <3.10.0 omegaconf >=2.2.3, <2.4.0 hydra-core >=1.2.0, <1.4.0 jsonargparse[signatures] >=4.28.0, <=4.40.0 From d4ebb0ccbe0d54b4164ad9e15163c9b524330853 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 09:47:30 +0200 Subject: [PATCH 065/112] build(deps): update nbconvert requirement from <7.14 to <7.17 in /requirements (#20834) * build(deps): update nbconvert requirement in /requirements Updates the requirements on [nbconvert](https://github.com/jupyter/nbconvert) to permit the latest version. - [Release notes](https://github.com/jupyter/nbconvert/releases) - [Changelog](https://github.com/jupyter/nbconvert/blob/main/CHANGELOG.md) - [Commits](https://github.com/jupyter/nbconvert/compare/4.0.0...v7.16.6) --- updated-dependencies: - dependency-name: nbconvert dependency-version: 7.16.6 dependency-type: direct:production ... Signed-off-by: dependabot[bot] * Apply suggestions from code review --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 760ae74bc33032d636b404b2ac2853c7431b9d47) --- requirements/docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/docs.txt b/requirements/docs.txt index 1cd331da55dd4..6d1d609d55ba2 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,7 +1,7 @@ sphinx >5.0, <6.0 myst-parser >=0.18.1, <4.0.0 nbsphinx >=0.8.5, <=0.9.7 -nbconvert <7.14 # temporary fix for https://github.com/jupyter/nbconvert/issues/2092 +nbconvert >7.14, <7.17 pandoc >=1.0, <=2.3 docutils >=0.16, <0.22 sphinxcontrib-fulltoc >=1.0, <=1.2.0 From 73fb5b59ed4aef71668024610251d7d0dbee6e6e Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 19 May 2025 11:53:17 +0200 Subject: [PATCH 066/112] docker: extend building base docker images for litGPT (#20842) (cherry picked from commit bef53a86586077c70e8bbd178b341f892c50031d) --- .github/workflows/docker-build.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index fe0c9c0d560bd..586109e0fe123 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -94,6 +94,11 @@ jobs: strategy: fail-fast: false matrix: + # adding dome more images as Thunder mainly using python 3.10, + # and we need to support integrations as for example LitGPT + python_version: ["3.10"] + pytorch_version: ["2.3.1", "2.4.1", "2.5.1", "2.6.0", "2.7.0"] + cuda_version: ["12.4.1"] include: # These are the base images for PL release docker images. # Make sure the matrix here matches the one above. From 2cae6fd417d7b116549db9b795931f732948725b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 14:47:46 +0200 Subject: [PATCH 067/112] build(deps): update onnx requirement from <1.18.0,>=1.12.0 to >=1.12.0,<1.19.0 in /requirements (#20841) build(deps): update onnx requirement in /requirements Updates the requirements on [onnx](https://github.com/onnx/onnx) to permit the latest version. - [Release notes](https://github.com/onnx/onnx/releases) - [Changelog](https://github.com/onnx/onnx/blob/main/docs/Changelog-ml.md) - [Commits](https://github.com/onnx/onnx/compare/v1.12.0...v1.18.0) --- updated-dependencies: - dependency-name: onnx dependency-version: 1.18.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 97177875e26e9dbd39bff4dc677d5eb1658ee2a4) --- requirements/pytorch/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 022097a641432..98f3216ba5527 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -9,7 +9,7 @@ pytest-random-order ==1.1.1 cloudpickle >=1.3, <3.2.0 scikit-learn >0.22.1, <1.7.0 numpy >=1.17.2, <1.27.0 -onnx >=1.12.0, <1.18.0 +onnx >=1.12.0, <1.19.0 onnxruntime >=1.12.0, <1.21.0 psutil <7.0.1 # for `DeviceStatsMonitor` pandas >1.0, <2.3.0 # needed in benchmarks From 23612cc583de4bfa16ed576801489dd36ee0174f Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 19 May 2025 14:48:11 +0200 Subject: [PATCH 068/112] ci: skip failing run on master (#20843) * disabled (cherry picked from commit 9d93d7734ea06cda10a870586f6e2d5d423b7c36) --- .github/workflows/_legacy-checkpoints.yml | 2 +- ...points.yml => ci-checkpoints.yml.disabled} | 0 .github/workflows/release-pkg.yml | 20 +++++++++---------- 3 files changed, 11 insertions(+), 11 deletions(-) rename .github/workflows/{ci-checkpoints.yml => ci-checkpoints.yml.disabled} (100%) diff --git a/.github/workflows/_legacy-checkpoints.yml b/.github/workflows/_legacy-checkpoints.yml index 9306170f549d3..f1f51818ba9b7 100644 --- a/.github/workflows/_legacy-checkpoints.yml +++ b/.github/workflows/_legacy-checkpoints.yml @@ -113,7 +113,7 @@ jobs: - run: pip install -r requirements/ci.txt - name: Upload checkpoints to S3 - if: secrets.AWS_REGION != '' + if: ${{ secrets[AWS_REGION] != '' }} working-directory: ${{ env.LEGACY_FOLDER }} env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY }} diff --git a/.github/workflows/ci-checkpoints.yml b/.github/workflows/ci-checkpoints.yml.disabled similarity index 100% rename from .github/workflows/ci-checkpoints.yml rename to .github/workflows/ci-checkpoints.yml.disabled diff --git a/.github/workflows/release-pkg.yml b/.github/workflows/release-pkg.yml index 9786c2f57b3c7..944e69476a358 100644 --- a/.github/workflows/release-pkg.yml +++ b/.github/workflows/release-pkg.yml @@ -13,7 +13,7 @@ on: - "requirements/ci.txt" - ".github/actions/pkg-check/*" - ".github/actions/pkg-publish/*" - - ".github/workflows/_legacy-checkpoints.yml.yml" + - ".github/workflows/_legacy-checkpoints.yml" - ".github/workflows/_build-packages.yml" - ".github/workflows/release-pypi.yml" @@ -179,12 +179,12 @@ jobs: with: pkg-folder: dist/${{ steps.folder.outputs.pkg }} pypi-token: ${{ secrets[format('PYPI_TOKEN_{0}', matrix.name)] }} - - legacy-checkpoints: - needs: [build-packages] - uses: ./.github/workflows/_legacy-checkpoints.yml - with: - push_to_s3: ${{ startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' }} - upload_local: ${{ startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' }} - create_pr: ${{ startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' }} - secrets: inherit +# FIXME: this is not working suddenly, Unrecognized named-value: 'secrets' +# legacy-checkpoints: +# needs: [build-packages] +# uses: ./.github/workflows/_legacy-checkpoints.yml +# with: +# push_to_s3: ${{ startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' }} +# upload_local: ${{ startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' }} +# create_pr: ${{ startsWith(github.event.ref, 'refs/tags') || github.event_name == 'release' }} +# secrets: inherit From 2178d0aa674f79ebda2746f3f8a71e18f8b8822e Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Mon, 19 May 2025 14:52:13 +0200 Subject: [PATCH 069/112] CLI: resolve jsonargparse deprecation warning (#20802) --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 4b47e0fcdedae40d6e83d40cd4e636595b59528f) --- src/lightning/pytorch/cli.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lightning/pytorch/cli.py b/src/lightning/pytorch/cli.py index 75a6347c95356..8e64b8aba1b73 100644 --- a/src/lightning/pytorch/cli.py +++ b/src/lightning/pytorch/cli.py @@ -45,11 +45,18 @@ Namespace, class_from_function, register_unresolvable_import_paths, - set_config_read_mode, ) register_unresolvable_import_paths(torch) # Required until fix https://github.com/pytorch/pytorch/issues/74483 - set_config_read_mode(fsspec_enabled=True) + + try: + from jsonargparse import set_parsing_settings + + set_parsing_settings(config_read_mode_fsspec_enabled=True) + except ImportError: + from jsonargparse import set_config_read_mode + + set_config_read_mode(fsspec_enabled=True) else: locals()["ArgumentParser"] = object locals()["Namespace"] = object From 766f7f8bc972a0fb486d0ff217928ab632688f4f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 15:10:18 +0200 Subject: [PATCH 070/112] build(deps): bump pytest-timeout from 2.3.1 to 2.4.0 in /requirements (#20840) Bumps [pytest-timeout](https://github.com/pytest-dev/pytest-timeout) from 2.3.1 to 2.4.0. - [Commits](https://github.com/pytest-dev/pytest-timeout/compare/2.3.1...2.4.0) --- updated-dependencies: - dependency-name: pytest-timeout dependency-version: 2.4.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 29f0b13379b6be1432b44b42a7b77dc0f231cf75) --- requirements/fabric/test.txt | 2 +- requirements/pytorch/test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index efb18148387f3..34571a9ee977e 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -2,7 +2,7 @@ coverage ==7.8.0 numpy >=1.17.2, <1.27.0 pytest ==8.3.5 pytest-cov ==6.1.1 -pytest-timeout ==2.3.1 +pytest-timeout ==2.4.0 pytest-rerunfailures ==15.1 pytest-random-order ==1.1.1 click ==8.1.8 diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 98f3216ba5527..e27b8aa33b3ea 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -1,7 +1,7 @@ coverage ==7.8.0 pytest ==8.3.5 pytest-cov ==6.1.1 -pytest-timeout ==2.3.1 +pytest-timeout ==2.4.0 pytest-rerunfailures ==15.1 pytest-random-order ==1.1.1 From 55d48f0af517d4066d780205df5c0b73a7d98d93 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 19 May 2025 15:25:20 +0200 Subject: [PATCH 071/112] docker: update building base docker images for last CUDA & py3.10 (#20844) docker: extend building base docker images for last CUDA (cherry picked from commit 3b37c3e6ebd2ecd3b346fb20ecae7449d13bfdda) --- .github/workflows/docker-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 586109e0fe123..c7f86a264116c 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -97,8 +97,8 @@ jobs: # adding dome more images as Thunder mainly using python 3.10, # and we need to support integrations as for example LitGPT python_version: ["3.10"] - pytorch_version: ["2.3.1", "2.4.1", "2.5.1", "2.6.0", "2.7.0"] - cuda_version: ["12.4.1"] + pytorch_version: ["2.6.0", "2.7.0"] + cuda_version: ["12.6.3"] include: # These are the base images for PL release docker images. # Make sure the matrix here matches the one above. From d567dcc331edd5643145ca45075181e5b4735d9e Mon Sep 17 00:00:00 2001 From: Deependu Date: Tue, 20 May 2025 13:08:50 +0530 Subject: [PATCH 072/112] Remove legacy version check for `lightning_utilities >= 0.10` (#20823) (cherry picked from commit dd2912a045f1a1a0c87528b41c9854fc7454d4b0) --- .gitignore | 2 +- src/lightning/fabric/utilities/imports.py | 2 -- src/lightning/fabric/utilities/rank_zero.py | 34 ++------------------- 3 files changed, 3 insertions(+), 35 deletions(-) diff --git a/.gitignore b/.gitignore index cf5face7db3d4..42c36e534d5ef 100644 --- a/.gitignore +++ b/.gitignore @@ -198,7 +198,7 @@ node_modules/ **/events.out.tfevents.* examples/**/*.png -# instalation artifacts +# installation artifacts requirements/base.txt # CI diff --git a/src/lightning/fabric/utilities/imports.py b/src/lightning/fabric/utilities/imports.py index 5a9ec1edc1ca8..a618371d7f2b4 100644 --- a/src/lightning/fabric/utilities/imports.py +++ b/src/lightning/fabric/utilities/imports.py @@ -37,5 +37,3 @@ _TORCH_LESS_EQUAL_2_6 = compare_version("torch", operator.le, "2.6.0") _PYTHON_GREATER_EQUAL_3_10_0 = (sys.version_info.major, sys.version_info.minor) >= (3, 10) - -_UTILITIES_GREATER_EQUAL_0_10 = compare_version("lightning_utilities", operator.ge, "0.10.0") diff --git a/src/lightning/fabric/utilities/rank_zero.py b/src/lightning/fabric/utilities/rank_zero.py index bd9b65e3b897d..d34e19430b107 100644 --- a/src/lightning/fabric/utilities/rank_zero.py +++ b/src/lightning/fabric/utilities/rank_zero.py @@ -15,8 +15,7 @@ import logging import os -from functools import wraps -from typing import Callable, Optional, TypeVar, overload +from typing import Optional import lightning_utilities.core.rank_zero as rank_zero_module @@ -29,9 +28,6 @@ rank_zero_info, rank_zero_warn, ) -from typing_extensions import ParamSpec - -from lightning.fabric.utilities.imports import _UTILITIES_GREATER_EQUAL_0_10 rank_zero_module.log = logging.getLogger(__name__) @@ -48,33 +44,7 @@ def _get_rank() -> Optional[int]: return None -if not _UTILITIES_GREATER_EQUAL_0_10: - T = TypeVar("T") - P = ParamSpec("P") - - @overload - def rank_zero_only(fn: Callable[P, T]) -> Callable[P, Optional[T]]: - """Rank zero only.""" - - @overload - def rank_zero_only(fn: Callable[P, T], default: T) -> Callable[P, T]: - """Rank zero only.""" - - def rank_zero_only(fn: Callable[P, T], default: Optional[T] = None) -> Callable[P, Optional[T]]: - @wraps(fn) - def wrapped_fn(*args: P.args, **kwargs: P.kwargs) -> Optional[T]: - rank = getattr(rank_zero_only, "rank", None) - if rank is None: - raise RuntimeError("The `rank_zero_only.rank` needs to be set before use") - if rank == 0: - return fn(*args, **kwargs) - return default - - return wrapped_fn - - rank_zero_module.rank_zero_only.rank = getattr(rank_zero_module.rank_zero_only, "rank", _get_rank() or 0) -else: - rank_zero_only = rank_zero_module.rank_zero_only +rank_zero_only = rank_zero_module.rank_zero_only # add the attribute to the function but don't overwrite in case Trainer has already set it rank_zero_only.rank = getattr(rank_zero_only, "rank", _get_rank() or 0) From 019e1e9319cc1ec4097a715433756755fac89d4b Mon Sep 17 00:00:00 2001 From: Tomas Zbk <85851175+tomaszbk@users.noreply.github.com> Date: Fri, 23 May 2025 11:47:52 -0300 Subject: [PATCH 073/112] add python 3.12 in setup.py (#20850) (cherry picked from commit e5b5f3c34502fadb3ba721cfa04800ce42ee36e5) --- src/lightning_fabric/__setup__.py | 1 + src/pytorch_lightning/__setup__.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/lightning_fabric/__setup__.py b/src/lightning_fabric/__setup__.py index 36dbae53ef171..a12b5b7597447 100644 --- a/src/lightning_fabric/__setup__.py +++ b/src/lightning_fabric/__setup__.py @@ -108,5 +108,6 @@ def _setup_args() -> dict[str, Any]: "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], } diff --git a/src/pytorch_lightning/__setup__.py b/src/pytorch_lightning/__setup__.py index 97250404230b6..212e8a0fdbebb 100644 --- a/src/pytorch_lightning/__setup__.py +++ b/src/pytorch_lightning/__setup__.py @@ -110,5 +110,6 @@ def _setup_args() -> dict[str, Any]: "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ], } From cb1d016cebe6006f186dced2b7d1a83f4fbe0811 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 27 May 2025 11:00:44 +0200 Subject: [PATCH 074/112] build(deps): update setuptools requirement from <80.7.2 to <80.8.1 in /requirements (#20857) build(deps): update setuptools requirement in /requirements Updates the requirements on [setuptools](https://github.com/pypa/setuptools) to permit the latest version. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/0.6...v80.8.0) --- updated-dependencies: - dependency-name: setuptools dependency-version: 80.8.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 6977650b1defc369e89cba02f6290ddbf7ee5dee) --- requirements/ci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index df50862ebb111..4dab6e5a0dac8 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,4 +1,4 @@ -setuptools <80.7.2 +setuptools <80.8.1 wheel <0.46.0 awscli >=1.30.0, <1.41.0 twine ==6.1.0 From df870a57828c51ce03b395558457804758860b26 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 27 May 2025 11:45:31 +0200 Subject: [PATCH 075/112] build(deps): update fsspec[http] requirement from <2025.4.0,>=2022.5.0 to >=2022.5.0,<2025.6.0 in /requirements (#20859) build(deps): update fsspec[http] requirement in /requirements Updates the requirements on [fsspec[http]](https://github.com/fsspec/filesystem_spec) to permit the latest version. - [Commits](https://github.com/fsspec/filesystem_spec/compare/2022.5.0...2025.5.1) --- updated-dependencies: - dependency-name: fsspec[http] dependency-version: 2025.5.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit e15cd6733a767cd8cb879aa29808e9fa83eaffcc) --- requirements/fabric/base.txt | 2 +- requirements/pytorch/base.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 2561b2324b772..335742103d078 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment torch >=2.1.0, <2.8.0 -fsspec[http] >=2022.5.0, <2025.4.0 +fsspec[http] >=2022.5.0, <2025.6.0 packaging >=20.0, <=25.0 typing-extensions >=4.4.0, <4.14.0 lightning-utilities >=0.10.0, <0.15.0 diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 00889581f6407..1b1f743a618b9 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -4,7 +4,7 @@ torch >=2.1.0, <2.8.0 tqdm >=4.57.0, <4.68.0 PyYAML >=5.4, <6.1.0 -fsspec[http] >=2022.5.0, <2025.4.0 +fsspec[http] >=2022.5.0, <2025.6.0 torchmetrics >=0.7.0, <1.8.0 packaging >=20.0, <=25.0 typing-extensions >=4.4.0, <4.14.0 From 41cd7ce15792d224b4d16e74eb41df17e8dd70e3 Mon Sep 17 00:00:00 2001 From: Siem de Jong <28396796+siemdejong@users.noreply.github.com> Date: Tue, 27 May 2025 14:20:18 +0200 Subject: [PATCH 076/112] Allow cross-device local checkpoints with `fsspec>=2025.5.0` (#20780) Cross-device transactions via fsspec (used for example in ModelCheckpoint) resulted in permission errors. The permission errors were caused by attempts to change file modes on different filesystem. This was fixed in fsspec 2025.3.3. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit a83fcef17bbf198d0411e4ff553e8e3bf0b4f97f) --- src/lightning/fabric/utilities/cloud_io.py | 15 +++++++--- src/lightning/pytorch/CHANGELOG.md | 3 +- .../pytorch/callbacks/model_checkpoint.py | 4 +++ .../connectors/test_checkpoint_connector.py | 28 +++++++++++++++++++ 4 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/lightning/fabric/utilities/cloud_io.py b/src/lightning/fabric/utilities/cloud_io.py index 9d0a33afd0b77..637dfcd9b1671 100644 --- a/src/lightning/fabric/utilities/cloud_io.py +++ b/src/lightning/fabric/utilities/cloud_io.py @@ -13,6 +13,7 @@ # limitations under the License. """Utilities related to data saving/loading.""" +import errno import io import logging from pathlib import Path @@ -84,10 +85,16 @@ def _atomic_save(checkpoint: dict[str, Any], filepath: Union[str, Path]) -> None log.debug(f"Saving checkpoint: {filepath}") torch.save(checkpoint, bytesbuffer) - # We use a transaction here to avoid file corruption if the save gets interrupted - fs, urlpath = fsspec.core.url_to_fs(str(filepath)) - with fs.transaction, fs.open(urlpath, "wb") as f: - f.write(bytesbuffer.getvalue()) + try: + # We use a transaction here to avoid file corruption if the save gets interrupted + fs, urlpath = fsspec.core.url_to_fs(str(filepath)) + with fs.transaction, fs.open(urlpath, "wb") as f: + f.write(bytesbuffer.getvalue()) + except PermissionError as e: + if isinstance(e.__context__, OSError) and getattr(e.__context__, "errno", None) == errno.EXDEV: + raise RuntimeError( + 'Upgrade fsspec to enable cross-device local checkpoints: pip install "fsspec[http]>=2025.5.0"', + ) from e def _is_object_storage(fs: AbstractFileSystem) -> bool: diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index 4b71ca14466f2..e8775d3c800b2 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -11,12 +11,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - +- For cross-device local checkpoints, instruct users to install `fsspec>=2025.5.0` if unavailable ([#20780](https://github.com/Lightning-AI/pytorch-lightning/pull/20780)) + ### Changed - - ### Removed - diff --git a/src/lightning/pytorch/callbacks/model_checkpoint.py b/src/lightning/pytorch/callbacks/model_checkpoint.py index 85bfb65c0ea6e..6b7b2831a2e04 100644 --- a/src/lightning/pytorch/callbacks/model_checkpoint.py +++ b/src/lightning/pytorch/callbacks/model_checkpoint.py @@ -155,6 +155,10 @@ class ModelCheckpoint(Checkpoint): If the checkpoint's ``dirpath`` changed from what it was before while resuming the training, only ``best_model_path`` will be reloaded and a warning will be issued. + If you provide a ``filename`` on a mounted device where changing permissions is not allowed (causing ``chmod`` + to raise a ``PermissionError``), install `fsspec>=2025.5.0`. Then the error is caught, the file's permissions + remain unchanged, and the checkpoint is still saved. Otherwise, no checkpoint will be saved and training stops. + Raises: MisconfigurationException: If ``save_top_k`` is smaller than ``-1``, diff --git a/tests/tests_pytorch/trainer/connectors/test_checkpoint_connector.py b/tests/tests_pytorch/trainer/connectors/test_checkpoint_connector.py index 722742a3ccae0..662fd99d1b12c 100644 --- a/tests/tests_pytorch/trainer/connectors/test_checkpoint_connector.py +++ b/tests/tests_pytorch/trainer/connectors/test_checkpoint_connector.py @@ -11,10 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import errno import os +import re from unittest import mock from unittest.mock import ANY, Mock +import fsspec import pytest import torch @@ -105,6 +108,31 @@ def test_hpc_max_ckpt_version(tmp_path): ) +def test_local_cross_device_checkpoint(tmpdir): + """Test that the _CheckpointConnector can write local cross-device files or raises an error if fsspec<2025.5.0.""" + model = BoringModel() + # hardcoding dir since `tmp_path` can be windows path + trainer = Trainer( + default_root_dir="memory://test_ckpt_for_fsspec", limit_train_batches=1, limit_val_batches=1, max_epochs=1 + ) + trainer.fit(model) + # Simulate the behavior of fsspec when writing to a local file system but other device. + with ( + mock.patch("os.rename", side_effect=OSError(errno.EXDEV, "Invalid cross-device link")), + mock.patch("os.chmod", side_effect=PermissionError("Operation not permitted")), + ): + if fsspec.__version__ < "2025.5.0": + with pytest.raises( + RuntimeError, + match=re.escape( + 'Upgrade fsspec to enable cross-device local checkpoints: pip install "fsspec[http]>=2025.5.0"' + ), + ): + trainer.save_checkpoint(tmpdir + "/test_ckpt_for_fsspec/hpc_ckpt.ckpt") + else: + trainer.save_checkpoint(tmpdir + "/test_ckpt_for_fsspec/hpc_ckpt.ckpt") + + def test_ckpt_for_fsspec(): """Test that the _CheckpointConnector is able to write to fsspec file systems.""" model = BoringModel() From dafa2b48d6ed2b920b2068731158ed7979629f3b Mon Sep 17 00:00:00 2001 From: omahs <73983677+omahs@users.noreply.github.com> Date: Tue, 27 May 2025 15:35:42 +0200 Subject: [PATCH 077/112] docs: fix typos (#20847) --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 470e82c26fa2fe6fda029c1d235ac4447abe1ee1) --- docs/source-fabric/advanced/model_parallel/tp_fsdp.rst | 2 +- docs/source-pytorch/advanced/compile.rst | 4 ++-- docs/source-pytorch/advanced/model_parallel/tp_fsdp.rst | 2 +- docs/source-pytorch/advanced/post_training_quantization.rst | 4 ++-- docs/source-pytorch/advanced/pruning_quantization.rst | 4 ++-- docs/source-pytorch/advanced/training_tricks.rst | 2 +- src/lightning/fabric/connector.py | 2 +- src/lightning/fabric/plugins/precision/bitsandbytes.py | 2 +- .../pytorch/trainer/connectors/accelerator_connector.py | 2 +- tests/tests_fabric/utilities/test_data.py | 5 +++-- 10 files changed, 15 insertions(+), 14 deletions(-) diff --git a/docs/source-fabric/advanced/model_parallel/tp_fsdp.rst b/docs/source-fabric/advanced/model_parallel/tp_fsdp.rst index f87645e0c11c6..454ebdacbb9d9 100644 --- a/docs/source-fabric/advanced/model_parallel/tp_fsdp.rst +++ b/docs/source-fabric/advanced/model_parallel/tp_fsdp.rst @@ -276,7 +276,7 @@ Next steps .. displayitem:: :header: Pipeline Parallelism - :description: Coming sooon + :description: Coming soon :col_css: col-md-4 :height: 160 :tag: advanced diff --git a/docs/source-pytorch/advanced/compile.rst b/docs/source-pytorch/advanced/compile.rst index 16fe91ca282df..90a5a1f508189 100644 --- a/docs/source-pytorch/advanced/compile.rst +++ b/docs/source-pytorch/advanced/compile.rst @@ -262,7 +262,7 @@ Avoid graph breaks When ``torch.compile`` looks at the code in your model's ``forward()`` or ``*_step()`` method, it will try to compile as much of the code as possible. If there are regions in the code that it doesn't understand, it will introduce a so-called "graph break" that essentially splits the code in optimized and unoptimized parts. Graph breaks aren't a deal breaker, since the optimized parts should still run faster. -But if you want to get the most out of ``torch.compile``, you might want to invest rewriting the problematic section of the code that produce the breaks. +But if you want to get the most out of ``torch.compile``, you might want to invest rewriting the problematic section of the code that produces the breaks. You can check whether your model produces graph breaks by calling ``torch.compile`` with ``fullgraph=True``: @@ -332,7 +332,7 @@ Enabling CUDA Graphs often results in a significant speedup, but sometimes also **Shape padding:** The specific shape/size of the tensors involved in the computation of your model (input, activations, weights, gradients, etc.) can have an impact on the performance. With shape padding enabled, ``torch.compile`` can extend the tensors by padding to a size that gives a better memory alignment. -Naturally, the tradoff here is that it will consume a bit more memory. +Naturally, the tradeoff here is that it will consume a bit more memory. .. code-block:: python diff --git a/docs/source-pytorch/advanced/model_parallel/tp_fsdp.rst b/docs/source-pytorch/advanced/model_parallel/tp_fsdp.rst index dae23bd4ee0c0..550a0a0fb26ae 100644 --- a/docs/source-pytorch/advanced/model_parallel/tp_fsdp.rst +++ b/docs/source-pytorch/advanced/model_parallel/tp_fsdp.rst @@ -282,7 +282,7 @@ Next steps .. displayitem:: :header: Pipeline Parallelism - :description: Coming sooon + :description: Coming soon :col_css: col-md-4 :height: 160 :tag: advanced diff --git a/docs/source-pytorch/advanced/post_training_quantization.rst b/docs/source-pytorch/advanced/post_training_quantization.rst index f925c6ccd47b4..60755593f015e 100644 --- a/docs/source-pytorch/advanced/post_training_quantization.rst +++ b/docs/source-pytorch/advanced/post_training_quantization.rst @@ -106,7 +106,7 @@ The "approach" parameter in PostTrainingQuantConfig is defined by the user to ma Quantize the model ================== -The model can be qutized by Intel® Neural Compressor with: +The model can be quantized by Intel® Neural Compressor with: .. code-block:: python @@ -126,7 +126,7 @@ At last, the quantized model can be saved by: Hands-on Examples ***************** -Based on the `given example code `_, we show how Intel Neural Compressor conduct model quantization on PyTorch Lightning. We first define the basic config of the quantization process. +Based on the `given example code `_, we show how Intel Neural Compressor conducts model quantization on PyTorch Lightning. We first define the basic config of the quantization process. .. code-block:: python diff --git a/docs/source-pytorch/advanced/pruning_quantization.rst b/docs/source-pytorch/advanced/pruning_quantization.rst index f8b099652a381..5c703de20fe3e 100644 --- a/docs/source-pytorch/advanced/pruning_quantization.rst +++ b/docs/source-pytorch/advanced/pruning_quantization.rst @@ -32,7 +32,7 @@ You can also perform iterative pruning, apply the `lottery ticket hypothesis ` for advanced use-cases. +Read more about :ref:`Configuring Gradient Clipping ` for advanced use cases. ---------- diff --git a/src/lightning/fabric/connector.py b/src/lightning/fabric/connector.py index 85d30a07ce207..0e0e86ee7c63e 100644 --- a/src/lightning/fabric/connector.py +++ b/src/lightning/fabric/connector.py @@ -239,7 +239,7 @@ def _check_config_and_set_final_flags( else: raise TypeError( f"Found invalid type for plugin {plugin}. Expected one of: Precision, " - "CheckpointIO, ClusterEnviroment." + "CheckpointIO, ClusterEnvironment." ) duplicated_plugin_key = [k for k, v in plugins_flags_types.items() if v > 1] diff --git a/src/lightning/fabric/plugins/precision/bitsandbytes.py b/src/lightning/fabric/plugins/precision/bitsandbytes.py index b78157d1c4074..646df2028672e 100644 --- a/src/lightning/fabric/plugins/precision/bitsandbytes.py +++ b/src/lightning/fabric/plugins/precision/bitsandbytes.py @@ -403,7 +403,7 @@ class _NF4DQLinear(_Linear4bit): def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, quant_type="nf4", compress_statistics=True, **kwargs) - # these classes are defined programatically like this to avoid importing bitsandbytes in environments that have + # these classes are defined programmatically like this to avoid importing bitsandbytes in environments that have # it available but will not use it classes = { "_Linear8bitLt": _Linear8bitLt, diff --git a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py index 40ee0eef4de33..603aedfc94589 100644 --- a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py +++ b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py @@ -248,7 +248,7 @@ def _check_config_and_set_final_flags( else: raise MisconfigurationException( f"Found invalid type for plugin {plugin}. Expected one of: Precision, " - "CheckpointIO, ClusterEnviroment, or LayerSync." + "CheckpointIO, ClusterEnvironment, or LayerSync." ) duplicated_plugin_key = [k for k, v in plugins_flags_types.items() if v > 1] diff --git a/tests/tests_fabric/utilities/test_data.py b/tests/tests_fabric/utilities/test_data.py index faff6e182a06f..91b0a4e47b8b0 100644 --- a/tests/tests_fabric/utilities/test_data.py +++ b/tests/tests_fabric/utilities/test_data.py @@ -53,8 +53,9 @@ def test_has_len(): def test_replace_dunder_methods_multiple_loaders_without_init(): """In case of a class, that inherits from a class that we are patching, but doesn't define its own `__init__` method (the one we are wrapping), it can happen, that `hasattr(cls, "__old__init__")` is True because of parent - class, but it is impossible to delete, because that method is owned by parent class. Furthermore, the error occured - only sometimes because it depends on the order in which we are iterating over a set of classes we are patching. + class, but it is impossible to delete, because that method is owned by parent class. Furthermore, the error + occurred only sometimes because it depends on the order in which we are iterating over a set of classes we are + patching. This test simulates the behavior by generating sufficient number of dummy classes, which do not define `__init__` and are children of `DataLoader`. We are testing that a) context manager `_replace_dunder_method` exits cleanly, and From 0cb95a5df23ff74ad545b87094f6efc4cca30a1f Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Tue, 27 May 2025 15:55:56 +0200 Subject: [PATCH 078/112] drop deprecated dependabot reviewers (#20860) (cherry picked from commit 7b8ff1de2c5f75f0e76f33e2eaae2c57c89a0bb4) --- .github/dependabot.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 421314b211deb..3b2ab99d015c1 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -19,8 +19,6 @@ updates: separator: "-" # Allow up to 5 open pull requests for pip dependencies open-pull-requests-limit: 10 - reviewers: - - "Lightning-AI/teams/core-lightning" # Enable version updates for GitHub Actions - package-ecosystem: "github-actions" @@ -37,5 +35,3 @@ updates: separator: "-" # Allow up to 5 open pull requests for GitHub Actions open-pull-requests-limit: 10 - reviewers: - - "Lightning-AI/teams/core-lightning" From 4ba96a27e7be9e4d883970b99deed41896e1e46e Mon Sep 17 00:00:00 2001 From: Kavyansh Tyagi <142140238+KAVYANSHTYAGI@users.noreply.github.com> Date: Wed, 28 May 2025 14:42:33 +0530 Subject: [PATCH 079/112] Fix: Synchronize SIGTERM Handling in DDP to Prevent Deadlocks (#20825) * Update signal_connector.py * Update training_epoch_loop.py * Create test_ddp_sigterm_handling.py * update + chlog * Apply suggestions from code review --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jirka B Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 989b759325c50c20f2ea63952d73844d69c06dff) --- src/lightning/pytorch/CHANGELOG.md | 3 + .../pytorch/loops/training_epoch_loop.py | 24 ++++++ .../trainer/connectors/callback_connector.py | 5 +- .../trainer/connectors/signal_connector.py | 11 ++- .../trainer/test_ddp_sigterm_handling.py | 80 +++++++++++++++++++ 5 files changed, 119 insertions(+), 4 deletions(-) create mode 100644 tests/tests_pytorch/trainer/test_ddp_sigterm_handling.py diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index e8775d3c800b2..b1fabb97f6d19 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -28,6 +28,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `logger_connector` has edge case where step can be a float ([#20692](https://github.com/Lightning-AI/pytorch-lightning/issues/20692)) +- Fix: Synchronize SIGTERM Handling in DDP to Prevent Deadlocks ([#20825](https://github.com/Lightning-AI/pytorch-lightning/pull/20825)) + + --- ## [2.5.1] - 2025-03-18 diff --git a/src/lightning/pytorch/loops/training_epoch_loop.py b/src/lightning/pytorch/loops/training_epoch_loop.py index 7cdf7888bbfe2..599eccdc8ca91 100644 --- a/src/lightning/pytorch/loops/training_epoch_loop.py +++ b/src/lightning/pytorch/loops/training_epoch_loop.py @@ -11,11 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import contextlib import math from collections import OrderedDict from dataclasses import dataclass from typing import Any, Optional, Union +import torch from typing_extensions import override import lightning.pytorch as pl @@ -249,6 +251,21 @@ def _on_before_fetch(self) -> None: def _on_after_fetch(self) -> None: self.trainer.profiler.stop(f"[{self.__class__.__name__}].train_dataloader_next") + def _broadcast_sigterm_tensor(self) -> None: + try: + sigterm_tensor = torch.tensor( + [1 if getattr(self.trainer, "received_sigterm", False) else 0], + device=self.trainer.strategy.root_device, + ) + torch.distributed.broadcast(sigterm_tensor, src=0) + except Exception: + sigterm_tensor = torch.tensor([0], device=self.trainer.strategy.root_device) + + if sigterm_tensor.item() == 1: + with contextlib.suppress(Exception): + torch.distributed.barrier() # prevent deadlocks by syncing all ranks before exit + raise SIGTERMException() + def advance(self, data_fetcher: _DataFetcher) -> None: """Runs a single training batch. @@ -272,6 +289,13 @@ def advance(self, data_fetcher: _DataFetcher) -> None: # we are going to train first so the val loop does not need to restart self.val_loop.restarting = False + # ===================================================================== + + if torch.distributed.is_available() and torch.distributed.is_initialized() and self.trainer.world_size > 1: + self._broadcast_sigterm_tensor() + + # ===================================================================== + if using_dataloader_iter := isinstance(data_fetcher, _DataLoaderIterDataFetcher): dataloader_iter = next(data_fetcher) # hook's batch_idx and dataloader_idx arguments correctness cannot be guaranteed in this setting diff --git a/src/lightning/pytorch/trainer/connectors/callback_connector.py b/src/lightning/pytorch/trainer/connectors/callback_connector.py index 8d67081db8638..5c351aeebc564 100644 --- a/src/lightning/pytorch/trainer/connectors/callback_connector.py +++ b/src/lightning/pytorch/trainer/connectors/callback_connector.py @@ -106,8 +106,9 @@ def _configure_checkpoint_callbacks(self, enable_checkpointing: bool) -> None: model_checkpoint = LitModelCheckpoint(model_registry=self.trainer._model_registry) else: rank_zero_info( - "Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable" - " `LitModelCheckpoint` for automatic upload to the Lightning model registry." + "💡 Tip: For seamless cloud uploads and versioning," + " try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint," + " which syncs automatically with the Lightning model registry." ) model_checkpoint = ModelCheckpoint() self.trainer.callbacks.append(model_checkpoint) diff --git a/src/lightning/pytorch/trainer/connectors/signal_connector.py b/src/lightning/pytorch/trainer/connectors/signal_connector.py index e63fecd3897f2..ece7e902c5f5f 100644 --- a/src/lightning/pytorch/trainer/connectors/signal_connector.py +++ b/src/lightning/pytorch/trainer/connectors/signal_connector.py @@ -7,6 +7,9 @@ from types import FrameType from typing import Any, Callable, Union +import torch +import torch.distributed as dist + import lightning.pytorch as pl from lightning.fabric.plugins.environments import SLURMEnvironment from lightning.fabric.utilities.imports import _IS_WINDOWS @@ -104,12 +107,16 @@ def _slurm_sigusr_handler_fn(self, signum: _SIGNUM, _: FrameType) -> None: def _sigterm_notifier_fn(self, signum: _SIGNUM, _: FrameType) -> None: log.info(rank_prefixed_message(f"Received SIGTERM: {signum}", self.trainer.local_rank)) - # subprocesses killing the parent process is not supported, only the parent (rank 0) does it if not self.received_sigterm: - # send the same signal to the subprocesses launcher = self.trainer.strategy.launcher if launcher is not None: launcher.kill(signum) + + # New broadcast logic + if dist.is_available() and dist.is_initialized() and self.trainer.world_size > 1: + sigterm_tensor = torch.tensor([1], device=self.trainer.strategy.root_device) + dist.broadcast(sigterm_tensor, src=0) + self.received_sigterm = True def _sigterm_handler_fn(self, signum: _SIGNUM, _: FrameType) -> None: diff --git a/tests/tests_pytorch/trainer/test_ddp_sigterm_handling.py b/tests/tests_pytorch/trainer/test_ddp_sigterm_handling.py new file mode 100644 index 0000000000000..0e4e5210db60c --- /dev/null +++ b/tests/tests_pytorch/trainer/test_ddp_sigterm_handling.py @@ -0,0 +1,80 @@ +import os +import signal +import time + +import pytest +import torch +import torch.multiprocessing as mp + +from lightning.pytorch import LightningModule, Trainer, seed_everything +from lightning.pytorch.demos.boring_classes import BoringDataModule +from lightning.pytorch.strategies.ddp import DDPStrategy +from lightning.pytorch.utilities.exceptions import SIGTERMException + +# Skip the test if DDP or multiple devices are not available + +pytestmark = pytest.mark.skipif( + not torch.distributed.is_available() or torch.cuda.device_count() < 2, + reason="Requires torch.distributed and at least 2 CUDA devices", +) + + +class DummyModel(LightningModule): + def training_step(self, batch, batch_idx): + # Simulate SIGTERM in rank 0 at batch 2 + if self.trainer.global_rank == 0 and batch_idx == 2: + time.sleep(3) # Let other ranks proceed to the next batch + os.kill(os.getpid(), signal.SIGTERM) + return super().training_step(batch, batch_idx) + + +def run_ddp_sigterm(rank, world_size, tmpdir): + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "12355" + os.environ["RANK"] = str(rank) + os.environ["WORLD_SIZE"] = str(world_size) + + seed_everything(42) + + torch.cuda.set_device(rank) if torch.cuda.is_available() else None + + model = DummyModel() + datamodule = BoringDataModule() + + trainer = Trainer( + accelerator="cuda" if torch.cuda.is_available() else "cpu", + strategy=DDPStrategy(find_unused_parameters=False), + devices=world_size, + num_nodes=1, + max_epochs=3, + default_root_dir=tmpdir, + enable_checkpointing=False, + enable_progress_bar=False, + enable_model_summary=False, + logger=False, + ) + + try: + trainer.fit(model, datamodule=datamodule) + except SIGTERMException: + # Test passed: SIGTERM was properly raised and caught + print(f"[Rank {rank}] Caught SIGTERMException successfully.") + except Exception as e: + pytest.fail(f"[Rank {rank}] Unexpected exception: {e}") + + +def test_ddp_sigterm_handling(tmp_path): + world_size = 2 + mp.spawn(run_ddp_sigterm, args=(world_size, tmp_path), nprocs=world_size, join=True) + + +@pytest.mark.skipif( + not torch.distributed.is_available(), + reason="Requires torch.distributed", +) +@pytest.mark.skipif( + torch.cuda.is_available() and torch.cuda.device_count() < 2, + reason="Requires >=2 CUDA devices or use CPU", +) +def test_sigterm_handling_ddp(tmp_path): + test_ddp_sigterm_handling(tmp_path) From f1edfd832115cdebf12ed6547de0ba311b3a3d32 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 28 May 2025 13:39:22 +0200 Subject: [PATCH 080/112] build(deps): update setuptools requirement from <80.8.1 to <80.9.1 in /requirements (#20861) build(deps): update setuptools requirement in /requirements Updates the requirements on [setuptools](https://github.com/pypa/setuptools) to permit the latest version. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/0.6...v80.9.0) --- updated-dependencies: - dependency-name: setuptools dependency-version: 80.9.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 39854146083ec165fc5e4ab61dc8a1f14150b0c2) --- requirements/ci.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/ci.txt b/requirements/ci.txt index 4dab6e5a0dac8..6b879f4f3fbb1 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,4 +1,4 @@ -setuptools <80.8.1 +setuptools <80.9.1 wheel <0.46.0 awscli >=1.30.0, <1.41.0 twine ==6.1.0 From e5ceb7ba514f6c4f3d991c288f06adf3dcf233d6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 29 May 2025 14:02:08 +0200 Subject: [PATCH 081/112] build(deps): bump coverage from 7.8.0 to 7.8.2 in /requirements (#20858) * build(deps): bump coverage from 7.8.0 to 7.8.2 in /requirements Bumps [coverage](https://github.com/nedbat/coveragepy) from 7.8.0 to 7.8.2. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/7.8.0...7.8.2) --- updated-dependencies: - dependency-name: coverage dependency-version: 7.8.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * test_torchscript_input_output --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka B Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit fafc2395884877f94cbc2bf83a83e3fc0f1d5c2d) --- requirements/fabric/test.txt | 2 +- requirements/pytorch/test.txt | 2 +- tests/tests_pytorch/models/test_torchscript.py | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 34571a9ee977e..9035164ea1462 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -1,4 +1,4 @@ -coverage ==7.8.0 +coverage ==7.8.2 numpy >=1.17.2, <1.27.0 pytest ==8.3.5 pytest-cov ==6.1.1 diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index e27b8aa33b3ea..fd4237ef74e66 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -1,4 +1,4 @@ -coverage ==7.8.0 +coverage ==7.8.2 pytest ==8.3.5 pytest-cov ==6.1.1 pytest-timeout ==2.4.0 diff --git a/tests/tests_pytorch/models/test_torchscript.py b/tests/tests_pytorch/models/test_torchscript.py index 8f9151265d21a..10a19974971eb 100644 --- a/tests/tests_pytorch/models/test_torchscript.py +++ b/tests/tests_pytorch/models/test_torchscript.py @@ -44,7 +44,9 @@ def test_torchscript_input_output(modelclass): model_output = model(model.example_input_array) script_output = script(model.example_input_array) - assert torch.allclose(script_output, model_output) + assert torch.allclose(script_output, model_output, rtol=1e-5, atol=1e-8), ( + f"Scripted output {script_output} does not match model output {model_output}." + ) @pytest.mark.skipif(_IS_WINDOWS and _TORCH_GREATER_EQUAL_2_4, reason="not close on Windows + PyTorch 2.4") From 1c1c11098866ec4225064a82c6d1576ba259e38d Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Thu, 5 Jun 2025 18:50:13 +0200 Subject: [PATCH 082/112] bump: PyTorch to be latest `2.7.1` (#20877) (cherry picked from commit 821611b5b3b17422ae0c025dad905ff9d76d6b52) --- .github/workflows/docker-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index c7f86a264116c..ef75788679990 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -97,7 +97,7 @@ jobs: # adding dome more images as Thunder mainly using python 3.10, # and we need to support integrations as for example LitGPT python_version: ["3.10"] - pytorch_version: ["2.6.0", "2.7.0"] + pytorch_version: ["2.6.0", "2.7.1"] cuda_version: ["12.6.3"] include: # These are the base images for PL release docker images. @@ -108,7 +108,7 @@ jobs: - { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.1" } - { python_version: "3.12", pytorch_version: "2.5.1", cuda_version: "12.1.1" } - { python_version: "3.12", pytorch_version: "2.6.0", cuda_version: "12.4.1" } - - { python_version: "3.12", pytorch_version: "2.7.0", cuda_version: "12.6.3" } + - { python_version: "3.12", pytorch_version: "2.7.1", cuda_version: "12.6.3" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 From 7c1d74eaf60185b77736c0d7776a0a432608107b Mon Sep 17 00:00:00 2001 From: Arman Naseri Date: Thu, 5 Jun 2025 19:05:38 +0200 Subject: [PATCH 083/112] Ensure correct device is used for autocast when mps is selected as Fabric accelerator (#20876) * Make sure MPS is used when chosen as accelerator in Fabric * Added mps tests to connector and Fabric --------- Co-authored-by: Haga Device Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 14b6c3e24f22aa85dceae48527ab86fdee310973) --- src/lightning/fabric/connector.py | 2 +- tests/tests_fabric/test_connector.py | 7 +++++++ tests/tests_fabric/test_fabric.py | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/lightning/fabric/connector.py b/src/lightning/fabric/connector.py index 0e0e86ee7c63e..55b4af2728e6f 100644 --- a/src/lightning/fabric/connector.py +++ b/src/lightning/fabric/connector.py @@ -492,7 +492,7 @@ def _check_and_init_precision(self) -> Precision: if self._precision_input == "16-mixed" else "Using bfloat16 Automatic Mixed Precision (AMP)" ) - device = "cpu" if self._accelerator_flag == "cpu" else "cuda" + device = self._accelerator_flag if self._accelerator_flag in ("cpu", "mps") else "cuda" return MixedPrecision(precision=self._precision_input, device=device) # type: ignore[arg-type] raise RuntimeError("No precision set") diff --git a/tests/tests_fabric/test_connector.py b/tests/tests_fabric/test_connector.py index 9bb9fa1d7d145..c6bef5943a30f 100644 --- a/tests/tests_fabric/test_connector.py +++ b/tests/tests_fabric/test_connector.py @@ -405,6 +405,13 @@ def test_unsupported_strategy_types_on_cpu_and_fallback(): assert isinstance(connector.strategy, DDPStrategy) +@RunIf(mps=True) +@pytest.mark.parametrize("precision", ["16-mixed", "bf16-mixed"]) +def test_mps_enabled_with_float16_or_bfloat16_precision(precision): + connector = _Connector(accelerator="mps", precision=precision) + assert connector.precision.device == "mps" + + def test_invalid_accelerator_choice(): with pytest.raises(ValueError, match="You selected an invalid accelerator name: `accelerator='cocofruit'`"): _Connector(accelerator="cocofruit") diff --git a/tests/tests_fabric/test_fabric.py b/tests/tests_fabric/test_fabric.py index ee002b5d8061c..dc0203dc067e3 100644 --- a/tests/tests_fabric/test_fabric.py +++ b/tests/tests_fabric/test_fabric.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +import warnings from contextlib import nullcontext from re import escape from unittest import mock @@ -735,6 +736,22 @@ def test_autocast(): fabric._precision.forward_context().__exit__.assert_called() +@RunIf(mps=True) +@pytest.mark.parametrize("precision", ["16-mixed", "bf16-mixed"]) +def test_autocast_does_not_use_cuda_on_mps(precision): + """Ensure Fabric.autocast on MPS does not fall back to CUDA when using (bf)16-mixed precision.""" + fabric = Fabric(accelerator="mps", precision=precision) + fabric.launch() + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + with fabric.autocast(): + pass + + for warning in w: + assert "device_type of 'cuda'" not in str(warning.message) + + def test_no_backward_sync(): """Test that `Fabric.no_backward_sync()` validates the strategy and model is compatible.""" fabric = Fabric(devices=1) From 368c1a440d7f123688f1e6993749c3ac0789da6f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Jun 2025 19:05:55 +0200 Subject: [PATCH 084/112] build(deps): bump torch from 2.7.0 to 2.7.1 in /requirements (#20878) Bumps [torch](https://github.com/pytorch/pytorch) from 2.7.0 to 2.7.1. - [Release notes](https://github.com/pytorch/pytorch/releases) - [Changelog](https://github.com/pytorch/pytorch/blob/main/RELEASE.md) - [Commits](https://github.com/pytorch/pytorch/compare/v2.7.0...v2.7.1) --- updated-dependencies: - dependency-name: torch dependency-version: 2.7.1 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 980ec509cb4dc9aa51fe6965e879b0c9800c0a0a) --- requirements/typing.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/typing.txt b/requirements/typing.txt index becbd1e76c11d..e050157f8dbac 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,5 +1,5 @@ mypy==1.15.0 -torch==2.7.0 +torch==2.7.1 types-Markdown types-PyYAML From 083d62a6f876985048a8966f2a05cce3184e7308 Mon Sep 17 00:00:00 2001 From: GdoongMathew Date: Fri, 6 Jun 2025 17:24:16 +0800 Subject: [PATCH 085/112] fix: move `check_inputs` to target device if available during `to_torchscript`. (#20873) (cherry picked from commit ab7b1181ac0c489826a2383e5855edd9584bd134) --- src/lightning/pytorch/core/module.py | 4 ++++ .../tests_pytorch/models/test_torchscript.py | 20 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py index b8624daac3fa3..8b2387fcea481 100644 --- a/src/lightning/pytorch/core/module.py +++ b/src/lightning/pytorch/core/module.py @@ -1472,6 +1472,10 @@ def forward(self, x): ) example_inputs = self.example_input_array + if kwargs.get("check_inputs") is not None: + kwargs["check_inputs"] = self._on_before_batch_transfer(kwargs["check_inputs"]) + kwargs["check_inputs"] = self._apply_batch_transfer_handler(kwargs["check_inputs"]) + # automatically send example inputs to the right device and use trace example_inputs = self._on_before_batch_transfer(example_inputs) example_inputs = self._apply_batch_transfer_handler(example_inputs) diff --git a/tests/tests_pytorch/models/test_torchscript.py b/tests/tests_pytorch/models/test_torchscript.py index 10a19974971eb..29f251044c0b5 100644 --- a/tests/tests_pytorch/models/test_torchscript.py +++ b/tests/tests_pytorch/models/test_torchscript.py @@ -105,6 +105,26 @@ def test_torchscript_device(device_str): assert script_output.device == device +@pytest.mark.parametrize( + "device_str", + [ + "cpu", + pytest.param("cuda:0", marks=RunIf(min_cuda_gpus=1)), + pytest.param("mps:0", marks=RunIf(mps=True)), + ], +) +def test_torchscript_device_with_check_inputs(device_str): + """Test that scripted module is on the correct device.""" + device = torch.device(device_str) + model = BoringModel().to(device) + model.example_input_array = torch.randn(5, 32) + + check_inputs = torch.rand(5, 32) + + script = model.to_torchscript(method="trace", check_inputs=check_inputs) + assert isinstance(script, torch.jit.ScriptModule) + + def test_torchscript_retain_training_state(): """Test that torchscript export does not alter the training mode of original model.""" model = BoringModel() From 967d8b79549f34a5402f51a8863c84c5f48c1da3 Mon Sep 17 00:00:00 2001 From: Kavyansh Tyagi <142140238+KAVYANSHTYAGI@users.noreply.github.com> Date: Tue, 10 Jun 2025 13:40:14 +0530 Subject: [PATCH 086/112] Use lazy string formatting in logging statement in setup.py (#20886) (cherry picked from commit 3b2a73fcd5d8b0e46546e7cefcbadf3e93cee83b) --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 92f0265eafb9f..b432033a0d8c4 100755 --- a/setup.py +++ b/setup.py @@ -110,7 +110,8 @@ def _set_manifest_path(manifest_dir: str, aggregate: bool = False, mapping: Mapp assert os.path.exists(manifest_path) # avoid error: setup script specifies an absolute path manifest_path = os.path.relpath(manifest_path, _PATH_ROOT) - logging.info("Set manifest path to", manifest_path) + # Use lazy logging formatting + logging.info("Set manifest path to %s", manifest_path) setuptools.command.egg_info.manifest_maker.template = manifest_path yield # cleanup From 12aeefbcd893126502ffdc9d2c88fab858d67687 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 10 Jun 2025 12:02:44 +0200 Subject: [PATCH 087/112] build(deps): bump pytest from 8.3.5 to 8.4.0 in /requirements (#20887) * build(deps): bump pytest from 8.3.5 to 8.4.0 in /requirements Bumps [pytest](https://github.com/pytest-dev/pytest) from 8.3.5 to 8.4.0. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/8.3.5...8.4.0) --- updated-dependencies: - dependency-name: pytest dependency-version: 8.4.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * assert raw_checkpoint_path --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka B (cherry picked from commit 7433dc2e3c59d5bcdb7136c7d331674426626519) --- requirements/doctests.txt | 2 +- requirements/fabric/test.txt | 2 +- requirements/pytorch/test.txt | 2 +- tests/tests_pytorch/models/test_hparams.py | 3 +-- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/requirements/doctests.txt b/requirements/doctests.txt index 96794a9afb460..919ee2d3a2672 100644 --- a/requirements/doctests.txt +++ b/requirements/doctests.txt @@ -1,2 +1,2 @@ -pytest ==8.3.5 +pytest ==8.4.0 pytest-doctestplus ==1.4.0 diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 9035164ea1462..4d57af004884d 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -1,6 +1,6 @@ coverage ==7.8.2 numpy >=1.17.2, <1.27.0 -pytest ==8.3.5 +pytest ==8.4.0 pytest-cov ==6.1.1 pytest-timeout ==2.4.0 pytest-rerunfailures ==15.1 diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index fd4237ef74e66..2abb8cadd6772 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -1,5 +1,5 @@ coverage ==7.8.2 -pytest ==8.3.5 +pytest ==8.4.0 pytest-cov ==6.1.1 pytest-timeout ==2.4.0 pytest-rerunfailures ==15.1 diff --git a/tests/tests_pytorch/models/test_hparams.py b/tests/tests_pytorch/models/test_hparams.py index 3c7838f11a85a..f14d62b6befb4 100644 --- a/tests/tests_pytorch/models/test_hparams.py +++ b/tests/tests_pytorch/models/test_hparams.py @@ -250,8 +250,7 @@ def __init__(self, test_arg, test_arg2): model = LocalModel.load_from_checkpoint(raw_checkpoint_path, test_arg2=123) assert model.hparams.test_arg == 14 assert "test_arg2" not in model.hparams # test_arg2 is not registered in class init - - return raw_checkpoint_path + assert raw_checkpoint_path # ------------------------- From 0668c8729fbff099b99c982f93bd0e8db4e29701 Mon Sep 17 00:00:00 2001 From: Pooja <59486401+bandpooja@users.noreply.github.com> Date: Tue, 10 Jun 2025 07:12:50 -0400 Subject: [PATCH 088/112] Fix progress bar display to correctly handle iterable dataset and max_steps during training (#20869) * changes to show correct progress bar numbers when using max_steps --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 1776963f4bfef2310203566d1062fdb3eb143b21) --- .../callbacks/progress/progress_bar.py | 3 + .../tests_pytorch/loops/test_training_loop.py | 71 +++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/src/lightning/pytorch/callbacks/progress/progress_bar.py b/src/lightning/pytorch/callbacks/progress/progress_bar.py index 7cf6993b4414b..4c965038cb294 100644 --- a/src/lightning/pytorch/callbacks/progress/progress_bar.py +++ b/src/lightning/pytorch/callbacks/progress/progress_bar.py @@ -85,6 +85,9 @@ def total_train_batches(self) -> Union[int, float]: dataloader is of infinite size. """ + if self.trainer.max_epochs == -1 and self.trainer.max_steps is not None and self.trainer.max_steps > 0: + remaining_steps = self.trainer.max_steps - self.trainer.global_step + return min(self.trainer.num_training_batches, remaining_steps) return self.trainer.num_training_batches @property diff --git a/tests/tests_pytorch/loops/test_training_loop.py b/tests/tests_pytorch/loops/test_training_loop.py index 29afd1ba1a250..e3a4c37f6a284 100644 --- a/tests/tests_pytorch/loops/test_training_loop.py +++ b/tests/tests_pytorch/loops/test_training_loop.py @@ -11,11 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import itertools import logging from unittest.mock import Mock import pytest import torch +from torch.utils.data import DataLoader from lightning.pytorch import Trainer, seed_everything from lightning.pytorch.demos.boring_classes import BoringModel @@ -206,3 +208,72 @@ def test_should_stop_early_stopping_conditions_met( assert (message in caplog.text) is raise_debug_msg assert trainer.fit_loop._can_stop_early is early_stop + + +@pytest.mark.parametrize("max_steps", [7, 20]) +def test_tqdm_total_steps_with_iterator_no_length(tmp_path, max_steps): + """Test trainer with infinite iterator (no __len__)""" + + batch_size = 4 + model = BoringModel() + + # Infinite generator (no __len__) + # NOTE: 32 for BoringModel + infinite_iter = (torch.randn(batch_size, 32, dtype=torch.float32) for _ in itertools.count(0)) + + trainer = Trainer( + default_root_dir=tmp_path, + max_steps=max_steps, + max_epochs=-1, + limit_val_batches=0, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + ) + + # Override train_dataloader with infinite iterator + model.train_dataloader = lambda: infinite_iter + pbar = trainer.progress_bar_callback + trainer.fit(model) + + # assert progress bar callback uses correct total steps + assert pbar.train_progress_bar.total == max_steps + + +@pytest.mark.parametrize("max_steps", [10, 15]) +def test_progress_bar_steps(tmp_path, max_steps): + batch_size = 4 + + model = BoringModel() + # Create dataloader here, outside the model + # NOTE: 32 for boring model + x = torch.randn(100, 32) + + class SingleTensorDataset(torch.utils.data.IterableDataset): + def __init__(self, data): + super().__init__() + self.data = data + + def __iter__(self): + yield from self.data # yield just a tensor, not a tuple + + dataset = SingleTensorDataset(x) + dataloader = DataLoader(dataset, batch_size=batch_size) + + # Patch model's train_dataloader method to return this dataloader + model.train_dataloader = lambda: dataloader + + trainer = Trainer( + default_root_dir=tmp_path, + max_steps=max_steps, + max_epochs=-1, + limit_val_batches=0, + enable_progress_bar=True, + enable_model_summary=False, + accelerator="cpu", + ) + pbar = trainer.progress_bar_callback + trainer.fit(model) + + # assert progress bar callback uses correct total steps + assert pbar.train_progress_bar.total == max_steps From f0024a2669037ac7e8055b9522c128a2bfa1800c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 10 Jun 2025 13:14:28 +0200 Subject: [PATCH 089/112] build(deps): update pandas requirement from <2.3.0,>1.0 to >1.0,<2.4.0 in /requirements (#20888) * build(deps): update pandas requirement in /requirements Updates the requirements on [pandas](https://github.com/pandas-dev/pandas) to permit the latest version. - [Release notes](https://github.com/pandas-dev/pandas/releases) - [Commits](https://github.com/pandas-dev/pandas/compare/v1.0.1...v2.3.0) --- updated-dependencies: - dependency-name: pandas dependency-version: 2.3.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] * Apply suggestions from code review --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 9ddb4180d82595f91ab7e2255f42938d5ca27687) --- requirements/pytorch/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 2abb8cadd6772..43aaf999a25ce 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -12,7 +12,7 @@ numpy >=1.17.2, <1.27.0 onnx >=1.12.0, <1.19.0 onnxruntime >=1.12.0, <1.21.0 psutil <7.0.1 # for `DeviceStatsMonitor` -pandas >1.0, <2.3.0 # needed in benchmarks +pandas >2.0, <2.4.0 # needed in benchmarks fastapi # for `ServableModuleValidator` # not setting version as re-defined in App uvicorn # for `ServableModuleValidator` # not setting version as re-defined in App From b5caa2d4ff8f1d0096c2ca3dcf38be6da0793cdd Mon Sep 17 00:00:00 2001 From: Rustam Zhumagambetov Date: Tue, 10 Jun 2025 18:41:37 +0200 Subject: [PATCH 090/112] refactor: add `toggled_optimizer` context manager (#20771) * Apply suggestions from code review --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit d195d2b8bdcfdf7ec5c8195636e5b28702098b1a) --- docs/source-pytorch/conf.py | 1 + .../model/manual_optimization.rst | 2 +- src/lightning/pytorch/CHANGELOG.md | 4 +++ src/lightning/pytorch/core/module.py | 26 +++++++++++++++++++ .../core/test_lightning_module.py | 16 ++++++++++++ 5 files changed, 48 insertions(+), 1 deletion(-) diff --git a/docs/source-pytorch/conf.py b/docs/source-pytorch/conf.py index 90400b1df491d..62cd21fc127f4 100644 --- a/docs/source-pytorch/conf.py +++ b/docs/source-pytorch/conf.py @@ -487,6 +487,7 @@ def _load_py_module(name: str, location: str) -> ModuleType: ("py:meth", "setup"), ("py:meth", "test_step"), ("py:meth", "toggle_optimizer"), + ("py:meth", "toggled_optimizer"), ("py:class", "torch.ScriptModule"), ("py:class", "torch.distributed.fsdp.fully_sharded_data_parallel.CPUOffload"), ("py:class", "torch.distributed.fsdp.fully_sharded_data_parallel.MixedPrecision"), diff --git a/docs/source-pytorch/model/manual_optimization.rst b/docs/source-pytorch/model/manual_optimization.rst index 150f04793eae6..4c7400c0457ca 100644 --- a/docs/source-pytorch/model/manual_optimization.rst +++ b/docs/source-pytorch/model/manual_optimization.rst @@ -17,7 +17,7 @@ To manually optimize, do the following: * ``optimizer.zero_grad()`` to clear the gradients from the previous training step * ``self.manual_backward(loss)`` instead of ``loss.backward()`` * ``optimizer.step()`` to update your model parameters - * ``self.toggle_optimizer()`` and ``self.untoggle_optimizer()`` if needed + * ``self.toggle_optimizer()`` and ``self.untoggle_optimizer()``, or ``self.toggled_optimizer()`` if needed Here is a minimal example of manual optimization. diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index b1fabb97f6d19..ed8435d194960 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -11,6 +11,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - + +- Add `toggled_optimizer(optimizer)` method to the LightningModule, which is a context manager version of `toggle_optimize` and `untoggle_optimizer` ([#20771](https://github.com/Lightning-AI/pytorch-lightning/pull/20771)) + + - For cross-device local checkpoints, instruct users to install `fsspec>=2025.5.0` if unavailable ([#20780](https://github.com/Lightning-AI/pytorch-lightning/pull/20780)) diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py index 8b2387fcea481..c484a95c6c632 100644 --- a/src/lightning/pytorch/core/module.py +++ b/src/lightning/pytorch/core/module.py @@ -1141,6 +1141,32 @@ def untoggle_optimizer(self, optimizer: Union[Optimizer, LightningOptimizer]) -> # save memory self._param_requires_grad_state = {} + @contextmanager + def toggled_optimizer(self, optimizer: Union[Optimizer, LightningOptimizer]) -> Generator: + """Makes sure only the gradients of the current optimizer's parameters are calculated in the training step to + prevent dangling gradients in multiple-optimizer setup. Combines :meth:`toggle_optimizer` and + :meth:`untoggle_optimizer` into context manager. + + Args: + optimizer: The optimizer to toggle. + + Example:: + + def training_step(...): + opt = self.optimizers() + with self.toggled_optimizer(opt): + loss = ... + opt.zero_grad() + self.manual_backward(loss) + opt.step() + + """ + self.toggle_optimizer(optimizer) + try: + yield + finally: + self.untoggle_optimizer(optimizer) + def clip_gradients( self, optimizer: Optimizer, diff --git a/tests/tests_pytorch/core/test_lightning_module.py b/tests/tests_pytorch/core/test_lightning_module.py index 2036014762ebf..c33488a4f2626 100644 --- a/tests/tests_pytorch/core/test_lightning_module.py +++ b/tests/tests_pytorch/core/test_lightning_module.py @@ -119,6 +119,22 @@ def test_1_optimizer_toggle_model(): assert not model._param_requires_grad_state +def test_optimizer_toggle_model_context_manager(): + """Test toggle_model runs when only one optimizer is used.""" + model = BoringModel() + trainer = Mock() + model.trainer = trainer + params = model.parameters() + optimizer = torch.optim.SGD(params, lr=0.1) + trainer.optimizers = [optimizer] + + assert not model._param_requires_grad_state + # toggle optimizer was failing with a single optimizer + with model.toggled_optimizer(optimizer): + assert model._param_requires_grad_state + assert not model._param_requires_grad_state + + def test_toggle_untoggle_2_optimizers_no_shared_parameters(tmp_path): class TestModel(BoringModel): def __init__(self): From 71dd1130002a60ce1c6619266446f86997e2ae09 Mon Sep 17 00:00:00 2001 From: littlebullGit Date: Wed, 11 Jun 2025 02:56:20 -0400 Subject: [PATCH 091/112] refactor: use __all__ in accelerators/__init__.py (#20889) (cherry picked from commit cce06eceb2d6b3bee7d4e9a5b2faec263cdb42ef) --- .../pytorch/accelerators/__init__.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/lightning/pytorch/accelerators/__init__.py b/src/lightning/pytorch/accelerators/__init__.py index 4cadee51f64c7..d7c2197aa5ed4 100644 --- a/src/lightning/pytorch/accelerators/__init__.py +++ b/src/lightning/pytorch/accelerators/__init__.py @@ -10,16 +10,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +__all__ = [ + "Accelerator", + "CPUAccelerator", + "CUDAAccelerator", + "MPSAccelerator", + "XLAAccelerator", + "find_usable_cuda_devices", +] + import sys -from lightning.fabric.accelerators import find_usable_cuda_devices # noqa: F401 +from lightning.fabric.accelerators import find_usable_cuda_devices from lightning.fabric.accelerators.registry import _AcceleratorRegistry from lightning.fabric.utilities.registry import _register_classes from lightning.pytorch.accelerators.accelerator import Accelerator -from lightning.pytorch.accelerators.cpu import CPUAccelerator # noqa: F401 -from lightning.pytorch.accelerators.cuda import CUDAAccelerator # noqa: F401 -from lightning.pytorch.accelerators.mps import MPSAccelerator # noqa: F401 -from lightning.pytorch.accelerators.xla import XLAAccelerator # noqa: F401 +from lightning.pytorch.accelerators.cpu import CPUAccelerator +from lightning.pytorch.accelerators.cuda import CUDAAccelerator +from lightning.pytorch.accelerators.mps import MPSAccelerator +from lightning.pytorch.accelerators.xla import XLAAccelerator AcceleratorRegistry = _AcceleratorRegistry() _register_classes(AcceleratorRegistry, "register_accelerators", sys.modules[__name__], Accelerator) From 5974232199017e45243d0688e5b2114dbcd44530 Mon Sep 17 00:00:00 2001 From: Mauricio Villegas <5780272+mauvilsa@users.noreply.github.com> Date: Wed, 11 Jun 2025 08:58:35 +0200 Subject: [PATCH 092/112] LightningCLI instantiator receives values applied by instantiation links to set in hparams (#20777) * Instantiator receives values applied by instantiation links to set in hparams (#20311). * Add cleandir to test_lightning_cli_link_arguments * fix install... --------- Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Jirka B (cherry picked from commit d03238814125934ddae695b17d34d2bdf3aa9753) --- .azure/gpu-tests-pytorch.yml | 1 - dockers/base-cuda/Dockerfile | 2 +- requirements/pytorch/extra.txt | 2 +- src/lightning/pytorch/CHANGELOG.md | 7 ++- src/lightning/pytorch/cli.py | 40 +++++++++++++-- tests/tests_pytorch/test_cli.py | 79 ++++++++++++++++++++++++++++-- 6 files changed, 118 insertions(+), 13 deletions(-) diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 803460c770c13..eb76cd49e3f94 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -117,7 +117,6 @@ jobs: set -e extra=$(python -c "print({'lightning': 'pytorch-'}.get('$(PACKAGE_NAME)', ''))") pip install -e ".[${extra}dev]" pytest-timeout -U --extra-index-url="${TORCH_URL}" - pip install setuptools==75.6.0 jsonargparse==4.35.0 displayName: "Install package & dependencies" - bash: pip uninstall -y lightning diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 0da0cf9b2de9f..bf493ad47e51a 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -34,7 +34,7 @@ ENV \ MAKEFLAGS="-j2" RUN \ - apt-get update && apt-get install -y wget && \ + apt-get update --fix-missing && apt-get install -y wget && \ apt-get update -qq --fix-missing && \ NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \ CUDA_VERSION_MM=${CUDA_VERSION%.*} && \ diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index edba87bdfb82d..5694bcfe9b595 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -5,7 +5,7 @@ matplotlib>3.1, <3.10.0 omegaconf >=2.2.3, <2.4.0 hydra-core >=1.2.0, <1.4.0 -jsonargparse[signatures] >=4.28.0, <=4.40.0 +jsonargparse[signatures] >=4.39.0, <4.40.0 rich >=12.3.0, <14.1.0 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute bitsandbytes >=0.45.2,<0.45.3; platform_system != "Darwin" diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index ed8435d194960..c388d46f315fe 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -9,7 +9,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added -- +- Add enable_autolog_hparams argument to Trainer ([#20593](https://github.com/Lightning-AI/pytorch-lightning/pull/20593)) - Add `toggled_optimizer(optimizer)` method to the LightningModule, which is a context manager version of `toggle_optimize` and `untoggle_optimizer` ([#20771](https://github.com/Lightning-AI/pytorch-lightning/pull/20771)) @@ -29,7 +29,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- Fixed `logger_connector` has edge case where step can be a float ([#20692](https://github.com/Lightning-AI/pytorch-lightning/issues/20692)) +- Fixed `save_hyperparameters` not working correctly with `LightningCLI` when there are parsing links applied on instantiation ([#20777](https://github.com/Lightning-AI/pytorch-lightning/pull/20777)) + + +- Fixed logger_connector has edge case where step can be a float ([#20692](https://github.com/Lightning-AI/pytorch-lightning/pull/20692)) - Fix: Synchronize SIGTERM Handling in DDP to Prevent Deadlocks ([#20825](https://github.com/Lightning-AI/pytorch-lightning/pull/20825)) diff --git a/src/lightning/pytorch/cli.py b/src/lightning/pytorch/cli.py index 8e64b8aba1b73..2a6226c42cde1 100644 --- a/src/lightning/pytorch/cli.py +++ b/src/lightning/pytorch/cli.py @@ -327,6 +327,7 @@ def __init__( args: ArgsType = None, run: bool = True, auto_configure_optimizers: bool = True, + load_from_checkpoint_support: bool = True, ) -> None: """Receives as input pytorch-lightning classes (or callables which return pytorch-lightning classes), which are called / instantiated using a parsed configuration file and / or command line args. @@ -367,6 +368,11 @@ def __init__( ``dict`` or ``jsonargparse.Namespace``. run: Whether subcommands should be added to run a :class:`~lightning.pytorch.trainer.trainer.Trainer` method. If set to ``False``, the trainer and model classes will be instantiated only. + auto_configure_optimizers: Whether to automatically add default optimizer and lr_scheduler arguments. + load_from_checkpoint_support: Whether ``save_hyperparameters`` should save the original parsed + hyperparameters (instead of what ``__init__`` receives), such that it is possible for + ``load_from_checkpoint`` to correctly instantiate classes even when using complex nesting and + dependency injection. """ self.save_config_callback = save_config_callback @@ -396,7 +402,8 @@ def __init__( self._set_seed() - self._add_instantiators() + if load_from_checkpoint_support: + self._add_instantiators() self.before_instantiate_classes() self.instantiate_classes() self.after_instantiate_classes() @@ -544,11 +551,14 @@ def parse_arguments(self, parser: LightningArgumentParser, args: ArgsType) -> No else: self.config = parser.parse_args(args) - def _add_instantiators(self) -> None: + def _dump_config(self) -> None: + if hasattr(self, "config_dump"): + return self.config_dump = yaml.safe_load(self.parser.dump(self.config, skip_link_targets=False, skip_none=False)) if "subcommand" in self.config: self.config_dump = self.config_dump[self.config.subcommand] + def _add_instantiators(self) -> None: self.parser.add_instantiator( _InstantiatorFn(cli=self, key="model"), _get_module_type(self._model_class), @@ -799,12 +809,27 @@ def _get_module_type(value: Union[Callable, type]) -> type: return value +def _set_dict_nested(data: dict, key: str, value: Any) -> None: + keys = key.split(".") + for k in keys[:-1]: + assert k in data, f"Expected key {key} to be in data" + data = data[k] + data[keys[-1]] = value + + class _InstantiatorFn: def __init__(self, cli: LightningCLI, key: str) -> None: self.cli = cli self.key = key - def __call__(self, class_type: type[ModuleType], *args: Any, **kwargs: Any) -> ModuleType: + def __call__( + self, + class_type: type[ModuleType], + *args: Any, + applied_instantiation_links: dict, + **kwargs: Any, + ) -> ModuleType: + self.cli._dump_config() hparams = self.cli.config_dump.get(self.key, {}) if "class_path" in hparams: # To make hparams backwards compatible, and so that it is the same irrespective of subclass_mode, the @@ -815,6 +840,15 @@ def __call__(self, class_type: type[ModuleType], *args: Any, **kwargs: Any) -> M **hparams.get("init_args", {}), **hparams.get("dict_kwargs", {}), } + # get instantiation link target values from kwargs + for key, value in applied_instantiation_links.items(): + if not key.startswith(f"{self.key}."): + continue + key = key[len(f"{self.key}.") :] + if key.startswith("init_args."): + key = key[len("init_args.") :] + _set_dict_nested(hparams, key, value) + with _given_hyperparameters_context( hparams=hparams, instantiator="lightning.pytorch.cli.instantiate_module", diff --git a/tests/tests_pytorch/test_cli.py b/tests/tests_pytorch/test_cli.py index 7658894b37414..9cf81aa5a739c 100644 --- a/tests/tests_pytorch/test_cli.py +++ b/tests/tests_pytorch/test_cli.py @@ -550,6 +550,7 @@ def __init__(self, activation: torch.nn.Module = None, transform: Optional[list[ class BoringModelRequiredClasses(BoringModel): def __init__(self, num_classes: int, batch_size: int = 8): super().__init__() + self.save_hyperparameters() self.num_classes = num_classes self.batch_size = batch_size @@ -561,13 +562,13 @@ def __init__(self, batch_size: int = 8): self.num_classes = 5 # only available after instantiation -def test_lightning_cli_link_arguments(): +def test_lightning_cli_link_arguments(cleandir): class MyLightningCLI(LightningCLI): def add_arguments_to_parser(self, parser): parser.link_arguments("data.batch_size", "model.batch_size") parser.link_arguments("data.num_classes", "model.num_classes", apply_on="instantiate") - cli_args = ["--data.batch_size=12"] + cli_args = ["--data.batch_size=12", "--trainer.max_epochs=1"] with mock.patch("sys.argv", ["any.py"] + cli_args): cli = MyLightningCLI(BoringModelRequiredClasses, BoringDataModuleBatchSizeAndClasses, run=False) @@ -575,21 +576,89 @@ def add_arguments_to_parser(self, parser): assert cli.model.batch_size == 12 assert cli.model.num_classes == 5 - class MyLightningCLI(LightningCLI): + cli.trainer.fit(cli.model) + hparams_path = Path(cli.trainer.log_dir) / "hparams.yaml" + assert hparams_path.is_file() + hparams = yaml.safe_load(hparams_path.read_text()) + + hparams.pop("_instantiator") + assert hparams == {"batch_size": 12, "num_classes": 5} + + class MyLightningCLI2(LightningCLI): def add_arguments_to_parser(self, parser): parser.link_arguments("data.batch_size", "model.init_args.batch_size") parser.link_arguments("data.num_classes", "model.init_args.num_classes", apply_on="instantiate") - cli_args[-1] = "--model=tests_pytorch.test_cli.BoringModelRequiredClasses" + cli_args[0] = "--model=tests_pytorch.test_cli.BoringModelRequiredClasses" with mock.patch("sys.argv", ["any.py"] + cli_args): - cli = MyLightningCLI( + cli = MyLightningCLI2( BoringModelRequiredClasses, BoringDataModuleBatchSizeAndClasses, subclass_mode_model=True, run=False ) assert cli.model.batch_size == 8 assert cli.model.num_classes == 5 + cli.trainer.fit(cli.model) + hparams_path = Path(cli.trainer.log_dir) / "hparams.yaml" + assert hparams_path.is_file() + hparams = yaml.safe_load(hparams_path.read_text()) + + hparams.pop("_instantiator") + assert hparams == {"batch_size": 8, "num_classes": 5} + + +class CustomAdam(torch.optim.Adam): + def __init__(self, params, num_classes: Optional[int] = None, **kwargs): + super().__init__(params, **kwargs) + + +class DeepLinkTargetModel(BoringModel): + def __init__( + self, + optimizer: OptimizerCallable = torch.optim.Adam, + ): + super().__init__() + self.save_hyperparameters() + self.optimizer = optimizer + + def configure_optimizers(self): + optimizer = self.optimizer(self.parameters()) + return {"optimizer": optimizer} + + +def test_lightning_cli_link_arguments_subcommands_nested_target(cleandir): + class MyLightningCLI(LightningCLI): + def add_arguments_to_parser(self, parser): + parser.link_arguments( + "data.num_classes", + "model.init_args.optimizer.init_args.num_classes", + apply_on="instantiate", + ) + + cli_args = [ + "fit", + "--data.batch_size=12", + "--trainer.max_epochs=1", + "--model=tests_pytorch.test_cli.DeepLinkTargetModel", + "--model.optimizer=tests_pytorch.test_cli.CustomAdam", + ] + + with mock.patch("sys.argv", ["any.py"] + cli_args): + cli = MyLightningCLI( + DeepLinkTargetModel, + BoringDataModuleBatchSizeAndClasses, + subclass_mode_model=True, + auto_configure_optimizers=False, + ) + + hparams_path = Path(cli.trainer.log_dir) / "hparams.yaml" + assert hparams_path.is_file() + hparams = yaml.safe_load(hparams_path.read_text()) + + assert hparams["optimizer"]["class_path"] == "tests_pytorch.test_cli.CustomAdam" + assert hparams["optimizer"]["init_args"]["num_classes"] == 5 + class EarlyExitTestModel(BoringModel): def on_fit_start(self): From cb240073f61619716c2b39c56d275fbe0b1e86dd Mon Sep 17 00:00:00 2001 From: Kavyansh Tyagi <142140238+KAVYANSHTYAGI@users.noreply.github.com> Date: Wed, 11 Jun 2025 12:38:41 +0530 Subject: [PATCH 093/112] Implement todos tensorboard (#20874) * test: enhance tensorboard log graph * Update test_tensorboard.py --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 43691d4d8f799a902d064feea54a78a8450ec8ea) --- .../tests_fabric/loggers/test_tensorboard.py | 41 +++++++++++++++---- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/tests/tests_fabric/loggers/test_tensorboard.py b/tests/tests_fabric/loggers/test_tensorboard.py index 4dcb86f0e7406..9cd61ef2e131b 100644 --- a/tests/tests_fabric/loggers/test_tensorboard.py +++ b/tests/tests_fabric/loggers/test_tensorboard.py @@ -147,29 +147,52 @@ def test_tensorboard_log_hparams_and_metrics(tmp_path): @pytest.mark.parametrize("example_input_array", [None, torch.rand(2, 32)]) -def test_tensorboard_log_graph(tmp_path, example_input_array): - """Test that log graph works with both model.example_input_array and if array is passed externally.""" - # TODO(fabric): Test both nn.Module and LightningModule - # TODO(fabric): Assert _apply_batch_transfer_handler is calling the batch transfer hooks +def test_tensorboard_log_graph_plain_module(tmp_path, example_input_array): model = BoringModel() - if example_input_array is not None: - model.example_input_array = None - logger = TensorBoardLogger(tmp_path) logger._experiment = Mock() + logger.log_graph(model, example_input_array) if example_input_array is not None: logger.experiment.add_graph.assert_called_with(model, example_input_array) + else: + logger.experiment.add_graph.assert_not_called() + logger._experiment.reset_mock() - # model wrapped in `FabricModule` wrapped = _FabricModule(model, strategy=Mock()) logger.log_graph(wrapped, example_input_array) if example_input_array is not None: logger.experiment.add_graph.assert_called_with(model, example_input_array) -@pytest.mark.skipif(not _TENSORBOARD_AVAILABLE, reason=str(_TENSORBOARD_AVAILABLE)) +@pytest.mark.parametrize("example_input_array", [None, torch.rand(2, 32)]) +def test_tensorboard_log_graph_with_batch_transfer_hooks(tmp_path, example_input_array): + model = pytest.importorskip("lightning.pytorch.demos.boring_classes").BoringModel() + logger = TensorBoardLogger(tmp_path) + logger._experiment = Mock() + + with ( + mock.patch.object(model, "_on_before_batch_transfer", return_value=example_input_array) as before_mock, + mock.patch.object(model, "_apply_batch_transfer_handler", return_value=example_input_array) as transfer_mock, + ): + logger.log_graph(model, example_input_array) + logger._experiment.reset_mock() + + wrapped = _FabricModule(model, strategy=Mock()) + logger.log_graph(wrapped, example_input_array) + + if example_input_array is not None: + assert before_mock.call_count == 2 + assert transfer_mock.call_count == 2 + logger.experiment.add_graph.assert_called_with(model, example_input_array) + else: + before_mock.assert_not_called() + transfer_mock.assert_not_called() + logger.experiment.add_graph.assert_not_called() + + +@pytest.mark.skipif(not _TENSORBOARD_AVAILABLE, reason="tensorboard is required") def test_tensorboard_log_graph_warning_no_example_input_array(tmp_path): """Test that log graph throws warning if model.example_input_array is None.""" model = BoringModel() From c3ba585b67def43f591fdce928b6ac5571ed526d Mon Sep 17 00:00:00 2001 From: leopardracer <136604165+leopardracer@users.noreply.github.com> Date: Wed, 11 Jun 2025 13:18:19 +0300 Subject: [PATCH 094/112] =?UTF-8?q?Fix=20typos:=20"reparametrization"=20?= =?UTF-8?q?=E2=86=92=20"reparameterization"=20and=20"recommed"=20=E2=86=92?= =?UTF-8?q?=20"recommend"=20(#20892)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update pruning.py * Update connector.py (cherry picked from commit 64b2b6adad2ceabe410c488185e86390ddb9fc0c) --- src/lightning/fabric/connector.py | 2 +- src/lightning/pytorch/callbacks/pruning.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lightning/fabric/connector.py b/src/lightning/fabric/connector.py index 55b4af2728e6f..ac3cc7c13851c 100644 --- a/src/lightning/fabric/connector.py +++ b/src/lightning/fabric/connector.py @@ -428,7 +428,7 @@ def _check_strategy_and_fallback(self) -> None: if strategy_flag in _DDP_FORK_ALIASES and "fork" not in torch.multiprocessing.get_all_start_methods(): raise ValueError( f"You selected `Fabric(strategy='{strategy_flag}')` but process forking is not supported on this" - f" platform. We recommed `Fabric(strategy='ddp_spawn')` instead." + f" platform. We recommend `Fabric(strategy='ddp_spawn')` instead." ) if ( strategy_flag in _FSDP_ALIASES or type(self._strategy_flag) is FSDPStrategy diff --git a/src/lightning/pytorch/callbacks/pruning.py b/src/lightning/pytorch/callbacks/pruning.py index 1517ef6920b0d..a55c21a5c6ed1 100644 --- a/src/lightning/pytorch/callbacks/pruning.py +++ b/src/lightning/pytorch/callbacks/pruning.py @@ -129,7 +129,7 @@ def __init__( - ``bool``. Always apply it or not. - ``Callable[[epoch], bool]``. For dynamic values. Will be called every epoch. - make_pruning_permanent: Whether to remove all reparametrization pre-hooks and apply masks + make_pruning_permanent: Whether to remove all reparameterization pre-hooks and apply masks when training ends or the model is saved. use_lottery_ticket_hypothesis: See `The lottery ticket hypothesis `_: From 33c88dee264628826543cb300099a13043e6e86b Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:01:46 +0200 Subject: [PATCH 095/112] fixing various typos (#20893) * fixing various typos * flaky test_async_checkpoint_plugin * Apply suggestions from code review (cherry picked from commit aebf3f43579ae554166d50ee020c5d3fd52875ff) --- .actions/assistant.py | 28 +++++++++---------- dockers/base-cuda/Dockerfile | 9 +++--- .../fabric/build_your_own_trainer/trainer.py | 2 +- examples/fabric/meta_learning/train_fabric.py | 2 +- examples/fabric/meta_learning/train_torch.py | 2 +- .../domain_templates/reinforce_learn_ppo.py | 2 +- pyproject.toml | 2 +- setup.py | 2 +- src/lightning/__version__.py | 4 +-- src/lightning/fabric/connector.py | 2 +- src/lightning/fabric/fabric.py | 2 +- src/lightning/fabric/strategies/parallel.py | 2 +- .../callbacks/progress/rich_progress.py | 2 +- src/lightning/pytorch/core/module.py | 2 +- src/lightning/pytorch/demos/transformer.py | 2 +- src/lightning/pytorch/strategies/parallel.py | 2 +- .../connectors/accelerator_connector.py | 2 +- src/lightning_fabric/__version__.py | 4 +-- src/pytorch_lightning/__version__.py | 4 +-- tests/parity_fabric/test_parity_ddp.py | 2 +- .../plugins/environments/test_slurm.py | 2 +- .../strategies/test_ddp_integration.py | 2 +- .../strategies/test_fsdp_integration.py | 2 +- tests/tests_fabric/test_connector.py | 12 ++++---- .../callbacks/test_throughput_monitor.py | 2 +- .../checkpointing/test_model_checkpoint.py | 4 +-- .../core/test_lightning_optimizer.py | 2 +- tests/tests_pytorch/models/test_cpu.py | 2 +- tests/tests_pytorch/models/test_restore.py | 4 +-- .../plugins/test_checkpoint_io_plugin.py | 2 ++ .../launchers/test_multiprocessing.py | 2 +- tests/tests_pytorch/strategies/test_fsdp.py | 4 +-- tests/tests_pytorch/test_cli.py | 2 +- .../connectors/test_accelerator_connector.py | 12 ++++---- 34 files changed, 68 insertions(+), 65 deletions(-) diff --git a/.actions/assistant.py b/.actions/assistant.py index 47a0543f228ad..7b2d49423d622 100644 --- a/.actions/assistant.py +++ b/.actions/assistant.py @@ -154,8 +154,8 @@ def load_readme_description(path_dir: str, homepage: str, version: str) -> str: """ path_readme = os.path.join(path_dir, "README.md") - with open(path_readme, encoding="utf-8") as fo: - text = fo.read() + with open(path_readme, encoding="utf-8") as fopen: + text = fopen.read() # drop images from readme text = text.replace( @@ -308,17 +308,17 @@ def copy_replace_imports( if ext in (".pyc",): continue # Try to parse everything else - with open(fp, encoding="utf-8") as fo: + with open(fp, encoding="utf-8") as fopen: try: - lines = fo.readlines() + lines = fopen.readlines() except UnicodeDecodeError: # a binary file, skip print(f"Skipped replacing imports for {fp}") continue lines = _replace_imports(lines, list(zip(source_imports, target_imports)), lightning_by=lightning_by) os.makedirs(os.path.dirname(fp_new), exist_ok=True) - with open(fp_new, "w", encoding="utf-8") as fo: - fo.writelines(lines) + with open(fp_new, "w", encoding="utf-8") as fopen: + fopen.writelines(lines) def create_mirror_package(source_dir: str, package_mapping: dict[str, str]) -> None: @@ -370,10 +370,10 @@ def _prune_packages(req_file: str, packages: Sequence[str]) -> None: @staticmethod def _replace_min(fname: str) -> None: - with open(fname, encoding="utf-8") as fo: - req = fo.read().replace(">=", "==") - with open(fname, "w", encoding="utf-8") as fw: - fw.write(req) + with open(fname, encoding="utf-8") as fopen: + req = fopen.read().replace(">=", "==") + with open(fname, "w", encoding="utf-8") as fwrite: + fwrite.write(req) @staticmethod def replace_oldest_ver(requirement_fnames: Sequence[str] = REQUIREMENT_FILES_ALL) -> None: @@ -471,15 +471,15 @@ def convert_version2nightly(ver_file: str = "src/version.info") -> None: """Load the actual version and convert it to the nightly version.""" from datetime import datetime - with open(ver_file) as fo: - version = fo.read().strip() + with open(ver_file) as fopen: + version = fopen.read().strip() # parse X.Y.Z version and prune any suffix vers = re.match(r"(\d+)\.(\d+)\.(\d+).*", version) # create timestamp YYYYMMDD timestamp = datetime.now().strftime("%Y%m%d") version = f"{'.'.join(vers.groups())}.dev{timestamp}" - with open(ver_file, "w") as fo: - fo.write(version + os.linesep) + with open(ver_file, "w") as fopen: + fopen.write(version + os.linesep) @staticmethod def generate_docker_tags( diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index bf493ad47e51a..2fe1e57e95a77 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -34,11 +34,12 @@ ENV \ MAKEFLAGS="-j2" RUN \ - apt-get update --fix-missing && apt-get install -y wget && \ - apt-get update -qq --fix-missing && \ - NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \ CUDA_VERSION_MM=${CUDA_VERSION%.*} && \ + apt-get update -qq --fix-missing && apt-get install -y wget && \ + NCCL_VER=$(dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$') && \ + echo "NCCL version found: $NCCL_VER" && \ TO_INSTALL_NCCL=$(echo -e "$MAX_ALLOWED_NCCL\n$NCCL_VER" | sort -V | head -n1)-1+cuda${CUDA_VERSION_MM} && \ + echo "NCCL version to install: $TO_INSTALL_NCCL" && \ apt-get install -y --no-install-recommends --allow-downgrades --allow-change-held-packages \ build-essential \ pkg-config \ @@ -96,7 +97,7 @@ RUN \ --extra-index-url="https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM//'.'/''}/" RUN \ - # Show what we have + # Show what we have \ pip --version && \ pip list && \ python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \ diff --git a/examples/fabric/build_your_own_trainer/trainer.py b/examples/fabric/build_your_own_trainer/trainer.py index d9d081a2aea69..ef7c3f4f53534 100644 --- a/examples/fabric/build_your_own_trainer/trainer.py +++ b/examples/fabric/build_your_own_trainer/trainer.py @@ -418,7 +418,7 @@ def load(self, state: Optional[Mapping], path: str) -> None: """Loads a checkpoint from a given file into state. Args: - state: a mapping contaning model, optimizer and lr scheduler + state: a mapping containing model, optimizer and lr scheduler path: the path to load the checkpoint from """ diff --git a/examples/fabric/meta_learning/train_fabric.py b/examples/fabric/meta_learning/train_fabric.py index 203155f7b2ada..779ede1632d06 100644 --- a/examples/fabric/meta_learning/train_fabric.py +++ b/examples/fabric/meta_learning/train_fabric.py @@ -30,7 +30,7 @@ def accuracy(predictions, targets): def fast_adapt(batch, learner, loss, adaptation_steps, shots, ways): data, labels = batch - # Separate data into adaptation/evalutation sets + # Separate data into adaptation/evaluation sets adaptation_indices = torch.zeros(data.size(0), dtype=bool) adaptation_indices[torch.arange(shots * ways) * 2] = True evaluation_indices = ~adaptation_indices diff --git a/examples/fabric/meta_learning/train_torch.py b/examples/fabric/meta_learning/train_torch.py index 1e3666755704b..99357ebd5e8b4 100644 --- a/examples/fabric/meta_learning/train_torch.py +++ b/examples/fabric/meta_learning/train_torch.py @@ -34,7 +34,7 @@ def fast_adapt(batch, learner, loss, adaptation_steps, shots, ways, device): data, labels = batch data, labels = data.to(device), labels.to(device) - # Separate data into adaptation/evalutation sets + # Separate data into adaptation/evaluation sets adaptation_indices = torch.zeros(data.size(0), dtype=bool) adaptation_indices[torch.arange(shots * ways) * 2] = True evaluation_indices = ~adaptation_indices diff --git a/examples/pytorch/domain_templates/reinforce_learn_ppo.py b/examples/pytorch/domain_templates/reinforce_learn_ppo.py index af503dbb925cd..55581c1b68088 100644 --- a/examples/pytorch/domain_templates/reinforce_learn_ppo.py +++ b/examples/pytorch/domain_templates/reinforce_learn_ppo.py @@ -353,7 +353,7 @@ def generate_trajectory_samples(self) -> tuple[list[torch.Tensor], list[torch.Te # logging self.avg_reward = sum(self.epoch_rewards) / self.steps_per_epoch - # if epoch ended abruptly, exlude last cut-short episode to prevent stats skewness + # if epoch ended abruptly, exclude last cut-short episode to prevent stats skewness epoch_rewards = self.epoch_rewards if not done: epoch_rewards = epoch_rewards[:-1] diff --git a/pyproject.toml b/pyproject.toml index 48439bee75332..b45f60489c6fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ blank = true [tool.codespell] # Todo: enable also python files in a next step -skip = '*.py' +#skip = '*.py' quiet-level = 3 # comma separated list of words; waiting for: # https://github.com/codespell-project/codespell/issues/2839#issuecomment-1731601603 diff --git a/setup.py b/setup.py index b432033a0d8c4..fffb38f9a578b 100755 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ - for `pytorch-lightning` use `export PACKAGE_NAME=pytorch ; pip install .` - for `lightning-fabric` use `export PACKAGE_NAME=fabric ; pip install .` -3. Building packages as sdist or binary wheel and installing or publish to PyPI afterwords you use command +3. Building packages as sdist or binary wheel and installing or publish to PyPI afterwards you use command `python setup.py sdist` or `python setup.py bdist_wheel` accordingly. In case you want to build just a particular package you want to set an environment variable: `PACKAGE_NAME=lightning|pytorch|fabric python setup.py sdist|bdist_wheel` diff --git a/src/lightning/__version__.py b/src/lightning/__version__.py index 1491508baf4b3..862b5f95d8845 100644 --- a/src/lightning/__version__.py +++ b/src/lightning/__version__.py @@ -5,5 +5,5 @@ if not os.path.exists(_VERSION_PATH): # relevant for `bdist_wheel` _VERSION_PATH = os.path.join(_PACKAGE_ROOT, "version.info") -with open(_VERSION_PATH, encoding="utf-8") as fo: - version = fo.readlines()[0].strip() +with open(_VERSION_PATH, encoding="utf-8") as fopen: + version = fopen.readlines()[0].strip() diff --git a/src/lightning/fabric/connector.py b/src/lightning/fabric/connector.py index ac3cc7c13851c..b3289debbd522 100644 --- a/src/lightning/fabric/connector.py +++ b/src/lightning/fabric/connector.py @@ -83,7 +83,7 @@ class _Connector: 1. strategy class 2. strategy str registered with STRATEGY_REGISTRY 3. strategy str in _strategy_type enum which listed in each strategy as - backend (registed these too, and _strategy_type could be deprecated) + backend (registered these too, and _strategy_type could be deprecated) C. plugins flag could be: 1. precision class (should be removed, and precision flag should allow user pass classes) diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py index 36ffc0c1c7772..92870fcd7afb2 100644 --- a/src/lightning/fabric/fabric.py +++ b/src/lightning/fabric/fabric.py @@ -327,7 +327,7 @@ def setup_optimizers(self, *optimizers: Optimizer) -> Union[_FabricOptimizer, tu ``.setup(model, optimizer, ...)`` instead to jointly set them up. Args: - *optimizers: One or more optmizers to set up. + *optimizers: One or more optimizers to set up. Returns: The wrapped optimizer(s). diff --git a/src/lightning/fabric/strategies/parallel.py b/src/lightning/fabric/strategies/parallel.py index d9bc1a03d1bb5..327cfc016d4ef 100644 --- a/src/lightning/fabric/strategies/parallel.py +++ b/src/lightning/fabric/strategies/parallel.py @@ -87,7 +87,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo @override def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool: - """Reduces a boolean decision over distributed processes. By default is analagous to ``all`` from the standard + """Reduces a boolean decision over distributed processes. By default is analogous to ``all`` from the standard library, returning ``True`` only if all input decisions evaluate to ``True``. If ``all`` is set to ``False``, it behaves like ``any`` instead. diff --git a/src/lightning/pytorch/callbacks/progress/rich_progress.py b/src/lightning/pytorch/callbacks/progress/rich_progress.py index 0a51d99ccb676..7bb98e8a9058c 100644 --- a/src/lightning/pytorch/callbacks/progress/rich_progress.py +++ b/src/lightning/pytorch/callbacks/progress/rich_progress.py @@ -430,7 +430,7 @@ def on_validation_batch_start( if self.val_progress_bar_id is not None: self.progress.update(self.val_progress_bar_id, advance=0, visible=False) - # TODO: remove old tasks when new onces are created + # TODO: remove old tasks when new once they are created self.val_progress_bar_id = self._add_task( self.total_val_batches_current_dataloader, self.validation_description, diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py index c484a95c6c632..8108100be6dc4 100644 --- a/src/lightning/pytorch/core/module.py +++ b/src/lightning/pytorch/core/module.py @@ -262,7 +262,7 @@ def current_epoch(self) -> int: def global_step(self) -> int: """Total training batches seen across all epochs. - If no Trainer is attached, this propery is 0. + If no Trainer is attached, this property is 0. """ return self.trainer.global_step if self._trainer else 0 diff --git a/src/lightning/pytorch/demos/transformer.py b/src/lightning/pytorch/demos/transformer.py index eca86b4cb4dc7..fefa073fbd310 100644 --- a/src/lightning/pytorch/demos/transformer.py +++ b/src/lightning/pytorch/demos/transformer.py @@ -84,7 +84,7 @@ def __init__(self, dim: int, dropout: float = 0.1, max_len: int = 5000) -> None: def forward(self, x: Tensor) -> Tensor: if self.pe is None: # 1) can't use buffer, see https://github.com/pytorch/pytorch/issues/68407 - # 2) can't use parameter becauses pe gets sliced and DDP requires all params to participate in forward + # 2) can't use parameter because pe gets sliced and DDP requires all params to participate in forward # TODO: Could make this a `nn.Parameter` with `requires_grad=False` self.pe = self._init_pos_encoding(device=x.device) diff --git a/src/lightning/pytorch/strategies/parallel.py b/src/lightning/pytorch/strategies/parallel.py index 285d40706a5a9..dbd8e2962b230 100644 --- a/src/lightning/pytorch/strategies/parallel.py +++ b/src/lightning/pytorch/strategies/parallel.py @@ -93,7 +93,7 @@ def all_gather(self, tensor: Tensor, group: Optional[Any] = None, sync_grads: bo @override def reduce_boolean_decision(self, decision: bool, all: bool = True) -> bool: - """Reduces a boolean decision over distributed processes. By default is analagous to ``all`` from the standard + """Reduces a boolean decision over distributed processes. By default is analogous to ``all`` from the standard library, returning ``True`` only if all input decisions evaluate to ``True``. If ``all`` is set to ``False``, it behaves like ``any`` instead. diff --git a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py index 603aedfc94589..1423c1aeeafe4 100644 --- a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py +++ b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py @@ -467,7 +467,7 @@ def _check_strategy_and_fallback(self) -> None: if strategy_flag in _DDP_FORK_ALIASES and "fork" not in torch.multiprocessing.get_all_start_methods(): raise ValueError( f"You selected `Trainer(strategy='{strategy_flag}')` but process forking is not supported on this" - f" platform. We recommed `Trainer(strategy='ddp_spawn')` instead." + f" platform. We recommend `Trainer(strategy='ddp_spawn')` instead." ) if strategy_flag: self._strategy_flag = strategy_flag diff --git a/src/lightning_fabric/__version__.py b/src/lightning_fabric/__version__.py index 1491508baf4b3..862b5f95d8845 100644 --- a/src/lightning_fabric/__version__.py +++ b/src/lightning_fabric/__version__.py @@ -5,5 +5,5 @@ if not os.path.exists(_VERSION_PATH): # relevant for `bdist_wheel` _VERSION_PATH = os.path.join(_PACKAGE_ROOT, "version.info") -with open(_VERSION_PATH, encoding="utf-8") as fo: - version = fo.readlines()[0].strip() +with open(_VERSION_PATH, encoding="utf-8") as fopen: + version = fopen.readlines()[0].strip() diff --git a/src/pytorch_lightning/__version__.py b/src/pytorch_lightning/__version__.py index 1491508baf4b3..862b5f95d8845 100644 --- a/src/pytorch_lightning/__version__.py +++ b/src/pytorch_lightning/__version__.py @@ -5,5 +5,5 @@ if not os.path.exists(_VERSION_PATH): # relevant for `bdist_wheel` _VERSION_PATH = os.path.join(_PACKAGE_ROOT, "version.info") -with open(_VERSION_PATH, encoding="utf-8") as fo: - version = fo.readlines()[0].strip() +with open(_VERSION_PATH, encoding="utf-8") as fopen: + version = fopen.readlines()[0].strip() diff --git a/tests/parity_fabric/test_parity_ddp.py b/tests/parity_fabric/test_parity_ddp.py index d30d2b6233886..4fc78d384de45 100644 --- a/tests/parity_fabric/test_parity_ddp.py +++ b/tests/parity_fabric/test_parity_ddp.py @@ -126,7 +126,7 @@ def train_fabric_ddp(fabric): def run_parity_test(accelerator: str = "cpu", devices: int = 2, tolerance: float = 0.02): cuda_reset() - # Launch processes with Fabric and re-use them for the PyTorch training for convenience + # Launch processes with Fabric and reuse them for the PyTorch training for convenience fabric = Fabric(accelerator=accelerator, strategy="ddp", devices=devices) fabric.launch() diff --git a/tests/tests_fabric/plugins/environments/test_slurm.py b/tests/tests_fabric/plugins/environments/test_slurm.py index 75ca43577d579..b907c287faa5f 100644 --- a/tests/tests_fabric/plugins/environments/test_slurm.py +++ b/tests/tests_fabric/plugins/environments/test_slurm.py @@ -174,7 +174,7 @@ def test_validate_user_settings(): with pytest.raises(ValueError, match="the number of nodes configured in SLURM .* does not match"): env.validate_settings(num_devices=4, num_nodes=1) - # in interactive mode, validation is skipped becauses processes get launched by Fabric/Trainer, not SLURM + # in interactive mode, validation is skipped because processes get launched by Fabric/Trainer, not SLURM with mock.patch( "lightning.fabric.plugins.environments.slurm.SLURMEnvironment.job_name", return_value="interactive" ): diff --git a/tests/tests_fabric/strategies/test_ddp_integration.py b/tests/tests_fabric/strategies/test_ddp_integration.py index 3ed76211e5d6d..9d43724228cd2 100644 --- a/tests/tests_fabric/strategies/test_ddp_integration.py +++ b/tests/tests_fabric/strategies/test_ddp_integration.py @@ -85,7 +85,7 @@ def test_reapply_compile(): fabric.launch() model = BoringModel() - # currently (PyTorch 2.6) using ruduce-overhead here casues a RuntimeError: + # currently (PyTorch 2.6) using reduce overhead here causes a RuntimeError: # Error: accessing tensor output of CUDAGraphs that has been overwritten by a subsequent run. compile_kwargs = {"mode": "reduce-overhead"} if _TORCH_LESS_EQUAL_2_6 else {} compiled_model = torch.compile(model, **compile_kwargs) diff --git a/tests/tests_fabric/strategies/test_fsdp_integration.py b/tests/tests_fabric/strategies/test_fsdp_integration.py index 576a0df38b966..5da9b50399a94 100644 --- a/tests/tests_fabric/strategies/test_fsdp_integration.py +++ b/tests/tests_fabric/strategies/test_fsdp_integration.py @@ -412,7 +412,7 @@ def test_reapply_compile(): fabric.launch() model = BoringModel() - # currently (PyTorch 2.6) using ruduce-overhead here casues a RuntimeError: + # currently (PyTorch 2.6) using ruduce-overhead here causes a RuntimeError: # Error: accessing tensor output of CUDAGraphs that has been overwritten by a subsequent run. compile_kwargs = {"mode": "reduce-overhead"} if _TORCH_LESS_EQUAL_2_6 else {} compiled_model = torch.compile(model, **compile_kwargs) diff --git a/tests/tests_fabric/test_connector.py b/tests/tests_fabric/test_connector.py index c6bef5943a30f..1074789e71055 100644 --- a/tests/tests_fabric/test_connector.py +++ b/tests/tests_fabric/test_connector.py @@ -194,23 +194,23 @@ def name() -> str: class Prec(Precision): pass - class Strat(SingleDeviceStrategy): + class TestStrategy(SingleDeviceStrategy): pass - strategy = Strat(device=torch.device("cpu"), accelerator=Accel(), precision=Prec()) + strategy = TestStrategy(device=torch.device("cpu"), accelerator=Accel(), precision=Prec()) connector = _Connector(strategy=strategy, devices=2) assert isinstance(connector.accelerator, Accel) - assert isinstance(connector.strategy, Strat) + assert isinstance(connector.strategy, TestStrategy) assert isinstance(connector.precision, Prec) assert connector.strategy is strategy - class Strat(DDPStrategy): + class TestStrategy(DDPStrategy): pass - strategy = Strat(accelerator=Accel(), precision=Prec()) + strategy = TestStrategy(accelerator=Accel(), precision=Prec()) connector = _Connector(strategy=strategy, devices=2) assert isinstance(connector.accelerator, Accel) - assert isinstance(connector.strategy, Strat) + assert isinstance(connector.strategy, TestStrategy) assert isinstance(connector.precision, Prec) assert connector.strategy is strategy diff --git a/tests/tests_pytorch/callbacks/test_throughput_monitor.py b/tests/tests_pytorch/callbacks/test_throughput_monitor.py index 9f77e4371e69e..83bcb16c81797 100644 --- a/tests/tests_pytorch/callbacks/test_throughput_monitor.py +++ b/tests/tests_pytorch/callbacks/test_throughput_monitor.py @@ -303,7 +303,7 @@ def test_throughput_monitor_eval(tmp_path, fn): assert logger_mock.log_metrics.mock_calls == [ call(metrics={**expected, f"{fn}|batches": 3, f"{fn}|samples": 9}, step=3), call(metrics={**expected, f"{fn}|batches": 6, f"{fn}|samples": 18}, step=6), - # the step doesnt repeat + # the step doesn't repeat call(metrics={**expected, f"{fn}|batches": 9, f"{fn}|samples": 27}, step=9), call(metrics={**expected, f"{fn}|batches": 12, f"{fn}|samples": 36}, step=12), ] diff --git a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py index 1907a5fb35799..7b17498865889 100644 --- a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py +++ b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py @@ -326,8 +326,8 @@ def test_model_checkpoint_to_yaml(tmp_path, save_top_k: int): path_yaml = tmp_path / "best_k_models.yaml" checkpoint.to_yaml(path_yaml) - with open(path_yaml) as fo: - d = yaml.full_load(fo) + with open(path_yaml) as fopen: + d = yaml.full_load(fopen) best_k = dict(checkpoint.best_k_models.items()) assert d == best_k diff --git a/tests/tests_pytorch/core/test_lightning_optimizer.py b/tests/tests_pytorch/core/test_lightning_optimizer.py index ed1ca2b4db03f..042532f968e7d 100644 --- a/tests/tests_pytorch/core/test_lightning_optimizer.py +++ b/tests/tests_pytorch/core/test_lightning_optimizer.py @@ -45,7 +45,7 @@ def configure_optimizers(self): def test_init_optimizers_resets_lightning_optimizers(tmp_path): - """Test that the Trainer resets the `lightning_optimizers` list everytime new optimizers get initialized.""" + """Test that the Trainer resets the `lightning_optimizers` list every time new optimizers get initialized.""" def compare_optimizers(): assert trainer.strategy._lightning_optimizers[0].optimizer is trainer.optimizers[0] diff --git a/tests/tests_pytorch/models/test_cpu.py b/tests/tests_pytorch/models/test_cpu.py index a2d38aca7c56c..38e37effe9600 100644 --- a/tests/tests_pytorch/models/test_cpu.py +++ b/tests/tests_pytorch/models/test_cpu.py @@ -49,7 +49,7 @@ def test_cpu_slurm_save_load(_, tmp_path): trainer.fit(model) real_global_step = trainer.global_step - # traning complete + # training complete assert trainer.state.finished, "cpu model failed to complete" # predict with trained model before saving diff --git a/tests/tests_pytorch/models/test_restore.py b/tests/tests_pytorch/models/test_restore.py index 099493890831d..e651e4729a72f 100644 --- a/tests/tests_pytorch/models/test_restore.py +++ b/tests/tests_pytorch/models/test_restore.py @@ -547,7 +547,7 @@ def test_strict_model_load_more_params(monkeypatch, tmp_path, tmpdir_server, url ) trainer.fit(model) - # traning complete + # training complete assert trainer.state.finished, f"Training failed with {trainer.state}" # save model @@ -587,7 +587,7 @@ def test_strict_model_load_less_params(monkeypatch, tmp_path, tmpdir_server, url ) trainer.fit(model) - # traning complete + # training complete assert trainer.state.finished, f"Training failed with {trainer.state}" # save model diff --git a/tests/tests_pytorch/plugins/test_checkpoint_io_plugin.py b/tests/tests_pytorch/plugins/test_checkpoint_io_plugin.py index cae26fc1fe775..0f62eeae69ef8 100644 --- a/tests/tests_pytorch/plugins/test_checkpoint_io_plugin.py +++ b/tests/tests_pytorch/plugins/test_checkpoint_io_plugin.py @@ -16,6 +16,7 @@ from typing import Any, Optional from unittest.mock import MagicMock, Mock +import pytest import torch from lightning.fabric.plugins import CheckpointIO, TorchCheckpointIO @@ -97,6 +98,7 @@ def test_checkpoint_plugin_called(tmp_path): checkpoint_plugin.load_checkpoint.assert_called_with(str(tmp_path / "last-v1.ckpt")) +@pytest.mark.flaky(reruns=3) def test_async_checkpoint_plugin(tmp_path): """Ensure that the custom checkpoint IO plugin and torch checkpoint IO plugin is called when async saving and loading.""" diff --git a/tests/tests_pytorch/strategies/launchers/test_multiprocessing.py b/tests/tests_pytorch/strategies/launchers/test_multiprocessing.py index d26f6c4d2c3ef..d0b4ab617df66 100644 --- a/tests/tests_pytorch/strategies/launchers/test_multiprocessing.py +++ b/tests/tests_pytorch/strategies/launchers/test_multiprocessing.py @@ -230,7 +230,7 @@ def test_fit_twice_raises(mps_count_0): barebones=True, ) trainer.fit(model) - trainer.test(model) # make sure testing in between doesnt impact the result + trainer.test(model) # make sure testing in between doesn't impact the result trainer.fit_loop.max_epochs += 1 with pytest.raises(NotImplementedError, match=r"twice.*is not supported"): trainer.fit(model) diff --git a/tests/tests_pytorch/strategies/test_fsdp.py b/tests/tests_pytorch/strategies/test_fsdp.py index f3e88ca356764..560ab19f823ca 100644 --- a/tests/tests_pytorch/strategies/test_fsdp.py +++ b/tests/tests_pytorch/strategies/test_fsdp.py @@ -110,7 +110,7 @@ def __init__(self, wrap_min_params: int = 2): self.should_be_wrapped = [wrap_min_params < (32 * 32 + 32), None, wrap_min_params < (32 * 2 + 2)] def configure_optimizers(self): - # SGD's FSDP optimier state is fixed in https://github.com/pytorch/pytorch/pull/99214 + # SGD's FSDP optimizer, state is fixed in https://github.com/pytorch/pytorch/pull/99214 return torch.optim.AdamW(self.parameters(), lr=0.1) @@ -808,7 +808,7 @@ def __init__(self, params_to_compare=None): self.params_to_compare = params_to_compare def configure_optimizers(self): - # SGD's FSDP optimier state is fixed in https://github.com/pytorch/pytorch/pull/99214 + # SGD's FSDP optimizer, state is fixed in https://github.com/pytorch/pytorch/pull/99214 return torch.optim.AdamW(self.parameters(), lr=0.1) def on_train_start(self): diff --git a/tests/tests_pytorch/test_cli.py b/tests/tests_pytorch/test_cli.py index 9cf81aa5a739c..59ce4cfe4bb71 100644 --- a/tests/tests_pytorch/test_cli.py +++ b/tests/tests_pytorch/test_cli.py @@ -1251,7 +1251,7 @@ def test_lightning_cli_datamodule_short_arguments(): with mock.patch("sys.argv", ["any.py"]): cli = LightningCLI(BoringModel, BoringDataModule, run=False) - # since we are passing the DataModule, that's whats added to the parser + # since we are passing the DataModule, that's what's added to the parser assert cli.parser.groups["data"].group_class is BoringDataModule diff --git a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py index b8517a0303015..3877d6c051017 100644 --- a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py +++ b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py @@ -207,23 +207,23 @@ def name() -> str: class Prec(Precision): pass - class Strat(SingleDeviceStrategy): + class TestStrategy(SingleDeviceStrategy): pass - strategy = Strat(device=torch.device("cpu"), accelerator=Accel(), precision_plugin=Prec()) + strategy = TestStrategy(device=torch.device("cpu"), accelerator=Accel(), precision_plugin=Prec()) trainer = Trainer(strategy=strategy, fast_dev_run=True, devices=2) assert isinstance(trainer.accelerator, Accel) - assert isinstance(trainer.strategy, Strat) + assert isinstance(trainer.strategy, TestStrategy) assert isinstance(trainer.precision_plugin, Prec) assert trainer._accelerator_connector.strategy is strategy - class Strat(DDPStrategy): + class TestStrategy(DDPStrategy): pass - strategy = Strat(accelerator=Accel(), precision_plugin=Prec()) + strategy = TestStrategy(accelerator=Accel(), precision_plugin=Prec()) trainer = Trainer(strategy=strategy, fast_dev_run=True, devices=2) assert isinstance(trainer.accelerator, Accel) - assert isinstance(trainer.strategy, Strat) + assert isinstance(trainer.strategy, TestStrategy) assert isinstance(trainer.precision_plugin, Prec) assert trainer._accelerator_connector.strategy is strategy From e011d4513f818b786e9c50a2eb12d49df6fa6e3c Mon Sep 17 00:00:00 2001 From: kilavvy <140459108+kilavvy@users.noreply.github.com> Date: Thu, 12 Jun 2025 20:02:18 +0200 Subject: [PATCH 096/112] Fix Typo in TBPTT Documentation and Improve Trainer Docstring (#20897) (cherry picked from commit ab619cd0e073df678ba6ba8bfbee7c3b4c67b3cb) --- docs/source-pytorch/common/tbptt.rst | 2 +- examples/fabric/build_your_own_trainer/trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source-pytorch/common/tbptt.rst b/docs/source-pytorch/common/tbptt.rst index 04b8ea33b9235..8f448e9001145 100644 --- a/docs/source-pytorch/common/tbptt.rst +++ b/docs/source-pytorch/common/tbptt.rst @@ -2,7 +2,7 @@ Truncated Backpropagation Through Time (TBPTT) ############################################## -Truncated Backpropagation Through Time (TBPTT) performs backpropogation every k steps of +Truncated Backpropagation Through Time (TBPTT) performs backpropagation every k steps of a much longer sequence. This is made possible by passing training batches split along the time-dimensions into splits of size k to the ``training_step``. In order to keep the same forward propagation behavior, all diff --git a/examples/fabric/build_your_own_trainer/trainer.py b/examples/fabric/build_your_own_trainer/trainer.py index ef7c3f4f53534..c9f0740152445 100644 --- a/examples/fabric/build_your_own_trainer/trainer.py +++ b/examples/fabric/build_your_own_trainer/trainer.py @@ -35,7 +35,7 @@ def __init__( checkpoint_dir: str = "./checkpoints", checkpoint_frequency: int = 1, ) -> None: - """Exemplary Trainer with Fabric. This is a very simple trainer focused on readablity but with reduced + """Exemplary Trainer with Fabric. This is a very simple trainer focused on readability but with reduced featureset. As a trainer with more included features, we recommend using the :class:`lightning.pytorch.Trainer`. From d5bd7c7e159ce23639ad183d2546881ecc0f692d Mon Sep 17 00:00:00 2001 From: Rittik Panda Date: Fri, 13 Jun 2025 16:03:21 +0530 Subject: [PATCH 097/112] chore: bump `mypy` from 1.15.0 to 1.16.0 and resolve typing issues (#20900) * build(deps): bump mypy from 1.15.0 to 1.16.0 in /requirements Bumps [mypy](https://github.com/python/mypy) from 1.15.0 to 1.16.0. - [Changelog](https://github.com/python/mypy/blob/master/CHANGELOG.md) - [Commits](https://github.com/python/mypy/compare/v1.15.0...v1.16.0) --- updated-dependencies: - dependency-name: mypy dependency-version: 1.16.0 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * fix typing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update fsdp.py --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit b6442c5eaa35e1ac81a074591fac9cb111a00f67) --- requirements/typing.txt | 2 +- src/lightning/fabric/plugins/precision/bitsandbytes.py | 4 ++-- src/lightning/fabric/strategies/fsdp.py | 2 +- src/lightning/fabric/strategies/single_xla.py | 6 +++--- src/lightning/fabric/strategies/strategy.py | 2 +- src/lightning/fabric/strategies/xla.py | 6 +++--- src/lightning/fabric/strategies/xla_fsdp.py | 6 +++--- src/lightning/pytorch/core/module.py | 2 +- src/lightning/pytorch/serve/servable_module_validator.py | 2 +- src/lightning/pytorch/strategies/fsdp.py | 2 +- src/lightning/pytorch/strategies/single_xla.py | 6 +++--- src/lightning/pytorch/strategies/strategy.py | 2 +- src/lightning/pytorch/strategies/xla.py | 6 +++--- src/lightning/pytorch/trainer/trainer.py | 2 +- 14 files changed, 25 insertions(+), 25 deletions(-) diff --git a/requirements/typing.txt b/requirements/typing.txt index e050157f8dbac..940534fc729bb 100644 --- a/requirements/typing.txt +++ b/requirements/typing.txt @@ -1,4 +1,4 @@ -mypy==1.15.0 +mypy==1.16.0 torch==2.7.1 types-Markdown diff --git a/src/lightning/fabric/plugins/precision/bitsandbytes.py b/src/lightning/fabric/plugins/precision/bitsandbytes.py index 646df2028672e..8bda93b84e243 100644 --- a/src/lightning/fabric/plugins/precision/bitsandbytes.py +++ b/src/lightning/fabric/plugins/precision/bitsandbytes.py @@ -226,7 +226,7 @@ class _Linear8bitLt(bnb.nn.Linear8bitLt): def __init__(self, *args: Any, device: Optional[_DEVICE] = None, threshold: float = 6.0, **kwargs: Any) -> None: super().__init__(*args, device=device, threshold=threshold, **kwargs) self.weight = cast(bnb.nn.Int8Params, self.weight) # type: ignore[has-type] - self.bias = cast(Optional[torch.nn.Parameter], self.bias) # type: ignore[has-type] + self.bias: Optional[torch.nn.Parameter] = self.bias # if the device is CUDA or we are under a CUDA context manager, quantize the weight here, so we don't end up # filling the device memory with float32 weights which could lead to OOM if torch.tensor(0, device=device).device.type == "cuda": @@ -310,7 +310,7 @@ class _Linear4bit(bnb.nn.Linear4bit): def __init__(self, *args: Any, device: Optional[_DEVICE] = None, **kwargs: Any) -> None: super().__init__(*args, device=device, **kwargs) self.weight = cast(bnb.nn.Params4bit, self.weight) # type: ignore[has-type] - self.bias = cast(Optional[torch.nn.Parameter], self.bias) # type: ignore[has-type] + self.bias: Optional[torch.nn.Parameter] = self.bias # if the device is CUDA or we are under a CUDA context manager, quantize the weight here, so we don't end up # filling the device memory with float32 weights which could lead to OOM if torch.tensor(0, device=device).device.type == "cuda": diff --git a/src/lightning/fabric/strategies/fsdp.py b/src/lightning/fabric/strategies/fsdp.py index 9dd5b2c62d4c9..74af1dd0e8f43 100644 --- a/src/lightning/fabric/strategies/fsdp.py +++ b/src/lightning/fabric/strategies/fsdp.py @@ -237,7 +237,7 @@ def precision(self) -> FSDPPrecision: @precision.setter @override - def precision(self, precision: Optional[FSDPPrecision]) -> None: + def precision(self, precision: Optional[Precision]) -> None: if precision is not None and not isinstance(precision, FSDPPrecision): raise TypeError(f"The FSDP strategy can only work with the `FSDPPrecision` plugin, found {precision}") self._precision = precision diff --git a/src/lightning/fabric/strategies/single_xla.py b/src/lightning/fabric/strategies/single_xla.py index 3db361a2eaa99..ba2fce91f1146 100644 --- a/src/lightning/fabric/strategies/single_xla.py +++ b/src/lightning/fabric/strategies/single_xla.py @@ -18,7 +18,7 @@ from lightning.fabric.accelerators import Accelerator from lightning.fabric.accelerators.xla import _XLA_AVAILABLE -from lightning.fabric.plugins import XLAPrecision +from lightning.fabric.plugins import CheckpointIO, Precision, XLAPrecision from lightning.fabric.plugins.io.xla import XLACheckpointIO from lightning.fabric.strategies import _StrategyRegistry from lightning.fabric.strategies.single_device import SingleDeviceStrategy @@ -61,7 +61,7 @@ def checkpoint_io(self) -> XLACheckpointIO: @checkpoint_io.setter @override - def checkpoint_io(self, io: Optional[XLACheckpointIO]) -> None: + def checkpoint_io(self, io: Optional[CheckpointIO]) -> None: if io is not None and not isinstance(io, XLACheckpointIO): raise TypeError(f"The XLA strategy can only work with the `XLACheckpointIO` plugin, found {io}") self._checkpoint_io = io @@ -77,7 +77,7 @@ def precision(self) -> XLAPrecision: @precision.setter @override - def precision(self, precision: Optional[XLAPrecision]) -> None: + def precision(self, precision: Optional[Precision]) -> None: if precision is not None and not isinstance(precision, XLAPrecision): raise TypeError(f"The XLA strategy can only work with the `XLAPrecision` plugin, found {precision}") self._precision = precision diff --git a/src/lightning/fabric/strategies/strategy.py b/src/lightning/fabric/strategies/strategy.py index 4daad9b954b2f..e0100bb148dd3 100644 --- a/src/lightning/fabric/strategies/strategy.py +++ b/src/lightning/fabric/strategies/strategy.py @@ -52,7 +52,7 @@ def __init__( self._checkpoint_io: Optional[CheckpointIO] = checkpoint_io self._precision: Optional[Precision] = None # Call the precision setter for input validation - self.precision = precision # type: ignore[assignment] + self.precision = precision self._launcher: Optional[_Launcher] = None self._backward_sync_control: Optional[_BackwardSyncControl] = None diff --git a/src/lightning/fabric/strategies/xla.py b/src/lightning/fabric/strategies/xla.py index 5fd6651b12710..3a571fef37f00 100644 --- a/src/lightning/fabric/strategies/xla.py +++ b/src/lightning/fabric/strategies/xla.py @@ -23,7 +23,7 @@ from lightning.fabric.accelerators import Accelerator from lightning.fabric.accelerators.xla import _XLA_GREATER_EQUAL_2_1 -from lightning.fabric.plugins import XLAPrecision +from lightning.fabric.plugins import CheckpointIO, Precision, XLAPrecision from lightning.fabric.plugins.environments import XLAEnvironment from lightning.fabric.plugins.io.xla import XLACheckpointIO from lightning.fabric.strategies import ParallelStrategy, _StrategyRegistry @@ -83,7 +83,7 @@ def checkpoint_io(self) -> XLACheckpointIO: @checkpoint_io.setter @override - def checkpoint_io(self, io: Optional[XLACheckpointIO]) -> None: + def checkpoint_io(self, io: Optional[CheckpointIO]) -> None: if io is not None and not isinstance(io, XLACheckpointIO): raise TypeError(f"The XLA strategy can only work with the `XLACheckpointIO` plugin, found {io}") self._checkpoint_io = io @@ -99,7 +99,7 @@ def precision(self) -> XLAPrecision: @precision.setter @override - def precision(self, precision: Optional[XLAPrecision]) -> None: + def precision(self, precision: Optional[Precision]) -> None: if precision is not None and not isinstance(precision, XLAPrecision): raise TypeError(f"The XLA strategy can only work with the `XLAPrecision` plugin, found {precision}") self._precision = precision diff --git a/src/lightning/fabric/strategies/xla_fsdp.py b/src/lightning/fabric/strategies/xla_fsdp.py index 8e19fc032e910..c4f8820cd2895 100644 --- a/src/lightning/fabric/strategies/xla_fsdp.py +++ b/src/lightning/fabric/strategies/xla_fsdp.py @@ -26,7 +26,7 @@ from lightning.fabric.accelerators import Accelerator from lightning.fabric.accelerators.xla import _XLA_AVAILABLE -from lightning.fabric.plugins import XLAPrecision +from lightning.fabric.plugins import CheckpointIO, Precision, XLAPrecision from lightning.fabric.plugins.environments import XLAEnvironment from lightning.fabric.plugins.io.xla import XLACheckpointIO from lightning.fabric.strategies import ParallelStrategy, _StrategyRegistry @@ -134,7 +134,7 @@ def checkpoint_io(self) -> XLACheckpointIO: @checkpoint_io.setter @override - def checkpoint_io(self, io: Optional[XLACheckpointIO]) -> None: + def checkpoint_io(self, io: Optional[CheckpointIO]) -> None: if io is not None and not isinstance(io, XLACheckpointIO): raise TypeError(f"The XLA strategy can only work with the `XLACheckpointIO` plugin, found {io}") self._checkpoint_io = io @@ -150,7 +150,7 @@ def precision(self) -> XLAPrecision: @precision.setter @override - def precision(self, precision: Optional[XLAPrecision]) -> None: + def precision(self, precision: Optional[Precision]) -> None: if precision is not None and not isinstance(precision, XLAPrecision): raise TypeError(f"The XLA FSDP strategy can only work with the `XLAPrecision` plugin, found {precision}") self._precision = precision diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py index 8108100be6dc4..7df0cb7757f81 100644 --- a/src/lightning/pytorch/core/module.py +++ b/src/lightning/pytorch/core/module.py @@ -218,7 +218,7 @@ def trainer(self) -> "pl.Trainer": def trainer(self, trainer: Optional["pl.Trainer"]) -> None: for v in self.children(): if isinstance(v, LightningModule): - v.trainer = trainer # type: ignore[assignment] + v.trainer = trainer self._trainer = trainer @property diff --git a/src/lightning/pytorch/serve/servable_module_validator.py b/src/lightning/pytorch/serve/servable_module_validator.py index dc92625da357d..4c0e6192abdba 100644 --- a/src/lightning/pytorch/serve/servable_module_validator.py +++ b/src/lightning/pytorch/serve/servable_module_validator.py @@ -93,7 +93,7 @@ def on_train_start(self, trainer: "pl.Trainer", servable_module: "pl.LightningMo # Note: The Trainer needs to be detached from the pl_module before starting the process. # This would fail during the deepcopy with DDP. - servable_module.trainer = None # type: ignore[assignment] + servable_module.trainer = None process = Process(target=self._start_server, args=(servable_module, self.host, self.port, self.optimization)) process.start() diff --git a/src/lightning/pytorch/strategies/fsdp.py b/src/lightning/pytorch/strategies/fsdp.py index bfbf99e82934c..55ea354a5cb60 100644 --- a/src/lightning/pytorch/strategies/fsdp.py +++ b/src/lightning/pytorch/strategies/fsdp.py @@ -227,7 +227,7 @@ def precision_plugin(self) -> FSDPPrecision: @precision_plugin.setter @override - def precision_plugin(self, precision_plugin: Optional[FSDPPrecision]) -> None: + def precision_plugin(self, precision_plugin: Optional[Precision]) -> None: if precision_plugin is not None and not isinstance(precision_plugin, FSDPPrecision): raise TypeError( f"The FSDP strategy can only work with the `FSDPPrecision` plugin, found {precision_plugin}" diff --git a/src/lightning/pytorch/strategies/single_xla.py b/src/lightning/pytorch/strategies/single_xla.py index f46717212809b..2a5e2f3a85b96 100644 --- a/src/lightning/pytorch/strategies/single_xla.py +++ b/src/lightning/pytorch/strategies/single_xla.py @@ -19,7 +19,7 @@ import lightning.pytorch as pl from lightning.fabric.accelerators.xla import _XLA_AVAILABLE -from lightning.fabric.plugins import XLACheckpointIO +from lightning.fabric.plugins import CheckpointIO, Precision, XLACheckpointIO from lightning.fabric.strategies import _StrategyRegistry from lightning.fabric.utilities.optimizer import _optimizers_to_device from lightning.fabric.utilities.types import _DEVICE @@ -67,7 +67,7 @@ def checkpoint_io(self) -> Union[XLACheckpointIO, _WrappingCheckpointIO]: @checkpoint_io.setter @override - def checkpoint_io(self, io: Optional[Union[XLACheckpointIO, _WrappingCheckpointIO]]) -> None: + def checkpoint_io(self, io: Optional[CheckpointIO]) -> None: if io is not None and not isinstance(io, (XLACheckpointIO, _WrappingCheckpointIO)): raise TypeError(f"The XLA strategy can only work with the `XLACheckpointIO` plugin, found {io}") self._checkpoint_io = io @@ -83,7 +83,7 @@ def precision_plugin(self) -> XLAPrecision: @precision_plugin.setter @override - def precision_plugin(self, precision_plugin: Optional[XLAPrecision]) -> None: + def precision_plugin(self, precision_plugin: Optional[Precision]) -> None: if precision_plugin is not None and not isinstance(precision_plugin, XLAPrecision): raise TypeError(f"The XLA strategy can only work with the `XLAPrecision` plugin, found {precision_plugin}") self._precision_plugin = precision_plugin diff --git a/src/lightning/pytorch/strategies/strategy.py b/src/lightning/pytorch/strategies/strategy.py index 0a0f52e906dd5..16b16a4927513 100644 --- a/src/lightning/pytorch/strategies/strategy.py +++ b/src/lightning/pytorch/strategies/strategy.py @@ -57,7 +57,7 @@ def __init__( self._checkpoint_io: Optional[CheckpointIO] = checkpoint_io self._precision_plugin: Optional[Precision] = None # Call the precision setter for input validation - self.precision_plugin = precision_plugin # type: ignore[assignment] + self.precision_plugin = precision_plugin self._lightning_module: Optional[pl.LightningModule] = None self._model: Optional[Module] = None self._launcher: Optional[_Launcher] = None diff --git a/src/lightning/pytorch/strategies/xla.py b/src/lightning/pytorch/strategies/xla.py index cb70871c83e91..bf141dbd010d7 100644 --- a/src/lightning/pytorch/strategies/xla.py +++ b/src/lightning/pytorch/strategies/xla.py @@ -22,7 +22,7 @@ import lightning.pytorch as pl from lightning.fabric.accelerators.xla import _XLA_AVAILABLE, _XLA_GREATER_EQUAL_2_1 -from lightning.fabric.plugins import XLACheckpointIO +from lightning.fabric.plugins import CheckpointIO, Precision, XLACheckpointIO from lightning.fabric.plugins.environments import XLAEnvironment from lightning.fabric.strategies import _StrategyRegistry from lightning.fabric.utilities.optimizer import _optimizers_to_device @@ -81,7 +81,7 @@ def checkpoint_io(self) -> Union[XLACheckpointIO, _WrappingCheckpointIO]: @checkpoint_io.setter @override - def checkpoint_io(self, io: Optional[Union[XLACheckpointIO, _WrappingCheckpointIO]]) -> None: + def checkpoint_io(self, io: Optional[CheckpointIO]) -> None: if io is not None and not isinstance(io, (XLACheckpointIO, _WrappingCheckpointIO)): raise TypeError(f"The XLA strategy can only work with the `XLACheckpointIO` plugin, found {io}") self._checkpoint_io = io @@ -97,7 +97,7 @@ def precision_plugin(self) -> XLAPrecision: @precision_plugin.setter @override - def precision_plugin(self, precision_plugin: Optional[XLAPrecision]) -> None: + def precision_plugin(self, precision_plugin: Optional[Precision]) -> None: if precision_plugin is not None and not isinstance(precision_plugin, XLAPrecision): raise TypeError(f"The XLA strategy can only work with the `XLAPrecision` plugin, found {precision_plugin}") self._precision_plugin = precision_plugin diff --git a/src/lightning/pytorch/trainer/trainer.py b/src/lightning/pytorch/trainer/trainer.py index 8b976cd2f4f46..8f6d3245a5faf 100644 --- a/src/lightning/pytorch/trainer/trainer.py +++ b/src/lightning/pytorch/trainer/trainer.py @@ -901,7 +901,7 @@ def _predict_impl( # -------------------- log.debug(f"{self.__class__.__name__}: trainer predict stage") - self.predict_loop.return_predictions = return_predictions # type: ignore[assignment] + self.predict_loop.return_predictions = return_predictions # if a datamodule comes in as the second arg, then fix it for the user if isinstance(dataloaders, LightningDataModule): From 57b745d7779ed16b5d9ed83de4655dc111ec0922 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Jun 2025 08:49:16 +0200 Subject: [PATCH 098/112] build(deps): update pandoc requirement from <=2.3,>=1.0 to >=1.0,<=2.4 in /requirements (#20908) build(deps): update pandoc requirement in /requirements Updates the requirements on [pandoc](https://github.com/boisgera/pandoc) to permit the latest version. - [Release notes](https://github.com/boisgera/pandoc/releases) - [Changelog](https://github.com/boisgera/pandoc/blob/master/mkdocs/changelog.md) - [Commits](https://github.com/boisgera/pandoc/compare/v1.0.0...v2.4) --- updated-dependencies: - dependency-name: pandoc dependency-version: '2.4' dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 9606c19d2ed083df94d4505afb3304177b243629) --- requirements/docs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/docs.txt b/requirements/docs.txt index 6d1d609d55ba2..1acd55018df8c 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -2,7 +2,7 @@ sphinx >5.0, <6.0 myst-parser >=0.18.1, <4.0.0 nbsphinx >=0.8.5, <=0.9.7 nbconvert >7.14, <7.17 -pandoc >=1.0, <=2.3 +pandoc >=1.0, <=2.4 docutils >=0.16, <0.22 sphinxcontrib-fulltoc >=1.0, <=1.2.0 sphinxcontrib-mockautodoc From 3e9ae034ea9bde1d7bc3c3ab16df1dab80a42a23 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Jun 2025 08:49:26 +0200 Subject: [PATCH 099/112] build(deps): update jsonargparse[signatures] requirement from <4.40.0,>=4.39.0 to >=4.39.0,<4.41.0 in /requirements (#20907) build(deps): update jsonargparse[signatures] requirement Updates the requirements on [jsonargparse[signatures]](https://github.com/omni-us/jsonargparse) to permit the latest version. - [Changelog](https://github.com/omni-us/jsonargparse/blob/main/CHANGELOG.rst) - [Commits](https://github.com/omni-us/jsonargparse/compare/v4.39.0...v4.40.0) --- updated-dependencies: - dependency-name: jsonargparse[signatures] dependency-version: 4.40.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 9992c498255dcd0a170de063764e7040ce1646bf) --- requirements/pytorch/extra.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index 5694bcfe9b595..f205ba1298a8a 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -5,7 +5,7 @@ matplotlib>3.1, <3.10.0 omegaconf >=2.2.3, <2.4.0 hydra-core >=1.2.0, <1.4.0 -jsonargparse[signatures] >=4.39.0, <4.40.0 +jsonargparse[signatures] >=4.39.0, <4.41.0 rich >=12.3.0, <14.1.0 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute bitsandbytes >=0.45.2,<0.45.3; platform_system != "Darwin" From c10150d07a8b6969daa7270f3c90e63f2a7441b8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Jun 2025 08:49:31 +0200 Subject: [PATCH 100/112] build(deps): update lightning-habana requirement from <1.3.0,>=1.2.0 to >=1.2.0,<1.7.0 in /requirements (#20903) build(deps): update lightning-habana requirement in /requirements Updates the requirements on [lightning-habana](https://github.com/Lightning-AI/lightning-habana) to permit the latest version. - [Release notes](https://github.com/Lightning-AI/lightning-habana/releases) - [Changelog](https://github.com/Lightning-AI/lightning-Habana/blob/main/CHANGELOG.md) - [Commits](https://github.com/Lightning-AI/lightning-habana/compare/1.2.0...1.6.0) --- updated-dependencies: - dependency-name: lightning-habana dependency-version: 1.6.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit b2aaab3dbdb3e69afb7c56a203ce60254520500f) --- requirements/_integrations/accelerators.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/_integrations/accelerators.txt b/requirements/_integrations/accelerators.txt index 90c72bedb2cdc..4f5c1113ce9a2 100644 --- a/requirements/_integrations/accelerators.txt +++ b/requirements/_integrations/accelerators.txt @@ -1,2 +1,2 @@ # validation accelerator connectors -lightning-habana >=1.2.0, <1.3.0 +lightning-habana >=1.2.0, <1.7.0 From 70acd200d44f8596e22c3933c1396166a4c4c873 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Jun 2025 08:49:50 +0200 Subject: [PATCH 101/112] build(deps): bump coverage from 7.8.2 to 7.9.1 in /requirements (#20904) Bumps [coverage](https://github.com/nedbat/coveragepy) from 7.8.2 to 7.9.1. - [Release notes](https://github.com/nedbat/coveragepy/releases) - [Changelog](https://github.com/nedbat/coveragepy/blob/master/CHANGES.rst) - [Commits](https://github.com/nedbat/coveragepy/compare/7.8.2...7.9.1) --- updated-dependencies: - dependency-name: coverage dependency-version: 7.9.1 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> (cherry picked from commit 3693432f03b7813dd46ca0802f29e3832e71bd75) --- requirements/fabric/test.txt | 2 +- requirements/pytorch/test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index 4d57af004884d..71d62d508cd6a 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -1,4 +1,4 @@ -coverage ==7.8.2 +coverage ==7.9.1 numpy >=1.17.2, <1.27.0 pytest ==8.4.0 pytest-cov ==6.1.1 diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt index 43aaf999a25ce..64fd22839c81a 100644 --- a/requirements/pytorch/test.txt +++ b/requirements/pytorch/test.txt @@ -1,4 +1,4 @@ -coverage ==7.8.2 +coverage ==7.9.1 pytest ==8.4.0 pytest-cov ==6.1.1 pytest-timeout ==2.4.0 From abde8742794e13f98b1151cb3fca3e94c6edb378 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Mon, 16 Jun 2025 14:34:33 +0200 Subject: [PATCH 102/112] fix check for flaky links in readme (#20910) * fix check for flaky links in readme * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update ci-check-md-links.yml * force-check-all: "yes" * fix & skip * Empty-Commit * fixing * fixing * fixing --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> (cherry picked from commit 26e2c2c132c4a5b4f8f03dbe95cedd89777aa001) --- .github/CONTRIBUTING.md | 8 ++++---- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/markdown-links-config.json | 6 ++++++ .github/workflows/ci-check-md-links.yml | 5 +++-- README.md | 2 +- docs/source-fabric/_templates/theme_variables.jinja | 2 +- docs/source-fabric/advanced/compile.rst | 2 +- docs/source-fabric/advanced/multiple_setup.rst | 2 +- docs/source-fabric/examples/index.rst | 10 +++++----- docs/source-fabric/fundamentals/convert.rst | 2 +- docs/source-pytorch/_templates/theme_variables.jinja | 2 +- docs/source-pytorch/advanced/compile.rst | 2 +- docs/source-pytorch/community/governance.rst | 2 +- examples/fabric/meta_learning/README.md | 2 +- src/lightning/data/README.md | 4 ++-- src/pytorch_lightning/README.md | 4 ++-- tests/README.md | 4 ++-- 17 files changed, 34 insertions(+), 27 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index f3edd2bae51ab..cfb03d220c99c 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -130,11 +130,11 @@ In case you are adding new dependencies, make sure that they are compatible with ### Documentation -To learn about development of docs, check out the docs [README.md](https://github.com/Lightning-AI/lightning/blob/master/docs/README.md). +To learn about development of docs, check out the docs [README.md](https://github.com/Lightning-AI/pytorch-lightning/blob/master/docs/README.md). ### Testing -To learn about tests, check out the tests [README.md](https://github.com/Lightning-AI/lightning/blob/master/tests/README.md). +To learn about tests, check out the tests [README.md](https://github.com/Lightning-AI/pytorch-lightning/blob/master/tests/README.md). ### Pull Request @@ -165,8 +165,8 @@ We welcome any useful contribution! For your convenience here's a recommended wo 1. If any of the existing tests fail in your PR on our CI, refer to the following READMEs to identify what's failing and try to address it. - - [Test README](https://github.com/Lightning-AI/lightning/blob/master/tests/README.md) - - [CI/CD README](https://github.com/Lightning-AI/lightning/blob/master/.github/workflows/README.md) + - [Test README](https://github.com/Lightning-AI/pytorch-lightning/blob/master/tests/README.md) + - [CI/CD README](https://github.com/Lightning-AI/pytorch-lightning/tree/master/.github/workflows#readme) 1. When you feel ready for integrating your work, mark your PR "Ready for review". diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index fe89375a04e83..cd784a5037525 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Fixes #\ Before submitting - Was this **discussed/agreed** via a GitHub issue? (not for typos and docs) -- [ ] Did you read the [contributor guideline](https://github.com/Lightning-AI/lightning/blob/master/.github/CONTRIBUTING.md), **Pull Request** section? +- [ ] Did you read the [contributor guideline](https://github.com/Lightning-AI/pytorch-lightning/blob/master/.github/CONTRIBUTING.md), **Pull Request** section? - [ ] Did you make sure your **PR does only one thing**, instead of bundling different changes together? - Did you make sure to **update the documentation** with your changes? (if necessary) - Did you write any **new necessary tests**? (not for typos and docs) diff --git a/.github/markdown-links-config.json b/.github/markdown-links-config.json index 1447e5c8ea832..bc9721da2c587 100644 --- a/.github/markdown-links-config.json +++ b/.github/markdown-links-config.json @@ -2,6 +2,12 @@ "ignorePatterns": [ { "pattern": "^https://github.com/Lightning-AI/pytorch-lightning/pull/" + }, + { + "pattern": "^https://dev.azure.com/Lightning-AI/lightning/_apis/build/status" + }, + { + "pattern": "^https://codecov.io/gh/Lightning-AI/pytorch-lightning/graph/badge.svg" } ], "httpHeaders": [ diff --git a/.github/workflows/ci-check-md-links.yml b/.github/workflows/ci-check-md-links.yml index efae72a87f86c..12aca9547e7c7 100644 --- a/.github/workflows/ci-check-md-links.yml +++ b/.github/workflows/ci-check-md-links.yml @@ -9,12 +9,13 @@ on: types: [opened, reopened, ready_for_review, synchronize] paths: - ".github/workflows/ci-check-md-links.yml" - - ".github/workflows/markdown.links.config.json" + - ".github/markdown-links-config.json" - "**/*.md" jobs: check-md-links: - uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@v0.14.3 + uses: Lightning-AI/utilities/.github/workflows/check-md-links.yml@main # can be pin with >=0.14.4 with: config-file: ".github/markdown-links-config.json" base-branch: "master" + force-check-all: "yes" diff --git a/README.md b/README.md index aa58c6d8a585a..dd5f0fe43e0c7 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ ______________________________________________________________________ [![Discord](https://img.shields.io/discord/1077906959069626439?style=plastic)](https://discord.gg/VptPCZkGNa) ![GitHub commit activity](https://img.shields.io/github/commit-activity/w/lightning-ai/lightning) -[![license](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/Lightning-AI/lightning/blob/master/LICENSE) +[![license](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/Lightning-AI/pytorch-lightning/blob/master/LICENSE) @@ -28,7 +28,7 @@ ______________________________________________________________________ [![codecov](https://codecov.io/gh/Lightning-AI/pytorch-lightning/graph/badge.svg?token=SmzX8mnKlA)](https://codecov.io/gh/Lightning-AI/pytorch-lightning) [![ReadTheDocs](https://readthedocs.org/projects/pytorch-lightning/badge/?version=stable)](https://lightning.ai/docs/pytorch/stable/)[![Discord](https://img.shields.io/discord/1077906959069626439?style=plastic)](https://discord.gg/VptPCZkGNa) -[![license](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/Lightning-AI/lightning/blob/master/LICENSE) +[![license](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/Lightning-AI/pytorch-lightning/blob/master/LICENSE)