Skip to content

Commit 6a62f66

Browse files
committed
Merge branch 'master' into bug/18727_reset_trainer_should_stop_on_fit
2 parents c85b8fd + 6497e36 commit 6a62f66

File tree

532 files changed

+10675
-4908
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

532 files changed

+10675
-4908
lines changed

.actions/assistant.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ def _download_frontend(pkg_path: str, version: str = "v0.0.0"):
234234
response = urllib.request.urlopen(frontend_release_url)
235235

236236
file = tarfile.open(fileobj=response, mode="r|gz")
237-
file.extractall(path=download_dir)
237+
file.extractall(path=download_dir) # noqa: S202
238238

239239
shutil.move(download_dir, frontend_dir)
240240
print("The Lightning UI has successfully been downloaded!")
@@ -442,9 +442,20 @@ def pull_docs_files(
442442
target_dir: str = "docs/source-pytorch/XXX",
443443
checkout: str = "refs/tags/1.0.0",
444444
source_dir: str = "docs/source",
445+
single_page: Optional[str] = None,
445446
as_orphan: bool = False,
446447
) -> None:
447-
"""Pull docs pages from external source and append to local docs."""
448+
"""Pull docs pages from external source and append to local docs.
449+
450+
Args:
451+
gh_user_repo: standard GitHub user/repo string
452+
target_dir: relative location inside the docs folder
453+
checkout: specific tag or branch to checkout
454+
source_dir: relative location inside the remote / external repo
455+
single_page: copy only single page from the remote repo and name it as the repo name
456+
as_orphan: append orphan statement to the page
457+
458+
"""
448459
import zipfile
449460

450461
zip_url = f"https://github.com/{gh_user_repo}/archive/{checkout}.zip"
@@ -457,13 +468,21 @@ def pull_docs_files(
457468
raise RuntimeError(f"Requesting file '{zip_url}' does not exist or it is just unavailable.")
458469

459470
with zipfile.ZipFile(zip_file, "r") as zip_ref:
460-
zip_ref.extractall(tmp)
471+
zip_ref.extractall(tmp) # noqa: S202
461472

462473
zip_dirs = [d for d in glob.glob(os.path.join(tmp, "*")) if os.path.isdir(d)]
463474
# check that the extracted archive has only repo folder
464475
assert len(zip_dirs) == 1
465476
repo_dir = zip_dirs[0]
466477

478+
if single_page: # special case for copying single page
479+
single_page = os.path.join(repo_dir, source_dir, single_page)
480+
assert os.path.isfile(single_page), f"File '{single_page}' does not exist."
481+
name = re.sub(r"lightning[-_]?", "", gh_user_repo.split("/")[-1])
482+
new_rst = os.path.join(_PROJECT_ROOT, target_dir, f"{name}.rst")
483+
AssistantCLI._copy_rst(single_page, new_rst, as_orphan=as_orphan)
484+
return
485+
# continue with copying all pages
467486
ls_pages = glob.glob(os.path.join(repo_dir, source_dir, "*.rst"))
468487
ls_pages += glob.glob(os.path.join(repo_dir, source_dir, "**", "*.rst"))
469488
for rst in ls_pages:

.azure/gpu-benchmarks.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ jobs:
4646
variables:
4747
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
4848
container:
49-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
49+
# TODO: Upgrade to Python 3.11
50+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
5051
options: "--gpus=all --shm-size=32g"
5152
strategy:
5253
matrix:

.azure/gpu-tests-fabric.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,12 @@ jobs:
5656
options: "--gpus=all --shm-size=2gb -v /var/tmp:/var/tmp"
5757
strategy:
5858
matrix:
59+
# TODO: Upgrade to Python 3.11
5960
"Fabric | latest":
60-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
61+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
6162
PACKAGE_NAME: "fabric"
6263
"Lightning | latest":
63-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
64+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
6465
PACKAGE_NAME: "lightning"
6566
workspace:
6667
clean: all
@@ -73,6 +74,10 @@ jobs:
7374
scope=$(python -c 'n = "$(PACKAGE_NAME)" ; print(dict(fabric="lightning_fabric").get(n, n))')
7475
echo "##vso[task.setvariable variable=COVERAGE_SOURCE]$scope"
7576
displayName: "set env. vars"
77+
- bash: |
78+
echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM}/torch_test.html"
79+
condition: endsWith(variables['Agent.JobName'], 'future')
80+
displayName: "set env. vars 4 future"
7681
7782
- bash: |
7883
echo $(DEVICES)
@@ -99,13 +104,14 @@ jobs:
99104
100105
- bash: |
101106
extra=$(python -c "print({'lightning': 'fabric-'}.get('$(PACKAGE_NAME)', ''))")
102-
pip install -e ".[${extra}dev]" pytest-timeout -U --find-links ${TORCH_URL}
107+
pip install -e ".[${extra}dev]" pytest-timeout -U --find-links="${TORCH_URL}"
103108
displayName: "Install package & dependencies"
104109
105110
- bash: |
106111
set -e
107112
python requirements/collect_env_details.py
108113
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
114+
python -c "import bitsandbytes"
109115
displayName: "Env details"
110116
111117
- bash: python -m pytest lightning_fabric

.azure/gpu-tests-pytorch.yml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,12 @@ jobs:
4848
cancelTimeoutInMinutes: "2"
4949
strategy:
5050
matrix:
51+
# TODO: Upgrade to Python 3.11
5152
"PyTorch | latest":
52-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
53+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
5354
PACKAGE_NAME: "pytorch"
5455
"Lightning | latest":
55-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
56+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.2-cuda12.1.0"
5657
PACKAGE_NAME: "lightning"
5758
pool: lit-rtx-3090
5859
variables:
@@ -76,6 +77,10 @@ jobs:
7677
scope=$(python -c 'n = "$(PACKAGE_NAME)" ; print(dict(pytorch="pytorch_lightning").get(n, n))')
7778
echo "##vso[task.setvariable variable=COVERAGE_SOURCE]$scope"
7879
displayName: "set env. vars"
80+
- bash: |
81+
echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM}/torch_test.html"
82+
condition: endsWith(variables['Agent.JobName'], 'future')
83+
displayName: "set env. vars 4 future"
7984
8085
- bash: |
8186
echo $(DEVICES)
@@ -103,13 +108,13 @@ jobs:
103108
- bash: |
104109
pip install -q -r .actions/requirements.txt
105110
python .actions/assistant.py requirements_prune_pkgs \
106-
--packages="[lightning-colossalai,lightning-bagua]" \
111+
--packages="[lightning-colossalai]" \
107112
--req_files="[requirements/_integrations/strategies.txt]"
108113
displayName: "Prune packages" # these have installation issues
109114
110115
- bash: |
111116
extra=$(python -c "print({'lightning': 'pytorch-'}.get('$(PACKAGE_NAME)', ''))")
112-
pip install -e ".[${extra}dev]" -r requirements/_integrations/strategies.txt pytest-timeout -U --find-links ${TORCH_URL}
117+
pip install -e ".[${extra}dev]" -r requirements/_integrations/strategies.txt pytest-timeout -U --find-links="${TORCH_URL}"
113118
displayName: "Install package & dependencies"
114119
115120
- bash: pip uninstall -y lightning
@@ -127,6 +132,7 @@ jobs:
127132
python requirements/collect_env_details.py
128133
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
129134
python requirements/pytorch/check-avail-extras.py
135+
python -c "import bitsandbytes"
130136
displayName: "Env details"
131137
132138
- bash: python -m pytest pytorch_lightning

.github/ISSUE_TEMPLATE/1_bug_report.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ body:
3636
- "v1.9"
3737
- "v2.0"
3838
- "v2.1"
39+
- "v2.2"
3940
- "master"
4041
validations:
4142
required: true

.github/checkgroup.yml

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,18 @@ subprojects:
1919
- "!*.md"
2020
- "!**/*.md"
2121
checks:
22-
- "pl-cpu (macOS-11, lightning, 3.8, 1.12, oldest)"
23-
- "pl-cpu (macOS-11, lightning, 3.9, 1.12)"
22+
- "pl-cpu (macOS-11, lightning, 3.8, 1.13, oldest)"
2423
- "pl-cpu (macOS-11, lightning, 3.10, 1.13)"
25-
- "pl-cpu (macOS-11, lightning, 3.10, 2.0)"
2624
- "pl-cpu (macOS-11, lightning, 3.10, 2.1)"
27-
- "pl-cpu (ubuntu-20.04, lightning, 3.8, 1.12, oldest)"
28-
- "pl-cpu (ubuntu-20.04, lightning, 3.9, 1.12)"
25+
- "pl-cpu (macOS-11, lightning, 3.10, 2.2)"
26+
- "pl-cpu (ubuntu-20.04, lightning, 3.8, 1.13, oldest)"
2927
- "pl-cpu (ubuntu-20.04, lightning, 3.10, 1.13)"
30-
- "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.0)"
3128
- "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.1)"
32-
- "pl-cpu (windows-2022, lightning, 3.8, 1.12, oldest)"
33-
- "pl-cpu (windows-2022, lightning, 3.9, 1.12)"
29+
- "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.2)"
30+
- "pl-cpu (windows-2022, lightning, 3.8, 1.13, oldest)"
3431
- "pl-cpu (windows-2022, lightning, 3.10, 1.13)"
35-
- "pl-cpu (windows-2022, lightning, 3.10, 2.0)"
3632
- "pl-cpu (windows-2022, lightning, 3.10, 2.1)"
33+
- "pl-cpu (windows-2022, lightning, 3.10, 2.2)"
3734
- "pl-cpu (macOS-11, pytorch, 3.8, 1.13)"
3835
- "pl-cpu (ubuntu-20.04, pytorch, 3.8, 1.13)"
3936
- "pl-cpu (windows-2022, pytorch, 3.8, 1.13)"
@@ -95,7 +92,6 @@ subprojects:
9592
- ".github/workflows/tpu-tests.yml"
9693
- "tests/tests_pytorch/run_tpu_tests.sh"
9794
checks:
98-
- "test-on-tpus (pytorch, xrt, v4-8)"
9995
- "test-on-tpus (pytorch, pjrt, v4-8)"
10096

10197
- id: "fabric: Docs"
@@ -144,13 +140,11 @@ subprojects:
144140
- "!*.md"
145141
- "!**/*.md"
146142
checks:
147-
- "build-cuda (3.9, 1.12, 11.7.1)"
148143
- "build-cuda (3.9, 1.13, 11.8.0)"
149144
- "build-cuda (3.9, 1.13, 12.0.1)"
150145
- "build-cuda (3.10, 2.0, 11.8.0)"
151146
- "build-cuda (3.10, 2.1, 12.1.0)"
152147
#- "build-NGC"
153-
- "build-pl (3.9, 1.12, 11.7.1)"
154148
- "build-pl (3.9, 1.13, 11.8.0)"
155149
- "build-pl (3.9, 1.13, 12.0.1)"
156150
- "build-pl (3.10, 2.0, 11.8.0)"
@@ -194,21 +188,18 @@ subprojects:
194188
- "!*.md"
195189
- "!**/*.md"
196190
checks:
197-
- "fabric-cpu (macOS-11, lightning, 3.8, 1.12, oldest)"
198-
- "fabric-cpu (macOS-11, lightning, 3.9, 1.12)"
191+
- "fabric-cpu (macOS-11, lightning, 3.8, 1.13, oldest)"
199192
- "fabric-cpu (macOS-11, lightning, 3.10, 1.13)"
200-
- "fabric-cpu (macOS-11, lightning, 3.10, 2.0)"
201193
- "fabric-cpu (macOS-11, lightning, 3.11, 2.1)"
202-
- "fabric-cpu (ubuntu-20.04, lightning, 3.8, 1.12, oldest)"
203-
- "fabric-cpu (ubuntu-20.04, lightning, 3.9, 1.12)"
194+
- "fabric-cpu (macOS-11, lightning, 3.11, 2.2)"
195+
- "fabric-cpu (ubuntu-20.04, lightning, 3.8, 1.13, oldest)"
204196
- "fabric-cpu (ubuntu-20.04, lightning, 3.10, 1.13)"
205-
- "fabric-cpu (ubuntu-20.04, lightning, 3.10, 2.0)"
206197
- "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.1)"
207-
- "fabric-cpu (windows-2022, lightning, 3.8, 1.12, oldest)"
208-
- "fabric-cpu (windows-2022, lightning, 3.9, 1.12)"
198+
- "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.2)"
199+
- "fabric-cpu (windows-2022, lightning, 3.8, 1.13, oldest)"
209200
- "fabric-cpu (windows-2022, lightning, 3.10, 1.13)"
210-
- "fabric-cpu (windows-2022, lightning, 3.10, 2.0)"
211201
- "fabric-cpu (windows-2022, lightning, 3.11, 2.1)"
202+
- "fabric-cpu (windows-2022, lightning, 3.11, 2.2)"
212203
- "fabric-cpu (macOS-11, fabric, 3.8, 1.13)"
213204
- "fabric-cpu (ubuntu-20.04, fabric, 3.8, 1.13)"
214205
- "fabric-cpu (windows-2022, fabric, 3.8, 1.13)"
@@ -248,7 +239,6 @@ subprojects:
248239
- ".github/workflows/tpu-tests.yml"
249240
- "tests/tests_fabric/run_tpu_tests.sh"
250241
checks:
251-
- "test-on-tpus (fabric, xrt, v4-8)"
252242
- "test-on-tpus (pytorch, pjrt, v4-8)"
253243

254244
# SECTION: lightning_app

.github/workflows/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ Brief description of all our automation tools used for boosting development perf
2727

2828
- GPU: 2 x NVIDIA RTX 3090
2929
- TPU: [Google TPU v4-8](https://cloud.google.com/tpu/docs)
30-
- IPU: [Colossus MK1 IPU](https://www.graphcore.ai/products/ipu)
3130

3231
- To check which versions of Python or PyTorch are used for testing in our CI, see the corresponding workflow files or checkgroup config file at [`.github/checkgroup.yml`](../checkgroup.yml).
3332

.github/workflows/_build-packages.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
with:
4848
name: ${{ inputs.artifact-name }}
4949
path: pypi
50-
- uses: actions/setup-python@v4
50+
- uses: actions/setup-python@v5
5151
with:
5252
python-version: 3.9
5353

.github/workflows/_legacy-checkpoints.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ on:
4242
type: boolean
4343

4444
env:
45-
legacy_dir: "tests/legacy"
45+
LEGACY_FOLDER: "tests/legacy"
4646
TORCH_URL: "https://download.pytorch.org/whl/cpu/torch_stable.html"
4747

4848
defaults:
@@ -57,7 +57,7 @@ jobs:
5757
steps:
5858
- uses: actions/checkout@v4
5959

60-
- uses: actions/setup-python@v4
60+
- uses: actions/setup-python@v5
6161
with:
6262
# Python version here needs to be supported by all PL versions listed in back-compatible-versions.txt.
6363
python-version: 3.8
@@ -91,11 +91,11 @@ jobs:
9191
run: python -c "import pytorch_lightning as pl; print(f'pl-version={pl.__version__}')" >> $GITHUB_OUTPUT || echo pl-version='' >> $GITHUB_OUTPUT
9292

9393
- name: Generate checkpoints
94-
working-directory: ${{ env.legacy_dir }}
94+
working-directory: ${{ env.LEGACY_FOLDER }}
9595
run: bash generate_checkpoints.sh ${{ inputs.pl_version }}
9696
- name: Rename local to actual version
9797
if: inputs.upload_local
98-
working-directory: ${{ env.legacy_dir }}
98+
working-directory: ${{ env.LEGACY_FOLDER }}
9999
run: mv checkpoints/local checkpoints/${{ steps.decide-version.outputs.pl-version }}
100100

101101
- name: "Determine: Keep artifact & DryRun"
@@ -107,12 +107,12 @@ jobs:
107107
uses: actions/upload-artifact@v3
108108
with:
109109
name: checkpoints-${{ github.sha }}
110-
path: ${{ env.legacy_dir }}/checkpoints/
110+
path: ${{ env.LEGACY_FOLDER }}/checkpoints/
111111
retention-days: ${{ env.KEEP_DAYS }}
112112

113113
- run: pip install -r requirements/ci.txt
114114
- name: Upload checkpoints to S3
115-
working-directory: ${{ env.legacy_dir }}
115+
working-directory: ${{ env.LEGACY_FOLDER }}
116116
env:
117117
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_PUB_ACCESS_KEY }}
118118
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_PUB_SECRET_KEY }}
@@ -134,11 +134,11 @@ jobs:
134134
ref: master
135135

136136
- name: Append a new version to legacy checkpoint list
137-
working-directory: ${{ env.legacy_dir }}
137+
working-directory: ${{ env.LEGACY_FOLDER }}
138138
run: echo ${PL_VERSION} >> back-compatible-versions.txt
139139

140140
- name: Create Pull Request
141-
uses: peter-evans/create-pull-request@v5
141+
uses: peter-evans/create-pull-request@v6
142142
with:
143143
title: Adding test for legacy checkpoint created with ${{ env.PL_VERSION }}
144144
committer: GitHub <[email protected]>

.github/workflows/call-clear-cache.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ on:
1212
jobs:
1313
cron-clear:
1414
if: github.event_name == 'schedule'
15-
uses: Lightning-AI/utilities/.github/workflows/[email protected].0
15+
uses: Lightning-AI/utilities/.github/workflows/[email protected].1
1616
with:
1717
pattern: "latest|docs"
1818

1919
direct-clear:
2020
if: github.event_name == 'workflow_dispatch'
21-
uses: Lightning-AI/utilities/.github/workflows/[email protected].0
21+
uses: Lightning-AI/utilities/.github/workflows/[email protected].1
2222
with:
2323
pattern: ${{ inputs.pattern }}

0 commit comments

Comments
 (0)