Skip to content

Commit 50a0af7

Browse files
authored
Merge branch 'master' into deepspeed_mics_init
2 parents 87eefdc + 4c0eb3a commit 50a0af7

File tree

13 files changed

+98
-44
lines changed

13 files changed

+98
-44
lines changed

.github/workflows/_build-packages.yml

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,38 +19,16 @@ defaults:
1919
shell: bash
2020

2121
jobs:
22-
init:
23-
runs-on: ubuntu-22.04
24-
steps:
25-
- uses: actions/checkout@v4
26-
- run: |
27-
mkdir dist && touch dist/.placeholder
28-
- name: Keep artifact
29-
id: keep-artifact
30-
run: python -c "print('DAYS=' + str(5 if '${{ github.event_name }}'.startswith('pull_request') else 0))" >> $GITHUB_OUTPUT
31-
- uses: actions/upload-artifact@v3
32-
with:
33-
name: ${{ inputs.artifact-name }}
34-
path: dist
35-
retention-days: ${{ steps.keep-artifact.outputs.DAYS }}
36-
include-hidden-files: true
37-
3822
build-packages:
39-
needs: init
4023
runs-on: ubuntu-22.04
4124
strategy:
42-
max-parallel: 1 # run sequential to prevent download/upload collisions
4325
matrix:
4426
pkg-name: ${{ fromJSON(inputs.pkg-names) }}
4527
steps:
4628
- uses: actions/checkout@v4
47-
- uses: actions/download-artifact@v3
48-
with:
49-
name: ${{ inputs.artifact-name }}
50-
path: pypi
5129
- uses: actions/setup-python@v5
5230
with:
53-
python-version: 3.9
31+
python-version: "3.x"
5432

5533
- run: python -c "print('NB_DIRS=' + str(2 if '${{ matrix.pkg-name }}' == 'pytorch' else 1))" >> $GITHUB_ENV
5634
- name: Build & check package
@@ -60,11 +38,33 @@ jobs:
6038
nb-dirs: ${{ env.NB_DIRS }}
6139

6240
- run: |
63-
mkdir pypi/${{ matrix.pkg-name }}
41+
mkdir -p pypi/${{ matrix.pkg-name }}
6442
cp dist/* pypi/${{ matrix.pkg-name }}/
6543
66-
- uses: actions/upload-artifact@v3
44+
- uses: actions/upload-artifact@v4
45+
with:
46+
name: ${{ inputs.artifact-name }}-${{ matrix.pkg-name }}
47+
path: pypi
48+
retention-days: 1
49+
50+
merge-artifacts:
51+
needs: build-packages
52+
runs-on: ubuntu-22.04
53+
steps:
54+
- uses: actions/download-artifact@v4
55+
with: # download all build artifacts
56+
pattern: ${{ inputs.artifact-name }}-*
57+
merge-multiple: true
58+
path: pypi
59+
- run: |
60+
sudo apt-get install -y tree
61+
tree pypi
62+
63+
- name: Keep artifact
64+
run: python -c "print('DAYS=' + str(5 if '${{ github.event_name }}'.startswith('pull_request') else 0))" >> $GITHUB_ENV
65+
- uses: actions/upload-artifact@v4
6766
with:
6867
name: ${{ inputs.artifact-name }}
6968
path: pypi
70-
include-hidden-files: true
69+
retention-days: ${{ env.DAYS }}
70+
if-no-files-found: error

.github/workflows/_legacy-checkpoints.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ jobs:
104104
python -c "print('AWS_RUN=' + str('' if '${{inputs.push_to_s3}}' == 'true' else '--dryrun'))" >> $GITHUB_ENV
105105
106106
- name: Upload checkpoints to GitHub Actions artifact
107-
uses: actions/upload-artifact@v3
107+
uses: actions/upload-artifact@v4
108108
with:
109109
name: checkpoints-${{ github.sha }}
110110
path: ${{ env.LEGACY_FOLDER }}/checkpoints/

.github/workflows/ci-pkg-install.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ jobs:
5050
- uses: actions/setup-python@v5
5151
with:
5252
python-version: ${{ matrix.python-version }}
53-
- uses: actions/download-artifact@v3
53+
- uses: actions/download-artifact@v4
5454
with:
5555
name: dist-packages-${{ github.sha }}
5656
path: dist

.github/workflows/docs-build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ jobs:
129129
run: echo "ARTIFACT_DAYS=7" >> $GITHUB_ENV
130130
- name: Upload built docs
131131
if: ${{ matrix.target == 'html' }}
132-
uses: actions/upload-artifact@v3
132+
uses: actions/upload-artifact@v4
133133
with:
134134
name: docs-${{ matrix.pkg-name }}-${{ github.sha }}
135135
path: docs/build/html/
@@ -158,7 +158,7 @@ jobs:
158158
# use input if dispatch or git tag
159159
VERSION: ${{ inputs.version || github.ref_name }}
160160
steps:
161-
- uses: actions/download-artifact@v3
161+
- uses: actions/download-artifact@v4
162162
with:
163163
name: docs-${{ matrix.pkg-name }}-${{ github.sha }}
164164
path: docs/build/html/

.github/workflows/release-pkg.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
if: github.event_name == 'release'
3939
steps:
4040
- uses: actions/checkout@v4
41-
- uses: actions/download-artifact@v3
41+
- uses: actions/download-artifact@v4
4242
with:
4343
name: dist-packages-${{ github.sha }}
4444
path: dist
@@ -140,7 +140,7 @@ jobs:
140140
name: ["FABRIC", "PYTORCH", "LIGHTNING"]
141141
steps:
142142
- uses: actions/checkout@v4 # needed for local action below
143-
- uses: actions/download-artifact@v3
143+
- uses: actions/download-artifact@v4
144144
with:
145145
name: dist-packages-${{ github.sha }}
146146
path: dist
@@ -165,7 +165,7 @@ jobs:
165165
name: ["FABRIC", "PYTORCH", "LIGHTNING"]
166166
steps:
167167
- uses: actions/checkout@v4 # needed for local action below
168-
- uses: actions/download-artifact@v3
168+
- uses: actions/download-artifact@v4
169169
with:
170170
name: dist-packages-${{ github.sha }}
171171
path: dist

docs/source-pytorch/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,7 @@ def package_list_from_file(file):
643643
r"starter/installation.html$",
644644
r"^../common/trainer.html#trainer-flags$",
645645
"https://deepgenerativemodels.github.io/assets/slides/cs236_lecture11.pdf",
646+
"https://developer.habana.ai", # returns 403 error but redirects to intel.com documentation
646647
"https://www.intel.com/content/www/us/en/products/docs/processors/what-is-a-gpu.html",
647648
"https://www.microsoft.com/en-us/research/blog/zero-infinity-and-deepspeed-unlocking-unprecedented-model-scale-for-deep-learning-training/", # noqa: E501
648649
"https://stackoverflow.com/questions/66640705/how-can-i-install-grpcio-on-an-apple-m1-silicon-laptop",

src/lightning/fabric/loggers/tensorboard.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -220,15 +220,19 @@ def log_metrics(self, metrics: Mapping[str, float], step: Optional[int] = None)
220220
@override
221221
@rank_zero_only
222222
def log_hyperparams(
223-
self, params: Union[dict[str, Any], Namespace], metrics: Optional[dict[str, Any]] = None
223+
self,
224+
params: Union[dict[str, Any], Namespace],
225+
metrics: Optional[dict[str, Any]] = None,
226+
step: Optional[int] = None,
224227
) -> None:
225228
"""Record hyperparameters. TensorBoard logs with and without saved hyperparameters are incompatible, the
226229
hyperparameters are then not displayed in the TensorBoard. Please delete or move the previously saved logs to
227230
display the new ones with hyperparameters.
228231
229232
Args:
230-
params: a dictionary-like container with the hyperparameters
233+
params: A dictionary-like container with the hyperparameters
231234
metrics: Dictionary with metric names as keys and measured quantities as values
235+
step: Optional global step number for the logged metrics
232236
233237
"""
234238
params = _convert_params(params)
@@ -244,7 +248,7 @@ def log_hyperparams(
244248
metrics = {"hp_metric": metrics}
245249

246250
if metrics:
247-
self.log_metrics(metrics, 0)
251+
self.log_metrics(metrics, step)
248252

249253
if _TENSORBOARD_AVAILABLE:
250254
from torch.utils.tensorboard.summary import hparams
@@ -253,9 +257,9 @@ def log_hyperparams(
253257

254258
exp, ssi, sei = hparams(params, metrics)
255259
writer = self.experiment._get_file_writer()
256-
writer.add_summary(exp)
257-
writer.add_summary(ssi)
258-
writer.add_summary(sei)
260+
writer.add_summary(exp, step)
261+
writer.add_summary(ssi, step)
262+
writer.add_summary(sei, step)
259263

260264
@override
261265
@rank_zero_only

src/lightning/pytorch/CHANGELOG.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
66

77

8+
## [unreleased] - YYYY-MM-DD
9+
10+
### Added
11+
12+
### Changed
13+
14+
- Merging of hparams when logging now ignores parameter names that begin with underscore `_` ([#20221](https://github.com/Lightning-AI/pytorch-lightning/pull/20221))
15+
16+
### Removed
17+
18+
### Fixed
19+
20+
- Fix LightningCLI failing when both module and data module save hyperparameters due to conflicting internal `_class_path` parameter ([#20221](https://github.com/Lightning-AI/pytorch-lightning/pull/20221))
21+
22+
823
## [2.4.0] - 2024-08-06
924

1025
### Added

src/lightning/pytorch/loggers/tensorboard.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,15 +153,19 @@ def save_dir(self) -> str:
153153
@override
154154
@rank_zero_only
155155
def log_hyperparams(
156-
self, params: Union[dict[str, Any], Namespace], metrics: Optional[dict[str, Any]] = None
156+
self,
157+
params: Union[dict[str, Any], Namespace],
158+
metrics: Optional[dict[str, Any]] = None,
159+
step: Optional[int] = None,
157160
) -> None:
158161
"""Record hyperparameters. TensorBoard logs with and without saved hyperparameters are incompatible, the
159162
hyperparameters are then not displayed in the TensorBoard. Please delete or move the previously saved logs to
160163
display the new ones with hyperparameters.
161164
162165
Args:
163-
params: a dictionary-like container with the hyperparameters
166+
params: A dictionary-like container with the hyperparameters
164167
metrics: Dictionary with metric names as keys and measured quantities as values
168+
step: Optional global step number for the logged metrics
165169
166170
"""
167171
if _OMEGACONF_AVAILABLE:
@@ -175,7 +179,7 @@ def log_hyperparams(
175179
else:
176180
self.hparams.update(params)
177181

178-
return super().log_hyperparams(params=params, metrics=metrics)
182+
return super().log_hyperparams(params=params, metrics=metrics, step=step)
179183

180184
@override
181185
@rank_zero_only

src/lightning/pytorch/loggers/utilities.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ def _log_hyperparams(trainer: "pl.Trainer") -> None:
6969
lightning_hparams = pl_module.hparams_initial
7070
inconsistent_keys = []
7171
for key in lightning_hparams.keys() & datamodule_hparams.keys():
72+
if key == "_class_path":
73+
# Skip LightningCLI's internal hparam
74+
continue
7275
lm_val, dm_val = lightning_hparams[key], datamodule_hparams[key]
7376
if (
7477
type(lm_val) != type(dm_val)
@@ -88,6 +91,10 @@ def _log_hyperparams(trainer: "pl.Trainer") -> None:
8891
elif datamodule_log_hyperparams:
8992
hparams_initial = trainer.datamodule.hparams_initial
9093

94+
# Don't log LightningCLI's internal hparam
95+
if hparams_initial is not None:
96+
hparams_initial = {k: v for k, v in hparams_initial.items() if k != "_class_path"}
97+
9198
for logger in trainer.loggers:
9299
if hparams_initial is not None:
93100
logger.log_hyperparams(hparams_initial)

0 commit comments

Comments
 (0)