Skip to content

Commit 516a26f

Browse files
authored
Merge branch 'master' into wkong/perf-opt
2 parents 1d2eced + e21b172 commit 516a26f

File tree

9 files changed

+67
-22
lines changed

9 files changed

+67
-22
lines changed

.github/checkgroup.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ subprojects:
4747
- "!*.md"
4848
- "!**/*.md"
4949
checks:
50-
- "pytorch.yml / Lit Job (nvidia/cuda:12.1.1-runtime-ubuntu22.04, pytorch, 3.10, L4_X_2)"
51-
- "pytorch.yml / Lit Job (nvidia/cuda:12.6.3-runtime-ubuntu22.04, lightning, 3.12, L4_X_2)"
52-
- "pytorch.yml / Lit Job (nvidia/cuda:12.6.3-runtime-ubuntu22.04, pytorch, 3.12, L4_X_2)"
50+
- "pytorch.yml / Lit Job (nvidia/cuda:12.1.1-runtime-ubuntu22.04, pytorch, 3.10)"
51+
- "pytorch.yml / Lit Job (lightning, 3.12)"
52+
- "pytorch.yml / Lit Job (pytorch, 3.12)"
5353

5454
- id: "Benchmarks"
5555
paths:
@@ -148,9 +148,9 @@ subprojects:
148148
- "!*.md"
149149
- "!**/*.md"
150150
checks:
151-
- "fabric.yml / Lit Job (nvidia/cuda:12.1.1-runtime-ubuntu22.04, fabric, 3.10, L4_X_2)"
152-
- "fabric.yml / Lit Job (nvidia/cuda:12.6.3-runtime-ubuntu22.04, fabric, 3.12, L4_X_2)"
153-
- "fabric.yml / Lit Job (nvidia/cuda:12.6.3-runtime-ubuntu22.04, lightning, 3.12, L4_X_2)"
151+
- "fabric.yml / Lit Job (nvidia/cuda:12.1.1-runtime-ubuntu22.04, fabric, 3.10)"
152+
- "fabric.yml / Lit Job (fabric, 3.12)"
153+
- "fabric.yml / Lit Job (lightning, 3.12)"
154154

155155
# Temporarily disabled
156156
# - id: "lightning_fabric: TPU workflow"

.lightning/workflows/fabric.yml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,21 @@ trigger:
55
branches: ["master", "release/stable"]
66

77
timeout: "60" # minutes
8+
machine: "L4_X_2"
9+
image: "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
810
parametrize:
911
matrix: {}
1012
include:
1113
# note that this is setting also all oldest requirements which is linked to python == 3.10
1214
- image: "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
1315
PACKAGE_NAME: "fabric"
1416
python_version: "3.10"
15-
machine: "L4_X_2"
16-
- image: "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
17-
PACKAGE_NAME: "fabric"
17+
- PACKAGE_NAME: "fabric"
1818
python_version: "3.12"
19-
machine: "L4_X_2"
2019
# - image: "nvidia/cuda:12.6-runtime-ubuntu22.04"
2120
# PACKAGE_NAME: "fabric"
22-
- image: "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
23-
PACKAGE_NAME: "lightning"
21+
- PACKAGE_NAME: "lightning"
2422
python_version: "3.12"
25-
machine: "L4_X_2"
2623
exclude: []
2724

2825
env:

.lightning/workflows/pytorch.yml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,21 @@ trigger:
55
branches: ["master", "release/stable"]
66

77
timeout: "60" # minutes
8+
machine: "L4_X_2"
9+
image: "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
810
parametrize:
911
matrix: {}
1012
include:
1113
# note that this also sets oldest requirements which are linked to Python == 3.10
1214
- image: "nvidia/cuda:12.1.1-runtime-ubuntu22.04"
1315
PACKAGE_NAME: "pytorch"
1416
python_version: "3.10"
15-
machine: "L4_X_2"
16-
- image: "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
17-
PACKAGE_NAME: "pytorch"
17+
- PACKAGE_NAME: "pytorch"
1818
python_version: "3.12"
19-
machine: "L4_X_2"
2019
# - image: "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
2120
# PACKAGE_NAME: "pytorch"
22-
- image: "nvidia/cuda:12.6.3-runtime-ubuntu22.04"
23-
PACKAGE_NAME: "lightning"
21+
- PACKAGE_NAME: "lightning"
2422
python_version: "3.12"
25-
machine: "L4_X_2"
2623
exclude: []
2724

2825
env:

docs/source-pytorch/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ def package_list_from_file(file):
645645
r"installation.html$",
646646
r"starter/installation.html$",
647647
r"^../common/trainer.html#trainer-flags$",
648+
"https://medium.com/pytorch-lightning/quick-contribution-guide-86d977171b3a",
648649
"https://deepgenerativemodels.github.io/assets/slides/cs236_lecture11.pdf",
649650
"https://developer.habana.ai", # returns 403 error but redirects to intel.com documentation
650651
"https://www.intel.com/content/www/us/en/products/docs/processors/what-is-a-gpu.html",

docs/source-pytorch/data/alternatives.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,12 @@ The webdataset library contains a small wrapper (``WebLoader``) that adds a flui
9999
import lightning as L
100100
import webdataset as wds
101101
102-
dataset = wds.WebDataset(urls)
102+
dataset = wds.WebDataset(
103+
urls,
104+
# needed for multi-gpu or multi-node training
105+
workersplitter=wds.shardlists.split_by_worker,
106+
nodesplitter=wds.shardlists.split_by_node,
107+
)
103108
train_dataloader = wds.WebLoader(dataset)
104109
105110
model = ...

requirements/fabric/test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,5 @@ pytest-timeout ==2.4.0
66
pytest-rerunfailures ==16.0.1
77
pytest-random-order ==1.2.0
88
click ==8.1.8; python_version < "3.11"
9-
click ==8.2.1; python_version > "3.10"
9+
click ==8.3.0; python_version > "3.10"
1010
tensorboardX >=2.6, <2.7.0 # todo: relax it back to `>=2.2` after fixing tests

src/lightning/pytorch/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
5252
- Fixed preventing recursive symlink creation iwhen `save_last='link'` and `save_top_k=-1` ([#21186](https://github.com/Lightning-AI/pytorch-lightning/pull/21186))
5353

5454

55+
- Fixed `LightningCLI` loading of hyperparameters from `ckpt_path` failing for subclass model mode ([#21246](https://github.com/Lightning-AI/pytorch-lightning/pull/21246))
56+
57+
5558
- Fixed redundant host-device sync in progressbar printing ([#21233](https://github.com/Lightning-AI/pytorch-lightning/pull/21233))
5659

60+
5761
---
5862

5963
## [2.5.5] - 2025-09-05

src/lightning/pytorch/cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,11 @@ def _parse_ckpt_path(self) -> None:
564564
hparams.pop("_instantiator", None)
565565
if not hparams:
566566
return
567+
if "_class_path" in hparams:
568+
hparams = {
569+
"class_path": hparams.pop("_class_path"),
570+
"dict_kwargs": hparams,
571+
}
567572
hparams = {self.config.subcommand: {"model": hparams}}
568573
try:
569574
self.config = self.parser.parse_object(hparams, self.config)

tests/tests_pytorch/test_cli.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ class BoringCkptPathModel(BoringModel):
491491
def __init__(self, out_dim: int = 2, hidden_dim: int = 2) -> None:
492492
super().__init__()
493493
self.save_hyperparameters()
494+
self.hidden_dim = hidden_dim
494495
self.layer = torch.nn.Linear(32, out_dim)
495496

496497

@@ -526,6 +527,41 @@ def add_arguments_to_parser(self, parser):
526527
assert "Parsing of ckpt_path hyperparameters failed" in err.getvalue()
527528

528529

530+
class BoringCkptPathSubclass(BoringCkptPathModel):
531+
def __init__(self, extra: bool = True, **kwargs) -> None:
532+
super().__init__(**kwargs)
533+
self.extra = extra
534+
535+
536+
def test_lightning_cli_ckpt_path_argument_hparams_subclass_mode(cleandir):
537+
class CkptPathCLI(LightningCLI):
538+
def add_arguments_to_parser(self, parser):
539+
parser.link_arguments("model.init_args.out_dim", "model.init_args.hidden_dim", compute_fn=lambda x: x * 2)
540+
541+
cli_args = ["fit", "--model=BoringCkptPathSubclass", "--model.out_dim=4", "--trainer.max_epochs=1"]
542+
with mock.patch("sys.argv", ["any.py"] + cli_args):
543+
cli = CkptPathCLI(BoringCkptPathModel, subclass_mode_model=True)
544+
545+
assert cli.config.fit.model.class_path.endswith(".BoringCkptPathSubclass")
546+
assert cli.config.fit.model.init_args == Namespace(out_dim=4, hidden_dim=8, extra=True)
547+
hparams_path = Path(cli.trainer.log_dir) / "hparams.yaml"
548+
assert hparams_path.is_file()
549+
hparams = yaml.safe_load(hparams_path.read_text())
550+
assert hparams["out_dim"] == 4
551+
assert hparams["hidden_dim"] == 8
552+
assert hparams["extra"] is True
553+
554+
checkpoint_path = next(Path(cli.trainer.log_dir, "checkpoints").glob("*.ckpt"))
555+
cli_args = ["predict", "--model=BoringCkptPathModel", f"--ckpt_path={checkpoint_path}"]
556+
with mock.patch("sys.argv", ["any.py"] + cli_args):
557+
cli = CkptPathCLI(BoringCkptPathModel, subclass_mode_model=True)
558+
559+
assert isinstance(cli.model, BoringCkptPathSubclass)
560+
assert cli.model.hidden_dim == 8
561+
assert cli.model.extra is True
562+
assert cli.model.layer.out_features == 4
563+
564+
529565
def test_lightning_cli_submodules(cleandir):
530566
class MainModule(BoringModel):
531567
def __init__(self, submodule1: LightningModule, submodule2: LightningModule, main_param: int = 1):

0 commit comments

Comments
 (0)