Merge branch 'master' into docs/wandb

ethanwharris · web-flow · commit f7c1d8b1945d · 2025-09-12T19:00:59.000+01:00
diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml
@@ -1,8 +1,3 @@
-# Python package
-# Create and test a Python package on multiple Python versions.
-# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
-# https://docs.microsoft.com/azure/devops/pipelines/languages/python
-
 trigger:
   tags:
     include: ["*"]
@@ -24,18 +19,18 @@ pr:
       - "examples/run_pl_examples.sh"
       - "examples/pytorch/basics/backbone_image_classifier.py"
       - "examples/pytorch/basics/autoencoder.py"
+      - "requirements/fabric/**"
       - "requirements/pytorch/**"
       - "src/lightning/__init__.py"
       - "src/lightning/__setup__.py"
       - "src/lightning/__version__.py"
-      - "src/lightning/pytorch/**"
+      - "src/lightning_fabric/*"
+      - "src/lightning/fabric/**"
       - "src/pytorch_lightning/*"
+      - "src/lightning/pytorch/**"
       - "tests/tests_pytorch/**"
       - "tests/run_standalone_*.sh"
       - "pyproject.toml" # includes pytest config
-      - "requirements/fabric/**"
-      - "src/lightning/fabric/**"
-      - "src/lightning_fabric/*"
     exclude:
       - "requirements/*/docs.txt"
       - "*.md"
diff --git a/docs/source-pytorch/common/trainer.rst b/docs/source-pytorch/common/trainer.rst
@@ -413,6 +413,35 @@ Number of devices to train on (``int``), which devices to train on (``list`` or
         # Training with GPU Accelerator using total number of gpus available on the system
         Trainer(accelerator="gpu")
 
+
+enable_autolog_hparams
+^^^^^^^^^^^^^^^^^^^^^^
+
+Whether to log hyperparameters at the start of a run. Defaults to True.
+
+.. testcode::
+
+    # default used by the Trainer
+    trainer = Trainer(enable_autolog_hparams=True)
+
+    # disable logging hyperparams
+    trainer = Trainer(enable_autolog_hparams=False)
+
+With the parameter set to false, you can add custom code to log hyperparameters.
+
+.. code-block:: python
+
+    model = LitModel()
+    trainer = Trainer(enable_autolog_hparams=False)
+    for logger in trainer.loggers:
+        if isinstance(logger, lightning.pytorch.loggers.CSVLogger):
+            logger.log_hyperparams(hparams_dict_1)
+        else:
+            logger.log_hyperparams(hparams_dict_2)
+
+You can also use `self.logger.log_hyperparams(...)` inside `LightningModule` to log.
+
+
 enable_checkpointing
 ^^^^^^^^^^^^^^^^^^^^
 
@@ -443,6 +472,40 @@ See :doc:`Saving and Loading Checkpoints <../common/checkpointing>` for how to c
     # Add your callback to the callbacks list
     trainer = Trainer(callbacks=[checkpoint_callback])
 
+
+enable_model_summary
+^^^^^^^^^^^^^^^^^^^^
+
+Whether to enable or disable the model summarization. Defaults to True.
+
+.. testcode::
+
+    # default used by the Trainer
+    trainer = Trainer(enable_model_summary=True)
+
+    # disable summarization
+    trainer = Trainer(enable_model_summary=False)
+
+    # enable custom summarization
+    from lightning.pytorch.callbacks import ModelSummary
+
+    trainer = Trainer(enable_model_summary=True, callbacks=[ModelSummary(max_depth=-1)])
+
+
+enable_progress_bar
+^^^^^^^^^^^^^^^^^^^
+
+Whether to enable or disable the progress bar. Defaults to True.
+
+.. testcode::
+
+    # default used by the Trainer
+    trainer = Trainer(enable_progress_bar=True)
+
+    # disable progress bar
+    trainer = Trainer(enable_progress_bar=False)
+
+
 fast_dev_run
 ^^^^^^^^^^^^
 
@@ -500,6 +563,39 @@ Gradient clipping value
     # default used by the Trainer
     trainer = Trainer(gradient_clip_val=None)
 
+
+inference_mode
+^^^^^^^^^^^^^^
+
+Whether to use :func:`torch.inference_mode` or :func:`torch.no_grad` mode during evaluation
+(``validate``/``test``/``predict``)
+
+.. testcode::
+
+    # default used by the Trainer
+    trainer = Trainer(inference_mode=True)
+
+    # Use `torch.no_grad` instead
+    trainer = Trainer(inference_mode=False)
+
+
+With :func:`torch.inference_mode` disabled, you can enable the grad of your model layers if required.
+
+.. code-block:: python
+
+    class LitModel(LightningModule):
+        def validation_step(self, batch, batch_idx):
+            preds = self.layer1(batch)
+            with torch.enable_grad():
+                grad_preds = preds.requires_grad_()
+                preds2 = self.layer2(grad_preds)
+
+
+    model = LitModel()
+    trainer = Trainer(inference_mode=False)
+    trainer.validate(model)
+
+
 limit_train_batches
 ^^^^^^^^^^^^^^^^^^^
 
@@ -871,18 +967,6 @@ See the :doc:`profiler documentation <../tuning/profiler>` for more details.
     # advanced profiler for function-level stats, equivalent to `profiler=AdvancedProfiler()`
     trainer = Trainer(profiler="advanced")
 
-enable_progress_bar
-^^^^^^^^^^^^^^^^^^^
-
-Whether to enable or disable the progress bar. Defaults to True.
-
-.. testcode::
-
-    # default used by the Trainer
-    trainer = Trainer(enable_progress_bar=True)
-
-    # disable progress bar
-    trainer = Trainer(enable_progress_bar=False)
 
 reload_dataloaders_every_n_epochs
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -917,28 +1001,6 @@ The pseudocode applies also to the ``val_dataloader``.
 
 .. _replace-sampler-ddp:
 
-use_distributed_sampler
-^^^^^^^^^^^^^^^^^^^^^^^
-
-See :paramref:`lightning.pytorch.trainer.Trainer.params.use_distributed_sampler`.
-
-.. testcode::
-
-    # default used by the Trainer
-    trainer = Trainer(use_distributed_sampler=True)
-
-By setting to False, you have to add your own distributed sampler:
-
-.. code-block:: python
-
-    # in your LightningModule or LightningDataModule
-    def train_dataloader(self):
-        dataset = ...
-        # default used by the Trainer
-        sampler = torch.utils.data.DistributedSampler(dataset, shuffle=True)
-        dataloader = DataLoader(dataset, batch_size=32, sampler=sampler)
-        return dataloader
-
 
 strategy
 ^^^^^^^^
@@ -982,6 +1044,29 @@ Enable synchronization between batchnorm layers across all GPUs.
     trainer = Trainer(sync_batchnorm=True)
 
 
+use_distributed_sampler
+^^^^^^^^^^^^^^^^^^^^^^^
+
+See :paramref:`lightning.pytorch.trainer.Trainer.params.use_distributed_sampler`.
+
+.. testcode::
+
+    # default used by the Trainer
+    trainer = Trainer(use_distributed_sampler=True)
+
+By setting to False, you have to add your own distributed sampler:
+
+.. code-block:: python
+
+    # in your LightningModule or LightningDataModule
+    def train_dataloader(self):
+        dataset = ...
+        # default used by the Trainer
+        sampler = torch.utils.data.DistributedSampler(dataset, shuffle=True)
+        dataloader = DataLoader(dataset, batch_size=32, sampler=sampler)
+        return dataloader
+
+
 val_check_interval
 ^^^^^^^^^^^^^^^^^^
 
@@ -1058,84 +1143,6 @@ Can specify as float, int, or a time-based duration.
     # Total number of batches run
     total_fit_batches = total_train_batches + total_val_batches
 
-
-enable_model_summary
-^^^^^^^^^^^^^^^^^^^^
-
-Whether to enable or disable the model summarization. Defaults to True.
-
-.. testcode::
-
-    # default used by the Trainer
-    trainer = Trainer(enable_model_summary=True)
-
-    # disable summarization
-    trainer = Trainer(enable_model_summary=False)
-
-    # enable custom summarization
-    from lightning.pytorch.callbacks import ModelSummary
-
-    trainer = Trainer(enable_model_summary=True, callbacks=[ModelSummary(max_depth=-1)])
-
-
-inference_mode
-^^^^^^^^^^^^^^
-
-Whether to use :func:`torch.inference_mode` or :func:`torch.no_grad` mode during evaluation
-(``validate``/``test``/``predict``)
-
-.. testcode::
-
-    # default used by the Trainer
-    trainer = Trainer(inference_mode=True)
-
-    # Use `torch.no_grad` instead
-    trainer = Trainer(inference_mode=False)
-
-
-With :func:`torch.inference_mode` disabled, you can enable the grad of your model layers if required.
-
-.. code-block:: python
-
-    class LitModel(LightningModule):
-        def validation_step(self, batch, batch_idx):
-            preds = self.layer1(batch)
-            with torch.enable_grad():
-                grad_preds = preds.requires_grad_()
-                preds2 = self.layer2(grad_preds)
-
-
-    model = LitModel()
-    trainer = Trainer(inference_mode=False)
-    trainer.validate(model)
-
-enable_autolog_hparams
-^^^^^^^^^^^^^^^^^^^^^^
-
-Whether to log hyperparameters at the start of a run. Defaults to True.
-
-.. testcode::
-
-    # default used by the Trainer
-    trainer = Trainer(enable_autolog_hparams=True)
-
-    # disable logging hyperparams
-    trainer = Trainer(enable_autolog_hparams=False)
-
-With the parameter set to false, you can add custom code to log hyperparameters.
-
-.. code-block:: python
-
-    model = LitModel()
-    trainer = Trainer(enable_autolog_hparams=False)
-    for logger in trainer.loggers:
-        if isinstance(logger, lightning.pytorch.loggers.CSVLogger):
-            logger.log_hyperparams(hparams_dict_1)
-        else:
-            logger.log_hyperparams(hparams_dict_2)
-
-You can also use `self.logger.log_hyperparams(...)` inside `LightningModule` to log.
-
 -----
 
 Trainer class API
diff --git a/pyproject.toml b/pyproject.toml
@@ -120,7 +120,6 @@ ignore = [
     "S607",  # todo: Starting a process with a partial executable path
     "RET504",  # todo:Unnecessary variable assignment before `return` statement
     "PT004",  # todo: Fixture `tmpdir_unittest_fixture` does not return anything, add leading underscore
-    "PT011",  # todo: `pytest.raises(ValueError)` is too broad, set the `match` parameter or use a more specific exception
     "PT012",  # todo: `pytest.raises()` block should contain a single simple statement
     "PT019",  # todo: Fixture `_` without value is injected as parameter, use `@pytest.mark.usefixtures` instead
 ]
diff --git a/tests/tests_pytorch/loggers/test_neptune.py b/tests/tests_pytorch/loggers/test_neptune.py
@@ -121,19 +121,19 @@ def test_online_with_wrong_kwargs(neptune_mock):
     init."""
     run = neptune_mock.init_run()
 
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="Run parameter expected to be of type `neptune.Run`*"):
         NeptuneLogger(run="some string")
 
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="When an already initialized run object is provided*"):
         NeptuneLogger(run=run, project="redundant project")
 
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="When an already initialized run object is provided*"):
         NeptuneLogger(run=run, api_key="redundant api key")
 
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="When an already initialized run object is provided*"):
         NeptuneLogger(run=run, name="redundant api name")
 
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="When an already initialized run object is provided*"):
         NeptuneLogger(run=run, foo="random **kwarg")
 
     # this should work
diff --git a/tests/tests_pytorch/models/test_hparams.py b/tests/tests_pytorch/models/test_hparams.py
@@ -527,7 +527,7 @@ def __init__(self, arg1, arg2):
 )
 def test_single_config_models_fail(tmp_path, cls, config):
     """Test fail on passing unsupported config type."""
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match=r"Primitives \(<class 'bool'>*"):
         _ = cls(**config)
 
 
diff --git a/tests/tests_pytorch/profilers/test_profiler.py b/tests/tests_pytorch/profilers/test_profiler.py
@@ -86,12 +86,12 @@ def test_simple_profiler_overhead(simple_profiler):
 def test_simple_profiler_value_errors(simple_profiler):
     """Ensure errors are raised where expected."""
     action = "test"
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="Attempting to stop recording an action*"):
         simple_profiler.stop(action)
 
     simple_profiler.start(action)
 
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="Attempted to start test*"):
         simple_profiler.start(action)
 
     simple_profiler.stop(action)
@@ -325,7 +325,7 @@ def test_advanced_profiler_dump_states(tmp_path):
 def test_advanced_profiler_value_errors(advanced_profiler):
     """Ensure errors are raised where expected."""
     action = "test"
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="Attempting to stop recording*"):
         advanced_profiler.stop(action)
 
     advanced_profiler.start(action)
diff --git a/tests/tests_pytorch/test_cli.py b/tests/tests_pytorch/test_cli.py
@@ -339,7 +339,7 @@ def test_lightning_cli_save_config_seed_everything(cleandir):
 
 
 def test_save_to_log_dir_false_error():
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="`save_to_log_dir=False` only makes sense*"):
         SaveConfigCallback(
             LightningArgumentParser(),
             Namespace(),

Original file line number	Diff line number	Diff line change
`@@ -120,7 +120,6 @@ ignore = [`
`120`	`120`	`"S607", # todo: Starting a process with a partial executable path`
`121`	`121`	"RET504", # todo:Unnecessary variable assignment before `return` statement
`122`	`122`	"PT004", # todo: Fixture `tmpdir_unittest_fixture` does not return anything, add leading underscore
`123`		- "PT011", # todo: `pytest.raises(ValueError)` is too broad, set the `match` parameter or use a more specific exception
`124`	`123`	"PT012", # todo: `pytest.raises()` block should contain a single simple statement
`125`	`124`	"PT019", # todo: Fixture `_` without value is injected as parameter, use `@pytest.mark.usefixtures` instead
`126`	`125`	`]`
Original file line number	Diff line number	Diff line change
`@@ -527,7 +527,7 @@ def __init__(self, arg1, arg2):`
`527`	`527`	`)`
`528`	`528`	`def test_single_config_models_fail(tmp_path, cls, config):`
`529`	`529`	`"""Test fail on passing unsupported config type."""`
`530`		`- with pytest.raises(ValueError):`
	`530`	`+ with pytest.raises(ValueError, match=r"Primitives \(<class 'bool'>*"):`
`531`	`531`	`_ = cls(**config)`
`532`	`532`
`533`	`533`