Lightning-AI
diff --git a/‎.github/workflows/ci-tests-fabric.yml‎
Lines changed: 34 additions & 33 deletions b/‎.github/workflows/ci-tests-fabric.yml‎
Lines changed: 34 additions & 33 deletions
diff --git a/‎.github/workflows/ci-tests-pytorch.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/ci-tests-pytorch.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source-fabric/guide/index.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source-fabric/guide/index.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source-fabric/levels/intermediate.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source-fabric/levels/intermediate.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source-pytorch/advanced/speed.rst‎
Lines changed: 12 additions & 1 deletion b/‎docs/source-pytorch/advanced/speed.rst‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎docs/source-pytorch/common/trainer.rst‎
Lines changed: 28 additions & 1 deletion b/‎docs/source-pytorch/common/trainer.rst‎
Lines changed: 28 additions & 1 deletion
diff --git a/‎src/lightning/fabric/CHANGELOG.md‎
Lines changed: 5 additions & 1 deletion b/‎src/lightning/fabric/CHANGELOG.md‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/lightning/fabric/strategies/ddp.py‎
Lines changed: 11 additions & 1 deletion b/‎src/lightning/fabric/strategies/ddp.py‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎src/lightning/fabric/utilities/distributed.py‎
Lines changed: 5 additions & 1 deletion b/‎src/lightning/fabric/utilities/distributed.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/lightning/fabric/utilities/seed.py‎
Lines changed: 2 additions & 1 deletion b/‎src/lightning/fabric/utilities/seed.py‎
Lines changed: 2 additions & 1 deletion
@@ -62,49 +62,57 @@ jobs:
     env:
       PACKAGE_NAME: ${{ matrix.config.pkg-name }}
       FREEZE_REQUIREMENTS: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }}
-      PYPI_CACHE_DIR: "_pip-wheels"
       TORCH_URL_STABLE: "https://download.pytorch.org/whl/cpu/"
       TORCH_URL_TEST: "https://download.pytorch.org/whl/test/cpu/"
       # TODO: Remove this - Enable running MPS tests on this platform
       DISABLE_MPS: ${{ matrix.os == 'macOS-14' && '1' || '0' }}
     steps:
       - uses: actions/checkout@v5
 
-      - name: Set up Python ${{ matrix.config.python-version }}
-        uses: actions/setup-python@v5
+      - name: Install uv and set Python version
+        uses: astral-sh/setup-uv@v6
         with:
           python-version: ${{ matrix.config.python-version || '3.9' }}
+          # TODO: Avoid activating environment like this
+          # see: https://github.com/astral-sh/setup-uv/tree/v6/?tab=readme-ov-file#activate-environment
+          activate-environment: true
+          enable-cache: true
 
-      - name: basic setup
-        run: pip install -q -r .actions/requirements.txt
+      - name: Basic setup
+        run: uv pip install -q -r .actions/requirements.txt
+
+      - name: Append Env. vars for Linux
+        if: ${{ runner.os == 'Linux' }}
+        run: echo "GLOO_SOCKET_IFNAME=eth0" >> $GITHUB_ENV
+
+      - name: Append Env. vars for MacOS
+        if: ${{ runner.os == 'macOS' }}
+        run: echo "GLOO_SOCKET_IFNAME=lo0" >> $GITHUB_ENV
+
+      - name: Append Env. vars for Windows
+        if: ${{ runner.os == 'windows' }}
+        run: |
+          # Avoid issue on Windows with PyTorch 2.4: "RuntimeError: use_libuv was requested but PyTorch was build without libuv support"
+          echo "USE_LIBUV=0" >> $GITHUB_ENV
 
       - name: Set min. dependencies
         if: ${{ matrix.config.requires == 'oldest' }}
         run: |
           cd requirements/fabric
-          pip install -U "lightning-utilities[cli]"
+          uv pip install -U "lightning-utilities[cli]"
           python -m lightning_utilities.cli requirements set-oldest --req_files "['base.txt', 'strategies.txt', 'test.txt']"
-          pip install "cython<3.0" wheel
-          pip install "pyyaml==5.4" --no-build-isolation
+          uv pip install "cython<3.0" wheel
+          uv pip install "pyyaml==5.4" --no-build-isolation
 
       - name: Adjust PyTorch versions in requirements files
         if: ${{ matrix.config.requires != 'oldest' }}
         run: |
-          pip install -q -r requirements/ci.txt
+          uv pip install -q -r requirements/ci.txt
           python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
           for fpath in `ls requirements/**/*.txt`; do \
             python ./adjust-torch-versions.py $fpath ${{ matrix.config.pytorch-version }}; \
           done
 
-      - name: pip wheels cache
-        uses: actions/cache/restore@v4
-        with:
-          path: ${{ env.PYPI_CACHE_DIR }}
-          key: pypi_wheels
-      - run: |
-          mkdir -p $PYPI_CACHE_DIR
-          ls -lh $PYPI_CACHE_DIR
-
       - name: Expand Env. variables
         run: |
           # Switch PyTorch URL between stable and test/future
@@ -113,25 +121,15 @@ jobs:
           python -c "print('COVERAGE_SCOPE=' + str('lightning' if '${{matrix.config.pkg-name}}' == 'lightning' else 'lightning_fabric'))" >> $GITHUB_ENV
           # if you install mono-package set dependency only for this subpackage
           python -c "print('EXTRA_PREFIX=' + str('' if '${{matrix.config.pkg-name}}' != 'lightning' else 'fabric-'))" >> $GITHUB_ENV
-      - name: Append Env. vars for MacOS
-        if: ${{ runner.os == 'macOS' }}
-        run: |
-          # trying to avoid "gloo" issue with SIGABRT
-          echo "GLOO_SOCKET_IFNAME=lo0" >> $GITHUB_ENV
-      - name: Append Env. vars for Windows
-        if: ${{ runner.os == 'windows' }}
-        run: |
-          # Avoid issue on Windows with PyTorch 2.4: "RuntimeError: use_libuv was requested but PyTorch was build without libuv support"
-          echo "USE_LIBUV=0" >> $GITHUB_ENV
 
       - name: Install package & dependencies
         timeout-minutes: 20
         run: |
-          pip install -e ".[${EXTRA_PREFIX}test,${EXTRA_PREFIX}strategies]" \
-            -U --upgrade-strategy=eager --prefer-binary \
-            --extra-index-url="${TORCH_URL}" \
-            --find-links="${PYPI_CACHE_DIR}"
-          pip list
+          uv pip install ".[${EXTRA_PREFIX}test,${EXTRA_PREFIX}strategies]" \
+            --upgrade \
+            --find-links="${TORCH_URL}"
+          uv pip list
+
       - name: Dump handy wheels
         if: github.event_name == 'push' && github.ref == 'refs/heads/master'
         continue-on-error: true
@@ -179,6 +177,9 @@ jobs:
           name: CPU-coverage
           fail_ci_if_error: false
 
+      - name: Minimize uv cache
+        run: uv cache prune --ci
+
   fabric-cpu-guardian:
     runs-on: ubuntu-latest
     needs: fabric-cpu
 
@@ -89,6 +89,7 @@ jobs:
       - name: Append Env. vars for Linux
         if: ${{ runner.os == 'Linux' }}
         run: echo "GLOO_SOCKET_IFNAME=eth0" >> $GITHUB_ENV
+
       - name: Append Env. vars for MacOS
         if: ${{ runner.os == 'macOS' }}
         run: echo "GLOO_SOCKET_IFNAME=lo0" >> $GITHUB_ENV
 
@@ -78,7 +78,7 @@ Build your own Trainer
         <div class="row">
 
 .. displayitem::
-    :header: Organize your model code with with LightningModule
+    :header: Organize your model code with LightningModule
     :description: Organize your code in a LightningModule and use it with Fabric
     :button_link: lightning_module.html
     :col_css: col-md-4
 
@@ -19,7 +19,7 @@ Intermediate skills
         <div class="row">
 
 .. displayitem::
-    :header: Organize your model code with with LightningModule
+    :header: Organize your model code with LightningModule
     :description: Organize your code in a LightningModule and use it with Fabric
     :button_link: ../guide/lightning_module.html
     :col_css: col-md-4
 
@@ -297,7 +297,8 @@ Validation Within Training Epoch
 
 For large datasets, it's often desirable to check validation multiple times within a training epoch.
 Pass in a float to check that often within one training epoch. Pass in an int ``K`` to check every ``K`` training batch.
-Must use an ``int`` if using an :class:`~torch.utils.data.IterableDataset`.
+Must use an ``int`` if using an :class:`~torch.utils.data.IterableDataset`. Alternatively, pass a string ("DD:HH:MM:SS"),
+a dict of ``datetime.timedelta`` kwargs, or a ``datetime.timedelta`` to check validation after a given amount of wall-clock time.
 
 .. testcode::
 
@@ -310,6 +311,16 @@ Must use an ``int`` if using an :class:`~torch.utils.data.IterableDataset`.
     # check every 100 train batches (ie: for IterableDatasets or fixed frequency)
     trainer = Trainer(val_check_interval=100)
 
+    # check validation every 15 minutes of wall-clock time
+    trainer = Trainer(val_check_interval="00:00:15:00")
+
+    # alternatively, pass a dict of timedelta kwargs
+    trainer = Trainer(val_check_interval={"minutes": 1})
+
+    # or use a timedelta object directly
+    from datetime import timedelta
+    trainer = Trainer(val_check_interval=timedelta(hours=1))
+
 Learn more in our :ref:`trainer_flags` guide.
 
 
 
@@ -991,11 +991,23 @@ val_check_interval
     :muted:
 
 How often within one training epoch to check the validation set.
-Can specify as float or int.
+Can specify as float, int, or a time-based duration.
 
 - pass a ``float`` in the range [0.0, 1.0] to check after a fraction of the training epoch.
 - pass an ``int`` to check after a fixed number of training batches. An ``int`` value can only be higher than the number of training
   batches when ``check_val_every_n_epoch=None``, which validates after every ``N`` training batches across epochs or iteration-based training.
+- pass a ``string`` duration in the format "DD:HH:MM:SS", a ``datetime.timedelta`` object, or a ``dictionary`` of keyword arguments that can be passed
+  to ``datetime.timedelta`` for time-based validation. When using a time-based duration, validation will trigger once the elapsed wall-clock time
+  since the last validation exceeds the interval. The validation check occurs after the current batch completes, the validation loop runs, and
+  the timer resets.
+
+**Time-based validation behavior with check_val_every_n_epoch:**  When used together with ``val_check_interval`` (time-based) and
+``check_val_every_n_epoch > 1``, validation is aligned to epoch multiples:
+
+- If the time-based interval elapses **before** the next multiple-N epoch, validation runs at the start of that epoch (after the first batch),
+  and the timer resets.
+- If the interval elapses **during** a multiple-N epoch, validation runs after the current batch.
+- For cases where ``check_val_every_n_epoch=None`` or ``1``, the time-based behavior of ``val_check_interval`` applies without additional alignment.
 
 .. testcode::
 
@@ -1013,10 +1025,25 @@ Can specify as float or int.
     # (ie: production cases with streaming data)
     trainer = Trainer(val_check_interval=1000, check_val_every_n_epoch=None)
 
+    # check validation every 15 minutes of wall-clock time using a string-based approach
+    trainer = Trainer(val_check_interval="00:00:15:00")
+
+    # check validation every 15 minutes of wall-clock time using a dictionary-based approach
+    trainer = Trainer(val_check_interval={"minutes": 15})
+
+    # check validation every 1 hour of wall-clock time using a dictionary-based approach
+    trainer = Trainer(val_check_interval={"hours": 1})
+
+    # check validation every 1 hour of wall-clock time using a datetime.timedelta object
+    from datetime import timedelta
+    trainer = Trainer(val_check_interval=timedelta(hours=1))
+
+
 
 .. code-block:: python
 
     # Here is the computation to estimate the total number of batches seen within an epoch.
+    # This logic applies when `val_check_interval` is specified as an integer or a float.
 
     # Find the total number of train batches
     total_train_batches = total_train_samples // (train_batch_size * world_size)
 
@@ -22,14 +22,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
--
+- let `_get_default_process_group_backend_for_device` support more hardware platforms (
+    [#21057](https://github.com/Lightning-AI/pytorch-lightning/pull/21057), [#21093](https://github.com/Lightning-AI/pytorch-lightning/pull/21093))
 
 
 ### Fixed
 
 - Fixed with adding a missing device id for pytorch 2.8 ([#21105](https://github.com/Lightning-AI/pytorch-lightning/pull/21105))
 
 
+- Respect `verbose=False` in `seed_everything` when no seed is provided
+
+
 ---
 
 ## [2.5.4] - 2025-08-29
 
@@ -160,7 +160,17 @@ def barrier(self, *args: Any, **kwargs: Any) -> None:
         if torch.distributed.get_backend() == "nccl":
             torch.distributed.barrier(device_ids=self._determine_ddp_device_ids())
         else:
-            torch.distributed.barrier()
+            # Handle PyTorch bug where barrier() fails on CPU with "PrivateUse1HooksInterface" error
+            try:
+                torch.distributed.barrier()
+            except RuntimeError as e:
+                if "PrivateUse1HooksInterface" in str(e):
+                    # Fallback: Use all_reduce as barrier - all processes must participate
+                    # This achieves the same synchronization effect as barrier()
+                    dummy_tensor = torch.tensor(0.0, device=self.root_device)
+                    torch.distributed.all_reduce(dummy_tensor)
+                else:
+                    raise
 
     @override
     def broadcast(self, obj: TBroadcast, src: int = 0) -> TBroadcast:
 
@@ -319,7 +319,11 @@ def _destroy_dist_connection() -> None:
 
 
 def _get_default_process_group_backend_for_device(device: torch.device) -> str:
-    return "nccl" if device.type == "cuda" else "gloo"
+    """Return corresponding distributed backend for a given device."""
+    device_backend_map = torch.distributed.Backend.default_device_backend_map
+    if device.type in device_backend_map:
+        return device_backend_map[device.type]
+    return "gloo"
 
 
 class _DatasetSamplerWrapper(Dataset):
 
@@ -40,7 +40,8 @@ def seed_everything(seed: Optional[int] = None, workers: bool = False, verbose:
         env_seed = os.environ.get("PL_GLOBAL_SEED")
         if env_seed is None:
             seed = 0
-            rank_zero_warn(f"No seed found, seed set to {seed}")
+            if verbose:
+                rank_zero_warn(f"No seed found, seed set to {seed}")
         else:
             try:
                 seed = int(env_seed)