Skip to content

Commit cc1dcd7

Browse files
committed
restored the world_size == 1 branch so single-process uses to avoid the timeout in CI
1 parent aa4cef6 commit cc1dcd7

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

src/lightning/pytorch/callbacks/model_checkpoint.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -999,6 +999,10 @@ def to_yaml(self, filepath: Optional[_PATH] = None) -> None:
999999
def file_exists(self, filepath: _PATH, trainer: "pl.Trainer") -> bool:
10001000
"""Checks if a file exists on rank 0 and broadcasts the result to all other ranks, preventing the internal
10011001
state to diverge between ranks."""
1002+
# Single-process or strategies without distributed world size: no need for coordination
1003+
if trainer.world_size == 1:
1004+
return self._fs.exists(filepath)
1005+
10021006
# In distributed setups, only global rank 0 touches the filesystem
10031007
local_decision = self._fs.exists(filepath) if trainer.is_global_zero else False
10041008
# Reduce the decision across ranks using an "any"-style reduction to decide if the file exists anywhere

0 commit comments

Comments
 (0)