add local_rank_zero_fn decorator (#982)

JKSenthil · facebook-github-bot · commit 3dfcb7dcdfbd · 2025-03-20T20:34:40.000-07:00
Summary: Pull Request resolved: #982 Add `local_rank_zero_fn` decorator for functions that should be run by one process per host Reviewed By: galrotem, anshulverma Differential Revision: D70935839 fbshipit-source-id: 4fb267966546c08c0786894a667a6dde678a8774
diff --git a/tests/utils/test_distributed.py b/tests/utils/test_distributed.py
@@ -30,6 +30,7 @@
     get_process_group_backend_from_device,
     get_tcp_init_method,
     get_world_size,
+    local_rank_zero_fn,
     PGWrapper,
     rank_zero_fn,
     rank_zero_read_and_broadcast,
@@ -169,6 +170,14 @@ def foo() -> int:
         x = foo()
         assert x is None
 
+    def test_local_rank_zero_fn(self) -> None:
+        @local_rank_zero_fn
+        def foo() -> int:
+            return 1
+
+        x = foo()
+        assert x == 1
+
     def test_revert_sync_batchnorm(self) -> None:
         original_batchnorm = torch.nn.modules.batchnorm.BatchNorm1d(4)
 
diff --git a/torchtnt/utils/distributed.py b/torchtnt/utils/distributed.py
@@ -377,6 +377,41 @@ def wrapped_fn(*args: TParams.args, **kwargs: TParams.kwargs) -> Optional[TRetur
     return wrapped_fn
 
 
+def local_rank_zero_fn(
+    fn: Callable[TParams, TReturn]
+) -> Callable[TParams, Optional[TReturn]]:
+    """Function that can be used as a decorator to enable a function to be called on local rank 0 only.
+
+    Note:
+        This decorator should be used judiciously. it should never be used on functions that need synchronization.
+        It should be used very carefully with functions that mutate local state as well
+
+    Example:
+
+        >>> from torchtnt.utilities.distributed import local_rank_zero_fn
+        >>> @local_rank_zero_fn
+        ... def foo():
+        ...     return 1
+        ...
+        >>> x = foo() # x is 1 if local rank is 0 else x is None
+
+    Args:
+        fn: the desired function to be executed on rank 0 only
+
+    Return:
+        wrapped_fn: the wrapped function that executes only if the global rank is  0
+
+    """
+
+    @wraps(fn)
+    def wrapped_fn(*args: TParams.args, **kwargs: TParams.kwargs) -> Optional[TReturn]:
+        if get_local_rank() == 0:
+            return fn(*args, **kwargs)
+        return None
+
+    return wrapped_fn
+
+
 class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm):
     """
     The only difference between :class:`torch.nn.BatchNorm1d`, :class:`torch.nn.BatchNorm2d`,