Continue local tensor mode enablement for DTensor tests (pytorch#165451)

dzmitry-huba · pytorchmergebot · commit 01738a3feacb · 2025-10-14T21:20:54.000Z
Pull Request resolved: pytorch#165451 Approved by: https://github.com/ezyang, https://github.com/albanD
diff --git a/test/distributed/tensor/test_dtensor.py b/test/distributed/tensor/test_dtensor.py
@@ -1020,6 +1020,19 @@ def test_metadata_consistency_check(self):
             self.fail("Unexpected ValueError raised with run_check=False")
 
 
+DTensorMeshTestWithLocalTensor = create_local_tensor_test_class(
+    DTensorMeshTest,
+    skipped_tests=[
+        # Submeshes are not supported by local tensor mode
+        "test_from_local_sub_mesh",
+        "test_default_value_sub_mesh",
+        "test_redistribute_sub_mesh",
+        # Local tensor mode doesn't support tensors of different types on different ranks
+        "test_metadata_consistency_check",
+    ],
+)
+
+
 class TestDTensorPlacementTypes(DTensorTestBase):
     @property
     def world_size(self):
@@ -1086,6 +1099,11 @@ def test_split_tensor_1D(self) -> None:
                 assert_array_equal(expected_is_tensor_empty, is_tensor_empty)
 
 
+TestDTensorPlacementTypesWithLocalTensor = create_local_tensor_test_class(
+    TestDTensorPlacementTypes,
+)
+
+
 class TestDTensorSpec(DTensorTestBase):
     @property
     def world_size(self):
@@ -1265,5 +1283,9 @@ def test_default_shard_order(self):
         )
 
 
+TestDTensorSpecWithLocalTensor = create_local_tensor_test_class(
+    TestDTensorSpec,
+)
+
 if __name__ == "__main__":
     run_tests()
diff --git a/torch/distributed/_local_tensor/__init__.py b/torch/distributed/_local_tensor/__init__.py
@@ -51,6 +51,8 @@
 from types import TracebackType
 from typing import Any, Callable, Generator, Optional, Union
 
+import numpy as np
+
 import torch
 from torch import Size, SymBool, SymInt, Tensor
 from torch._C import DispatchKey, DispatchKeySet, ScriptObject
@@ -70,11 +72,13 @@
 from . import _c10d
 
 
-def _int_on_rank(i: "LocalIntNode | ConstantIntNode", r: int) -> int:
+def _int_on_rank(i: "int | LocalIntNode | ConstantIntNode", r: int) -> int:
     if isinstance(i, LocalIntNode):
         return i._local_ints[r]
     elif isinstance(i, ConstantIntNode):
         return i.val
+    elif isinstance(i, int):
+        return i
     else:
         raise AssertionError(type(i))
 
@@ -216,7 +220,7 @@ def is_constant(self) -> bool:
         return False
 
     def sym_max(
-        self, other: "LocalIntNode | ConstantIntNode"
+        self, other: "int | LocalIntNode | ConstantIntNode"
     ) -> "LocalIntNode | ConstantIntNode":
         return LocalIntNode(
             {
@@ -226,36 +230,50 @@ def sym_max(
         )
 
     def add(
-        self, other: "LocalIntNode | ConstantIntNode"
+        self, other: "int | LocalIntNode | ConstantIntNode"
     ) -> "LocalIntNode | ConstantIntNode":
         return LocalIntNode(
             {r: self._local_ints[r] + _int_on_rank(other, r) for r in self._local_ints}
         )
 
     def sub(
-        self, other: "LocalIntNode | ConstantIntNode"
+        self, other: "int | LocalIntNode | ConstantIntNode"
     ) -> "LocalIntNode | ConstantIntNode":
         return LocalIntNode(
             {r: self._local_ints[r] - _int_on_rank(other, r) for r in self._local_ints}
         )
 
     def mul(
-        self, other: "LocalIntNode | ConstantIntNode"
+        self, other: "int | LocalIntNode | ConstantIntNode"
     ) -> "LocalIntNode | ConstantIntNode":
         return LocalIntNode(
             {r: self._local_ints[r] * _int_on_rank(other, r) for r in self._local_ints}
         )
 
-    def eq(self, other: "LocalIntNode | ConstantIntNode") -> bool | SymBool:
+    def mod(
+        self, other: "int | LocalIntNode | ConstantIntNode"
+    ) -> "LocalIntNode | ConstantIntNode":
+        return LocalIntNode(
+            {r: self._local_ints[r] % _int_on_rank(other, r) for r in self._local_ints}
+        )
+
+    def int_floordiv(
+        self, other: "int | LocalIntNode | ConstantIntNode"
+    ) -> "LocalIntNode | ConstantIntNode":
+        return LocalIntNode(
+            {r: self._local_ints[r] // _int_on_rank(other, r) for r in self._local_ints}
+        )
+
+    def eq(self, other: "int | LocalIntNode | ConstantIntNode") -> bool | SymBool:
         r = {self._local_ints[r] == _int_on_rank(other, r) for r in self._local_ints}
         return torch._C._get_constant_bool_symnode(len(r) == 1 and next(iter(r)))
 
-    def gt(self, other: "LocalIntNode | ConstantIntNode") -> bool | SymBool:
+    def gt(self, other: "int | LocalIntNode | ConstantIntNode") -> bool | SymBool:
         r = {self._local_ints[r] > _int_on_rank(other, r) for r in self._local_ints}
         assert len(r) == 1, (self, other)
         return torch._C._get_constant_bool_symnode(next(iter(r)))
 
-    def lt(self, other: "LocalIntNode | ConstantIntNode") -> bool | SymBool:
+    def lt(self, other: "int | LocalIntNode | ConstantIntNode") -> bool | SymBool:
         r = {self._local_ints[r] < _int_on_rank(other, r) for r in self._local_ints}
         assert len(r) == 1, (self, other)
         return torch._C._get_constant_bool_symnode(next(iter(r)))
@@ -437,6 +455,27 @@ def __torch_dispatch__(  # type: ignore[override]
         with LocalTensorMode(local_tensor._ranks):
             return func(*args, **kwargs)
 
+    def numpy(self, *, force: bool = False) -> np.ndarray:
+        return self.reconcile().numpy(force=force)
+
+    def __lt__(
+        self, other: torch.Tensor | bool | complex | float | int
+    ) -> torch.Tensor:
+        self_rec = self.reconcile()
+        other_rec = other
+        if isinstance(other, LocalTensor):
+            other_rec = other.reconcile()
+        return self_rec < other_rec
+
+    def __gt__(
+        self, other: torch.Tensor | bool | complex | float | int
+    ) -> torch.Tensor:
+        self_rec = self.reconcile()
+        other_rec = other
+        if isinstance(other, LocalTensor):
+            other_rec = other.reconcile()
+        return self_rec > other_rec
+
     def tolist(self) -> list[Any]:
         """
         Reconcile and convert result to list.
diff --git a/torch/distributed/_local_tensor/_c10d.py b/torch/distributed/_local_tensor/_c10d.py
@@ -320,7 +320,6 @@ def _local_all_gather_(
 
     ranks, group_offsets, _offset = _prepare_collective_groups(process_group_so)
 
-    assert isinstance(input_tensor, LocalTensor), "Input tensor must be a LocalTensor"
     for i in range(len(output_tensors)):
         assert isinstance(output_tensors[i], LocalTensor), (
             "Output tensor must be a LocalTensor"
@@ -333,7 +332,11 @@ def _local_all_gather_(
 
         # For each rank in the group, gather from their input tensor
         for i, rank_i in enumerate(group_ranks):
-            output_tensors[i].copy_(input_tensor._local_tensors[rank_i])
+            # allgather object happens to create pure tensor, so we special case it here
+            source_tensor = input_tensor
+            if isinstance(input_tensor, LocalTensor):
+                source_tensor = input_tensor._local_tensors[rank_i]
+            output_tensors[i].copy_(source_tensor)
 
     work = FakeWork()
     work_so = Work.boxed(work)