Handled DTensor placements as positional argument for redistribute (#2797)

mattteochen · web-flow · commit 406cd2f3a2c9 · 2026-01-19T10:23:34.000Z
diff --git a/thunder/dynamo/utils.py b/thunder/dynamo/utils.py
@@ -1219,8 +1219,18 @@ def dtensor_from_local_prim_wrapper(x, mesh=mesh, placements=placements):
                 dtensor_from_local_prim_wrapper.thunder_supported = True
                 node.target = dtensor_from_local_prim_wrapper
             if "redistribute" in node.target.__name__:
-                kwargs = closure_vars.nonlocals["kwargs_as_value"]
-                placements = kwargs["placements"]
+                args = closure_vars.nonlocals.get("args_as_value", ())
+                kwargs = closure_vars.nonlocals.get("kwargs_as_value", {})
+
+                # Handle positional args: redistribute(device_mesh, placements)
+                # and keyword args: redistribute(placements=...)
+                # Pytorch docs says that placements can also be None or not provided at all but it will trigger Dynamo to raise an error
+                # https://docs.pytorch.org/docs/stable/distributed.tensor.html#torch.distributed.tensor.DTensor.redistribute
+                # To be coherent with the Pytorch docs, we will use None if placements is not provided
+                if len(args) >= 2:
+                    placements = args[1]
+                else:
+                    placements = kwargs.get("placements", None)
 
                 def dtensor_redistribute_prim_wrapper(x, placements=placements):
                     return dtensor_redistribute_prim(x, placements=placements)
diff --git a/thunder/tests/distributed/test_dtensor.py b/thunder/tests/distributed/test_dtensor.py
@@ -324,6 +324,38 @@ def test_dtensor_columnwise_parallel(self, jit_fn):
             assert len(tmodel._backend.subgraph_infos[0].thunder_compiled_fns) == 1
             assert len(tmodel._backend.subgraph_infos[0].split_reasons) == 0
 
+    def test_dtensor_redistribute_with_positional_args(self):
+        num_devices = self.world_size
+        mesh = DeviceMesh("cuda", list(range(num_devices)))
+        dim_size = 16
+
+        # Test redistribute with positional args: redistribute(mesh, placements, async_op=True)
+        def fn_positional(x):
+            dt = DTensor.from_local(x, mesh, [Shard(0)])
+            return dt.redistribute(mesh, [Replicate()], async_op=True)
+
+        # Test redistribute with keyword args: redistribute(placements=..., async_op=True)
+        def fn_keyword(x):
+            dt = DTensor.from_local(x, mesh, [Shard(0)])
+            return dt.redistribute(placements=[Replicate()], async_op=True)
+
+        local_tensor = torch.randn(dim_size, dim_size, device="cuda")
+
+        # Both should work and produce the same result
+        tmodel_positional = thunderfx(fn_positional)
+        tmodel_keyword = thunderfx(fn_keyword)
+
+        result_positional = tmodel_positional(local_tensor)
+        result_keyword = tmodel_keyword(local_tensor)
+
+        torch.testing.assert_close(result_positional, result_keyword)
+
+        # Verify no graph splits occurred (redistribute is supported)
+        assert len(tmodel_positional._backend.subgraph_infos) == 1
+        assert len(tmodel_positional._backend.subgraph_infos[0].split_reasons) == 0
+        assert len(tmodel_keyword._backend.subgraph_infos) == 1
+        assert len(tmodel_keyword._backend.subgraph_infos[0].split_reasons) == 0
+
     @common_utils.parametrize("executor", tuple(executors_map.keys()))
     @common_utils.parametrize(
         "input_shardings",