Fix inplace ops on Partial DTensors to preserve aliasing semantics (pytorch#164729)

RohitRathore1 · pytorchmergebot · commit f8a2ce3b9a1e · 2025-11-14T07:46:35.000Z
Fixes pytorch#163374. Here is the output from reproducible code: ``` W1006 09:09:26.329000 2457 /home/fedora/github/pytorch/torch/distributed/run.py:811] W1006 09:09:26.329000 2457 /home/fedora/github/pytorch/torch/distributed/run.py:811] ***************************************** W1006 09:09:26.329000 2457 /home/fedora/github/pytorch/torch/distributed/run.py:811] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. W1006 09:09:26.329000 2457 /home/fedora/github/pytorch/torch/distributed/run.py:811] ***************************************** aten::clamp_(dt: f32[][R], None, 2) redistribute_input(0, [P] -> [R]) redistribute_input(t: f32[], [P] -> [R]) _c10d_functional::all_reduce(t: f32[], sum, 0) _c10d_functional::wait_tensor(t: f32[]) aten::clamp_(t: f32[], None, 2) aten::view(t: f32[], []) (Replicate(),) tensor(2., device='cuda:0') ``` The behavior is now matching what you were expecting in issue pytorch#163374: Expected behavior (from the issue): 1. Placement should change from Partial(sum) to Replicate() 2. Value should be tensor(2.) instead of tensor(144.) Actual output from this build: 1. (Replicate(),) - placement is correct 2. tensor(2., device='cuda:0') - value is correct so the inplace operation now properly redistributes the partial DTensor to replicate before performing the clamp snd maintains the correct aliasing semantics. It also produces the expected clamped value. Pull Request resolved: pytorch#164729 Approved by: https://github.com/ezyang
diff --git a/test/distributed/tensor/test_pointwise_ops.py b/test/distributed/tensor/test_pointwise_ops.py
@@ -331,6 +331,25 @@ def test_mul_partial(self):
         self.assertEqual(z.placements, (Replicate(),))
         self.assertEqual(z.to_local(), input)
 
+    def test_inplace_op_partial_to_replicate(self):
+        # test that in-place operations that require redistribution raise an error
+        # to preserve aliasing semantics (issue #163374)
+        device_mesh = self.build_device_mesh()
+
+        input_tensor = torch.tensor(64.0, device=self.device_type)
+        partial_dt = DTensor.from_local(
+            input_tensor, device_mesh, placements=(Partial(),)
+        )
+
+        self.assertTrue(partial_dt.placements[0].is_partial())
+
+        # Inplace ops that require placement changes (Partial -> Replicate) should error
+        with self.assertRaisesRegex(
+            RuntimeError,
+            "in-place operations that require placement changes are not supported",
+        ):
+            partial_dt.clamp_(max=10)
+
 
 if __name__ == "__main__":
     run_tests()
diff --git a/torch/distributed/tensor/_dispatch.py b/torch/distributed/tensor/_dispatch.py
@@ -337,19 +337,34 @@ def _dispatch_fast_path_python_tail(
         if is_inplace_op:
             # inplace op should return self instead of re-wrapping
             if output_sharding.output_spec is not None:
+                output_spec = output_sharding.output_spec
+                assert isinstance(output_spec, DTensorSpec)
+                assert isinstance(args[0], dtensor.DTensor)
+
                 # NOTE: aten.squeeze_.dim is an inplace op but it also may change
                 # the inplace argument's tensor meta. Here we choose to special case
                 # this op because as far as I know this is the only inplace op that
                 # has such as behavior. We can extend this special case if necessary.
                 if op_call == aten.squeeze_.dim:
-                    output_spec = output_sharding.output_spec
-                    assert isinstance(output_spec, DTensorSpec)
-                    assert isinstance(args[0], dtensor.DTensor)
+                    # update the spec to handle tensor meta changes
                     args[0]._spec = output_spec
                     # use return_and_correct_aliasing to match the outer and the inner
                     # aliasing. See https://github.com/pytorch/pytorch/pull/158954
                     return return_and_correct_aliasing(op_call, args, kwargs, args[0])
                 else:
+                    # For all other inplace ops, check if placement changes are required
+                    # Inplace operations that change placement are not supported because
+                    # they would require redistribution, which breaks aliasing semantics.
+                    # If there are views into the tensor, the views would not be updated.
+                    if args[0]._spec.placements != output_spec.placements:
+                        raise RuntimeError(
+                            f"{op_call}: in-place operations that require placement changes "
+                            f"are not supported. The operation would change placement from "
+                            f"{args[0]._spec.placements} to {output_spec.placements}, "
+                            f"which requires redistribution and breaks aliasing semantics. "
+                            f"Please use the out-of-place version of this operation instead."
+                        )
+                    # Most inplace ops don't change tensor meta, so no spec update needed
                     return args[0]
             else:
                 return None