Fix visualize_tensor_sharding function for V2 shardings

hshahTT · hshahTT · commit 8d72b9ae4b92 · 2025-10-03T17:04:55.000Z
diff --git a/test/spmd/test_spmd_debugging.py b/test/spmd/test_spmd_debugging.py
@@ -17,6 +17,7 @@
 import torch_xla.distributed.spmd as xs
 from torch_xla.distributed.spmd import XLAShardedTensor
 from torch_xla.distributed.spmd import Mesh
+from torch_xla.distributed.spmd.debugging import construct_v1_sharding_str
 
 import test_xla_sharding_base
 
@@ -822,6 +823,77 @@ def test_multi_host_replicated_cpu(self):
     fake_output = fake_capture.get()
     assert output == fake_output
 
+
+class ConvertV2ShardingToV1Test(test_xla_sharding_base.XlaShardingTest):
+
+  @classmethod
+  def setUpClass(cls):
+    super().setUpClass()
+    os.environ["CONVERT_SHLO_TO_SHARDY"] = "1"
+
+  def run_test(self):
+    mesh = self._get_mesh(self.device_mesh_shape)
+    t = torch.randn(self.tensor_shape).to(torch_xla.device())
+    xs.mark_sharding(t, mesh, self.partition_spec)
+    actual_str = construct_v1_sharding_str(t)
+    self.assertEqual(self.expected_str, actual_str)
+
+  def test_tiled_sharding(self):
+    self.device_mesh_shape = (1, self.n_devices)
+    self.tensor_shape = (1, 128)
+    self.partition_spec = (0, 1)
+    self.expected_str = '{devices=[1,%d]%s}' % (self.n_devices, ','.join(
+        [str(i) for i in range(self.n_devices)]))
+    self.run_test()
+
+  @unittest.skipIf(xr.global_runtime_device_count() < 2,
+                   f"Requires at least 2 devices.")
+  def test_tupled_tiled_sharding(self):
+    self.device_mesh_shape = (2, self.n_devices // 2)
+    self.tensor_shape = (16,)
+    self.partition_spec = ((0, 1),)
+    self.expected_str = "{devices=[%d]%s}" % (self.n_devices, ','.join(
+        str(x) for x in range(self.n_devices)))
+    self.run_test()
+
+  def test_replicated_sharding(self):
+    self.device_mesh_shape = (1, self.n_devices)
+    self.tensor_shape = (4, 4)
+    self.partition_spec = (None, None)
+    self.expected_str = '{replicated}'
+    self.run_test()
+
+  @unittest.skipIf(xr.global_runtime_device_count() < 4,
+                   f"Requires at least 4 devices.")
+  def test_partial_replication_sharding(self):
+    self.device_mesh_shape = (2, self.n_devices // 2)
+    self.tensor_shape = (4, 4)
+    self.partition_spec = (0, None)
+    self.expected_str = '{devices=[2,1,%d]%s last_tile_dim_replicate}' % (
+        self.n_devices // 2, ','.join(str(x) for x in range(self.n_devices)))
+    self.run_test()
+
+  @unittest.skipIf(xr.global_runtime_device_count() < 4,
+                   f"Requires at least 4 devices.")
+  def test_tupled_partial_replication_sharding(self):
+    self.device_mesh_shape = (1, 2, self.n_devices // 2)
+    self.tensor_shape = (16, 16)
+    self.partition_spec = ((0, 1), None)
+    self.expected_str = "{devices=[2,1,%d]%s last_tile_dim_replicate}" % (
+        self.n_devices // 2, ','.join(str(x) for x in range(self.n_devices)))
+    self.run_test()
+
+  def test_tupled_partial_replication_sharding_with_transpose(self):
+    self.device_mesh_shape = (1, 2, self.n_devices // 2)
+    self.tensor_shape = (16, 16)
+    self.partition_spec = (None, (2, 1))
+    device_order = self.device_ids.reshape(self.device_mesh_shape).transpose(
+        (2, 1, 0)).flatten()
+    self.expected_str = "{devices=[1,%d]%s}" % (self.n_devices, ','.join(
+        str(x) for x in device_order))
+    self.run_test()
+
+
 if __name__ == '__main__':
   test = unittest.main()
   sys.exit(0 if test.result.wasSuccessful() else 1)
diff --git a/test/spmd/test_xla_sharding.py b/test/spmd/test_xla_sharding.py
@@ -6,7 +6,6 @@
 import unittest
 from unittest.mock import patch
 import sys
-import os
 
 import torch
 from torch import nn
@@ -27,16 +26,12 @@
 from torch_xla._internal import tpu
 
 
-def should_convert_to_shardy():
-  return os.environ.get("CONVERT_SHLO_TO_SHARDY",
-                        "").lower() in ("1", "true", "yes")
-
-
 class BasicXlaShardingTest(test_xla_sharding_base.XlaShardingTest):
 
   @classmethod
   def setUpClass(cls):
     super().setUpClass()
+    cls.convert_to_shardy = xu.check_env_flag("CONVERT_SHLO_TO_SHARDY")
 
   def test_xla_sharded_tensor(self):
     partition_spec = (0, 1)
@@ -244,7 +239,7 @@ def test_custom_tile_assignment(self):
     if self.n_devices > 1:
       annotation = '{devices=[1,%d]%s}' % (self.n_devices, ','.join(
           [str(i) for i in reversed(range(self.n_devices))]))
-      if should_convert_to_shardy():
+      if self.convert_to_shardy:
         annotation = '{devices=[1,%d]<=[%d]}' % (self.n_devices, self.n_devices)
       self.assertEqual(annotation, torch_xla._XLAC._get_xla_sharding_spec(xt))
 
@@ -260,7 +255,7 @@ def test_mark_sharding_2d(self):
     if self.n_devices > 1:
       annotation = '{devices=[1,%d]%s}' % (self.n_devices, ','.join(
           [str(i) for i in range(self.n_devices)]))
-      if should_convert_to_shardy():
+      if self.convert_to_shardy:
         annotation = '{devices=[1,%d]<=[%d]}' % (self.n_devices, self.n_devices)
       self.assertEqual(annotation, torch_xla._XLAC._get_xla_sharding_spec(xt1))
 
@@ -281,7 +276,7 @@ def test_mark_sharding_4d(self):
       annotation = '{devices=[1,1,%d,%d]%s}' % (
           z_dim, self.n_devices // z_dim, ','.join(
               [str(i) for i in range(self.n_devices)]))
-      if should_convert_to_shardy():
+      if self.convert_to_shardy:
         annotation = '{devices=[1,1,%d,%d]<=[%d]}' % (z_dim, self.n_devices //
                                                       z_dim, self.n_devices)
       self.assertEqual(annotation, torch_xla._XLAC._get_xla_sharding_spec(xt))
@@ -418,7 +413,7 @@ def test_tupled_partition_spec(self):
     xs.mark_sharding(t, mesh, ((0, 1),))
     annotation = "{devices=[%d]%s}" % (self.n_devices, ','.join(
         str(x) for x in range(self.n_devices)))
-    if should_convert_to_shardy():
+    if self.convert_to_shardy:
       annotation = "{devices=[%d]<=[%d]}" % (self.n_devices, self.n_devices)
     self.assertEqual(torch_xla._XLAC._get_xla_sharding_spec(t), annotation)
 
@@ -432,7 +427,7 @@ def test_named_partial_tupled_partition_spec(self):
     xs.mark_sharding(t, mesh, (('r', 'b'), None))
     annotation = "{devices=[2,1,%d]%s last_tile_dim_replicate}" % (
         self.n_devices // 2, ','.join(str(x) for x in range(self.n_devices)))
-    if should_convert_to_shardy():
+    if self.convert_to_shardy:
       annotation = "{devices=[2,1,%d]<=[%d] last_tile_dim_replicate}" % (
           self.n_devices // 2, self.n_devices)
     self.assertEqual(torch_xla._XLAC._get_xla_sharding_spec(t), annotation)
@@ -442,7 +437,7 @@ def test_named_partial_tupled_partition_spec(self):
     xs.mark_sharding(u, mesh, (None, ('b', 'm')))
     annotation = "{devices=[1,%d]%s}" % (self.n_devices, ','.join(
         str(x) for x in range(self.n_devices)))
-    if should_convert_to_shardy():
+    if self.convert_to_shardy:
       annotation = "{devices=[1,%d]<=[%d]}" % (self.n_devices, self.n_devices)
     self.assertEqual(torch_xla._XLAC._get_xla_sharding_spec(u), annotation)
 
@@ -452,7 +447,7 @@ def test_named_partial_tupled_partition_spec(self):
     device_order = mesh.get_logical_mesh().transpose((0, 2, 1)).flatten()
     annotation = "{devices=[1,%d,2]%s last_tile_dim_replicate}" % (
         self.n_devices // 2, ','.join(str(x) for x in device_order))
-    if should_convert_to_shardy():
+    if self.convert_to_shardy:
       annotation = "{devices=[1,%d,2]<=[2,%d]T(1,0) last_tile_dim_replicate}" % (
           self.n_devices // 2, self.n_devices // 2)
     self.assertEqual(torch_xla._XLAC._get_xla_sharding_spec(v), annotation)
@@ -463,7 +458,7 @@ def test_named_partial_tupled_partition_spec(self):
     device_order = mesh.get_logical_mesh().transpose((2, 1, 0)).flatten()
     annotation = "{devices=[1,%d]%s}" % (self.n_devices, ','.join(
         str(x) for x in device_order))
-    if should_convert_to_shardy():
+    if self.convert_to_shardy:
       annotation = "{devices=[1,%d]<=[2,%d]T(1,0)}" % (self.n_devices,
                                                        self.n_devices // 2)
     self.assertEqual(torch_xla._XLAC._get_xla_sharding_spec(v), annotation)
@@ -478,7 +473,7 @@ def test_multiple_tuples_in_spec(self):
     xs.mark_sharding(t, mesh, (('a', 'b'), ('c', 'd')))
     annotation = "{devices=[2,%d]%s}" % (self.n_devices // 2, ','.join(
         str(x) for x in range(self.n_devices)))
-    if should_convert_to_shardy():
+    if self.convert_to_shardy:
       annotation = "{devices=[2,%d]<=[%d]}" % (self.n_devices // 2,
                                                self.n_devices)
     self.assertEqual(torch_xla._XLAC._get_xla_sharding_spec(t), annotation)
@@ -491,7 +486,7 @@ def test_3d_tensor_2d_mesh(self):
     xs.mark_sharding(t, mesh, (None, 0, 1))
     annotation = '{devices=[1,2,%d]%s}' % (self.n_devices // 2, ','.join(
         str(x) for x in range(self.n_devices)))
-    if should_convert_to_shardy():
+    if self.convert_to_shardy:
       annotation = '{devices=[1,2,%d]<=[%d]}' % (self.n_devices // 2,
                                                  self.n_devices)
     self.assertEqual(torch_xla._XLAC._get_xla_sharding_spec(t), annotation)
@@ -1013,8 +1008,7 @@ def test_op_sharding_cache(self):
 
     t = torch.randn(1, self.n_devices).to('xla')
     xs.mark_sharding(t, mesh, (0, 1))
-    counter_name = "CreateIotaOpSharding" if should_convert_to_shardy(
-    ) else "CreateOpSharding"
+    counter_name = "CreateIotaOpSharding" if self.convert_to_shardy else "CreateOpSharding"
     self.assertIn(counter_name, met.counter_names())
     self.assertEqual(met.counter_value(counter_name), 1)
 
@@ -1435,7 +1429,7 @@ def test_data_loader_with_sharding(self):
     data, _ = iter(train_device_loader).__next__()
     self.assertEqual(data.size(), torch.Size([8, 3, 64, 64]))
     annotation = f"{{devices=[{mesh.size()},1,1,1]{','.join([str(i) for i in range(mesh.size())])}}}"
-    if should_convert_to_shardy():
+    if self.convert_to_shardy:
       annotation = f"{{devices=[{mesh.size()},1,1,1]<=[{mesh.size()}]}}"
     self.assertEqual(torch_xla._XLAC._get_xla_sharding_spec(data), annotation)
 
@@ -1458,7 +1452,7 @@ def test_data_loader_with_non_batch_size(self):
     data, _ = iter(train_device_loader).__next__()
     self.assertEqual(data.size(), torch.Size([mesh.size() - 1, 3, 64, 64]))
     annotation = f"{{devices=[{mesh.size()},1,1,1]{','.join([str(i) for i in range(mesh.size())])}}}"
-    if should_convert_to_shardy():
+    if self.convert_to_shardy:
       annotation = f"{{devices=[{mesh.size()},1,1,1]<=[{mesh.size()}]}}"
     self.assertEqual(torch_xla._XLAC._get_xla_sharding_spec(data), annotation)
 
diff --git a/torch_xla/csrc/init_python_bindings.cpp b/torch_xla/csrc/init_python_bindings.cpp
@@ -83,8 +83,6 @@
 #include "xla/pjrt/distributed/distributed.h"
 #include "xla/python/profiler/internal/traceme_wrapper.h"
 
-#define PYBIND11_DETAILED_ERROR_MESSAGES
-
 namespace torch_xla {
 namespace {
 
@@ -762,6 +760,16 @@ std::string GetTensorsHloGraph(const std::vector<at::Tensor>& tensors,
   return XLAGraphExecutor::Get()->DumpHloComputation(xtensors, mode);
 }
 
+std::optional<xla::OpSharding> GetXLAOpSharding(const at::Tensor& input) {
+  XLATensorPtr xtensor = bridge::GetXlaTensor(input);
+  XLATensor::ShardingSpecPtr sharding_spec =
+      xtensor ? xtensor->sharding_spec() : nullptr;
+  if (sharding_spec != nullptr) {
+    return sharding_spec->sharding;
+  }
+  return std::nullopt;
+}
+
 std::string GetXLAShardingSpec(const XLATensorPtr xtensor) {
   auto sharding_spec = xtensor->sharding_spec();
   if (sharding_spec != nullptr) {
@@ -1528,6 +1536,10 @@ at::Tensor tensor_fromDLPack(PyObject* data) {
 void InitXlaModuleBindings(py::module m) {
   PythonScope<py::module> module(m);
 
+  using TileAssignmentDims = std::vector<int64_t>;
+  using ReshapeDims = std::vector<int64_t>;
+  using TransposePerm = std::vector<int>;
+
   // Define the _XLAC.XlaShardingSpec class.
   PythonScope<py::class_<XLATensor::ShardingSpec, XLATensor::ShardingSpecPtr>>(
       m, "XlaShardingSpec")
@@ -2699,13 +2711,26 @@ void InitXlaModuleBindings(py::module m) {
            })
       .def("_get_xla_op_sharding",
            [](const at::Tensor& input) -> std::optional<xla::OpSharding> {
-            XLA_ASSIGN_OR_THROW(XLATensorPtr xtensor, bridge::GetXlaTensor(input));
-            XLATensor::ShardingSpecPtr sharding_spec =
-                xtensor ? xtensor->sharding_spec() : nullptr;
-            if (sharding_spec != nullptr) {
-              return sharding_spec->sharding;
+            return GetXLAOpSharding(input);
+           })
+      .def("_get_xla_op_sharding_v2_params",
+           [](const at::Tensor& input) -> std::optional<std::tuple<TileAssignmentDims, ReshapeDims, TransposePerm, bool>> {
+            std::optional<xla::OpSharding> maybe_sharding =
+                GetXLAOpSharding(input);
+            if (!maybe_sharding) {
+              return std::nullopt;
             }
-            return std::nullopt;
+            const xla::OpSharding& sharding = maybe_sharding.value();
+            TileAssignmentDims tile_assignment_dims(
+                sharding.tile_assignment_dimensions().begin(),
+                sharding.tile_assignment_dimensions().end());
+            ReshapeDims reshape_dims(sharding.iota_reshape_dims().begin(),
+                                     sharding.iota_reshape_dims().end());
+            TransposePerm transpose_perm(sharding.iota_transpose_perm().begin(),
+                                         sharding.iota_transpose_perm().end());
+            return std::make_tuple(tile_assignment_dims, reshape_dims,
+                                   transpose_perm,
+                                   sharding.replicate_on_last_tile_dim());
            })
       .def("_get_xla_sharding_specs",
            [](const std::vector<at::Tensor>& tensors)
diff --git a/torch_xla/distributed/spmd/debugging.py b/torch_xla/distributed/spmd/debugging.py
@@ -2,7 +2,7 @@
 import functools
 import string
 import sys
-from typing import Any, Callable, Optional, Union
+from typing import Any, Callable, Optional, Union, Tuple
 import weakref
 
 import numpy as np
@@ -157,12 +157,35 @@ def visualize_sharding(sharding: str,
   return table
 
 
+def construct_v1_sharding_str(t: torch.Tensor) -> str:
+  """
+    Returns the corresponding HLO V1 sharding string from the tensor
+    """
+  sharding = torch_xla._XLAC._get_xla_sharding_spec(t)
+  if "<=" not in sharding:
+    # This is already in the V1 format
+    return sharding
+  sharding_params = torch_xla._XLAC._get_xla_op_sharding_v2_params(t)
+  assert sharding_params is not None
+  tile_assignment_dims, reshape_dims, transpose_perm, replicate_on_last_dim = sharding_params
+  num_devices = np.prod(reshape_dims)
+  device_list = np.arange(num_devices).reshape(reshape_dims).transpose(
+      transpose_perm).reshape(num_devices)
+
+  tile_assignment_str = ",".join(str(dim) for dim in tile_assignment_dims)
+  device_list_str = ",".join(str(i) for i in device_list)
+  replicate_str = " last_tile_dim_replicate" if replicate_on_last_dim else ""
+  return f"{{devices=[{tile_assignment_str}]{device_list_str}{replicate_str}}}"
+
+
 def visualize_tensor_sharding(t, **kwargs):
   """Visualizes an array's sharding."""
 
   # XLAShardedTensor is-a torch.Tensor
   def maybe_unwrap(t: torch.Tensor) -> torch.Tensor:
     return t.global_tensor if isinstance(t, XLAShardedTensor) else t
 
-  sharding = torch_xla._XLAC._get_xla_sharding_spec(maybe_unwrap(t))
+  t = maybe_unwrap(t)
+  sharding = construct_v1_sharding_str(t)
+
   return visualize_sharding(sharding, **kwargs)