Replace GetComputationClientOrDie() with GetComputationClient() (part 1). (#9617)

ysiraichi · web-flow · commit 8274f945248c · 2025-09-05T17:37:28.000-03:00
This PR replaces calls of the deprecated function
`GetComputationClientOrDie()` with calls to the `GetComputationClient()`
function. The difference between them is that the former throws an
exception on error, while the latter returns an status object.

_Note: this is the part 1 out of 2 PRs. Together, they will phase out
`GetComputationClientOrDie()` function_
diff --git a/test/cpp/cpp_test_util.cpp b/test/cpp/cpp_test_util.cpp
@@ -222,18 +222,19 @@ void WithAllDevices(
     const std::function<void(const std::vector<torch::lazy::BackendDevice>&,
                              const std::vector<torch::lazy::BackendDevice>&)>&
         devfn) {
+  XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                      runtime::GetComputationClient());
   for (auto device_type : device_types) {
     std::vector<torch::lazy::BackendDevice> devices;
     std::vector<torch::lazy::BackendDevice> all_devices;
-    for (const auto& device_str :
-         torch_xla::runtime::GetComputationClientOrDie()->GetLocalDevices()) {
+
+    for (const auto& device_str : client->GetLocalDevices()) {
       torch::lazy::BackendDevice device = ParseDeviceString(device_str);
       if (device.type() == device_type.type) {
         devices.push_back(device);
       }
     }
-    for (const auto& device_str :
-         torch_xla::runtime::GetComputationClientOrDie()->GetAllDevices()) {
+    for (const auto& device_str : client->GetAllDevices()) {
       torch::lazy::BackendDevice device = ParseDeviceString(device_str);
       if (device.type() == device_type.type) {
         all_devices.push_back(device);
@@ -279,37 +280,36 @@ std::vector<torch_xla::runtime::ComputationClient::DataPtr> Execute(
   XLA_ASSIGN_OR_THROW(xla::XlaComputation computation, lowering_ctx.BuildXla());
   XLA_ASSIGN_OR_THROW(xla::ProgramShape program_shape,
                       computation.GetProgramShape());
+  XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                      runtime::GetComputationClient());
   xla::Shape shape = MakeShapeWithDeviceLayout(
       program_shape.result(), static_cast<XlaDeviceType>(device.type()));
 
   std::vector<torch_xla::runtime::ComputationClient::CompileInstance> instances;
-  instances.push_back(
-      {std::move(computation), device.toString(),
-       torch_xla::runtime::GetComputationClientOrDie()->GetCompilationDevices(
-           device.toString(), {}),
-       &shape});
+  instances.push_back({std::move(computation), device.toString(),
+                       client->GetCompilationDevices(device.toString(), {}),
+                       &shape});
 
   std::vector<
       std::shared_ptr<torch_xla::runtime::ComputationClient::Computation>>
-      computations = torch_xla::runtime::GetComputationClientOrDie()->Compile(
-          std::move(instances));
+      computations = client->Compile(std::move(instances));
 
   torch_xla::runtime::ComputationClient::ExecuteComputationOptions options;
-  XLA_ASSIGN_OR_THROW(
-      std::vector<runtime::ComputationClient::DataPtr> outputs,
-      torch_xla::runtime::GetComputationClientOrDie()->ExecuteComputation(
-          *computations.front(),
-          UnwrapXlaData(lowering_ctx.GetParametersData()), device.toString(),
-          options));
+  XLA_ASSIGN_OR_THROW(std::vector<runtime::ComputationClient::DataPtr> outputs,
+                      client->ExecuteComputation(
+                          *computations.front(),
+                          UnwrapXlaData(lowering_ctx.GetParametersData()),
+                          device.toString(), options));
   return outputs;
 }
 
 std::vector<at::Tensor> Fetch(
     absl::Span<const torch_xla::runtime::ComputationClient::DataPtr>
         device_data) {
-  XLA_ASSIGN_OR_THROW(
-      std::vector<xla::Literal> literals,
-      runtime::GetComputationClientOrDie()->TransferFromDevice(device_data));
+  XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                      runtime::GetComputationClient());
+  XLA_ASSIGN_OR_THROW(std::vector<xla::Literal> literals,
+                      client->TransferFromDevice(device_data));
   std::vector<at::Tensor> tensors;
   for (auto& literal : literals) {
     tensors.push_back(MakeTensorFromXlaLiteral(
diff --git a/test/cpp/test_replication.cpp b/test/cpp/test_replication.cpp
@@ -48,10 +48,10 @@ void TestSingleReplication(
     instances.emplace_back(CreateCrsComputation(shape), device_str,
                            all_device_strings, &shape);
   }
+  XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                      runtime::GetComputationClient());
   std::vector<torch_xla::runtime::ComputationClient::ComputationPtr>
-      compiled_computations =
-          torch_xla::runtime::GetComputationClientOrDie()->Compile(
-              std::move(instances));
+      compiled_computations = client->Compile(std::move(instances));
 
   std::vector<at::Tensor> tensors;
   for (size_t i = 0; i < device_strings.size(); ++i) {
@@ -66,24 +66,22 @@ void TestSingleReplication(
   torch_xla::runtime::ComputationClient::ExecuteComputationOptions exec_options;
   for (size_t i = 0; i < device_strings.size(); ++i) {
     auto executor = [&, i]() {
-      XLA_ASSIGN_OR_THROW(
-          results[i],
-          torch_xla::runtime::GetComputationClientOrDie()->ExecuteComputation(
-              *compiled_computations[i],
-              {std::dynamic_pointer_cast<
-                  torch_xla::runtime::ComputationClient::Data>(
-                  tensors_data[i])},
-              device_strings[i], exec_options));
+      XLA_ASSIGN_OR_THROW(results[i],
+                          client->ExecuteComputation(
+                              *compiled_computations[i],
+                              {std::dynamic_pointer_cast<
+                                  torch_xla::runtime::ComputationClient::Data>(
+                                  tensors_data[i])},
+                              device_strings[i], exec_options));
       counter.DecrementCount();
     };
     torch_xla::thread::Schedule(std::move(executor));
   }
   counter.Wait();
 
   for (size_t i = 0; i < results.size(); ++i) {
-    XLA_ASSIGN_OR_THROW(
-        std::vector<xla::Literal> literals,
-        runtime::GetComputationClientOrDie()->TransferFromDevice(results[i]));
+    XLA_ASSIGN_OR_THROW(std::vector<xla::Literal> literals,
+                        client->TransferFromDevice(results[i]));
     ASSERT_EQ(literals.size(), 1);
 
     // The result must be the original tensor value, multiplied by the number of
diff --git a/test/cpp/test_runtime.cpp b/test/cpp/test_runtime.cpp
@@ -13,13 +13,10 @@ TEST(RuntimeTest, ComputationClientInitialization) {
   // Initialize the ComputationClient.
   // Check all the APIs return the same valid ComputationClient.
 
-  client = GetComputationClientOrDie();
-  ASSERT_NE(client, nullptr);
-
   auto status = GetComputationClient();
   ASSERT_TRUE(status.ok());
 
-  EXPECT_EQ(status.value(), client);
+  client = status.value();
   EXPECT_EQ(GetComputationClientIfInitialized(), client);
 }
 
diff --git a/test/cpp/test_xla_sharding.cpp b/test/cpp/test_xla_sharding.cpp
@@ -332,16 +332,16 @@ TEST_F(XLAShardingTest, CreateTensorsData) {
   std::vector<torch::lazy::BackendDataPtr> tensors_data =
       CreateTensorsData(tensors, shardings, devices);
 
-  int64_t n_devices =
-      torch_xla::runtime::GetComputationClientOrDie()->GetLocalDevices().size();
+  XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                      runtime::GetComputationClient());
+  int64_t n_devices = client->GetLocalDevices().size();
   if (n_devices > 1) {
     // null sharding is treated as replicated.
     auto xla_data =
         std::dynamic_pointer_cast<torch_xla::runtime::ComputationClient::Data>(
             tensors_data[0]);
     std::vector<torch_xla::runtime::ComputationClient::DataPtr> shards =
-        torch_xla::runtime::GetComputationClientOrDie()->GetDataShards(
-            xla_data);
+        client->GetDataShards(xla_data);
     EXPECT_EQ(shards.size(), n_devices);
     EXPECT_TRUE(xla::Shape::Equal().IgnoreLayout()(xla_data->shape(),
                                                    shards[0]->shape()));
@@ -351,8 +351,7 @@ TEST_F(XLAShardingTest, CreateTensorsData) {
     auto sharded_xla_data =
         std::dynamic_pointer_cast<torch_xla::runtime::ComputationClient::Data>(
             tensors_data[1]);
-    shards = torch_xla::runtime::GetComputationClientOrDie()->GetDataShards(
-        sharded_xla_data);
+    shards = client->GetDataShards(sharded_xla_data);
     EXPECT_EQ(shards.size(), n_devices);
     EXPECT_TRUE(xla::Shape::Equal().IgnoreLayout()(sharded_xla_data->shape(),
                                                    shards[0]->shape()));
@@ -362,8 +361,7 @@ TEST_F(XLAShardingTest, CreateTensorsData) {
     sharded_xla_data =
         std::dynamic_pointer_cast<torch_xla::runtime::ComputationClient::Data>(
             tensors_data[2]);
-    shards = torch_xla::runtime::GetComputationClientOrDie()->GetDataShards(
-        sharded_xla_data);
+    shards = client->GetDataShards(sharded_xla_data);
     EXPECT_EQ(shards.size(), n_devices);
     EXPECT_TRUE(xla::Shape::Equal().IgnoreLayout()(sharded_xla_data->shape(),
                                                    shards[0]->shape()));
@@ -373,8 +371,9 @@ TEST_F(XLAShardingTest, CreateTensorsData) {
 
 TEST_F(XLAShardingTest, PrepareOutputShardingPropagation) {
   xla::Shape shape = xla::ShapeUtil::MakeShape(xla::PrimitiveType::F32, {4, 4});
-  int64_t n_devices =
-      torch_xla::runtime::GetComputationClientOrDie()->GetLocalDevices().size();
+  XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                      runtime::GetComputationClient());
+  int64_t n_devices = client->GetLocalDevices().size();
   xla::Array<int64_t> tile_assignment({1, n_devices});
   tile_assignment.FillIota(0);
   xla::OpSharding tiled = xla::HloSharding::Tile(tile_assignment).ToProto();
@@ -397,15 +396,14 @@ TEST_F(XLAShardingTest, PrepareOutputShardingPropagation) {
 
   std::vector<
       std::shared_ptr<torch_xla::runtime::ComputationClient::Computation>>
-      computations = torch_xla::runtime::GetComputationClientOrDie()->Compile(
-          std::move(instances));
+      computations = client->Compile(std::move(instances));
   torch_xla::runtime::ComputationClient::ComputationPtr computation =
       std::make_shared<torch_xla::runtime::ComputationClient::Computation>(
           "add", std::move(computations[0]->move_computation()));
 
   // Prepare output sharding propagation, expect a sharded output placeholder.
-  std::vector<XLATensorPtr> tensors{XLATensor::Create(
-      torch_xla::runtime::GetComputationClientOrDie()->CreateDataPlaceholder(
+  std::vector<XLATensorPtr> tensors{
+      XLATensor::Create(client->CreateDataPlaceholder(
           bridge::GetDefaultDevice()->toString(), std::move(shape)))};
   std::vector<torch::lazy::BackendDataPtr> data_placeholders;
   std::vector<XLATensor::ShardingSpecPtr> sharding_specs;
diff --git a/torch_xla/csrc/tensor_util.cpp b/torch_xla/csrc/tensor_util.cpp
@@ -550,11 +550,14 @@ torch::lazy::BackendDataPtr TensorToXlaData(
     const at::Tensor& tensor, const xla::Shape& shape,
     const torch::lazy::BackendDevice& device) {
   TORCH_LAZY_TIMED("TensorToData");
+
+  XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                      runtime::GetComputationClient());
+
   if (static_cast<XlaDeviceType>(device.type()) == XlaDeviceType::SPMD) {
     // The tensor is bypassing the virtual device, so it should be replicated
     // to all devices.
-    std::vector<std::string> local_devices =
-        runtime::GetComputationClientOrDie()->GetLocalDevices();
+    std::vector<std::string> local_devices = client->GetLocalDevices();
     auto replicated_data =
         std::vector<at::Tensor>(local_devices.size(), tensor);
     return ShardingUtil::CreateShardedData(replicated_data, local_devices,
@@ -565,8 +568,7 @@ torch::lazy::BackendDataPtr TensorToXlaData(
   source_tensors.push_back(
       std::make_shared<runtime::AtenSource>(tensor, shape, device.toString()));
 
-  auto handles =
-      runtime::GetComputationClientOrDie()->TransferToDevice(source_tensors);
+  auto handles = client->TransferToDevice(source_tensors);
   XLA_CHECK_EQ(handles.size(), 1);
   return handles.front();
 }
@@ -806,15 +808,17 @@ std::vector<torch::lazy::BackendDataPtr> CreateTensorsData(
     return {};
   }
 
+  XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                      runtime::GetComputationClient());
+
   // CreateTensorsData should be implicitly replicated to all devices.
   if (IsVirtualDevice(devices[0])) {
     XLA_CHECK(
         std::all_of(devices.begin(), devices.end(),
                     [&](const std::string& s) { return s == devices[0]; }))
         << "can't mix virtual device and real device.";
 
-    std::vector<std::string> local_devices =
-        runtime::GetComputationClientOrDie()->GetLocalDevices();
+    std::vector<std::string> local_devices = client->GetLocalDevices();
     std::vector<runtime::ComputationClient::DataPtr> handles;
     for (size_t i = 0; i < tensors.size(); ++i) {
       auto device = ParseDeviceString(devices[i]);
@@ -834,8 +838,7 @@ std::vector<torch::lazy::BackendDataPtr> CreateTensorsData(
     source_tensors.push_back(std::make_shared<runtime::AtenSource>(
         tensors[i], std::move(shape), devices[i]));
   }
-  return WrapXlaData(
-      runtime::GetComputationClientOrDie()->TransferToDevice(source_tensors));
+  return WrapXlaData(client->TransferToDevice(source_tensors));
 }
 
 std::vector<torch::lazy::BackendDataPtr> CreateTensorsData(
@@ -846,6 +849,9 @@ std::vector<torch::lazy::BackendDataPtr> CreateTensorsData(
   XLA_CHECK_EQ(tensors.size(), shardings.size());
   XLA_CHECK_EQ(tensors.size(), devices.size());
 
+  XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                      runtime::GetComputationClient());
+
   std::vector<runtime::ComputationClient::DataPtr> handles;
   for (size_t i = 0; i < tensors.size(); ++i) {
     torch::lazy::BackendDevice device = ParseDeviceString(devices[i]);
@@ -858,8 +864,7 @@ std::vector<torch::lazy::BackendDataPtr> CreateTensorsData(
       // GetLocalDevices returns the list of local devices specified by their
       // global ordinals (e.g. ["TPU:4", "TPU:5", "TPU:6", "TPU:7"]).
 
-      std::vector<std::string> local_devices =
-          runtime::GetComputationClientOrDie()->GetLocalDevices();
+      std::vector<std::string> local_devices = client->GetLocalDevices();
       // Shards the input tensors with padding, to split evenly.
       // The execution requires consistent shard sizes, and the zero-padded
       // values should be ignored.
@@ -871,8 +876,7 @@ std::vector<torch::lazy::BackendDataPtr> CreateTensorsData(
     } else {
       source_tensors.push_back(std::make_shared<runtime::AtenSource>(
           tensors[i], std::move(shape), devices[i]));
-      new_handles = runtime::GetComputationClientOrDie()->TransferToDevice(
-          source_tensors);
+      new_handles = client->TransferToDevice(source_tensors);
     }
     handles.insert(handles.end(), new_handles.begin(), new_handles.end());
   }
@@ -910,7 +914,7 @@ absl::StatusOr<std::vector<xla::Literal>> ReleaseGilAndTransferData(
     save = PyEval_SaveThread();
   }
 
-  XLA_ASSIGN_OR_RETURN(runtime::ComputationClient * client,
+  XLA_ASSIGN_OR_RETURN(runtime::ComputationClient * absl_nonnull const client,
                        runtime::GetComputationClient());
   XLA_ASSIGN_OR_RETURN(std::vector<xla::Literal> literals,
                        client->TransferFromDevice(UnwrapXlaData(xla_data)));
diff --git a/torch_xla/csrc/xla_backend_impl.cpp b/torch_xla/csrc/xla_backend_impl.cpp
@@ -28,8 +28,11 @@ class XlaBackendImpl : public torch::lazy::BackendImplInterface {
     if (!default_device_type_inited_) {
       // bridge::GetDefaultDevice will trigger the runtime device init, should
       // not do it during class init time.
-      default_device_type_ = std::make_shared<DeviceType>(
-          runtime::GetComputationClientOrDie()->GetDeviceType());
+      XLA_ASSIGN_OR_THROW(
+          runtime::ComputationClient * absl_nonnull const client,
+          runtime::GetComputationClient());
+      default_device_type_ =
+          std::make_shared<DeviceType>(client->GetDeviceType());
       default_device_type_inited_ = true;
     }
     return true;
@@ -77,8 +80,10 @@ class XlaBackendImpl : public torch::lazy::BackendImplInterface {
       const torch::lazy::BackendDevice& device,
       const torch::lazy::Shape& shape) const override {
     xla::Shape xla_shape = MakeXlaShapeFromLazyShape(shape, device);
-    return runtime::GetComputationClientOrDie()->CreateDataPlaceholder(
-        device.toString(), std::move(xla_shape));
+    XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                        runtime::GetComputationClient());
+    return client->CreateDataPlaceholder(device.toString(),
+                                         std::move(xla_shape));
   }
 
   torch::lazy::BackendDataPtr GetComputationDataFromNode(
@@ -121,8 +126,9 @@ class XlaBackendImpl : public torch::lazy::BackendImplInterface {
   std::vector<std::string> GetCompilationDevices(
       const std::string& device,
       c10::ArrayRef<std::string> devices) const override {
-    return runtime::GetComputationClientOrDie()->GetCompilationDevices(device,
-                                                                       devices);
+    XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                        runtime::GetComputationClient());
+    return client->GetCompilationDevices(device, devices);
   }
 
   std::vector<torch::lazy::ComputationPtr> Compile(
@@ -155,19 +161,22 @@ class XlaBackendImpl : public torch::lazy::BackendImplInterface {
           torch_xla_computation->get_device_string(),
           {current_device.toString()}, &output_shapes.back()));
     }
+    XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                        runtime::GetComputationClient());
     std::vector<std::shared_ptr<runtime::ComputationClient::Computation>>
-        client_computations = runtime::GetComputationClientOrDie()->Compile(
-            std::move(compile_instances));
+        client_computations = client->Compile(std::move(compile_instances));
     return {client_computations.begin(), client_computations.end()};
   }
 
   std::vector<torch::lazy::BackendDataPtr> ExecuteComputation(
       torch::lazy::ComputationPtr computation,
       c10::ArrayRef<torch::lazy::BackendDataPtr> arguments,
       const torch::lazy::BackendDevice& device) const override {
+    XLA_ASSIGN_OR_THROW(runtime::ComputationClient * absl_nonnull const client,
+                        runtime::GetComputationClient());
     XLA_ASSIGN_OR_THROW(
         std::vector<runtime::ComputationClient::DataPtr> results,
-        runtime::GetComputationClientOrDie()->ExecuteComputation(
+        client->ExecuteComputation(
             *std::dynamic_pointer_cast<runtime::ComputationClient::Computation>(
                 computation),
             UnwrapXlaData(arguments), device.toString()));
diff --git a/torch_xla/csrc/xla_graph_executor.cpp b/torch_xla/csrc/xla_graph_executor.cpp
diff --git a/torch_xla/csrc/xla_sharding_util.cpp b/torch_xla/csrc/xla_sharding_util.cpp