ops: Use new macros for throwing exceptions. (#9592)

ysiraichi · web-flow · commit 8d20a86a188f · 2025-08-28T12:36:03.000-03:00
Follow-up: #9588 and #9580 Target: - `torch_xla/csrc/ops` directory - Files related to the tracing of tensor operations In summary, this PR: - Replaces all calls to `OkOrThrow()` and `GetValueOrThrow()` (that throws an exception without source location information of the *"throw-site"*) with the macros `XLA_THROW_IF_ERROR()` and `XLA_ASSIGN_OR_THROW()`. - Corresponds to the fine-grained set of PRs that came from breaking down PR #9580 - Focuses on the `torch_xla/csrc/ops` directory and other files related to the tracing of tensor operations, replacing every use of those, now deprecated, functions by the newly introduced macros.
diff --git a/torch_xla/csrc/convolution.cpp b/torch_xla/csrc/convolution.cpp
@@ -218,9 +218,11 @@ xla::XlaOp BuildConvBackwardInput(xla::XlaOp grad_output, xla::XlaOp kernel,
       MakeConvOpAttrs(spatial_stride, spatial_padding, spatial_dilation, false);
   xla::XlaOp kernel_transposed = xla::Transpose(
       kernel, FilterTransposePermutation(input_shape.dimensions_size()));
-  return GetValueOrThrow(MakeXlaBackpropInputConvOp(
-      "conv_backward_input", input_shape, kernel_transposed, grad_output,
-      conv_op_attrs));
+  XLA_ASSIGN_OR_THROW(xla::XlaOp conv_backward_input,
+                      MakeXlaBackpropInputConvOp("conv_backward_input",
+                                                 input_shape, kernel_transposed,
+                                                 grad_output, conv_op_attrs));
+  return conv_backward_input;
 }
 
 // Computes the kernel gradient for a convolution.
@@ -238,14 +240,15 @@ xla::XlaOp BuildConvBackwardWeight(xla::XlaOp grad_output, xla::XlaOp input,
       xla::InversePermutation(transpose_permutation);
   xla::Shape transposed_weight_shape =
       xla::ShapeUtil::PermuteDimensions(transpose_permutation, kernel_shape);
-  xla::XlaOp conv = GetValueOrThrow(MakeXlaBackpropFilterConvOp(
-      "conv_backward_weight", input, transposed_weight_shape, grad_output,
-      conv_op_attrs));
+  XLA_ASSIGN_OR_THROW(xla::XlaOp conv_backward_weight,
+                      MakeXlaBackpropFilterConvOp("conv_backward_weight", input,
+                                                  transposed_weight_shape,
+                                                  grad_output, conv_op_attrs));
 
   // Reorder the dimensions of the filter gradient to match the NCHW convention
   // of PyTorch. The original result of the convolution has the spatial and
   // feature dimensions swapped and the spatial dimensions reversed.
-  return xla::Transpose(conv, inv_transpose_permutation);
+  return xla::Transpose(conv_backward_weight, inv_transpose_permutation);
 }
 
 xla::XlaOp BuildGradBias(xla::XlaOp grad_output) {
diff --git a/torch_xla/csrc/cross_replica_reduces.cpp b/torch_xla/csrc/cross_replica_reduces.cpp
@@ -116,7 +116,7 @@ std::shared_ptr<torch::lazy::Value> CreateToken(
 at::Tensor all_reduce(const at::Tensor& self, std::string reduceOp,
                       std::string /*group_name*/) {
   TORCH_LAZY_FN_COUNTER_TIMED_TRACING("xla::");
-  auto self_tensor = GetValueOrThrow(bridge::GetXlaTensor(self));
+  XLA_ASSIGN_OR_THROW(XLATensorPtr self_tensor, bridge::GetXlaTensor(self));
   // TODO(alanwaketan): Use group_name to generate groups. Currently we just
   // use {} as a workaround. Scale is always 1.0 here, and we always pin
   // layout.
@@ -270,7 +270,7 @@ AllGatherResult BuildAllGather(xla::XlaOp input, xla::XlaOp token, int64_t dim,
 at::Tensor all_gather_into_tensor(const at::Tensor& self, int64_t group_size,
                                   std::string group_name) {
   TORCH_LAZY_FN_COUNTER("xla::");
-  auto self_tensor = GetValueOrThrow(bridge::GetXlaTensor(self));
+  XLA_ASSIGN_OR_THROW(XLATensorPtr self_tensor, bridge::GetXlaTensor(self));
   std::vector<int64_t> all_groups(group_size);
   std::iota(all_groups.begin(), all_groups.end(), 0);
   auto result = tensor_methods::all_gather(self_tensor, 0, group_size,
@@ -349,9 +349,9 @@ at::Tensor all_to_all_single(const at::Tensor& input,
   }
   XLATensorPtr result_ptr;
   torch::lazy::Value new_token;
+  XLA_ASSIGN_OR_THROW(XLATensorPtr input_tensor, bridge::GetXlaTensor(input));
   std::tie(result_ptr, new_token) = tensor_methods::all_to_all(
-      GetValueOrThrow(bridge::GetXlaTensor(input)), token, 0, 0, split_count,
-      {all_groups}, pin_layout);
+      input_tensor, token, 0, 0, split_count, {all_groups}, pin_layout);
   at::Tensor result = bridge::AtenFromXlaTensor(std::move(result_ptr));
 
   at::Tensor result_with_grad = torch::autograd::make_variable(
@@ -481,7 +481,7 @@ xla::XlaOp BuildReduceScatter(AllReduceType reduce_type, xla::XlaOp input,
 at::Tensor reduce_scatter_tensor(const at::Tensor& input, std::string reduce_op,
                                  int64_t group_size, std::string group_name) {
   TORCH_LAZY_FN_COUNTER("xla::");
-  auto self = GetValueOrThrow(bridge::GetXlaTensor(input));
+  XLA_ASSIGN_OR_THROW(XLATensorPtr self, bridge::GetXlaTensor(input));
   std::vector<int64_t> all_groups(group_size);
   std::iota(all_groups.begin(), all_groups.end(), 0);
   int64_t shard_count = group_size;
diff --git a/torch_xla/csrc/data_ops.cpp b/torch_xla/csrc/data_ops.cpp
@@ -197,8 +197,8 @@ xla::XlaOp BuildMaskedFillScalar(xla::XlaOp input, xla::XlaOp mask,
   const xla::Shape& mask_shape = ShapeHelper::ShapeOfXlaOp(mask);
 
   if (!xla::ShapeUtil::Compatible(input_shape, mask_shape)) {
-    xla::Shape shape =
-        GetValueOrThrow(XlaHelpers::GetPromotedShape(input_shape, mask_shape));
+    XLA_ASSIGN_OR_THROW(xla::Shape shape,
+                        XlaHelpers::GetPromotedShape(input_shape, mask_shape));
     input = BuildExpand(input, shape.dimensions());
     mask = BuildExpand(mask, shape.dimensions());
   }
diff --git a/torch_xla/csrc/ops/index_ops.cpp b/torch_xla/csrc/ops/index_ops.cpp
@@ -18,6 +18,7 @@
 #include "torch_xla/csrc/ops/scalar.h"
 #include "torch_xla/csrc/runtime/debug_macros.h"
 #include "torch_xla/csrc/runtime/util.h"
+#include "torch_xla/csrc/status.h"
 #include "torch_xla/csrc/tensor_methods.h"
 #include "torch_xla/csrc/tensor_util.h"
 #include "torch_xla/csrc/xla_graph_executor.h"
@@ -315,8 +316,10 @@ XLATensorPtr GetZeroElementTensor(const XLATensorPtr& base,
                     base_dimensions.begin() + start_dim + indices.size(),
                     base_dimensions.end());
 
-  return GetValueOrThrow(
+  XLA_ASSIGN_OR_THROW(
+      XLATensorPtr output,
       tensor_methods::full(dimensions, 0, base->GetDevice(), base->dtype()));
+  return output;
 }
 
 XLATensorPtr IndexByTensors(const XLATensorPtr& base,
diff --git a/torch_xla/csrc/ops/triangular_solve.cpp b/torch_xla/csrc/ops/triangular_solve.cpp
@@ -33,7 +33,8 @@ std::pair<xla::Shape, xla::Shape> InferTriangularSolveShape(
     return std::pair<xla::Shape, xla::Shape>(rhs_batch_shape, lhs_batch_shape);
   }
   // Obtain the promoted shapes and add back the trailing dimension.
-  xla::Shape rhs_batch_promoted_shape = GetValueOrThrow(
+  XLA_ASSIGN_OR_THROW(
+      xla::Shape rhs_batch_promoted_shape,
       XlaHelpers::GetPromotedShape(rhs_batch_shape, lhs_batch_shape));
   xla::Shape lhs_batch_promoted_shape(rhs_batch_promoted_shape);
   rhs_batch_promoted_shape.add_dimensions(nrhs);
diff --git a/torch_xla/csrc/pooling.cpp b/torch_xla/csrc/pooling.cpp
@@ -49,7 +49,9 @@ xla::XlaComputation CreateGeComputation(xla::PrimitiveType type) {
   xla::XlaOp y = xla::Parameter(&reduction_builder, 1,
                                 xla::ShapeUtil::MakeShape(type, {}), "y");
   xla::Ge(x, y);
-  return GetValueOrThrow(reduction_builder.Build());
+  XLA_ASSIGN_OR_THROW(xla::XlaComputation ge_computation,
+                      reduction_builder.Build());
+  return ge_computation;
 }
 
 xla::TensorFormat MakeNCHWFormat(int64_t spatial_dim_count) {
@@ -367,7 +369,8 @@ xla::XlaOp ComputeMaxPoolIndices(
     return results;
   };
 
-  std::vector<xla::XlaOp> results = GetValueOrThrow(
+  XLA_ASSIGN_OR_THROW(
+      std::vector<xla::XlaOp> results,
       xla::WhileLoopHelper(cond_fn, body_fn, initial_values.values,
                            "ComputeMaxPoolIndices", padded_input.builder()));
 
diff --git a/torch_xla/csrc/reduction.cpp b/torch_xla/csrc/reduction.cpp
@@ -60,7 +60,8 @@ xla::XlaComputation CreateAllComputation(xla::PrimitiveType type) {
   xla::XlaOp zero = xla::Zero(&builder, type);
   xla::XlaOp one = xla::One(&builder, type);
   xla::Select(xla::And(xla::Ne(x, zero), xla::Ne(y, zero)), one, zero);
-  return GetValueOrThrow(builder.Build());
+  XLA_ASSIGN_OR_THROW(xla::XlaComputation all_computation, builder.Build());
+  return all_computation;
 }
 
 xla::XlaComputation CreateAnyComputation(xla::PrimitiveType type) {
@@ -72,7 +73,8 @@ xla::XlaComputation CreateAnyComputation(xla::PrimitiveType type) {
   xla::XlaOp zero = xla::Zero(&builder, type);
   xla::XlaOp one = xla::One(&builder, type);
   xla::Select(xla::Or(xla::Ne(x, zero), xla::Ne(y, zero)), one, zero);
-  return GetValueOrThrow(builder.Build());
+  XLA_ASSIGN_OR_THROW(xla::XlaComputation any_computation, builder.Build());
+  return any_computation;
 }
 
 xla::XlaOp GetScaleValue(xla::XlaOp input, xla::XlaOp count,
diff --git a/torch_xla/csrc/shape_helper.cpp b/torch_xla/csrc/shape_helper.cpp
@@ -6,7 +6,8 @@
 namespace torch_xla {
 
 const xla::Shape& ShapeHelper::ShapeOfXlaOp(xla::XlaOp op) {
-  return *GetValueOrThrow(GetShape(op));
+  XLA_ASSIGN_OR_THROW(const xla::Shape* shape, GetShape(op));
+  return *shape;
 }
 
 absl::StatusOr<const xla::Shape * absl_nonnull> GetShape(xla::XlaOp op) {
diff --git a/torch_xla/csrc/tensor_methods.cpp b/torch_xla/csrc/tensor_methods.cpp
@@ -1479,9 +1479,11 @@ std::tuple<XLATensorPtr, XLATensorPtr> cummax(const XLATensorPtr& input,
     at::Tensor val =
         at::empty(shape_, at::TensorOptions().dtype(input->dtype()));
     at::Tensor idx = at::empty(shape_, at::TensorOptions().dtype(at::kLong));
-    return std::make_tuple(
-        GetValueOrThrow(XLATensor::Create(val, input->GetDevice())),
-        GetValueOrThrow(XLATensor::Create(idx, input->GetDevice())));
+    XLA_ASSIGN_OR_THROW(XLATensorPtr xla_val,
+                        XLATensor::Create(val, input->GetDevice()));
+    XLA_ASSIGN_OR_THROW(XLATensorPtr xla_idx,
+                        XLATensor::Create(idx, input->GetDevice()));
+    return std::make_tuple(xla_val, xla_idx);
   }
   torch::lazy::NodePtr node =
       torch_xla::MakeNode<CumMax>(input->GetIrValue(), canonical_dim);
@@ -2533,10 +2535,10 @@ std::tuple<XLATensorPtr, XLATensorPtr, XLATensorPtr> native_batch_norm(
     }
   } else {
     at::Tensor at_input = bridge::AtenFromXlaTensor(input);
-    mean = GetValueOrThrow(
-        bridge::GetXlaTensor(at::empty({0}, at_input.options())));
-    variance_inverse = GetValueOrThrow(
-        bridge::GetXlaTensor(at::empty({0}, at_input.options())));
+    XLA_ASSIGN_OR_THROW(
+        mean, bridge::GetXlaTensor(at::empty({0}, at_input.options())));
+    XLA_ASSIGN_OR_THROW(variance_inverse, bridge::GetXlaTensor(at::empty(
+                                              {0}, at_input.options())));
   }
 
   XLAGraphExecutor* graph_executor = XLAGraphExecutor::Get();
diff --git a/torch_xla/csrc/tensor_ops.cpp b/torch_xla/csrc/tensor_ops.cpp
@@ -8,6 +8,7 @@
 #include "torch_xla/csrc/runtime/computation_client.h"
 #include "torch_xla/csrc/runtime/debug_macros.h"
 #include "torch_xla/csrc/runtime/util.h"
+#include "torch_xla/csrc/status.h"
 #include "torch_xla/csrc/tensor_methods.h"
 
 namespace torch_xla {
@@ -148,7 +149,8 @@ XLATensorPtr SmoothL1LossBackward(const XLATensorPtr& grad_output,
       XLATensorPtr grad_scale = tensor_methods::get_dimensions_size(
           broadcasted_input,
           XlaHelpers::GetAllDimensions(broadcasted_input->shape()));
-      XLATensorPtr div_result = GetValueOrThrow(
+      XLA_ASSIGN_OR_THROW(
+          XLATensorPtr div_result,
           tensor_methods::div(elementwise_loss_backward, grad_scale));
       return tensor_methods::mul(div_result, grad_output);
     }
@@ -174,7 +176,8 @@ XLATensorPtr SoftplusBackward(const XLATensorPtr& grad_output,
   XLATensorPtr z = tensor_methods::exp(scaled_input);
   XLATensorPtr one_vec =
       tensor_methods::full_like(z, 1, z->GetDevice(), z->dtype());
-  XLATensorPtr div = GetValueOrThrow(
+  XLA_ASSIGN_OR_THROW(
+      XLATensorPtr div,
       tensor_methods::div(z, tensor_methods::add(z, one_vec, 1)));
 
   return tensor_methods::where(tensor_methods::gt(scaled_input, threshold),
@@ -207,24 +210,29 @@ XLATensorPtr EmbeddingDenseBackward(const XLATensorPtr& grad_output,
   int64_t numel = xla::ShapeUtil::ElementsIn(indices_shape_ref.get());
   XLATensorPtr grad =
       tensor_methods::view(grad_output, {numel, grad_output->size(-1)});
-  XLATensorPtr grad_weight = GetValueOrThrow(
+  XLA_ASSIGN_OR_THROW(
+      XLATensorPtr grad_weight,
       tensor_methods::full({num_weights, grad_output->size(-1)}, 0,
                            grad_output->GetDevice(), grad_output->dtype()));
   XLATensorPtr indices_rank1 = tensor_methods::view(indices, {numel});
   if (scale_grad_by_freq) {
     // Compute the histogram of index values.
-    XLATensorPtr counts = GetValueOrThrow(tensor_methods::full(
-        {num_weights}, 0, indices->GetDevice(), indices->dtype()));
-    XLATensorPtr ones = GetValueOrThrow(tensor_methods::full(
-        {numel}, 1, indices->GetDevice(), indices->dtype()));
+    XLA_ASSIGN_OR_THROW(
+        XLATensorPtr counts,
+        tensor_methods::full({num_weights}, 0, indices->GetDevice(),
+                             indices->dtype()));
+    XLA_ASSIGN_OR_THROW(XLATensorPtr ones,
+                        tensor_methods::full({numel}, 1, indices->GetDevice(),
+                                             indices->dtype()));
     tensor_methods::index_put_(counts, counts, {indices_rank1}, /*start_dim=*/0,
                                /*values=*/ones,
                                /*accumulate=*/true, /*result_permutation=*/{0});
     XLATensorPtr grad_weights_scale =
         tensor_methods::index(counts, {indices_rank1}, 0);
     // Scale the value of the gradient by the histogram.
-    grad = GetValueOrThrow(tensor_methods::div(
-        grad, tensor_methods::unsqueeze(grad_weights_scale, 1)));
+    XLA_ASSIGN_OR_THROW(
+        grad, tensor_methods::div(
+                  grad, tensor_methods::unsqueeze(grad_weights_scale, 1)));
   }
   // Don't accumulate gradients for indices which are equal with the given
   // padding_idx.
diff --git a/torch_xla/csrc/tensor_util.cpp b/torch_xla/csrc/tensor_util.cpp
@@ -1056,7 +1056,7 @@ xla::PrimitiveType GetShapeDimensionType(
 
 std::shared_ptr<runtime::ComputationClient::Data> get_data_handle(
     const at::Tensor& input) {
-  XLATensorPtr xtensor = GetValueOrThrow(bridge::GetXlaTensor(input));
+  XLA_ASSIGN_OR_THROW(XLATensorPtr xtensor, bridge::GetXlaTensor(input));
   if (xtensor->CurrentDataHandle() != nullptr) {
     TF_VLOG(4) << "The xla tensor has a current data handle.";
     return std::dynamic_pointer_cast<runtime::ComputationClient::Data>(

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,8 @@`
`6`	`6`	`namespace torch_xla {`
`7`	`7`
`8`	`8`	`const xla::Shape& ShapeHelper::ShapeOfXlaOp(xla::XlaOp op) {`
`9`		`- return *GetValueOrThrow(GetShape(op));`
	`9`	`+ XLA_ASSIGN_OR_THROW(const xla::Shape* shape, GetShape(op));`
	`10`	`+ return *shape;`
`10`	`11`	`}`
`11`	`12`
`12`	`13`	`absl::StatusOr<const xla::Shape * absl_nonnull> GetShape(xla::XlaOp op) {`