DeepRec-AI
diff --git a/‎tensorflow/cc/gradients/array_grad.cc‎
Lines changed: 270 additions & 0 deletions b/‎tensorflow/cc/gradients/array_grad.cc‎
Lines changed: 270 additions & 0 deletions
@@ -120,6 +120,20 @@ Status SplitGrad(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("Split", SplitGrad);
 
+Status SplitVGrad(const Scope& scope, const Operation& op,
+                  const std::vector<Output>& grad_inputs,
+                  std::vector<Output>* grad_outputs) {
+  if (op.num_inputs() < 3) {
+    return errors::InvalidArgument("SplitV requires 3 arguments");
+  }
+  grad_outputs->push_back(Concat(scope, grad_inputs, op.input(2)));
+  for (int i = 0; i < op.num_inputs() - 1; ++i) {
+    grad_outputs->push_back(NoGradient());
+  }
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("SplitV", SplitVGrad);
+
 Status FillGrad(const Scope& scope, const Operation& op,
                 const std::vector<Output>& grad_inputs,
                 std::vector<Output>* grad_outputs) {
@@ -491,6 +505,262 @@ Status SliceGrad(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("Slice", SliceGrad);
 
+Status ConcatGradHelper(const Scope& scope, const Operation& op,
+                        const std::vector<Output>& grad_inputs,
+                        std::vector<Output>* grad_outputs,
+                        int start_value_index, int end_value_index,
+                        int dim_index) {
+  if (end_value_index >= op.num_inputs()) {
+    return errors::Internal("Invalid input index");
+  }
+  std::vector<Output> inputs;
+  for (int i = start_value_index; i < end_value_index; ++i) {
+    inputs.push_back(op.input(i));
+  }
+
+  auto shapes = ShapeN(scope, inputs);
+  const auto unique_name = scope.GetUniqueNameForOp("ConcatOffset");
+  auto builder =
+      ::tensorflow::NodeBuilder(unique_name, "ConcatOffset")
+          .Input(::tensorflow::ops::AsNodeOut(scope, op.input(dim_index)))
+          .Input(::tensorflow::ops::AsNodeOutList(scope, shapes.output));
+  scope.UpdateBuilder(&builder);
+  ::tensorflow::Node* concat_offset_node;
+  scope.UpdateStatus(builder.Finalize(scope.graph(), &concat_offset_node));
+  scope.UpdateStatus(scope.DoShapeInference(concat_offset_node));
+  if (concat_offset_node->num_outputs() != inputs.size()) {
+    return errors::Internal("ConcatOffset has invalid output count");
+  }
+  if (grad_inputs.size() != 1) {
+    return errors::InvalidArgument("Concat grad should have 1 input");
+  }
+
+  // For each dx[i], we take a slice of dy. The offset and size of the
+  // slice is given by offset[i] and shape[i].
+  const Output& dy = grad_inputs[0];
+  for (int i = 0; i < inputs.size(); ++i) {
+    grad_outputs->push_back(
+        Slice(scope, dy, Output(concat_offset_node, i), shapes.output[i]));
+  }
+
+  // Insert a NoGradient for the axis.
+  grad_outputs->insert(grad_outputs->begin() + dim_index, NoGradient());
+  return scope.status();
+}
+
+Status ConcatV2Grad(const Scope& scope, const Operation& op,
+                    const std::vector<Output>& grad_inputs,
+                    std::vector<Output>* grad_outputs) {
+  return ConcatGradHelper(scope, op, grad_inputs, grad_outputs,
+                          /*start_value_index=*/0,
+                          /*end_value_index=*/op.num_inputs() - 1,
+                          /*dim+index=*/op.num_inputs() - 1);
+}
+
+REGISTER_GRADIENT_OP("ConcatV2", ConcatV2Grad);
+
+Status BroadcastToGrad(const Scope& scope, const Operation& op,
+                       const std::vector<Output>& grad_inputs,
+                       std::vector<Output>* grad_outputs) {
+  if (grad_inputs.size() != 1) {
+    return errors::InvalidArgument("BroadcastTo grad should have 1 grad input");
+  }
+  if (op.num_inputs() != 2) {
+    return errors::InvalidArgument("BroadcastTo requires 2 inputs");
+  }
+
+  auto x_shape = Shape(scope, op.input(0));
+  auto args = internal::BroadcastGradientArgs(scope, x_shape, op.input(1));
+  auto sum_gx = Sum(scope, grad_inputs[0], args.r0);
+  grad_outputs->push_back(Reshape(scope, sum_gx, x_shape));
+  grad_outputs->push_back(NoGradient());
+  return scope.status();
+}
+
+REGISTER_GRADIENT_OP("BroadcastTo", BroadcastToGrad);
+
+Status TileGrad(const Scope& scope, const Operation& op,
+                const std::vector<Output>& grad_inputs,
+                std::vector<Output>* grad_outputs) {
+  if (op.num_inputs() != 2) {
+    return errors::InvalidArgument("Tile requires 2 inputs");
+  }
+  if (grad_inputs.size() != 1) {
+    return errors::InvalidArgument("Tile grad requires 1 grad input");
+  }
+
+  Shape::Attrs shape_attrs;
+  shape_attrs.out_type_ = op.input_type(1);
+  auto input_shape = Shape(scope, op.input(0), shape_attrs);
+  // We interleave multiples and input_shape to get split_shape,
+  // reshape grad to split_shape, and reduce along all even
+  // dimensions (the tiled dimensions) to get the result
+  // with shape input_shape.  For example
+  //   input_shape = [20, 30, 40]
+  //   multiples = [2, 3, 4]
+  //   split_shape = [2, 20, 3, 30, 4, 40]
+  //   axes = [0, 2, 4]
+  auto stack = Stack(scope, {op.input(1), input_shape.output});
+  auto perm = Range(scope, Sub(scope, Rank(scope, stack), 1), -1, -1);
+  auto split_shape = Reshape(scope, Transpose(scope, stack, perm), {-1});
+  auto axes = Range(scope, Const(scope, 0), Size(scope, split_shape.output), 2);
+  auto input_grad = ReduceSum(
+      scope, Reshape(scope, grad_inputs[0], split_shape.output), axes.output);
+  grad_outputs->push_back(input_grad.output);
+  grad_outputs->push_back(NoGradient());
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("Tile", TileGrad);
+
+// Create a constant of the provided d_type;
+Output ConstHelper(const Scope& scope, int value, DataType d_type) {
+  return Cast(scope, Const(scope, value), d_type);
+}
+
+// Adds the batch offsets to the given indices and returns the results.
+Output GetBatchIndices(const Scope& scope, const Output& params_shape,
+                       const Output& indices, int batch_dims) {
+  Output batch_indices = indices;
+  auto indices_ndims = Rank(scope, indices);
+  auto casted_params_shape = Cast(scope, params_shape, indices.type());
+  Output accum_dim_value = ConstHelper(scope, 1, indices.type());
+  for (int dim = batch_dims; dim > 0; dim--) {
+    Output dim_value = Slice(scope, casted_params_shape, {dim - 1}, {1});
+    accum_dim_value = Multiply(scope, accum_dim_value,
+                               Slice(scope, casted_params_shape, {dim}, {1}));
+    auto start = ConstHelper(scope, 0, indices.type());
+    auto step = ConstHelper(scope, 1, indices.type());
+    Output dim_indices = Range(scope, start, Squeeze(scope, dim_value), step);
+    dim_indices = Multiply(scope, dim_indices, accum_dim_value);
+    auto one = Cast(scope, Const(scope, {1}), indices.type());
+    auto dim_shape = Concat(
+        scope,
+        {Output(Tile(scope, one, Const(scope, {dim - 1}))), dim_value,
+         Output(Tile(scope, one,
+                     ExpandDims(scope, Sub(scope, indices_ndims, dim), 0)))},
+        /*axis=*/0);
+    batch_indices =
+        Add(scope, batch_indices, Reshape(scope, dim_indices, dim_shape));
+  }
+
+  return batch_indices;
+}
+
+Output BatchGatherGrad(const Scope& scope, Output params_shape, Output values,
+                       Output indices, int batch_dims, Output gather_dim_size) {
+  // Axis is the first non-batch dimension.
+  auto indices_size = ExpandDims(scope, Size(scope, indices), 0);
+  Output outer_shape, flat_values_shape;
+  if (batch_dims != 0) {
+    auto values_shape = Shape(scope, values);
+    // Add the batch offsets to indices and flatten the batch dimensions.
+    outer_shape = Slice(scope, values_shape, {0}, {batch_dims});
+    auto inner_shape =
+        Slice(scope, Slice(scope, values_shape, {batch_dims}, {-1}), {1}, {-1});
+    auto batch_size = Prod(scope, outer_shape, /*axis=*/0);
+    flat_values_shape = Concat(scope, {{-1}, inner_shape}, /*axis=*/0);
+    gather_dim_size = Multiply(scope, gather_dim_size, batch_size);
+    indices = GetBatchIndices(scope, params_shape, indices, batch_dims);
+    values = Reshape(scope, values, flat_values_shape);
+  }
+
+  indices = Reshape(scope, indices, indices_size);
+  Output params_grad =
+      UnsortedSegmentSum(scope, values, indices, gather_dim_size);
+
+  if (batch_dims != 0) {
+    // Put back the batch dimensions.
+    params_grad = Reshape(scope, params_grad, params_shape);
+  }
+  return params_grad;
+}
+
+Status GatherV2Grad(const Scope& scope, const Operation& op,
+                    const std::vector<Output>& grad_inputs,
+                    std::vector<Output>* grad_outputs) {
+  if (op.num_inputs() != 3) {
+    return errors::InvalidArgument("Gather requires 3 inputs");
+  }
+  if (grad_inputs.size() != 1) {
+    return errors::InvalidArgument("Gather grad requires 1 grad input");
+  }
+
+  // params can be large, so colocate the shape calculation with it.
+  // params can be very large for sparse model, array_ops.shape raises
+  // exception on the Windows platform when any dimension is larger than
+  // int32. params_shape is not used in optimizer apply_sparse gradients,
+  // so it's fine to convert it back to int32 regardless of truncation.
+  auto params = op.input(0);
+  auto colocate_scope = scope.ColocateWith(params);
+  Shape::Attrs shape_attrs;
+  shape_attrs.out_type_ = DT_INT64;
+  auto params_shape64 = Shape(colocate_scope, params, shape_attrs);
+  Output params_shape = Cast(colocate_scope, params_shape64, DT_INT32);
+
+  auto indices = op.input(1);
+  auto indices_size = ExpandDims(scope, Size(scope, indices), 0);
+  auto axis = op.input(2);
+  auto axis_expand = ExpandDims(scope, axis, 0);
+
+  int batch_dims;
+  TF_RETURN_IF_ERROR(
+      GetNodeAttr(op.node()->attrs(), "batch_dims", &batch_dims));
+  if (batch_dims < 0) {
+    // TODO(bdodson): Figure out if we can find the param rank here, like the
+    // python implementation does.
+    return errors::InvalidArgument(
+        "C++ GatherV2 gradient does not support negative batch_dims.");
+  }
+
+  // Handle axis by transposing the axis dimension to be the first non-batch
+  // dimension, compute the gradient and transpose the result back.
+  auto outer_shape = Slice(scope, params_shape, {0}, axis_expand);
+  auto inner_shape =
+      Slice(scope, Slice(scope, params_shape, axis_expand, {-1}), {1}, {-1});
+  auto values_shape = Concat(scope, {outer_shape, {-1}, inner_shape}, 0);
+  auto values_dims = Size(scope, values_shape);
+  auto axis_dims = Size(scope, outer_shape);
+
+  Output outer_batches_indices = Range(scope, 0, batch_dims, /*delta=*/1);
+  Output batch_axis_indices = Range(scope, batch_dims, axis_dims, /*delta=*/1);
+  Output inner_axes_indices =
+      Range(scope, Add(scope, axis_dims, 1), values_dims, /*delta=*/1);
+  Output axis_dims_expand = ExpandDims(scope, axis_dims, 0);
+
+  auto values = Reshape(scope, grad_inputs[0], values_shape);
+
+  // Move values[axis] up to values[batch_dims]
+  Output transpose_dims = Concat(scope,
+                                 {outer_batches_indices, axis_dims_expand,
+                                  batch_axis_indices, inner_axes_indices},
+                                 0);
+  auto values_transpose = Transpose(scope, values, transpose_dims);
+  Output gather_dim_size =
+      Squeeze(scope, Slice(scope, params_shape, axis_expand, {1}));
+  params_shape = Gather(scope, params_shape, transpose_dims);
+
+  auto params_grad = BatchGatherGrad(scope, params_shape, values_transpose,
+                                     indices, batch_dims, gather_dim_size);
+
+  // Inverts the above transpose by moving dimension batch_dims back to its
+  // original position.
+  Output invert_transpose_dims = Concat(scope,
+                                        {outer_batches_indices,
+                                         Add(scope, batch_axis_indices, 1),
+                                         {batch_dims},
+                                         inner_axes_indices},
+                                        0);
+
+  params_grad = Transpose(scope, params_grad, invert_transpose_dims);
+
+  grad_outputs->push_back(params_grad);
+  grad_outputs->push_back(NoGradient());
+  grad_outputs->push_back(NoGradient());
+  return scope.status();
+}
+
+REGISTER_GRADIENT_OP("GatherV2", GatherV2Grad);
+
 }  // anonymous namespace
 }  // namespace ops
 }  // namespace tensorflow