[NewIR]support c_allreduce_sum/c_identity/c_embedding/c_embedding_grad (#56836)

zhaoyinglia · web-flow · commit 0e74bf363c30 · 2023-09-04T15:40:55.000+08:00
* [NewIR]add c_allreduce_sum/c_identity/c_reduce_sum/c_embedding/c_embedding_grad

* rm VLOG

* rm c_identity from LegacyOpList

* rm VLOG

* rm c_reduce_sum
diff --git a/paddle/fluid/ir/dialect/paddle_dialect/utils/utils.cc b/paddle/fluid/ir/dialect/paddle_dialect/utils/utils.cc
@@ -27,7 +27,9 @@ const std::unordered_set<std::string> LegacyOpList = {
     "pd.c_sync_calc_stream_",
     "pd.c_sync_comm_stream_",
     "pd.send_v2",
-    "pd.recv_v2"};
+    "pd.recv_v2",
+    "pd.c_allreduce_sum",
+    "pd.c_allreduce_sum_"};
 
 enum class AttrType {
   UNDEFINED = 0,
diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -102,6 +102,17 @@
   composite: batch_norm_grad(x, scale, bias, mean_out, variance_out, saved_mean, saved_variance, reserve_space, out_grad, momentum, epsilon, data_layout, is_test, use_global_stats, trainable_statistics)
   backward : batch_norm_double_grad
 
+- backward_op : c_embedding_grad
+  forward : c_embedding (Tensor weight, Tensor x, int64_t start_index=0) -> Tensor(out)
+  args : (Tensor weight, Tensor x, Tensor out_grad, int64_t start_index=0)
+  output : Tensor(weight_grad)
+  infer_meta :
+    func : EmbeddingGradInferMeta
+    param : [x, weight]
+  kernel :
+    func : c_embedding_grad
+  no_need_buffer : weight
+
 - backward_op : cast_grad
   forward : cast (Tensor x, DataType dtype) -> Tensor(out)
   args : (Tensor x, Tensor out_grad)
diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml
@@ -123,6 +123,16 @@
   backward : batch_norm_grad
   optional : reserve_space
 
+- op : c_allreduce_sum
+  args : (Tensor x, int ring_id, bool use_calc_stream, bool use_model_parallel)
+  output : Tensor(out)
+  infer_meta :
+    func : AllReduceInferMeta
+    param : [x]
+  kernel :
+    func : c_allreduce_sum
+  inplace : (x -> out)
+
 - op : c_broadcast
   args : (Tensor x, int ring_id=0, int root=0, bool use_calc_stream=false)
   output : Tensor(out)
@@ -142,6 +152,27 @@
   kernel :
     func : c_concat
 
+- op : c_embedding
+  args : (Tensor weight, Tensor x, int64_t start_index=0)
+  output : Tensor(out)
+  infer_meta :
+    func : CEmbeddingInferMeta
+    param : [weight, x, start_index]
+  kernel :
+    func : c_embedding
+    param : [weight, x, start_index]
+    data_type : weight
+  backward : c_embedding_grad
+
+- op : c_identity
+  args : (Tensor x, int ring_id, bool use_calc_stream, bool use_model_parallel)
+  output : Tensor(out)
+  infer_meta :
+    func : CIdentityInferMeta
+  kernel :
+    func : c_identity
+  inplace : (x -> out)
+
 - op : c_sync_calc_stream
   args : (Tensor x)
   output : Tensor(out)
diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml
@@ -435,6 +435,13 @@
   outputs :
     out : Out
 
+- op : c_embedding
+  backward : c_embedding_grad
+  inputs :
+    {weight : W, x : Ids}
+  outputs :
+    out : Out
+
 - op : cast
   inputs :
     x : X
@@ -3032,12 +3039,24 @@
     yolo_loss : GetYoloLossExpectedKernelType
     yolo_loss_grad : GetYoloLossExpectedKernelType
 
+- op: c_allreduce_sum
+  inputs :
+    x : X
+  outputs :
+    out: Out
+
 - op: c_broadcast
   inputs :
     x : X
   outputs :
     out : Out
 
+- op: c_identity
+  inputs :
+    x : X
+  outputs :
+    out: Out
+
 - op: c_sync_calc_stream
   inputs :
     x : X
diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc
@@ -1274,6 +1274,43 @@ void EmbeddingInferMeta(const MetaTensor& x,
   out->share_lod(x);
 }
 
+void CEmbeddingInferMeta(const MetaTensor& weight,
+                         const MetaTensor& x,
+                         int64_t start_index,
+                         MetaTensor* out) {
+  const auto& table_dims = weight.dims();
+  const auto& ids_dims = x.dims();
+  int ids_rank = ids_dims.size();
+
+  VLOG(5) << "ids rank is " << ids_rank << std::endl;
+  PADDLE_ENFORCE_EQ(
+      table_dims.size(),
+      2,
+      phi::errors::InvalidArgument(
+          "ShapeError: The dimensions of the 'c_embedding' must be 2. "
+          "But received c_embedding's dimensions = %d, "
+          "c_embedding's shape = [%s].",
+          table_dims.size(),
+          table_dims));
+
+  auto output_dims = phi::vectorize(ids_dims);
+  output_dims.push_back(table_dims[1]);
+  out->set_dims(phi::make_ddim(output_dims));
+  out->set_dtype(weight.dtype());
+  out->share_lod(x);
+
+  const auto height = table_dims[0];
+  const auto width = table_dims[1];
+  PADDLE_ENFORCE_EQ(
+      (height > 0 && width > 0 && start_index >= 0),
+      true,
+      phi::errors::InvalidArgument(
+          "height:%ld width:%ld start_index:%ld must not have negative values",
+          height,
+          width,
+          start_index));
+}
+
 void ExpandAsInferMeta(const MetaTensor& x,
                        const MetaTensor& y,
                        const std::vector<int>& target_shape,
diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h
@@ -211,6 +211,11 @@ void EmbeddingInferMeta(const MetaTensor& x,
                         int64_t padding_idx,
                         MetaTensor* out);
 
+void CEmbeddingInferMeta(const MetaTensor& weight,
+                         const MetaTensor& x,
+                         int64_t start_index,
+                         MetaTensor* out);
+
 void ExpandAsInferMeta(const MetaTensor& x,
                        const MetaTensor& y,
                        const std::vector<int>& target_shape,