[NPU] aclnn add_n (#1333)

will-jl944 · web-flow · commit c1be08e60db6 · 2024-07-17T16:39:55.000+08:00
diff --git a/backends/npu/kernels/add_n_kernel.cc b/backends/npu/kernels/add_n_kernel.cc
@@ -18,9 +18,9 @@
 namespace custom_kernel {
 
 template <typename T, typename Context>
-void AddNKernel(const Context& dev_ctx,
-                const std::vector<const phi::DenseTensor*>& x,
-                phi::DenseTensor* out) {
+void AclopAddNKernel(const Context& dev_ctx,
+                     const std::vector<const phi::DenseTensor*>& x,
+                     phi::DenseTensor* out) {
   dev_ctx.template Alloc<T>(out);
   auto stream = dev_ctx.stream();
 
@@ -46,6 +46,29 @@ void AddNKernel(const Context& dev_ctx,
   runner.Run(stream);
 }
 
+template <typename T, typename Context>
+void AddNKernel(const Context& dev_ctx,
+                const std::vector<const phi::DenseTensor*>& x,
+                phi::DenseTensor* out) {
+  DO_COMPATIBILITY(
+      aclnnSum, (custom_kernel::AclopAddNKernel<T, Context>(dev_ctx, x, out)));
+
+  dev_ctx.template Alloc<T>(out);
+  int n = static_cast<int>(x.size());
+  if (n == 1) {
+    TensorCopy(dev_ctx, *x[0], false, out);
+    return;
+  }
+
+  std::vector<const phi::DenseTensor*> inputs;
+  for (int i = 0; i < n; ++i) {
+    if (x[i] && x[i]->numel() > 0) {
+      inputs.push_back(x[i]);
+    }
+  }
+  EXEC_NPU_CMD(aclnnSum, dev_ctx, inputs, *out);
+}
+
 }  // namespace custom_kernel
 
 PD_REGISTER_PLUGIN_KERNEL(add_n,