18
18
namespace custom_kernel {
19
19
20
20
template <typename T, typename Context>
21
- void AddNKernel (const Context& dev_ctx,
22
- const std::vector<const phi::DenseTensor*>& x,
23
- phi::DenseTensor* out) {
21
+ void AclopAddNKernel (const Context& dev_ctx,
22
+ const std::vector<const phi::DenseTensor*>& x,
23
+ phi::DenseTensor* out) {
24
24
dev_ctx.template Alloc <T>(out);
25
25
auto stream = dev_ctx.stream ();
26
26
@@ -46,6 +46,29 @@ void AddNKernel(const Context& dev_ctx,
46
46
runner.Run (stream);
47
47
}
48
48
49
+ template <typename T, typename Context>
50
+ void AddNKernel (const Context& dev_ctx,
51
+ const std::vector<const phi::DenseTensor*>& x,
52
+ phi::DenseTensor* out) {
53
+ DO_COMPATIBILITY (
54
+ aclnnSum, (custom_kernel::AclopAddNKernel<T, Context>(dev_ctx, x, out)));
55
+
56
+ dev_ctx.template Alloc <T>(out);
57
+ int n = static_cast <int >(x.size ());
58
+ if (n == 1 ) {
59
+ TensorCopy (dev_ctx, *x[0 ], false , out);
60
+ return ;
61
+ }
62
+
63
+ std::vector<const phi::DenseTensor*> inputs;
64
+ for (int i = 0 ; i < n; ++i) {
65
+ if (x[i] && x[i]->numel () > 0 ) {
66
+ inputs.push_back (x[i]);
67
+ }
68
+ }
69
+ EXEC_NPU_CMD (aclnnSum, dev_ctx, inputs, *out);
70
+ }
71
+
49
72
} // namespace custom_kernel
50
73
51
74
PD_REGISTER_PLUGIN_KERNEL (add_n,
0 commit comments