Skip to content

Commit c1be08e

Browse files
authored
[NPU] aclnn add_n (#1333)
1 parent bcc47be commit c1be08e

File tree

1 file changed

+26
-3
lines changed

1 file changed

+26
-3
lines changed

backends/npu/kernels/add_n_kernel.cc

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
namespace custom_kernel {
1919

2020
template <typename T, typename Context>
21-
void AddNKernel(const Context& dev_ctx,
22-
const std::vector<const phi::DenseTensor*>& x,
23-
phi::DenseTensor* out) {
21+
void AclopAddNKernel(const Context& dev_ctx,
22+
const std::vector<const phi::DenseTensor*>& x,
23+
phi::DenseTensor* out) {
2424
dev_ctx.template Alloc<T>(out);
2525
auto stream = dev_ctx.stream();
2626

@@ -46,6 +46,29 @@ void AddNKernel(const Context& dev_ctx,
4646
runner.Run(stream);
4747
}
4848

49+
template <typename T, typename Context>
50+
void AddNKernel(const Context& dev_ctx,
51+
const std::vector<const phi::DenseTensor*>& x,
52+
phi::DenseTensor* out) {
53+
DO_COMPATIBILITY(
54+
aclnnSum, (custom_kernel::AclopAddNKernel<T, Context>(dev_ctx, x, out)));
55+
56+
dev_ctx.template Alloc<T>(out);
57+
int n = static_cast<int>(x.size());
58+
if (n == 1) {
59+
TensorCopy(dev_ctx, *x[0], false, out);
60+
return;
61+
}
62+
63+
std::vector<const phi::DenseTensor*> inputs;
64+
for (int i = 0; i < n; ++i) {
65+
if (x[i] && x[i]->numel() > 0) {
66+
inputs.push_back(x[i]);
67+
}
68+
}
69+
EXEC_NPU_CMD(aclnnSum, dev_ctx, inputs, *out);
70+
}
71+
4972
} // namespace custom_kernel
5073

5174
PD_REGISTER_PLUGIN_KERNEL(add_n,

0 commit comments

Comments
 (0)