diff --git a/backends/npu/kernels/cum_kernel.cc b/backends/npu/kernels/cum_kernel.cc index e7000409244..db1a0169b68 100644 --- a/backends/npu/kernels/cum_kernel.cc +++ b/backends/npu/kernels/cum_kernel.cc @@ -72,13 +72,16 @@ void AclopCumsumKernel(const Context& dev_ctx, bool flatten, bool exclusive, bool reverse, + phi::DataType dtype, phi::DenseTensor* out) { dev_ctx.template Alloc(out); auto axis = axis_scalar.to(); - NPUAttributeMap attr_input = { - {"axis", axis}, {"exclusive", exclusive}, {"reverse", reverse}}; + NPUAttributeMap attr_input = {{"axis", axis}, + {"exclusive", exclusive}, + {"reverse", reverse}, + {"dtype", static_cast(dtype)}}; if (flatten) { PADDLE_ENFORCE_EQ( @@ -105,11 +108,12 @@ void CumsumKernel(const Context& dev_ctx, bool flatten, bool exclusive, bool reverse, + phi::DataType dtype, phi::DenseTensor* out) { DO_COMPATIBILITY( aclnnCumsumV2, (custom_kernel::AclopCumsumKernel( - dev_ctx, x, axis_scalar, flatten, exclusive, reverse, out))); + dev_ctx, x, axis_scalar, flatten, exclusive, reverse, dtype, out))); dev_ctx.template Alloc(out); auto axis = axis_scalar.to(); @@ -125,9 +129,11 @@ void CumsumKernel(const Context& dev_ctx, Tensor new_x(x); new_x.Resize(phi::make_ddim({x.numel()})); - EXEC_NPU_CMD(aclnnCumsumV2, dev_ctx, new_x, axis, exclusive, reverse, *out); + EXEC_NPU_CMD( + aclnnCumsumV2, dev_ctx, new_x, axis, exclusive, reverse, dtype, *out); } else { - EXEC_NPU_CMD(aclnnCumsumV2, dev_ctx, x, axis, exclusive, reverse, *out); + EXEC_NPU_CMD( + aclnnCumsumV2, dev_ctx, x, axis, exclusive, reverse, dtype, *out); } }