Skip to content

Commit bb9c213

Browse files
author
noemotiovon
committed
[CANN]Support Opt CONV_TRANSPOSE_1D and ELU
Signed-off-by: noemotiovon <[email protected]>
1 parent 916c83b commit bb9c213

File tree

3 files changed

+116
-0
lines changed

3 files changed

+116
-0
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
#include <aclnnop/aclnn_upsample_nearest_2d.h>
5353
#include <aclnnop/aclnn_weight_quant_batch_matmul_v2.h>
5454
#include <aclnnop/aclnn_argmax.h>
55+
#include <aclnnop/aclnn_convolution.h>
56+
#include <aclnnop/aclnn_elu.h>
5557
#include <float.h>
5658

5759
#include <cmath>
@@ -3484,3 +3486,77 @@ void ggml_cann_sin(ggml_backend_cann_context& ctx, ggml_tensor* dst){
34843486
ACL_CHECK(aclDestroyTensor(acl_src));
34853487
ACL_CHECK(aclDestroyTensor(acl_dst));
34863488
}
3489+
3490+
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){
3491+
ggml_tensor * src0 = dst->src[0];
3492+
ggml_tensor * src1 = dst->src[1];
3493+
3494+
// stride
3495+
int64_t s0 = ((const int32_t*)(dst->op_params))[0];
3496+
3497+
aclTensor* acl_input = ggml_cann_create_tensor(src1, src1->ne, src1->nb, 3, ACL_FORMAT_NCL);
3498+
aclTensor* acl_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);
3499+
aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3, ACL_FORMAT_NCL);
3500+
3501+
int64_t strideVal[1];
3502+
strideVal[0] = s0;
3503+
aclIntArray *stride = aclCreateIntArray(strideVal, 1);
3504+
int64_t paddingVal[] = {0};
3505+
aclIntArray *padding = aclCreateIntArray(paddingVal, 1);
3506+
int64_t dilationVal[] = {1};
3507+
aclIntArray *dilation = aclCreateIntArray(dilationVal, 1);
3508+
int64_t outputPaddingVal[] = {0};
3509+
aclIntArray *outputPadding = aclCreateIntArray(outputPaddingVal, 1);
3510+
bool transposed = true;
3511+
int64_t groups = 1;
3512+
int8_t cubeMathType = 0;
3513+
3514+
uint64_t workspaceSize = 0;
3515+
aclOpExecutor* executor;
3516+
void* workspaceAddr = nullptr;
3517+
ACL_CHECK(aclnnConvolutionGetWorkspaceSize(acl_input, acl_weight, nullptr, stride,
3518+
padding,dilation, transposed, outputPadding, groups, acl_dst,cubeMathType,
3519+
&workspaceSize, &executor));
3520+
3521+
if (workspaceSize > 0) {
3522+
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
3523+
workspaceAddr = workspace_allocator.get();
3524+
}
3525+
3526+
ACL_CHECK(aclnnConvolution(workspaceAddr, workspaceSize, executor, ctx.stream()));
3527+
ACL_CHECK(aclDestroyTensor(acl_input));
3528+
ACL_CHECK(aclDestroyTensor(acl_weight));
3529+
ACL_CHECK(aclDestroyTensor(acl_dst));
3530+
}
3531+
3532+
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
3533+
ggml_tensor * src0 = dst->src[0];
3534+
3535+
aclTensor* acl_input = ggml_cann_create_tensor(src0);
3536+
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
3537+
3538+
aclScalar* alpha = nullptr;
3539+
float alphaValue = 1.0f;
3540+
alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
3541+
aclScalar* scale = nullptr;
3542+
float scaleValue = 1.0f;
3543+
scale = aclCreateScalar(&scaleValue, aclDataType::ACL_FLOAT);
3544+
aclScalar* inputScale = nullptr;
3545+
float inputScaleValue = 1.0f;
3546+
inputScale = aclCreateScalar(&inputScaleValue, aclDataType::ACL_FLOAT);
3547+
3548+
uint64_t workspaceSize = 0;
3549+
aclOpExecutor* executor;
3550+
void* workspaceAddr = nullptr;
3551+
ACL_CHECK(aclnnEluGetWorkspaceSize(acl_input, alpha, scale, inputScale,
3552+
acl_dst, &workspaceSize, &executor));
3553+
3554+
if (workspaceSize > 0) {
3555+
ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
3556+
workspaceAddr = workspace_allocator.get();
3557+
}
3558+
3559+
ACL_CHECK(aclnnElu(workspaceAddr, workspaceSize, executor, ctx.stream()));
3560+
ACL_CHECK(aclDestroyTensor(acl_input));
3561+
ACL_CHECK(aclDestroyTensor(acl_dst));
3562+
}

ggml/src/ggml-cann/aclnn_ops.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,38 @@ void ggml_cann_cos(ggml_backend_cann_context& ctx, ggml_tensor* dst);
525525
*/
526526
void ggml_cann_sin(ggml_backend_cann_context& ctx, ggml_tensor* dst);
527527

528+
/**
529+
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml tensor using the CANN backend.
530+
*
531+
* @details This function performs a 1D transposed convolution (also known as deconvolution) operation
532+
* on the input tensor. The computed result is stored in the destination tensor `dst`.
533+
* The operation is optimized using the CANN backend for improved performance.
534+
*
535+
* @param ctx The CANN context used for operations.
536+
* @param dst The destination tensor where the transposed convolution result will be stored.
537+
* dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
538+
*/
539+
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
540+
541+
/**
542+
* @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor using the CANN backend.
543+
*
544+
* @details This function performs an element-wise ELU activation on the input tensor.
545+
* The result is written to the destination tensor `dst` in-place.
546+
* The ELU function is defined as:
547+
*
548+
* ELU(x) = x, if x > 0
549+
* = α * (exp(x) - 1), if x <= 0
550+
*
551+
* where α (alpha) is a hyperparameter, typically set to 1.0.
552+
* This operation is optimized using the CANN backend for high-performance inference or training.
553+
*
554+
* @param ctx The CANN context used for operations.
555+
* @param dst The destination tensor where the ELU-activated result will be stored.
556+
* dst->op is expected to be `GGML_OP_ELU`.
557+
*/
558+
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
559+
528560
template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
529561
aclTensor*, uint64_t*, aclOpExecutor**),
530562
aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,6 +1342,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
13421342
ggml_cann_activation<aclnnHardswishGetWorkspaceSize,
13431343
aclnnHardswish>(ctx, dst);
13441344
break;
1345+
case GGML_UNARY_OP_ELU:
1346+
ggml_cann_elu(ctx, dst);
1347+
break;
13451348
default:
13461349
return false;
13471350
}
@@ -1429,6 +1432,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
14291432
case GGML_OP_SIN:
14301433
ggml_cann_sin(ctx, dst);
14311434
break;
1435+
case GGML_OP_CONV_TRANSPOSE_1D:
1436+
ggml_cann_conv_transpose_1d(ctx, dst);
1437+
break;
14321438
default:
14331439
return false;
14341440
}
@@ -1686,6 +1692,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
16861692
case GGML_UNARY_OP_HARDSWISH:
16871693
case GGML_UNARY_OP_GELU_QUICK:
16881694
case GGML_UNARY_OP_TANH:
1695+
case GGML_UNARY_OP_ELU:
16891696
return true;
16901697
default:
16911698
return false;
@@ -1814,6 +1821,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
18141821
case GGML_OP_ARGMAX:
18151822
case GGML_OP_COS:
18161823
case GGML_OP_SIN:
1824+
case GGML_OP_CONV_TRANSPOSE_1D:
18171825
return true;
18181826
default:
18191827
return false;

0 commit comments

Comments
 (0)