[CANN]Support Opt CONV_TRANSPOSE_1D and ELU

noemotiovon · noemotiovon · commit bb9c2136e923 · 2025-04-07T02:05:07.000Z
Signed-off-by: noemotiovon &lt;noemotiovon@gmail.com&gt;
diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp
@@ -52,6 +52,8 @@
 #include <aclnnop/aclnn_upsample_nearest_2d.h>
 #include <aclnnop/aclnn_weight_quant_batch_matmul_v2.h>
 #include <aclnnop/aclnn_argmax.h>
+#include <aclnnop/aclnn_convolution.h>
+#include <aclnnop/aclnn_elu.h>
 #include <float.h>
 
 #include <cmath>
@@ -3484,3 +3486,77 @@ void ggml_cann_sin(ggml_backend_cann_context& ctx, ggml_tensor* dst){
     ACL_CHECK(aclDestroyTensor(acl_src));
     ACL_CHECK(aclDestroyTensor(acl_dst));
 }
+
+void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){
+    ggml_tensor * src0 = dst->src[0];
+    ggml_tensor * src1 = dst->src[1];
+
+    // stride
+    int64_t s0 = ((const int32_t*)(dst->op_params))[0];
+
+    aclTensor* acl_input = ggml_cann_create_tensor(src1, src1->ne, src1->nb, 3, ACL_FORMAT_NCL);
+    aclTensor* acl_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);
+    aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3, ACL_FORMAT_NCL);
+
+    int64_t strideVal[1];
+    strideVal[0] = s0;
+    aclIntArray *stride = aclCreateIntArray(strideVal, 1);
+    int64_t paddingVal[] = {0};
+    aclIntArray *padding = aclCreateIntArray(paddingVal, 1);
+    int64_t dilationVal[] = {1};
+    aclIntArray *dilation = aclCreateIntArray(dilationVal, 1);
+    int64_t outputPaddingVal[] = {0};
+    aclIntArray *outputPadding = aclCreateIntArray(outputPaddingVal, 1);
+    bool transposed = true;
+    int64_t groups = 1;
+    int8_t cubeMathType = 0;
+
+    uint64_t workspaceSize = 0;
+    aclOpExecutor* executor;
+    void* workspaceAddr = nullptr;
+    ACL_CHECK(aclnnConvolutionGetWorkspaceSize(acl_input, acl_weight, nullptr, stride,
+        padding,dilation, transposed, outputPadding, groups, acl_dst,cubeMathType,
+        &workspaceSize, &executor));
+
+    if (workspaceSize > 0) {
+        ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
+        workspaceAddr = workspace_allocator.get();
+    }
+
+    ACL_CHECK(aclnnConvolution(workspaceAddr, workspaceSize, executor, ctx.stream()));
+    ACL_CHECK(aclDestroyTensor(acl_input));
+    ACL_CHECK(aclDestroyTensor(acl_weight));
+    ACL_CHECK(aclDestroyTensor(acl_dst));
+}
+
+void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){
+    ggml_tensor * src0 = dst->src[0];
+
+    aclTensor* acl_input = ggml_cann_create_tensor(src0);
+    aclTensor* acl_dst = ggml_cann_create_tensor(dst);
+
+    aclScalar* alpha = nullptr;
+    float alphaValue = 1.0f;
+    alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);
+    aclScalar* scale = nullptr;
+    float scaleValue = 1.0f;
+    scale = aclCreateScalar(&scaleValue, aclDataType::ACL_FLOAT);
+    aclScalar* inputScale = nullptr;
+    float inputScaleValue = 1.0f;
+    inputScale = aclCreateScalar(&inputScaleValue, aclDataType::ACL_FLOAT);
+
+    uint64_t workspaceSize = 0;
+    aclOpExecutor* executor;
+    void* workspaceAddr = nullptr;
+    ACL_CHECK(aclnnEluGetWorkspaceSize(acl_input, alpha, scale, inputScale, 
+        acl_dst, &workspaceSize, &executor));
+
+    if (workspaceSize > 0) {
+        ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);
+        workspaceAddr = workspace_allocator.get();
+    }
+
+    ACL_CHECK(aclnnElu(workspaceAddr, workspaceSize, executor, ctx.stream()));
+    ACL_CHECK(aclDestroyTensor(acl_input));
+    ACL_CHECK(aclDestroyTensor(acl_dst));
+}
diff --git a/ggml/src/ggml-cann/aclnn_ops.h b/ggml/src/ggml-cann/aclnn_ops.h
@@ -525,6 +525,38 @@ void ggml_cann_cos(ggml_backend_cann_context& ctx, ggml_tensor* dst);
  */
 void ggml_cann_sin(ggml_backend_cann_context& ctx, ggml_tensor* dst);
 
+/**
+ * @brief   Computes the 1D transposed convolution (deconvolution) of a ggml tensor using the CANN backend.
+ *
+ * @details This function performs a 1D transposed convolution (also known as deconvolution) operation
+ *          on the input tensor. The computed result is stored in the destination tensor `dst`.
+ *          The operation is optimized using the CANN backend for improved performance.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the transposed convolution result will be stored.
+ *            dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
+ */
+void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
+/**
+ * @brief   Applies the ELU (Exponential Linear Unit) activation to a ggml tensor using the CANN backend.
+ *
+ * @details This function performs an element-wise ELU activation on the input tensor.
+ *          The result is written to the destination tensor `dst` in-place.
+ *          The ELU function is defined as:
+ *
+ *              ELU(x) = x,               if x > 0  
+ *                     = α * (exp(x) - 1), if x <= 0
+ *
+ *          where α (alpha) is a hyperparameter, typically set to 1.0.
+ *          This operation is optimized using the CANN backend for high-performance inference or training.
+ *
+ * @param ctx The CANN context used for operations.
+ * @param dst The destination tensor where the ELU-activated result will be stored.
+ *            dst->op is expected to be `GGML_OP_ELU`.
+ */
+void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
+
 template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*,
                                        aclTensor*, uint64_t*, aclOpExecutor**),
           aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)>
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
@@ -1342,6 +1342,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
                     ggml_cann_activation<aclnnHardswishGetWorkspaceSize,
                                          aclnnHardswish>(ctx, dst);
                     break;
+                case GGML_UNARY_OP_ELU:
+                    ggml_cann_elu(ctx, dst);
+                    break;
                 default:
                     return false;
             }
@@ -1429,6 +1432,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx,
         case GGML_OP_SIN:
             ggml_cann_sin(ctx, dst);
             break;
+        case GGML_OP_CONV_TRANSPOSE_1D:
+            ggml_cann_conv_transpose_1d(ctx, dst);
+            break;
         default:
             return false;
     }
@@ -1686,6 +1692,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
                 case GGML_UNARY_OP_HARDSWISH:
                 case GGML_UNARY_OP_GELU_QUICK:
                 case GGML_UNARY_OP_TANH:
+                case GGML_UNARY_OP_ELU:
                     return true;
                 default:
                     return false;
@@ -1814,6 +1821,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
         case GGML_OP_ARGMAX:
         case GGML_OP_COS:
         case GGML_OP_SIN:
+        case GGML_OP_CONV_TRANSPOSE_1D:
             return true;
         default:
             return false;