1- #ifndef CANN_ACLNN_OPS
2- #define CANN_ACLNN_OPS
3-
41/* *
5- * @file acl_tensor
6- * @brief This file contains related functions of ggml_tensor and acl_tensor.
7- * Contains conversion from ggml_tensor to acl_tensor, broadcast and other
8- * functions.
9- * @author hipudding <[email protected] > 10- * @author wangshuai09 <[email protected] > 11- * @date July 15, 2024
12- *
132 * Copyright (c) 2023-2024 The ggml authors
143 *
154 * Permission is hereby granted, free of charge, to any person obtaining a copy
3120 * IN THE SOFTWARE.
3221 */
3322
23+ #ifndef CANN_ACLNN_OPS
24+ #define CANN_ACLNN_OPS
25+
3426#include < aclnnop/aclnn_abs.h>
3527#include < aclnnop/aclnn_neg.h>
3628#include < aclnnop/aclnn_exp.h>
@@ -483,8 +475,8 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
483475 * operation is executed using the CANN backend for optimized performance.
484476 *
485477 * @param ctx The CANN context used for operations.
486- * @param dst The destination tensor where the indices of the maximum values will be stored.
487- * dst->op is `GGML_OP_ARGMAX`.
478+ * @param dst The destination tensor where the indices of the maximum values will
479+ * be stored. dst->op is `GGML_OP_ARGMAX`.
488480 */
489481void ggml_cann_argmax (ggml_backend_cann_context& ctx, ggml_tensor* dst);
490482
@@ -600,40 +592,8 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
600592 aclTensor* acl_dst);
601593
602594/* *
603- * @brief Launches an asynchronous task using the memory allocator.
604- *
605- * This macro submit an asynchronous task on the specified stream.
606- * The task uses memory allocated by the allocator. It is guaranteed
607- * that the memory will not be accessed by other tasks until this task
608- * completes, due to the sequential execution order within the same stream.
609- *
610- * @param OP_NAME aclnn operator name.
611- * @param args Additional arguments required by the task.
612- *
613- * @note
614- * Memory from the allocator will be "freed" immediately and can be
615- * reallocated to other pointers. However, it won't be accessed by any
616- * other task before this asynchronous task ends, because all tasks in the
617- * same stream are executed in queue order.
618- */
619- #define GGML_CANN_CALL_ACLNN_OP (OP_NAME, ...) \
620- do { \
621- uint64_t workspaceSize = 0 ; \
622- aclOpExecutor * executor; \
623- void * workspaceAddr = nullptr ; \
624- \
625- ACL_CHECK (aclnn##OP_NAME##GetWorkspaceSize (__VA_ARGS__, &workspaceSize, &executor)); \
626- \
627- if (workspaceSize > 0 ) { \
628- ggml_cann_pool_alloc workspace_allocator (ctx.pool (), workspaceSize); \
629- workspaceAddr = workspace_allocator.get (); \
630- } \
631- ACL_CHECK (aclnn##OP_NAME (workspaceAddr, workspaceSize, executor, ctx.stream ())); \
632- } while (0 )
633-
634-
635- /* *
636- * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one output tensor.
595+ * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
596+ * output tensor.
637597 *
638598 * This function checks whether broadcasting is needed between `src0` and `src1`.
639599 * If broadcasting is required, it calculates the proper shapes and creates
@@ -647,14 +607,57 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
647607 * @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
648608 * @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
649609 */
650- void bcast_shape (ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclTensor ** acl_src0,
651- aclTensor ** acl_src1, aclTensor ** acl_dst);
610+ void bcast_shape (ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
611+ aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst);
612+
613+ /* *
614+ * @brief Computes the 1D transposed convolution (deconvolution) of a ggml
615+ * tensor using the CANN backend.
616+ *
617+ * @details This function performs a 1D transposed convolution (also known as
618+ * deconvolution) operation on the input tensor. The computed result is stored
619+ * in the destination tensor `dst`. The operation is optimized using the CANN
620+ * backend for improved performance.
621+ *
622+ * @param ctx The CANN context used for operations.
623+ * @param dst The destination tensor where the transposed convolution result
624+ * will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
625+ */
626+ void ggml_cann_conv_transpose_1d (ggml_backend_cann_context& ctx, ggml_tensor* dst);
652627
653628/* *
654- * @brief Applies a element-wise operation to two input tensors using the CANN backend.
629+ * @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
630+ * using the CANN backend.
631+ *
632+ * @details This function performs an element-wise ELU activation on the input
633+ * tensor.
634+ * The result is written to the destination tensor `dst` in-place.
635+ * The ELU function is defined as:
636+ *
637+ * \text{ELU}(x) =
638+ * \begin{cases}
639+ * x, & \text{if } x > 0 \\
640+ * \alpha \left( \exp(x) - 1 \right), & \text{if } x \leq 0
641+ * \end{cases}
655642 *
656- * This templated function takes a binary operator and applies it to two source tensors
657- * associated with the destination tensor. The function handles broadcasting as needed.
643+ * where α (alpha) is a hyperparameter, typically set to 1.0.
644+ * This operation is optimized using the CANN backend for high-performance
645+ * inference or training.
646+ *
647+ * @param ctx The CANN context used for operations.
648+ * @param dst The destination tensor where the ELU-activated result will be stored.
649+ * dst->op is expected to be `GGML_OP_ELU`.
650+ */
651+ void ggml_cann_elu (ggml_backend_cann_context& ctx, ggml_tensor* dst);
652+
653+ /* *
654+ * @brief Applies a element-wise operation to two input tensors using the CANN
655+ * backend.
656+ *
657+ * This templated function takes a binary operator and applies it to two source
658+ * tensors
659+ * associated with the destination tensor. The function handles broadcasting as
660+ * needed.
658661 *
659662 * @tparam binary_op A callable object (e.g., lambda or function pointer) representing
660663 * the binary operation to be performed. It must take three arguments:
@@ -681,6 +684,38 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
681684 ACL_CHECK (aclDestroyTensor (acl_dst));
682685}
683686
687+ /* *
688+ * @brief Launches an asynchronous task using the memory allocator.
689+ *
690+ * This macro submit an asynchronous task on the specified stream.
691+ * The task uses memory allocated by the allocator. It is guaranteed
692+ * that the memory will not be accessed by other tasks until this task
693+ * completes, due to the sequential execution order within the same stream.
694+ *
695+ * @param OP_NAME aclnn operator name.
696+ * @param args Additional arguments required by the task.
697+ *
698+ * @note
699+ * Memory from the allocator will be "freed" immediately and can be
700+ * reallocated to other pointers. However, it won't be accessed by any
701+ * other task before this asynchronous task ends, because all tasks in the
702+ * same stream are executed in queue order.
703+ */
704+ #define GGML_CANN_CALL_ACLNN_OP (OP_NAME, ...) \
705+ do { \
706+ uint64_t workspaceSize = 0 ; \
707+ aclOpExecutor * executor; \
708+ void * workspaceAddr = nullptr ; \
709+ \
710+ ACL_CHECK (aclnn##OP_NAME##GetWorkspaceSize (__VA_ARGS__, &workspaceSize, &executor)); \
711+ \
712+ if (workspaceSize > 0 ) { \
713+ ggml_cann_pool_alloc workspace_allocator (ctx.pool (), workspaceSize); \
714+ workspaceAddr = workspace_allocator.get (); \
715+ } \
716+ ACL_CHECK (aclnn##OP_NAME (workspaceAddr, workspaceSize, executor, ctx.stream ())); \
717+ } while (0 )
718+
684719/* *
685720 * @brief Applies a unary operation to an input tensor using the CANN backend.
686721 *
@@ -690,7 +725,6 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
690725 * @tparam unary_op A callable with the signature:
691726 * void(ggml_backend_cann_context&, aclTensor*, aclTensor*)
692727 * where the first aclTensor is the source and the second is the destination.
693- *
694728 * @param ctx The CANN backend context for managing resources and execution.
695729 * @param dst The destination tensor. Its src[0] is treated as the input tensor.
696730 */
@@ -702,10 +736,30 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
702736 aclTensor* acl_dst = ggml_cann_create_tensor (dst);
703737
704738 unary_op (ctx, acl_src, acl_dst);
739+
705740 ACL_CHECK (aclDestroyTensor (acl_src));
706741 ACL_CHECK (aclDestroyTensor (acl_dst));
707742}
708743
744+ /* *
745+ * @brief Applies a unary operation to a ggml tensor using the CANN backend.
746+ *
747+ * @details This function performs a unary operation on the input tensor using
748+ * a user-provided lambda or callable object `unary_op`, which accepts the CANN
749+ * context and two ACL tensors (source and destination). Internally, this function
750+ * creates ACL representations of the ggml tensors and invokes the unary operation.
751+ * The result is stored in the destination tensor `dst`. This utility abstracts the
752+ * common boilerplate of tensor conversion and cleanup when implementing unary ops.
753+ *
754+ * @param unary_op A callable that performs the unary operation using CANN APIs.
755+ * @param ctx The CANN context used for operations.
756+ * @param dst The destination tensor where the result will be stored.
757+ * The source tensor is retrieved from `dst->src[0]`.
758+ */
759+ void ggml_cann_unary_op (
760+ std::function<void (ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
761+ ggml_backend_cann_context& ctx, ggml_tensor* dst);
762+
709763/* *
710764 * @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op.
711765 *
@@ -725,11 +779,12 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
725779 */
726780#define GGML_CANN_CALL_UNARY_OP (OP_NAME ) \
727781 do { \
728- auto lambda = [](auto ctx, auto acl_src, auto acl_dst) { \
782+ auto lambda = [](ggml_backend_cann_context& ctx, \
783+ aclTensor* acl_src, \
784+ aclTensor* acl_dst) { \
729785 GGML_CANN_CALL_ACLNN_OP (OP_NAME, acl_src, acl_dst); \
730786 }; \
731- ggml_cann_unary_op<lambda>( ctx, dst); \
787+ ggml_cann_unary_op (lambda, ctx, dst); \
732788 } \
733789 while (0 )
734-
735790#endif // CANN_ACLNN_OPS
0 commit comments