| 
52 | 52 | #include <aclnnop/aclnn_upsample_nearest_2d.h>  | 
53 | 53 | #include <aclnnop/aclnn_weight_quant_batch_matmul_v2.h>  | 
54 | 54 | #include <aclnnop/aclnn_argmax.h>  | 
 | 55 | +#include <aclnnop/aclnn_convolution.h>  | 
 | 56 | +#include <aclnnop/aclnn_elu.h>  | 
55 | 57 | #include <float.h>  | 
56 | 58 | 
 
  | 
57 | 59 | #include <cmath>  | 
@@ -3484,3 +3486,77 @@ void ggml_cann_sin(ggml_backend_cann_context& ctx, ggml_tensor* dst){  | 
3484 | 3486 |     ACL_CHECK(aclDestroyTensor(acl_src));  | 
3485 | 3487 |     ACL_CHECK(aclDestroyTensor(acl_dst));  | 
3486 | 3488 | }  | 
 | 3489 | + | 
 | 3490 | +void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst){  | 
 | 3491 | +    ggml_tensor * src0 = dst->src[0];  | 
 | 3492 | +    ggml_tensor * src1 = dst->src[1];  | 
 | 3493 | + | 
 | 3494 | +    // stride  | 
 | 3495 | +    int64_t s0 = ((const int32_t*)(dst->op_params))[0];  | 
 | 3496 | + | 
 | 3497 | +    aclTensor* acl_input = ggml_cann_create_tensor(src1, src1->ne, src1->nb, 3, ACL_FORMAT_NCL);  | 
 | 3498 | +    aclTensor* acl_weight = ggml_cann_create_tensor(src0, src0->ne, src0->nb, 3, ACL_FORMAT_NCL);  | 
 | 3499 | +    aclTensor* acl_dst = ggml_cann_create_tensor(dst, dst->ne, dst->nb, 3, ACL_FORMAT_NCL);  | 
 | 3500 | + | 
 | 3501 | +    int64_t strideVal[1];  | 
 | 3502 | +    strideVal[0] = s0;  | 
 | 3503 | +    aclIntArray *stride = aclCreateIntArray(strideVal, 1);  | 
 | 3504 | +    int64_t paddingVal[] = {0};  | 
 | 3505 | +    aclIntArray *padding = aclCreateIntArray(paddingVal, 1);  | 
 | 3506 | +    int64_t dilationVal[] = {1};  | 
 | 3507 | +    aclIntArray *dilation = aclCreateIntArray(dilationVal, 1);  | 
 | 3508 | +    int64_t outputPaddingVal[] = {0};  | 
 | 3509 | +    aclIntArray *outputPadding = aclCreateIntArray(outputPaddingVal, 1);  | 
 | 3510 | +    bool transposed = true;  | 
 | 3511 | +    int64_t groups = 1;  | 
 | 3512 | +    int8_t cubeMathType = 0;  | 
 | 3513 | + | 
 | 3514 | +    uint64_t workspaceSize = 0;  | 
 | 3515 | +    aclOpExecutor* executor;  | 
 | 3516 | +    void* workspaceAddr = nullptr;  | 
 | 3517 | +    ACL_CHECK(aclnnConvolutionGetWorkspaceSize(acl_input, acl_weight, nullptr, stride,  | 
 | 3518 | +        padding,dilation, transposed, outputPadding, groups, acl_dst,cubeMathType,  | 
 | 3519 | +        &workspaceSize, &executor));  | 
 | 3520 | + | 
 | 3521 | +    if (workspaceSize > 0) {  | 
 | 3522 | +        ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);  | 
 | 3523 | +        workspaceAddr = workspace_allocator.get();  | 
 | 3524 | +    }  | 
 | 3525 | + | 
 | 3526 | +    ACL_CHECK(aclnnConvolution(workspaceAddr, workspaceSize, executor, ctx.stream()));  | 
 | 3527 | +    ACL_CHECK(aclDestroyTensor(acl_input));  | 
 | 3528 | +    ACL_CHECK(aclDestroyTensor(acl_weight));  | 
 | 3529 | +    ACL_CHECK(aclDestroyTensor(acl_dst));  | 
 | 3530 | +}  | 
 | 3531 | + | 
 | 3532 | +void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst){  | 
 | 3533 | +    ggml_tensor * src0 = dst->src[0];  | 
 | 3534 | + | 
 | 3535 | +    aclTensor* acl_input = ggml_cann_create_tensor(src0);  | 
 | 3536 | +    aclTensor* acl_dst = ggml_cann_create_tensor(dst);  | 
 | 3537 | + | 
 | 3538 | +    aclScalar* alpha = nullptr;  | 
 | 3539 | +    float alphaValue = 1.0f;  | 
 | 3540 | +    alpha = aclCreateScalar(&alphaValue, aclDataType::ACL_FLOAT);  | 
 | 3541 | +    aclScalar* scale = nullptr;  | 
 | 3542 | +    float scaleValue = 1.0f;  | 
 | 3543 | +    scale = aclCreateScalar(&scaleValue, aclDataType::ACL_FLOAT);  | 
 | 3544 | +    aclScalar* inputScale = nullptr;  | 
 | 3545 | +    float inputScaleValue = 1.0f;  | 
 | 3546 | +    inputScale = aclCreateScalar(&inputScaleValue, aclDataType::ACL_FLOAT);  | 
 | 3547 | + | 
 | 3548 | +    uint64_t workspaceSize = 0;  | 
 | 3549 | +    aclOpExecutor* executor;  | 
 | 3550 | +    void* workspaceAddr = nullptr;  | 
 | 3551 | +    ACL_CHECK(aclnnEluGetWorkspaceSize(acl_input, alpha, scale, inputScale,   | 
 | 3552 | +        acl_dst, &workspaceSize, &executor));  | 
 | 3553 | + | 
 | 3554 | +    if (workspaceSize > 0) {  | 
 | 3555 | +        ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);  | 
 | 3556 | +        workspaceAddr = workspace_allocator.get();  | 
 | 3557 | +    }  | 
 | 3558 | + | 
 | 3559 | +    ACL_CHECK(aclnnElu(workspaceAddr, workspaceSize, executor, ctx.stream()));  | 
 | 3560 | +    ACL_CHECK(aclDestroyTensor(acl_input));  | 
 | 3561 | +    ACL_CHECK(aclDestroyTensor(acl_dst));  | 
 | 3562 | +}  | 
0 commit comments