6464#include  < aclnnop/aclnn_reflection_pad1d.h> 
6565#include  < aclnnop/aclnn_eq_tensor.h> 
6666#include  < aclnnop/aclnn_gt_scalar.h> 
67+ #include  < aclnnop/aclnn_pow.h> 
6768#include  < float.h> 
6869
6970#include  < cmath> 
@@ -144,23 +145,6 @@ static void aclnn_cast(ggml_backend_cann_context& ctx, aclTensor* acl_src,
144145    GGML_CANN_CALL_ACLNN_OP (Cast, acl_src, cast_data_type, acl_dst);
145146}
146147
147- /* *
148-  * @brief Casts the elements of a tensor to a specified data type using the CANN backend. 
149-  * 
150-  * @details This function performs a type conversion on the elements of the input tensor `acl_src` 
151-  *          and stores the results in the destination tensor `acl_dst`. The conversion type is 
152-  *          determined based on the `dst` tensor's data type. 
153-  * 
154-  * @param ctx The context for the CANN backend operations. 
155-  * @param acl_src The source tensor whose elements will be cast. 
156-  * @param acl_dst The destination tensor that will store the casted elements. 
157-  * @param dst The ggml tensor specifying the target data type. 
158-  */  
159- static  void  aclnn_cast (ggml_backend_cann_context& ctx, aclTensor* acl_src,
160-                     aclTensor* acl_dst, ggml_tensor* dst) {
161-     aclnn_cast (ctx, acl_src, acl_dst, ggml_cann_type_mapping (dst->type ));
162- }
163- 
164148void  ggml_cann_repeat (ggml_backend_cann_context& ctx, ggml_tensor* dst) {
165149    ggml_tensor* src = dst->src [0 ];
166150    GGML_ASSERT (ggml_can_repeat (src, dst));
@@ -767,7 +751,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
767751        if  (dst->type  == src0->type ) {
768752            cann_copy (ctx, acl_src, acl_dst);
769753        } else  {
770-             aclnn_cast (ctx, acl_src, acl_dst, dst);
754+             aclnn_cast (ctx, acl_src, acl_dst, ggml_cann_type_mapping ( dst-> type ) );
771755        }
772756    } else  {
773757        if  (ggml_is_contiguous (src0) && ggml_is_contiguous (dst)) {
@@ -792,7 +776,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
792776                    ggml_type_size (dst->type ), src0->ne , src_trans_nb,
793777                    GGML_MAX_DIMS);
794778
795-                 aclnn_cast (ctx, acl_src, src_trans_tensor, dst);
779+                 aclnn_cast (ctx, acl_src, src_trans_tensor, ggml_cann_type_mapping ( dst-> type ) );
796780                size_t  cpy_size = ggml_nbytes (dst);
797781                ACL_CHECK (aclrtMemcpyAsync (
798782                    dst->data , cpy_size, src_trans_buffer, cpy_size,
@@ -814,7 +798,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
814798                ggml_type_size (dst->type ), src0->ne , src_trans_nb,
815799                GGML_MAX_DIMS);
816800
817-             aclnn_cast (ctx, acl_src, src_trans_tensor, dst);
801+             aclnn_cast (ctx, acl_src, src_trans_tensor, ggml_cann_type_mapping ( dst-> type ) );
818802
819803            size_t  cpy_size = ggml_nbytes (dst);
820804            ACL_CHECK (aclrtMemcpyAsync (dst->data , cpy_size, src_trans_buffer,
@@ -1158,7 +1142,7 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
11581142            tmp_cast_buffer, ggml_cann_type_mapping (dst->type ),
11591143            ggml_type_size (dst->type ), tmp_im2col_ne, temp_cast_nb,
11601144            GGML_MAX_DIMS - 1 , ACL_FORMAT_ND);
1161-         aclnn_cast (ctx, tmp_im2col_tensor, tmp_cast_tensor, dst);
1145+         aclnn_cast (ctx, tmp_im2col_tensor, tmp_cast_tensor, ggml_cann_type_mapping ( dst-> type ) );
11621146    }
11631147
11641148    //  post-processing
@@ -1733,7 +1717,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
17331717            aclTensor* src_trans_tensor = ggml_cann_create_tensor (
17341718                src_trans_buffer, ACL_FLOAT, ggml_type_size (dst->type ),
17351719                src0->ne , src_trans_nb, GGML_MAX_DIMS);
1736-             aclnn_cast (ctx, acl_src0, src_trans_tensor, dst);
1720+             aclnn_cast (ctx, acl_src0, src_trans_tensor, ggml_cann_type_mapping ( dst-> type ) );
17371721            aclnn_embedding_4d (ctx, src_trans_buffer, src0->ne ,
17381722                                   src_trans_nb, src1, dst);
17391723            ACL_CHECK (aclDestroyTensor (acl_src0));
@@ -2074,7 +2058,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
20742058            output_buffer, ACL_FLOAT16, output_elem_size, output_cast_ne,
20752059            output_cast_nb, GGML_MAX_DIMS);
20762060        aclTensor* acl_dst_tensor = ggml_cann_create_tensor (dst);
2077-         aclnn_cast (ctx, acl_output_tensor, acl_dst_tensor, dst);
2061+         aclnn_cast (ctx, acl_output_tensor, acl_dst_tensor, ggml_cann_type_mapping ( dst-> type ) );
20782062
20792063        ACL_CHECK (aclDestroyTensor (acl_output_tensor));
20802064        ACL_CHECK (aclDestroyTensor (acl_dst_tensor));
@@ -2159,37 +2143,29 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
21592143    ggml_tensor* src1 = dst->src [1 ];  //  position
21602144    ggml_tensor* src2 = dst->src [2 ];  //  freq_factors
21612145
2162-     //  arange, [0,1,...,ne0/2]
2163-     int64_t  arange_length = src0->ne [0 ] / 2 ;
2164-     ggml_cann_pool_alloc arange_allocator (ctx.pool (),
2165-                                           arange_length * sizeof (float_t ));
2166-     void * arange_buffer = arange_allocator.get ();
2167-     int64_t  arange_ne[] = {arange_length, 1 , 1 , 1 };
2168-     size_t  arange_nb[] = {sizeof (float_t ), sizeof (float_t ), sizeof (float_t ),
2169-                           arange_length * sizeof (float_t )};
2170- 
2171-     aclTensor* acl_arange_tensor =
2172-         ggml_cann_create_tensor (arange_buffer, ACL_FLOAT, sizeof (float_t ),
2173-                                 arange_ne, arange_nb, GGML_MAX_DIMS);
2146+     GGML_TENSOR_BINARY_OP_LOCALS
2147+ 
2148+     //  theta_scale arange, [0,1,...,ne00/2 - 1]
2149+     int64_t  theta_scale_length = ne00 / 2 ;
2150+     ggml_cann_pool_alloc theta_scale_allocator (ctx.pool (),
2151+                                           theta_scale_length * sizeof (float_t ));
2152+     void * theta_scale_buffer = theta_scale_allocator.get ();
2153+     int64_t  theta_scale_ne[] = {theta_scale_length, 1 , 1 , 1 };
2154+     size_t  theta_scale_nb[] = {sizeof (float_t ), sizeof (float_t ), sizeof (float_t ),
2155+                           theta_scale_length * sizeof (float_t )};
2156+ 
2157+     aclTensor* acl_theta_scale_tensor =
2158+         ggml_cann_create_tensor (theta_scale_buffer, ACL_FLOAT, sizeof (float_t ),
2159+                                 theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
21742160    float  start = 0 ;
21752161    float  step = 1 ;
2176-     float  stop = src0-> ne [ 0 ]  / 2 ;
2177-     float  n_elements = src0-> ne [ 0 ]  / 2 ;
2178-     aclnn_arange (ctx, acl_arange_tensor , start, stop, step, n_elements);
2162+     float  stop = ne00  / 2 ;
2163+     float  n_elements = ne00  / 2 ;
2164+     aclnn_arange (ctx, acl_theta_scale_tensor , start, stop, step, n_elements);
21792165
21802166    //  power
2181-     //  aclnnPowScalarTensor(): @param self is tensor which should be scalar, so
2182-     //  use aclnn_pow_tensor_tensor() until fixed. aclScalar* acl_theta_scale =
2183-     //  aclCreateScalar(&theta_scale, aclDataType::ACL_FLOAT);
2184-     //  aclnn_power_scalar_tensor(ctx, acl_theta_scale, acl_arange_tensor,
2185-     //  acl_power_tensor);
2186-     ggml_cann_pool_alloc theta_scale_allocator (ctx.pool (),
2187-                                                arange_length * sizeof (float_t ));
2188-     void * theta_scale_buffer = theta_scale_allocator.get ();
2189-     aclTensor* acl_theta_scale_tensor = aclnn_values (
2190-         ctx, theta_scale_buffer, arange_length * sizeof (float_t ), arange_ne,
2191-         GGML_MAX_DIMS, ACL_FLOAT, sizeof (float_t ), theta_scale);
2192-     aclnn_pow_tensor_tensor (ctx, acl_theta_scale_tensor, acl_arange_tensor);
2167+     aclScalar* acl_theta_scale = aclCreateScalar (&theta_scale, aclDataType::ACL_FLOAT);
2168+     GGML_CANN_CALL_ACLNN_OP (PowScalarTensor, acl_theta_scale, acl_theta_scale_tensor, acl_theta_scale_tensor);
21932169
21942170    //  freq_scale
21952171    if  (freq_scale != 1 ) {
@@ -2200,28 +2176,27 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22002176    if  (src2) {
22012177        aclTensor* acl_freq_factors_tensor = ggml_cann_create_tensor (
22022178            src2->data , ggml_cann_type_mapping (src2->type ),
2203-             ggml_type_size (src2->type ), arange_ne, arange_nb , GGML_MAX_DIMS);
2179+             ggml_type_size (src2->type ), theta_scale_ne, theta_scale_nb , GGML_MAX_DIMS);
22042180        aclnn_div (ctx, acl_theta_scale_tensor, acl_freq_factors_tensor);
22052181        ACL_CHECK (aclDestroyTensor (acl_freq_factors_tensor));
22062182    }
22072183
22082184    //  position
22092185    GGML_ASSERT (src1->type  == GGML_TYPE_I32);
22102186    int64_t  position_length = src1->ne [0 ];
2211-     int64_t  position_ne[] = {1 , position_length, 1 , 1 };
2212-     size_t  position_nb[] = {sizeof (int32_t ), sizeof (int32_t ),
2213-                             sizeof (int32_t ) * position_length,
2187+     int64_t  position_ne[] = {1 , 1 , position_length, 1 };
2188+     size_t  position_nb[] = {sizeof (int32_t ), sizeof (int32_t ), sizeof (int32_t ),
22142189                            sizeof (int32_t ) * position_length};
22152190    aclTensor* acl_position_tensor = ggml_cann_create_tensor (
22162191        src1->data , ggml_cann_type_mapping (src1->type ),
22172192        ggml_type_size (src1->type ), position_ne, position_nb, GGML_MAX_DIMS);
22182193
22192194    //  power * position
2220-     int64_t  theta_length = arange_length  * position_length;
2195+     int64_t  theta_length = theta_scale_length  * position_length;
22212196    ggml_cann_pool_alloc theta_allocator (ctx.pool (),
22222197                                         theta_length * sizeof (float_t ));
22232198    void * theta_buffer = theta_allocator.get ();
2224-     int64_t  theta_ne[] = {arange_length, position_length,  1 , 1 };
2199+     int64_t  theta_ne[] = {theta_scale_length,  1 , position_length , 1 };
22252200    size_t  theta_nb[GGML_MAX_DIMS];
22262201    theta_nb[0 ] = sizeof (float_t );
22272202    for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
@@ -2233,40 +2208,22 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22332208    aclnn_mul (ctx, acl_position_tensor, acl_theta_scale_tensor,
22342209              acl_theta_tensor);
22352210
2236-     //  permute: [0,1,2,3]->[0,2,1,3]
2237-     int64_t  permute_ne[] = {arange_length, 1 , position_length, 1 };
2238-     size_t  permute_nb[GGML_MAX_DIMS];
2239-     permute_nb[0 ] = sizeof (float_t );
2240-     for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
2241-         permute_nb[i] = permute_nb[i - 1 ] * permute_ne[i - 1 ];
2242-     }
2243-     ggml_cann_pool_alloc permute_allocator (ctx.pool (),
2244-                                            theta_length * sizeof (float_t ));
2245-     void * permute_buffer = permute_allocator.get ();
2246-     aclTensor* acl_permute_tensor = ggml_cann_create_tensor (
2247-         permute_buffer, ACL_FLOAT, sizeof (float_t ), permute_ne, permute_nb,
2248-         GGML_MAX_DIMS, ACL_FORMAT_ND);
2249-     int64_t  permute_dim[] = {0 , 2 , 1 , 3 };
2250-     int64_t  num_dims = 4 ;
2251-     aclnn_permute (ctx, acl_theta_tensor, acl_permute_tensor, permute_dim,
2252-                   num_dims);
2253- 
22542211    //  sin/cos
22552212    ggml_cann_pool_alloc sin_allocator (ctx.pool (),
22562213                                       theta_length * sizeof (float_t ));
22572214    void * sin_buffer = sin_allocator.get ();
22582215    aclTensor* acl_sin_tensor = ggml_cann_create_tensor (
2259-         sin_buffer, ACL_FLOAT, sizeof (float_t ), permute_ne, permute_nb ,
2216+         sin_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb ,
22602217        GGML_MAX_DIMS, ACL_FORMAT_ND);
2261-     aclnn_sin (ctx, acl_permute_tensor , acl_sin_tensor);
2218+     aclnn_sin (ctx, acl_theta_tensor , acl_sin_tensor);
22622219
22632220    ggml_cann_pool_alloc cos_allocator (ctx.pool (),
22642221                                       theta_length * sizeof (float_t ));
22652222    void * cos_buffer = cos_allocator.get ();
22662223    aclTensor* acl_cos_tensor = ggml_cann_create_tensor (
2267-         cos_buffer, ACL_FLOAT, sizeof (float_t ), permute_ne, permute_nb ,
2224+         cos_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb ,
22682225        GGML_MAX_DIMS, ACL_FORMAT_ND);
2269-     aclnn_cos (ctx, acl_permute_tensor , acl_cos_tensor);
2226+     aclnn_cos (ctx, acl_theta_tensor , acl_cos_tensor);
22702227
22712228    //  attn_factor
22722229    if  (attn_factor != 1 ) {
@@ -2282,21 +2239,20 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22822239    } else  {
22832240        int64_t  num_repeats = 2 ;
22842241        int64_t  dim = 3 ;
2285-         int64_t  output_size = arange_length  * num_repeats;
2242+         int64_t  output_size = theta_scale_length  * num_repeats;
22862243        aclnn_repeat_interleave (ctx, acl_sin_tensor, acl_sin_repeat_tensor, dim,
22872244                                num_repeats, output_size);
22882245        aclnn_repeat_interleave (ctx, acl_cos_tensor, acl_cos_repeat_tensor, dim,
22892246                                num_repeats, output_size);
22902247    }
22912248
22922249    //  release
2293-     ACL_CHECK (aclDestroyTensor (acl_arange_tensor));
22942250    ACL_CHECK (aclDestroyTensor (acl_theta_scale_tensor));
22952251    ACL_CHECK (aclDestroyTensor (acl_position_tensor));
22962252    ACL_CHECK (aclDestroyTensor (acl_theta_tensor));
2297-     ACL_CHECK (aclDestroyTensor (acl_permute_tensor));
22982253    ACL_CHECK (aclDestroyTensor (acl_sin_tensor));
22992254    ACL_CHECK (aclDestroyTensor (acl_cos_tensor));
2255+     ACL_CHECK (aclDestroyScalar (acl_theta_scale));
23002256}
23012257
23022258#ifdef  __cplusplus
@@ -2318,7 +2274,6 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
23182274    //  TODO: use ascendc
23192275    //  Only test with LLAMA model.
23202276    ggml_tensor* src0 = dst->src [0 ];  //  input
2321-     //  ggml_tensor* src2 = dst->src[2];  // freq_factors, not used now.
23222277
23232278    //  param
23242279    float  freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
@@ -2353,13 +2308,13 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
23532308
23542309    //  init cos/sin cache
23552310    ggml_cann_pool_alloc sin_allocator (
2356-         ctx.pool (), src0-> ne [ 0 ]  * src0-> ne [ 2 ]  * sizeof (float_t ));
2311+         ctx.pool (), ne00  * ne02  * sizeof (float_t ));
23572312    ggml_cann_pool_alloc cos_allocator (
2358-         ctx.pool (), src0-> ne [ 0 ]  * src0-> ne [ 2 ]  * sizeof (float_t ));
2313+         ctx.pool (), ne00  * ne02  * sizeof (float_t ));
23592314    void * sin_buffer = sin_allocator.get ();
23602315    void * cos_buffer = cos_allocator.get ();
23612316
2362-     int64_t  sin_reshape_ne[4 ] = {src0-> ne [ 0 ] , 1 , src0-> ne [ 2 ] , 1 };
2317+     int64_t  sin_reshape_ne[4 ] = {ne00 , 1 , ne02 , 1 };
23632318    size_t  sin_reshape_nb[GGML_MAX_DIMS];
23642319    sin_reshape_nb[0 ] = sizeof (float_t );
23652320    for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
@@ -2372,7 +2327,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
23722327        ggml_cann_create_tensor (cos_buffer, ACL_FLOAT, sizeof (float_t ),
23732328                                sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
23742329    aclnn_cache_init (ctx, dst, acl_cos_reshape_tensor, acl_sin_reshape_tensor,
2375-                       theta_scale, freq_scale, attn_factor, is_neox);
2330+                     theta_scale, freq_scale, attn_factor, is_neox);
23762331
23772332    aclTensor* acl_src = ggml_cann_create_tensor (src0);
23782333    aclTensor* acl_dst = ggml_cann_create_tensor (dst);
@@ -2549,46 +2504,51 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25492504    return ;
25502505#endif 
25512506
2552-     //  src0 == GGML_TYPE_F16
2553-     //  TODO: optimization this `if` code
2554-     if  (src0->type  == GGML_TYPE_F16) {
2555-         ggml_cann_pool_alloc sin_final_allocator (
2556-             ctx.pool (), src0->ne [0 ] * src0->ne [2 ] * ggml_type_size (src0->type ));
2557-         ggml_cann_pool_alloc cos_final_allocator (
2558-             ctx.pool (), src0->ne [0 ] * src0->ne [2 ] * ggml_type_size (src0->type ));
2559-         void * sin_final_buffer = sin_final_allocator.get ();
2560-         void * cos_final_buffer = cos_final_allocator.get ();
2561- 
2562-         int64_t  sin_final_ne[4 ] = {src0->ne [0 ], 1 , src0->ne [2 ], 1 };
2563-         size_t  sin_final_nb[GGML_MAX_DIMS];
2564-         sin_final_nb[0 ] = ggml_type_size (src0->type );
2565-         for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
2566-             sin_final_nb[i] = sin_final_nb[i - 1 ] * sin_final_ne[i - 1 ];
2507+     //  ggml_mode = 0 --> aclnn_model = 1
2508+     int64_t  acl_mode = mode == 0  ? 1  : mode;
2509+ 
2510+     switch  (src0->type ) {
2511+         case  GGML_TYPE_F32: {
2512+             GGML_CANN_CALL_ACLNN_OP (RotaryPositionEmbedding, acl_src, acl_cos_reshape_tensor,
2513+                 acl_sin_reshape_tensor, acl_mode, acl_dst);
2514+             break ;
25672515        }
2568-         aclTensor* acl_sin_final_tensor = ggml_cann_create_tensor (
2569-             sin_final_buffer, ggml_cann_type_mapping (src0->type ),
2570-             ggml_type_size (src0->type ), sin_final_ne, sin_final_nb,
2571-             GGML_MAX_DIMS);
2572-         aclTensor* acl_cos_final_tensor = ggml_cann_create_tensor (
2573-             cos_final_buffer, ggml_cann_type_mapping (src0->type ),
2574-             ggml_type_size (src0->type ), sin_final_ne, sin_final_nb,
2575-             GGML_MAX_DIMS);
2516+         case  GGML_TYPE_F16: {
2517+             ggml_cann_pool_alloc src_trans_allocator (
2518+                 ctx.pool (), ggml_nelements (src0) * sizeof (float ));
2519+             void * src_trans_buffer = src_trans_allocator.get ();
2520+             ggml_cann_pool_alloc dst_trans_allocator (
2521+                 ctx.pool (), ggml_nelements (dst) * sizeof (float ));
2522+             void * dst_trans_buffer = dst_trans_allocator.get ();
25762523
2577-         aclnn_cast (ctx, acl_sin_reshape_tensor, acl_sin_final_tensor, dst);
2578-         aclnn_cast (ctx, acl_cos_reshape_tensor, acl_cos_final_tensor, dst);
2579-         ACL_CHECK (aclDestroyTensor (acl_cos_reshape_tensor));
2580-         ACL_CHECK (aclDestroyTensor (acl_sin_reshape_tensor));
2581-         acl_sin_reshape_tensor = acl_sin_final_tensor;
2582-         acl_cos_reshape_tensor = acl_cos_final_tensor;
2583-     }
2524+             size_t  src_trans_nb[GGML_MAX_DIMS];
2525+             src_trans_nb[0 ] = sizeof (float );
2526+             for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
2527+                 src_trans_nb[i] = src_trans_nb[i - 1 ] * src0->ne [i - 1 ];
2528+             }
25842529
2585-     int  acl_mode = mode;
2586-     if  (mode == 0 ) {
2587-         acl_mode = 1 ;
2588-     }
2530+             aclTensor* acl_src_trans_tensor = ggml_cann_create_tensor (
2531+                 src_trans_buffer, ACL_FLOAT, sizeof (float ), src0->ne , src_trans_nb,
2532+                 GGML_MAX_DIMS);
2533+             aclTensor* acl_dst_trans_tensor = ggml_cann_create_tensor (
2534+                 dst_trans_buffer, ACL_FLOAT, sizeof (float ), dst->ne , src_trans_nb,
2535+                 GGML_MAX_DIMS);
2536+ 
2537+             aclnn_cast (ctx, acl_src, acl_src_trans_tensor, ACL_FLOAT);
2538+ 
2539+             GGML_CANN_CALL_ACLNN_OP (RotaryPositionEmbedding, acl_src_trans_tensor, acl_cos_reshape_tensor,
2540+                 acl_sin_reshape_tensor, acl_mode, acl_dst_trans_tensor);
2541+ 
2542+             aclnn_cast (ctx, acl_dst_trans_tensor, acl_dst, ACL_FLOAT16);
25892543
2590-     GGML_CANN_CALL_ACLNN_OP (RotaryPositionEmbedding, acl_src, acl_cos_reshape_tensor,
2591-                   acl_sin_reshape_tensor, acl_mode, acl_dst);
2544+             ACL_CHECK (aclDestroyTensor (acl_src_trans_tensor));
2545+             ACL_CHECK (aclDestroyTensor (acl_dst_trans_tensor));
2546+             break ;
2547+         }
2548+         default :
2549+             GGML_ABORT (" Unsupported tensor type for GGML_OP_ROPE"  );
2550+             break ;
2551+     }
25922552    ACL_CHECK (aclDestroyTensor (acl_src));
25932553    ACL_CHECK (aclDestroyTensor (acl_cos_reshape_tensor));
25942554    ACL_CHECK (aclDestroyTensor (acl_sin_reshape_tensor));
0 commit comments