@@ -751,45 +751,56 @@ static void cann_copy(ggml_backend_cann_context& ctx, aclTensor* acl_src,
751751}
752752
753753void ggml_cann_dup (ggml_backend_cann_context& ctx, ggml_tensor* dst) {
754- ggml_tensor* src0 = dst->src [0 ];
755- void * src_trans_buffer = src0->data ;
756- ggml_cann_pool_alloc src_buffer_allocator;
757- if (!ggml_is_contiguous (src0)) {
754+ ggml_tensor* src0 = dst->src [0 ];
755+
756+ if (ggml_are_same_shape (src0, dst)) {
758757 aclTensor* acl_src = ggml_cann_create_tensor (src0);
759- src_buffer_allocator.alloc (ctx.pool (),
760- ggml_nelements (src0) * ggml_type_size (src0->type ));
761- src_trans_buffer = src_buffer_allocator.get ();
762- size_t src_trans_nb[GGML_MAX_DIMS];
763- src_trans_nb[0 ] = ggml_type_size (src0->type );
764- for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
765- src_trans_nb[i] = src_trans_nb[i - 1 ] * src0->ne [i - 1 ];
758+ aclTensor* acl_dst = ggml_cann_create_tensor (dst);
759+ if (dst->type == src0->type ) {
760+ cann_copy (ctx, acl_src, acl_dst);
761+ } else {
762+ aclnn_cast (ctx, acl_src, acl_dst, ggml_cann_type_mapping (dst->type ));
763+ }
764+ ggml_cann_release_resources (ctx, acl_src, acl_dst);
765+ } else {
766+ void * src_trans_buffer = src0->data ;
767+ ggml_cann_pool_alloc src_buffer_allocator;
768+ if (!ggml_is_contiguous (src0)) {
769+ aclTensor* acl_src = ggml_cann_create_tensor (src0);
770+ src_buffer_allocator.alloc (ctx.pool (),
771+ ggml_nelements (src0) * ggml_type_size (src0->type ));
772+ src_trans_buffer = src_buffer_allocator.get ();
773+ size_t src_trans_nb[GGML_MAX_DIMS];
774+ src_trans_nb[0 ] = ggml_type_size (src0->type );
775+ for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
776+ src_trans_nb[i] = src_trans_nb[i - 1 ] * src0->ne [i - 1 ];
777+ }
778+ aclTensor* src_trans_tensor = ggml_cann_create_tensor (
779+ src_trans_buffer, ggml_cann_type_mapping (src0->type ),
780+ ggml_type_size (src0->type ), src0->ne , src_trans_nb,
781+ GGML_MAX_DIMS);
782+ cann_copy (ctx, acl_src, src_trans_tensor);
783+ ggml_cann_release_resources (ctx, acl_src, src_trans_tensor);
766784 }
767- aclTensor* src_trans_tensor = ggml_cann_create_tensor (
768- src_trans_buffer, ggml_cann_type_mapping (src0->type ),
769- ggml_type_size (src0->type ), src0->ne , src_trans_nb,
770- GGML_MAX_DIMS);
771- cann_copy (ctx, acl_src, src_trans_tensor);
772- ggml_cann_release_resources (ctx, acl_src, src_trans_tensor);
773- }
774785
775- size_t src_reshape_nb[GGML_MAX_DIMS];
776- src_reshape_nb[0 ] = ggml_type_size (src0->type );
777- for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
778- src_reshape_nb[i] = src_reshape_nb[i - 1 ] * dst->ne [i - 1 ];
779- }
786+ size_t src_reshape_nb[GGML_MAX_DIMS];
787+ src_reshape_nb[0 ] = ggml_type_size (src0->type );
788+ for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
789+ src_reshape_nb[i] = src_reshape_nb[i - 1 ] * dst->ne [i - 1 ];
790+ }
780791
781- aclTensor* trans_acl_src = ggml_cann_create_tensor (src_trans_buffer,
782- ggml_cann_type_mapping (src0->type ),ggml_type_size (src0->type ),
783- dst->ne , src_reshape_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
784- aclTensor* acl_dst = ggml_cann_create_tensor (dst);
792+ aclTensor* trans_acl_src = ggml_cann_create_tensor (src_trans_buffer,
793+ ggml_cann_type_mapping (src0->type ),ggml_type_size (src0->type ),
794+ dst->ne , src_reshape_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
795+ aclTensor* acl_dst = ggml_cann_create_tensor (dst);
785796
786- if (dst->type == src0->type ) {
787- cann_copy (ctx, trans_acl_src, acl_dst);
788- } else {
789- aclnn_cast (ctx, trans_acl_src, acl_dst, ggml_cann_type_mapping (dst->type ));
797+ if (dst->type == src0->type ) {
798+ cann_copy (ctx, trans_acl_src, acl_dst);
799+ } else {
800+ aclnn_cast (ctx, trans_acl_src, acl_dst, ggml_cann_type_mapping (dst->type ));
801+ }
802+ ggml_cann_release_resources (ctx, trans_acl_src, acl_dst);
790803 }
791-
792- ggml_cann_release_resources (ctx, trans_acl_src, acl_dst);
793804 return ;
794805}
795806
0 commit comments