@@ -904,6 +904,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
904904        return ;
905905    }
906906    //  TODO: simplify
907+     const  size_t  CANN_DUP_OP_SUPPORTED_MAX_ROWS = 65535 ;
907908    if  (src->type  == GGML_TYPE_F16) {
908909        if  (dst->type  == GGML_TYPE_Q8_0) {
909910            aclrtlaunch_ascendc_quantize_f16_q8_0 (
@@ -931,7 +932,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
931932                if  (src->nb [0 ] == src_type_size) {
932933                    //  src0 is contigous on first dimension, copy by rows
933934                    int64_t  rows_num = ggml_nrows (src);
934- 
935+                      GGML_ASSERT (rows_num <= CANN_DUP_OP_SUPPORTED_MAX_ROWS); 
935936                    aclrtlaunch_ascendc_dup_by_rows_fp16 (
936937                        rows_num, ctx.stream (), src->data , dst->data ,
937938                        ((ggml_tensor*)src->extra )->ne ,
@@ -956,6 +957,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
956957                if  (src->nb [0 ] == src_type_size) {
957958                    //  src0 is contigous on first dimension, copy by rows
958959                    int64_t  rows_num = ggml_nrows (src);
960+                     GGML_ASSERT (rows_num <= CANN_DUP_OP_SUPPORTED_MAX_ROWS);
959961                    aclrtlaunch_ascendc_dup_by_rows_fp16_to_fp32 (
960962                        rows_num, ctx.stream (), src->data , dst->data ,
961963                        ((ggml_tensor*)src->extra )->ne ,
@@ -999,6 +1001,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
9991001                if  (src->nb [0 ] == src_type_size) {
10001002                    //  src0 is contigous on first dimension, copy by rows
10011003                    int64_t  rows_num = ggml_nrows (src);
1004+                     GGML_ASSERT (rows_num <= CANN_DUP_OP_SUPPORTED_MAX_ROWS);
10021005                    aclrtlaunch_ascendc_dup_by_rows_fp32 (
10031006                        rows_num, ctx.stream (), src->data , dst->data ,
10041007                        ((ggml_tensor*)src->extra )->ne ,
@@ -1025,6 +1028,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
10251028                if  (src->nb [0 ] == src_type_size) {
10261029                    //  src0 is contigous on first dimension, copy by rows
10271030                    int64_t  rows_num = ggml_nrows (src);
1031+                     GGML_ASSERT (rows_num <= CANN_DUP_OP_SUPPORTED_MAX_ROWS);
10281032                    aclrtlaunch_ascendc_dup_by_rows_fp32_to_fp16 (
10291033                        rows_num, ctx.stream (), src->data , dst->data ,
10301034                        ((ggml_tensor*)src->extra )->ne ,
@@ -2315,8 +2319,6 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
23152319            {
23162320                if  ((src0->ne [0 ] % 8 ) != 0 ) {
23172321                    size_t  dst_len = src1->ne [0 ] * src1->ne [1 ] * src1->ne [2 ] * src0->ne [0 ] * ggml_type_size (GGML_TYPE_F32);
2318- /*                      printf("\n\nggml_cann_get_rows: row elements:%d, src1->ne[0]:%d, src1->ne[1]:%d, src1->ne[2]%d, src0->ne[0]:%d, ggml_type_size(GGML_TYPE_F32):%d, dst_len:%d.\n", src0->ne[0], 
2319-                     src1->ne[0], src1->ne[1], src1->ne[2], src0->ne[0], ggml_type_size(GGML_TYPE_F32), dst_len); */  
23202322                    ACL_CHECK (aclrtMemset ((char *)dst->data , dst_len, 0 , dst_len));
23212323                }
23222324                aclrtlaunch_ascendc_get_row_f32 (
@@ -2332,8 +2334,6 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
23322334        {
23332335            if  ((src0->ne [0 ] % 16 ) != 0 ) {
23342336                size_t  dst_len = src1->ne [0 ] * src1->ne [1 ] * src1->ne [2 ] * src0->ne [0 ] * ggml_type_size (GGML_TYPE_F32); //  out is also f32, even input is f16
2335- /*                       printf("\n\nggml_cann_get_rows: row elements:%d, src1->ne[0]:%d, src1->ne[1]:%d, src1->ne[2]:%d, src0->ne[0]:%d, ggml_type_size(GGML_TYPE_F32):%d, dst_len:%d.\n", src0->ne[0], 
2336-                 src1->ne[0], src1->ne[1], src1->ne[2], src0->ne[0], ggml_type_size(GGML_TYPE_F32), dst_len); */  
23372337                ACL_CHECK (aclrtMemset ((char *)dst->data , dst_len, 0 , dst_len));
23382338            }
23392339            aclrtlaunch_ascendc_get_row_f16 (
0 commit comments