@@ -477,6 +477,94 @@ void ggml_cann_l2_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
477477 ggml_cann_release_resources (ctx, dims_array, p_scalar, acl_src, acl_dst, acl_div);
478478}
479479
480+ void ggml_cann_cross_entropy_loss (ggml_backend_cann_context & ctx, ggml_tensor * dst) {
481+ ggml_tensor * src0 = dst->src [0 ];
482+ ggml_tensor * src1 = dst->src [1 ];
483+
484+ const int64_t nc = src0->ne [0 ];
485+ const int64_t nr = ggml_nrows (src0);
486+
487+ int64_t logits_ne[] = {nc, nr};
488+ size_t logits_nb[2 ];
489+ logits_nb[0 ] = ggml_type_size (src0->type );
490+ logits_nb[1 ] = logits_nb[0 ] * logits_ne[0 ];
491+
492+ aclTensor * acl_logits = ggml_cann_create_tensor (src0->data , ACL_FLOAT, sizeof (float ), logits_ne, logits_nb, 2 );
493+
494+ size_t log_softmax_type_size = sizeof (float );
495+ int64_t log_softmax_n_bytes = nr * nc * log_softmax_type_size;
496+ ggml_cann_pool_alloc log_softmax_allocator (ctx.pool (), log_softmax_n_bytes);
497+ void * log_softmax_buffer = log_softmax_allocator.get ();
498+
499+ int64_t log_softmax_ne[] = {nc, nr};
500+ size_t log_softmax_nb[2 ];
501+ log_softmax_nb[0 ] = log_softmax_type_size;
502+ log_softmax_nb[1 ] = log_softmax_nb[0 ] * log_softmax_ne[0 ];
503+ aclTensor * acl_log_softmax = ggml_cann_create_tensor (log_softmax_buffer, ACL_FLOAT, log_softmax_type_size, log_softmax_ne, log_softmax_nb, 2 );
504+
505+ GGML_CANN_CALL_ACLNN_OP (ctx, LogSoftmax, acl_logits, 1 , acl_log_softmax);
506+
507+ int64_t labels_ne[] = {nc, nr};
508+ size_t labels_nb[2 ];
509+ labels_nb[0 ] = ggml_type_size (src1->type );
510+ labels_nb[1 ] = labels_nb[0 ] * labels_ne[0 ];
511+ aclTensor * acl_labels = ggml_cann_create_tensor (src1->data , ACL_FLOAT, sizeof (float ), labels_ne, labels_nb, 2 );
512+
513+ size_t mul_type_size = sizeof (float );
514+ int64_t mul_n_bytes = nr * nc * mul_type_size;
515+ ggml_cann_pool_alloc mul_allocator (ctx.pool (), mul_n_bytes);
516+ void * mul_buffer = mul_allocator.get ();
517+
518+ int64_t mul_ne[] = {nc, nr};
519+ size_t mul_nb[2 ];
520+ mul_nb[0 ] = mul_type_size;
521+ mul_nb[1 ] = mul_nb[0 ] * mul_ne[0 ];
522+ aclTensor * acl_mul_result = ggml_cann_create_tensor (mul_buffer, ACL_FLOAT, mul_type_size, mul_ne, mul_nb, 2 );
523+
524+ GGML_CANN_CALL_ACLNN_OP (ctx, Mul, acl_log_softmax, acl_labels, acl_mul_result);
525+
526+ size_t sum_per_sample_type_size = sizeof (float );
527+ int64_t sum_per_sample_n_bytes = nr * sum_per_sample_type_size;
528+ ggml_cann_pool_alloc sum_per_sample_allocator (ctx.pool (), sum_per_sample_n_bytes);
529+ void * sum_per_sample_buffer = sum_per_sample_allocator.get ();
530+
531+ int64_t sum_per_sample_ne[] = {nr};
532+ size_t sum_per_sample_nb[1 ];
533+ sum_per_sample_nb[0 ] = sum_per_sample_type_size;
534+ aclTensor * acl_sum_per_sample = ggml_cann_create_tensor (sum_per_sample_buffer, ACL_FLOAT, sum_per_sample_type_size, sum_per_sample_ne, sum_per_sample_nb, 1 );
535+
536+ std::vector<int64_t > sum_dims = {1 };
537+ aclIntArray * dims_array = aclCreateIntArray (sum_dims.data (), sum_dims.size ());
538+ bool keep_dims = false ;
539+
540+ GGML_CANN_CALL_ACLNN_OP (ctx, ReduceSum, acl_mul_result, dims_array, keep_dims, ACL_FLOAT, acl_sum_per_sample);
541+
542+ size_t total_sum_type_size = sizeof (float );
543+ int64_t total_sum_n_bytes = 1 * total_sum_type_size;
544+ ggml_cann_pool_alloc total_sum_allocator (ctx.pool (), total_sum_n_bytes);
545+ void * total_sum_buffer = total_sum_allocator.get ();
546+
547+ int64_t total_sum_ne[] = {1 };
548+ size_t total_sum_nb[1 ];
549+ total_sum_nb[0 ] = total_sum_type_size;
550+
551+ aclTensor * acl_total_sum = ggml_cann_create_tensor (total_sum_buffer, ACL_FLOAT, total_sum_type_size, total_sum_ne, total_sum_nb, 1 );
552+
553+ std::vector<int64_t > total_sum_dims = {0 };
554+ aclIntArray * total_sum_dims_array = aclCreateIntArray (total_sum_dims.data (), total_sum_dims.size ());
555+
556+ GGML_CANN_CALL_ACLNN_OP (ctx, ReduceSum, acl_sum_per_sample, total_sum_dims_array, keep_dims, ACL_FLOAT, acl_total_sum);
557+
558+ float value = -1 .0f / static_cast <float >(nr);
559+ aclScalar * scale_factor = aclCreateScalar (&value, aclDataType::ACL_FLOAT);
560+ aclTensor * acl_dst = ggml_cann_create_tensor (dst->data , ACL_FLOAT, sizeof (float ), total_sum_ne, total_sum_nb, 1 );
561+
562+ GGML_CANN_CALL_ACLNN_OP (ctx, Muls, acl_total_sum, scale_factor, acl_dst);
563+
564+ ggml_cann_release_resources (ctx, acl_logits, acl_log_softmax, acl_labels, acl_mul_result, acl_sum_per_sample, acl_total_sum, acl_dst,
565+ scale_factor, dims_array, total_sum_dims_array);
566+ }
567+
480568void ggml_cann_group_norm (ggml_backend_cann_context & ctx, ggml_tensor * dst) {
481569 ggml_tensor * src = dst->src [0 ];
482570
0 commit comments