@@ -2380,25 +2380,41 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
23802380 if (src0->ne [3 ] == 1 && src1->ne [3 ] == 1 ) {
23812381 // KQ single-batch
23822382 // mmv p021 was specific for these dimensions
2383+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_vec_p021\n " , __func__);
23832384 ggml_sycl_mul_mat_vec_p021 (ctx, src0, src1, dst);
2385+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_vec_p021 done\n " , __func__);
23842386 } else {
23852387 // The kernel from the if path is faster for that specific case, but does not support all mul mats.
2388+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_batched_sycl\n " , __func__);
23862389 ggml_sycl_mul_mat_batched_sycl (ctx, src0, src1, dst);
2390+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_batched_sycl done\n " , __func__);
23872391 }
23882392 } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous (src0) && !ggml_is_transposed (src1) && src1->ne [1 ] == 1 ) {
23892393 // KQV single-batch
2394+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_vec_nc\n " , __func__);
23902395 ggml_sycl_mul_mat_vec_nc (ctx, src0, src1, dst);
2396+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_vec_nc done\n " , __func__);
23912397 } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_transposed (src0) && !ggml_is_transposed (src1) && src1->ne [2 ]*src1->ne [3 ] > 1 ) {
23922398 // KQ + KQV multi-batch
2399+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_batched_sycl\n " , __func__);
23932400 ggml_sycl_mul_mat_batched_sycl (ctx, src0, src1, dst);
2401+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_mul_mat_batched_sycl done\n " , __func__);
23942402 } else if (use_dequantize_mul_mat_vec) {
2403+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_dequantize_mul_mat_vec\n " , __func__);
23952404 ggml_sycl_op_mul_mat (ctx, src0, src1, dst, ggml_sycl_op_dequantize_mul_mat_vec, false );
2405+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_dequantize_mul_mat_vec done\n " , __func__);
23962406 } else if (use_mul_mat_vec_q) {
2407+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_vec_q\n " , __func__);
23972408 ggml_sycl_op_mul_mat (ctx, src0, src1, dst, ggml_sycl_op_mul_mat_vec_q, true );
2409+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_vec_q done\n " , __func__);
23982410 } else if (use_mul_mat_q) {
2411+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_q\n " , __func__);
23992412 ggml_sycl_op_mul_mat (ctx, src0, src1, dst, ggml_sycl_op_mul_mat_q, true );
2413+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_q done\n " , __func__);
24002414 } else {
2415+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_sycl\n " , __func__);
24012416 ggml_sycl_op_mul_mat (ctx, src0, src1, dst, ggml_sycl_op_mul_mat_sycl, false );
2417+ GGML_SYCL_DEBUG (" %s: call ggml_sycl_op_mul_mat_sycl done\n " , __func__);
24022418 }
24032419}
24042420
0 commit comments