@@ -754,9 +754,7 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy,
754754 const sycl::range<3 > block_dims (1 , GGML_SYCL_MMV_Y, QK_WARP_SIZE);
755755 {
756756 stream->submit ([&](sycl::handler & cgh) {
757- // TODO: What's the purpose of these?
758- // auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
759- // auto ksigns64_ptr_ct1 = &ksigns64[0];
757+
760758
761759 cgh.parallel_for (
762760 sycl::nd_range<3 >(block_nums * block_dims, block_dims),
@@ -780,9 +778,7 @@ static void mul_mat_vec_iq2_s_q8_1_sycl(const void *vx, const void *vy,
780778 {
781779
782780 stream->submit ([&](sycl::handler &cgh) {
783- // TODO: What's the purpose of these?
784- // auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
785- // auto ksigns64_ptr_ct1 = &ksigns64[0];
781+
786782
787783 cgh.parallel_for (
788784 sycl::nd_range<3 >(block_nums * block_dims, block_dims),
@@ -806,9 +802,7 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy,
806802 {
807803
808804 stream->submit ([&](sycl::handler &cgh) {
809- // TODO: What's the purpose of these?
810- // auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
811- // auto ksigns64_ptr_ct1 = &ksigns64[0];
805+
812806
813807 cgh.parallel_for (
814808 sycl::nd_range<3 >(block_nums * block_dims, block_dims),
@@ -832,8 +826,7 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy,
832826 {
833827
834828 stream->submit ([&](sycl::handler &cgh) {
835- // TODO: What's the purpose of this?
836- // auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
829+
837830
838831 cgh.parallel_for (
839832 sycl::nd_range<3 >(block_nums * block_dims, block_dims),
@@ -857,9 +850,7 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
857850 {
858851
859852 stream->submit ([&](sycl::handler &cgh) {
860- // TODO: What's the purpose of these?
861- // auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
862- // auto ksigns64_ptr_ct1 = &ksigns64[0];
853+
863854
864855 cgh.parallel_for (
865856 sycl::nd_range<3 >(block_nums * block_dims, block_dims),
@@ -958,8 +949,7 @@ void ggml_sycl_op_mul_mat_vec_q(
958949 const size_t q8_1_bs = QK8_1;
959950 // the main device has a larger memory buffer to hold the results from all GPUs
960951 // nrows_dst == nrows of the matrix that the kernel writes into
961- // TODO: nrows_dst is unused. Please check.
962- // const int64_t nrows_dst = id == ctx.device ? ne00 : row_diff;
952+
963953 for (int i = 0 ; i < src1_ncols; i++)
964954 {
965955 const size_t src1_ddq_i_offset = i * src1_padded_col_size * q8_1_ts / q8_1_bs;
0 commit comments