@@ -2522,7 +2522,7 @@ template <ggml_type type, int mmq_x, int nwarps, bool need_check, bool fixup>
25222522static __device__ __forceinline__ void mul_mat_q_process_tile (
25232523 const char * __restrict__ x, const int offset_x, const int * __restrict__ y,
25242524 const int * __restrict__ ids_dst, float * __restrict__ dst, float * __restrict__ tmp_fixup,
2525- const int nrows_x, const int stride_row_x, const int ncols_y, const int stride_col_dst,
2525+ const int stride_row_x, const int ncols_y, const int stride_col_dst,
25262526 const int tile_x_max_i, const int tile_y_max_j, const int kb0_start, const int kb0_stop) {
25272527
25282528 constexpr int qk = ggml_cuda_type_traits<type>::qk;
@@ -2689,7 +2689,7 @@ static __global__ void mul_mat_q(
26892689
26902690 constexpr bool fixup = false ;
26912691 mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
2692- (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst,
2692+ (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
26932693 tile_x_max_i, tile_y_max_j, 0 , ncols_x/qk);
26942694 return ;
26952695 }
@@ -2767,7 +2767,7 @@ static __global__ void mul_mat_q(
27672767
27682768 constexpr bool fixup = false ; // All but (potentially) the last iterations write their data to dst rather than the fixup buffer.
27692769 mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
2770- (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst,
2770+ (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
27712771 tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop);
27722772
27732773 kbc += blocks_per_ne00;
@@ -2834,7 +2834,7 @@ static __global__ void mul_mat_q(
28342834
28352835 constexpr bool fixup = true ; // Last index writes its data to fixup buffer to avoid data races with other blocks.
28362836 mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
2837- (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, nrows_x, stride_row_x, ncols_y, stride_col_dst,
2837+ (x, offset_x, y + offset_y, ids_dst_shared, dst + offset_dst, tmp_fixup, stride_row_x, ncols_y, stride_col_dst,
28382838 tile_x_max_i, tile_y_max_j, kb0_start, kb0_stop);
28392839}
28402840
0 commit comments