We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 43d078f commit dae7aa6Copy full SHA for dae7aa6
src/ggml-cuda/mmvq.cu
@@ -117,7 +117,7 @@ static __global__ void mul_mat_vec_q(
117
tmp[j][i] = warp_reduce_sum(tmp[j][i]);
118
}
119
120
- if (threadIdx.x < rows_per_cuda_block) {
+ if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 || row0 + threadIdx.x < nrows_dst)) {
121
dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x];
122
123
0 commit comments