cuda : fix bounds check for src0 rows in MMVQ kernel (whisper/2231)

ggerganov · JohannesGaessler · ggerganov · commit dae7aa62e4d8 · 2024-06-16T20:30:48.000+03:00
* cuda : fix bounds check for src0 rows in MMVQ kernel

* Update ggml-cuda/mmvq.cu

Co-authored-by: Johannes Gäßler &lt;johannesg@5d6.de&gt;

---------

Co-authored-by: Johannes Gäßler &lt;johannesg@5d6.de&gt;
diff --git a/src/ggml-cuda/mmvq.cu b/src/ggml-cuda/mmvq.cu
@@ -117,7 +117,7 @@ static __global__ void mul_mat_vec_q(
             tmp[j][i] = warp_reduce_sum(tmp[j][i]);
         }
 
-        if (threadIdx.x < rows_per_cuda_block) {
+        if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 || row0 + threadIdx.x < nrows_dst)) {
             dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x];
         }
     }

Original file line number	Diff line number	Diff line change
`@@ -117,7 +117,7 @@ static __global__ void mul_mat_vec_q(`
`117`	`117`	`tmp[j][i] = warp_reduce_sum(tmp[j][i]);`
`118`	`118`	`}`
`119`	`119`
`120`		`- if (threadIdx.x < rows_per_cuda_block) {`
	`120`	`+ if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 \|\| row0 + threadIdx.x < nrows_dst)) {`
`121`	`121`	`dst[j*nrows_dst + row0 + threadIdx.x] = tmp[j][threadIdx.x];`
`122`	`122`	`}`
`123`	`123`	`}`