Skip to content

Commit 7c3c454

Browse files
committed
Replace unneeded use of template arg
This will allow us to avoid compiling the kernel multiple times
1 parent 9447968 commit 7c3c454

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/reduce_rows.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static __global__ void reduce_rows_f32(const float * __restrict__ x, float * __r
4040
sum = 0.0f;
4141
if constexpr (width > WARP_SIZE) {
4242
static_assert((width <= 1024) && (width % WARP_SIZE) == 0, "unexpected block_size");
43-
if (lane_id < (width / WARP_SIZE)) {
43+
if (lane_id < (blockDim.x / WARP_SIZE)) {
4444
sum = s_sum[lane_id];
4545
}
4646
sum = warp_reduce_sum(sum);

0 commit comments

Comments
 (0)