We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9447968 commit 7c3c454Copy full SHA for 7c3c454
ggml/src/ggml-cuda/reduce_rows.cuh
@@ -40,7 +40,7 @@ static __global__ void reduce_rows_f32(const float * __restrict__ x, float * __r
40
sum = 0.0f;
41
if constexpr (width > WARP_SIZE) {
42
static_assert((width <= 1024) && (width % WARP_SIZE) == 0, "unexpected block_size");
43
- if (lane_id < (width / WARP_SIZE)) {
+ if (lane_id < (blockDim.x / WARP_SIZE)) {
44
sum = s_sum[lane_id];
45
}
46
sum = warp_reduce_sum(sum);
0 commit comments