Skip to content

Commit 5016ca5

Browse files
Update k inside the loop as it's not a candidate for unrolling
Co-authored-by: Johannes Gäßler <[email protected]>
1 parent bf08ea5 commit 5016ca5

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

ggml/src/ggml-cuda/mmf.cuh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,8 @@ static __global__ void mul_mat_f(
6767
slot_map[j] = -1;
6868
}
6969

70-
for (int k_base = 0; k_base < nchannels_dst; k_base += warp_size) {
71-
int k = k_base + threadIdx.x;
72-
int match = (k < nchannels_dst) && (id_row[k*stride_col_id] == expert_idx);
70+
for (int k = threadIdx.x; k < nchannels_dst; k += warp_size) {
71+
int match = id_row[k*stride_col_id] == expert_idx;
7372

7473
if (match) {
7574
slot_map[j] = k;

0 commit comments

Comments
 (0)