Update k inside the loop as it's not a candidate for unrolling

am17an · JohannesGaessler · web-flow · commit 5016ca50f750 · 2025-09-15T10:47:46.000+08:00
Co-authored-by: Johannes Gäßler &lt;johannesg@5d6.de&gt;
diff --git a/ggml/src/ggml-cuda/mmf.cuh b/ggml/src/ggml-cuda/mmf.cuh
@@ -67,9 +67,8 @@ static __global__ void mul_mat_f(
                 slot_map[j] = -1;
             }
 
-            for (int k_base = 0; k_base < nchannels_dst; k_base += warp_size) {
-                int k = k_base + threadIdx.x;
-                int match = (k < nchannels_dst) && (id_row[k*stride_col_id] == expert_idx);
+            for (int k = threadIdx.x; k < nchannels_dst; k += warp_size) {
+                int match = id_row[k*stride_col_id] == expert_idx;
 
                 if (match) {
                     slot_map[j] = k;