We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent bf08ea5 commit 5016ca5Copy full SHA for 5016ca5
ggml/src/ggml-cuda/mmf.cuh
@@ -67,9 +67,8 @@ static __global__ void mul_mat_f(
67
slot_map[j] = -1;
68
}
69
70
- for (int k_base = 0; k_base < nchannels_dst; k_base += warp_size) {
71
- int k = k_base + threadIdx.x;
72
- int match = (k < nchannels_dst) && (id_row[k*stride_col_id] == expert_idx);
+ for (int k = threadIdx.x; k < nchannels_dst; k += warp_size) {
+ int match = id_row[k*stride_col_id] == expert_idx;
73
74
if (match) {
75
slot_map[j] = k;
0 commit comments