Skip to content

Commit 8c01771

Browse files
committed
metal : minor
1 parent 7841653 commit 8c01771

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

ggml/src/ggml-metal.metal

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1211,20 +1211,22 @@ void mul_vec_q_n_f32_impl(
12111211

12121212
// each thread in a SIMD group deals with half a block.
12131213
for (int ib = ix; ib < nb; ib += nw/2) {
1214-
float sumy = 0;
1214+
float sumy[2] = { 0.f, 0.f };
12151215

1216+
#pragma unroll
12161217
for (int i = 0; i < 8; i += 2) {
1217-
sumy += yb[i + 0] + yb[i + 1];
1218+
sumy[0] += yb[i + 0] + yb[i + 1];
12181219
yl[i + 0] = yb[i + 0];
12191220
yl[i + 1] = yb[i + 1]/256.f;
12201221

1221-
sumy += yb[i + 16] + yb[i + 17];
1222+
sumy[1] += yb[i + 16] + yb[i + 17];
12221223
yl[i + 8] = yb[i + 16]/16.f;
12231224
yl[i + 9] = yb[i + 17]/4096.f;
12241225
}
12251226

1227+
#pragma unroll
12261228
for (int row = 0; row < nr; row++) {
1227-
sumf[row] += block_q_n_dot_y(ax[row] + ib, sumy, yl, il);
1229+
sumf[row] += block_q_n_dot_y(ax[row] + ib, sumy[0] + sumy[1], yl, il);
12281230
}
12291231

12301232
yb += QK4_0 * 16;

0 commit comments

Comments
 (0)