Skip to content

Commit 5266eee

Browse files
author
Iwan Kawrakow
committed
Opt from #880 also for iqk cuda gemv
1 parent 5e7f671 commit 5266eee

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

ggml/src/ggml-cuda/iqk_mmvq_templates.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ __device__ void iqk_mul_mat_vec_q_kerne(
104104
}
105105

106106
if (threadIdx.x < rows_per_cuda_block && (rows_per_cuda_block == 1 || row0 + threadIdx.x < nrows_dst)) {
107-
dst[j*nrows_dst + row0 + threadIdx.x] = bias ? tmp[j][threadIdx.x] + bias[j*nrows_dst + row0 + threadIdx.x] : tmp[j][threadIdx.x];
107+
dst[j*nrows_dst + row0 + threadIdx.x] = bias ? tmp[j][threadIdx.x] + bias[row0 + threadIdx.x] : tmp[j][threadIdx.x];
108108
}
109109
}
110110
}
@@ -211,8 +211,8 @@ __device__ void iqk_fused_mul_mat_vec_q_kernel(
211211
default: {
212212
constexpr float alpha = 1.702f;
213213
constexpr float limit = 7.0f;
214-
g += bias_g[j*nrows_dst + row0 + threadIdx.x];
215-
u += bias_u[j*nrows_dst + row0 + threadIdx.x];
214+
g += bias_g[row0 + threadIdx.x];
215+
u += bias_u[row0 + threadIdx.x];
216216
g = fminf(g, limit);
217217
u = fmaxf(fminf(u, limit), -limit);
218218
r = g / (1.0f + expf(-g * alpha)) * (1.0f + u);

0 commit comments

Comments
 (0)