Skip to content

Commit 44987f7

Browse files
committed
CUDA: Remove unneded bias/gate dims in fused mmvq
Pointed out [here](#16847 (comment)) that only a single value is needed per target col per thread
1 parent 229bf68 commit 44987f7

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

ggml/src/ggml-cuda/mmvq.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ static __global__ void mul_mat_vec_q(
190190

191191
const uint32_t channel_bias = ids ? channel_x : channel_dst;
192192

193-
float x_biases[ncols_dst][rows_per_cuda_block] = { { 0.0f } };
194-
float gate_biases[ncols_dst][rows_per_cuda_block] = { { 0.0f } };
193+
float x_biases[ncols_dst] = { { 0.0f } };
194+
float gate_biases[ncols_dst] = { { 0.0f } };
195195
if constexpr (has_fusion) {
196196
if (use_bias) {
197197
x_bias = x_bias + sample_dst*stride_sample_dst + channel_bias*stride_channel_dst + row0;
@@ -200,7 +200,7 @@ static __global__ void mul_mat_vec_q(
200200
if (threadIdx.x < rows_per_cuda_block && threadIdx.y == 0 &&
201201
(rows_per_cuda_block == 1 || uint32_t(row0 + threadIdx.x) < stride_col_dst)) {
202202
for (int j = 0; j < ncols_dst; ++j) {
203-
x_biases[j][threadIdx.x] = x_bias[j * stride_col_dst + threadIdx.x];
203+
x_biases[j] = x_bias[j * stride_col_dst + threadIdx.x];
204204
}
205205
}
206206
}
@@ -209,7 +209,7 @@ static __global__ void mul_mat_vec_q(
209209
if (threadIdx.x < rows_per_cuda_block && threadIdx.y == 0 &&
210210
(rows_per_cuda_block == 1 || uint32_t(row0 + threadIdx.x) < stride_col_dst)) {
211211
for (int j = 0; j < ncols_dst; ++j) {
212-
gate_biases[j][threadIdx.x] = gate_bias[j * stride_col_dst + threadIdx.x];
212+
gate_biases[j] = gate_bias[j * stride_col_dst + threadIdx.x];
213213
}
214214
}
215215
}
@@ -299,12 +299,12 @@ static __global__ void mul_mat_vec_q(
299299
float result = tmp[j][threadIdx.x];
300300
if constexpr (has_fusion) {
301301
if (use_bias) {
302-
result += x_biases[j][threadIdx.x];
302+
result += x_biases[j];
303303
}
304304
if (use_gate) {
305305
float gate_value = tmp_gate[j][threadIdx.x];
306306
if (use_gate_bias) {
307-
gate_value += gate_biases[j][threadIdx.x];
307+
gate_value += gate_biases[j];
308308
}
309309
switch (active_glu) {
310310
case GGML_GLU_OP_SWIGLU:

0 commit comments

Comments
 (0)