Skip to content

Commit 2759ccd

Browse files
authored
CUDA: avoid mul + bias fusion when doing fusion (ggml-org#16935)
1 parent c5023da commit 2759ccd

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2115,6 +2115,14 @@ static bool ggml_cuda_should_fuse_mul_mat_vec_f(const ggml_tensor * tensor) {
21152115
const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
21162116
use_mul_mat_vec_f = use_mul_mat_vec_f && ggml_cuda_should_use_mmvf(src0->type, cc, src0->ne, is_mul_mat_id ? src1->ne[2] : src1->ne[1]);
21172117

2118+
const bool split = ggml_backend_buft_is_cuda_split(src0->buffer->buft) ||
2119+
ggml_backend_buft_is_cuda_split(src1->buffer->buft);
2120+
2121+
//TODO: add support for fusion for split buffers
2122+
if (split) {
2123+
return false;
2124+
}
2125+
21182126
//we only support fusion for ncols_dst = 1
21192127
if (tensor->op == GGML_OP_MUL_MAT && dst->ne[1] != 1) {
21202128
return false;
@@ -2154,6 +2162,15 @@ static bool ggml_cuda_should_fuse_mul_mat_vec_q(const ggml_tensor * tensor) {
21542162
return false;
21552163
}
21562164

2165+
2166+
const bool split = ggml_backend_buft_is_cuda_split(src0->buffer->buft) ||
2167+
ggml_backend_buft_is_cuda_split(src1->buffer->buft);
2168+
2169+
//TODO: add support for fusion for split buffers
2170+
if (split) {
2171+
return false;
2172+
}
2173+
21572174
return use_mul_mat_vec_q;
21582175
}
21592176

0 commit comments

Comments
 (0)