Skip to content

Commit 975ef38

Browse files
committed
don't use mmvq in pascal and lower
1 parent 65a098f commit 975ef38

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2131,6 +2131,11 @@ static bool ggml_cuda_should_fuse_mul_mat_vec_q(const ggml_tensor * tensor) {
21312131
bool use_mul_mat_vec_q = ggml_is_quantized(src0->type) && !bad_padding_clear && src1->type == GGML_TYPE_F32 &&
21322132
dst->type == GGML_TYPE_F32 && src1->ne[1] <= MMVQ_MAX_BATCH_SIZE;
21332133

2134+
// fusion is not universally faster on Pascal
2135+
const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
2136+
if (cc <= GGML_CUDA_CC_PASCAL) {
2137+
return false;
2138+
}
21342139
//we only support fusion for ncols_dst = 1
21352140
if (tensor->op == GGML_OP_MUL_MAT && dst->ne[1] != 1) {
21362141
return false;

0 commit comments

Comments
 (0)