Skip to content

Commit c80a441

Browse files
CUDA: fix mul_mat_vec for CC 6.0
1 parent 19d3c82 commit c80a441

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

ggml/src/ggml-cuda/mmv.cu

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,12 @@ void ggml_cuda_op_mul_mat_vec(
255255
GGML_ASSERT(src1_ncols == 1);
256256

257257
const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
258+
#ifdef GGML_CUDA_F16
258259
const enum ggml_prec prec = fast_fp16_available(cc) ? ggml_prec(dst->op_params[0]) : GGML_PREC_F32;
260+
#else
261+
// FIXME by default there is no code for CC 6.0 so trying to use FP16 intrinsics results in a crash
262+
const enum ggml_prec prec = fast_fp16_available(cc) && cc != 600 ? ggml_prec(dst->op_params[0]) : GGML_PREC_F32;
263+
#endif // GGML_CUDA_F16
259264

260265

261266
// ggml_cuda_op provides single, contiguous matrices

0 commit comments

Comments
 (0)