CUDA: fix mul_mat_vec for CC 6.0

JohannesGaessler · JohannesGaessler · commit c80a4413ad91 · 2025-02-09T19:30:15.000+01:00
diff --git a/ggml/src/ggml-cuda/mmv.cu b/ggml/src/ggml-cuda/mmv.cu
@@ -255,7 +255,12 @@ void ggml_cuda_op_mul_mat_vec(
     GGML_ASSERT(src1_ncols == 1);
 
     const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
+#ifdef GGML_CUDA_F16
     const enum ggml_prec prec = fast_fp16_available(cc) ? ggml_prec(dst->op_params[0]) : GGML_PREC_F32;
+#else
+    // FIXME by default there is no code for CC 6.0 so trying to use FP16 intrinsics results in a crash
+    const enum ggml_prec prec = fast_fp16_available(cc) && cc != 600 ? ggml_prec(dst->op_params[0]) : GGML_PREC_F32;
+#endif // GGML_CUDA_F16
 
 
     // ggml_cuda_op provides single, contiguous matrices