We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 19d3c82 commit c80a441Copy full SHA for c80a441
ggml/src/ggml-cuda/mmv.cu
@@ -255,7 +255,12 @@ void ggml_cuda_op_mul_mat_vec(
255
GGML_ASSERT(src1_ncols == 1);
256
257
const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
258
+#ifdef GGML_CUDA_F16
259
const enum ggml_prec prec = fast_fp16_available(cc) ? ggml_prec(dst->op_params[0]) : GGML_PREC_F32;
260
+#else
261
+ // FIXME by default there is no code for CC 6.0 so trying to use FP16 intrinsics results in a crash
262
+ const enum ggml_prec prec = fast_fp16_available(cc) && cc != 600 ? ggml_prec(dst->op_params[0]) : GGML_PREC_F32;
263
+#endif // GGML_CUDA_F16
264
265
266
// ggml_cuda_op provides single, contiguous matrices
0 commit comments