diff --git a/src/ggml-cuda/ggml-cuda.cu b/src/ggml-cuda/ggml-cuda.cu index 060550c572..53a43241d6 100644 --- a/src/ggml-cuda/ggml-cuda.cu +++ b/src/ggml-cuda/ggml-cuda.cu @@ -1195,6 +1195,8 @@ static void ggml_cuda_op_mul_mat_cublas( const bool use_fp16 = (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT; if (src0->type == GGML_TYPE_BF16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1]) { + // MIT licensed. Copyright (C) 2024 Iwan Kawrakow + // https://github.com/ikawrakow/ik_llama.cpp/pull/40 ggml_cuda_pool_alloc src1_as_bf16(ctx.pool(id)); if (src1->type != GGML_TYPE_BF16) { const to_bf16_cuda_t to_bf16_cuda = ggml_get_to_bf16_cuda(src1->type);