Skip to content

Commit b3efcef

Browse files
committed
3
Signed-off-by: Xiaodong Ye <[email protected]>
1 parent de06dd3 commit b3efcef

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,7 +1200,8 @@ static void ggml_cuda_op_mul_mat_cublas(
12001200

12011201
const bool use_fp16 = (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT;
12021202

1203-
if (!(GGML_CUDA_CC_IS_MTHREADS(cc) && cc < GGML_CUDA_CC_QY2) &&
1203+
if ((GGML_CUDA_CC_IS_NVIDIA(cc) || GGML_CUDA_CC_IS_AMD(cc) ||
1204+
(GGML_CUDA_CC_IS_MTHREADS(cc) && cc >= GGML_CUDA_CC_QY2)) &&
12041205
src0->type == GGML_TYPE_BF16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1]) {
12051206
ggml_cuda_pool_alloc<nv_bfloat16> src1_as_bf16(ctx.pool(id));
12061207
if (src1->type != GGML_TYPE_BF16) {
@@ -1229,7 +1230,9 @@ static void ggml_cuda_op_mul_mat_cublas(
12291230

12301231
const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(GGML_TYPE_BF16);
12311232
to_fp32_cuda(dst_bf16.get(), dst_dd_i, row_diff*src1_ncols, stream);
1232-
} else if (((GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_VOLTA) || GGML_CUDA_CC_IS_AMD(cc)) && use_fp16) {
1233+
} else if (((GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_VOLTA) ||
1234+
(GGML_CUDA_CC_IS_MTHREADS(cc) && cc >= GGML_CUDA_CC_QY2) ||
1235+
GGML_CUDA_CC_IS_AMD(cc)) && use_fp16) {
12331236
// convert src0 and src1 to fp16, multiply as fp16, convert dst to fp32
12341237
ggml_cuda_pool_alloc<half> src0_as_f16(ctx.pool(id));
12351238
if (src0->type != GGML_TYPE_F16) {

0 commit comments

Comments
 (0)