Skip to content

Commit f06a1ef

Browse files
committed
1
Signed-off-by: Xiaodong Ye <[email protected]>
1 parent abbf408 commit f06a1ef

File tree

2 files changed

+12
-12
lines changed

2 files changed

+12
-12
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,9 @@ typedef float2 dfloat2;
198198
#define GGML_USE_VMM
199199
#endif // (!defined(GGML_USE_HIP) && !defined(GGML_CUDA_NO_VMM)) || (defined(GGML_USE_HIP) && !defined(GGML_HIP_NO_VMM))
200200

201-
#if (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
201+
#if (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_MUSA)
202202
#define FP16_AVAILABLE
203-
#endif // (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
203+
#endif // (defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_MUSA)
204204

205205
#if defined(FP16_AVAILABLE) && __CUDA_ARCH__ != GGML_CUDA_CC_DP4A
206206
#define FAST_FP16_AVAILABLE
@@ -236,7 +236,7 @@ static bool fp16_available(const int cc) {
236236

237237
static bool fast_fp16_available(const int cc) {
238238
return (GGML_CUDA_CC_IS_NVIDIA(cc) && fp16_available(cc) && cc != GGML_CUDA_CC_DP4A) ||
239-
GGML_CUDA_CC_IS_AMD(cc);
239+
GGML_CUDA_CC_IS_AMD(cc) || GGML_CUDA_CC_IS_MTHREADS(cc);
240240
}
241241

242242
// To be used for feature selection of external libraries, e.g. cuBLAS.

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3028,10 +3028,16 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
30283028
}
30293029
#ifdef GGML_USE_MUSA
30303030
const int cc = ggml_cuda_info().devices[dev_ctx->device].cc;
3031-
if (GGML_CUDA_CC_IS_MTHREADS(cc) && GGML_CUDA_CC_IS_QY1(cc) &&
3032-
b->type == GGML_TYPE_F16 && b->ne[2]*b->ne[3] > 1 &&
3031+
if (GGML_CUDA_CC_IS_MTHREADS(cc) && b->ne[2]*b->ne[3] > 1 &&
30333032
!ggml_is_transposed(a) && !ggml_is_transposed(b)) {
3034-
return false;
3033+
if (GGML_CUDA_CC_IS_QY1(cc) && op->op == GGML_OP_MUL_MAT
3034+
&& b->type == GGML_TYPE_F16) {
3035+
return false;
3036+
}
3037+
if (GGML_CUDA_CC_IS_QY2(cc) && op->op == GGML_OP_MUL_MAT_ID &&
3038+
a->type == GGML_TYPE_Q2_K && b->type == GGML_TYPE_F32) {
3039+
return false;
3040+
}
30353041
}
30363042
#endif // GGML_USE_MUSA
30373043
switch (a->type) {
@@ -3058,12 +3064,6 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
30583064
case GGML_TYPE_IQ4_NL:
30593065
case GGML_TYPE_IQ4_XS:
30603066
case GGML_TYPE_BF16:
3061-
#ifdef GGML_USE_MUSA
3062-
if (GGML_CUDA_CC_IS_MTHREADS(cc) && GGML_CUDA_CC_IS_QY2(cc) &&
3063-
a->type == GGML_TYPE_Q2_K) {
3064-
return false;
3065-
}
3066-
#endif // GGML_USE_MUSA
30673067
return true;
30683068
default:
30693069
return false;

0 commit comments

Comments
 (0)