Skip to content

Commit 4b4e0fd

Browse files
author
ZhouYu
committed
1
1 parent 97e91c2 commit 4b4e0fd

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,6 @@
7676
#define GGML_CUDA_CC_IS_CDNA(cc) (cc >= GGML_CUDA_CC_CDNA && cc < GGML_CUDA_CC_RDNA1)
7777

7878
// Moore Threads
79-
#define GGML_CUDA_MUSA_ARCH_IS_QY1 (__MUSA_ARCH__ <= 210)
80-
8179
#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210) // MTT S80, MTT S3000
8280
#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220) // MTT S4000
8381
#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310) // TBD
@@ -215,9 +213,9 @@ typedef float2 dfloat2;
215213
#define FP16_MMA_AVAILABLE
216214
#endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || defined(RDNA4))
217215

218-
#if defined(GGML_USE_MUSA) && !GGML_CUDA_MUSA_ARCH_IS_QY1
216+
#if defined(GGML_USE_MUSA) && __MUSA_ARCH__ >= 220
219217
#define FP16_MMA_AVAILABLE
220-
#endif // defined(GGML_USE_MUSA) && !GGML_CUDA_MUSA_ARCH_IS_QY1
218+
#endif // defined(GGML_USE_MUSA) && __MUSA_ARCH__ >= 220
221219

222220
#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING
223221
#define NEW_MMA_AVAILABLE
@@ -227,9 +225,9 @@ typedef float2 dfloat2;
227225
#define CP_ASYNC_AVAILABLE
228226
#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE
229227

230-
#if !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && GGML_CUDA_MUSA_ARCH_IS_QY1)
228+
#if !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ < 220)
231229
#define FLASH_ATTN_AVAILABLE
232-
#endif // !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && GGML_CUDA_MUSA_ARCH_IS_QY1)
230+
#endif // !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ < 220)
233231

234232
static bool fp16_available(const int cc) {
235233
return ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_PASCAL;

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3021,12 +3021,14 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
30213021
if (b->type == GGML_TYPE_F16 && a->type != GGML_TYPE_F16) {
30223022
return false;
30233023
}
3024-
#if defined(GGML_USE_MUSA) && GGML_CUDA_MUSA_ARCH_IS_QY1
3025-
if (b->type == GGML_TYPE_F16 && b->ne[2]*b->ne[3] > 1 &&
3024+
#if defined(GGML_USE_MUSA)
3025+
const int cc = ggml_cuda_info().devices[dev_ctx->device].cc;
3026+
if (GGML_CUDA_CC_IS_MTHREADS(cc) && GGML_CUDA_CC_IS_QY1(cc) &&
3027+
b->type == GGML_TYPE_F16 && b->ne[2]*b->ne[3] > 1 &&
30263028
!ggml_is_transposed(a) && !ggml_is_transposed(b)) {
30273029
return false;
30283030
}
3029-
#endif // defined(GGML_USE_MUSA) && GGML_CUDA_MUSA_ARCH_IS_QY1
3031+
#endif // defined(GGML_USE_MUSA)
30303032
switch (a->type) {
30313033
case GGML_TYPE_F32:
30323034
case GGML_TYPE_F16:
@@ -3051,11 +3053,12 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
30513053
case GGML_TYPE_IQ4_NL:
30523054
case GGML_TYPE_IQ4_XS:
30533055
case GGML_TYPE_BF16:
3054-
#if defined(GGML_USE_MUSA) && !GGML_CUDA_MUSA_ARCH_IS_QY1
3055-
if (a->type == GGML_TYPE_Q2_K) {
3056+
#if defined(GGML_USE_MUSA)
3057+
if (GGML_CUDA_CC_IS_MTHREADS(cc) && GGML_CUDA_CC_IS_QY2(cc) &&
3058+
a->type == GGML_TYPE_Q2_K) {
30563059
return false;
30573060
}
3058-
#endif // defined(GGML_USE_MUSA) && !GGML_CUDA_MUSA_ARCH_IS_QY1
3061+
#endif // defined(GGML_USE_MUSA)
30593062
return true;
30603063
default:
30613064
return false;

0 commit comments

Comments
 (0)