|
76 | 76 | #define GGML_CUDA_CC_IS_CDNA(cc) (cc >= GGML_CUDA_CC_CDNA && cc < GGML_CUDA_CC_RDNA1) |
77 | 77 |
|
78 | 78 | // Moore Threads |
79 | | -#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 210) // MTT S80, MTT S3000 |
80 | | -#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 220) // MTT S4000 |
81 | | -#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 310) // TBD |
| 79 | +#define GGML_CUDA_CC_IS_QY1_OR_EARLIER (__MUSA_ARCH__ < 220) |
| 80 | + |
| 81 | +#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210) // MTT S80, MTT S3000 |
| 82 | +#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220) // MTT S4000 |
| 83 | +#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310) // TBD |
82 | 84 |
|
83 | | -#define GGML_CUDA_CC_TO_MTHREADS(cc) ((cc) - GGML_CUDA_CC_OFFSET_MTHREADS) |
84 | 85 | #define GGML_CUDA_CC_IS_MTHREADS(cc) (cc >= GGML_CUDA_CC_OFFSET_MTHREADS && cc < GGML_CUDA_CC_OFFSET_AMD) |
85 | 86 | #define GGML_CUDA_CC_IS_QY1(cc) (cc >= GGML_CUDA_CC_QY1 && cc < GGML_CUDA_CC_QY2) |
86 | 87 | #define GGML_CUDA_CC_IS_QY2(cc) (cc >= GGML_CUDA_CC_QY2 && cc < GGML_CUDA_CC_NG) |
@@ -226,9 +227,9 @@ typedef float2 dfloat2; |
226 | 227 | #define CP_ASYNC_AVAILABLE |
227 | 228 | #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE |
228 | 229 |
|
229 | | -#if !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ < GGML_CUDA_CC_TO_MTHREADS(GGML_CUDA_CC_QY2)) |
| 230 | +#if !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && GGML_CUDA_CC_IS_QY1_OR_EARLIER) |
230 | 231 | #define FLASH_ATTN_AVAILABLE |
231 | | -#endif // !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ < GGML_CUDA_CC_TO_MTHREADS(GGML_CUDA_CC_QY2)) |
| 232 | +#endif // !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && GGML_CUDA_CC_IS_QY1_OR_EARLIER) |
232 | 233 |
|
233 | 234 | static bool fp16_available(const int cc) { |
234 | 235 | return ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_PASCAL; |
|
0 commit comments