Skip to content

Commit de06dd3

Browse files
committed
2
Signed-off-by: Xiaodong Ye <[email protected]>
1 parent 34f4762 commit de06dd3

File tree

2 files changed

+9
-7
lines changed

2 files changed

+9
-7
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,12 @@
7676
#define GGML_CUDA_CC_IS_CDNA(cc) (cc >= GGML_CUDA_CC_CDNA && cc < GGML_CUDA_CC_RDNA1)
7777

7878
// Moore Threads
79-
#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 210) // MTT S80, MTT S3000
80-
#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 220) // MTT S4000
81-
#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 310) // TBD
79+
#define GGML_CUDA_CC_IS_QY1_OR_EARLIER (__MUSA_ARCH__ < 220)
80+
81+
#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210) // MTT S80, MTT S3000
82+
#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220) // MTT S4000
83+
#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310) // TBD
8284

83-
#define GGML_CUDA_CC_TO_MTHREADS(cc) ((cc) - GGML_CUDA_CC_OFFSET_MTHREADS)
8485
#define GGML_CUDA_CC_IS_MTHREADS(cc) (cc >= GGML_CUDA_CC_OFFSET_MTHREADS && cc < GGML_CUDA_CC_OFFSET_AMD)
8586
#define GGML_CUDA_CC_IS_QY1(cc) (cc >= GGML_CUDA_CC_QY1 && cc < GGML_CUDA_CC_QY2)
8687
#define GGML_CUDA_CC_IS_QY2(cc) (cc >= GGML_CUDA_CC_QY2 && cc < GGML_CUDA_CC_NG)
@@ -226,9 +227,9 @@ typedef float2 dfloat2;
226227
#define CP_ASYNC_AVAILABLE
227228
#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE
228229

229-
#if !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ < GGML_CUDA_CC_TO_MTHREADS(GGML_CUDA_CC_QY2))
230+
#if !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && GGML_CUDA_CC_IS_QY1_OR_EARLIER)
230231
#define FLASH_ATTN_AVAILABLE
231-
#endif // !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ < GGML_CUDA_CC_TO_MTHREADS(GGML_CUDA_CC_QY2))
232+
#endif // !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && GGML_CUDA_CC_IS_QY1_OR_EARLIER)
232233

233234
static bool fp16_available(const int cc) {
234235
return ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_PASCAL;

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,8 @@ static ggml_cuda_device_info ggml_cuda_init() {
268268
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
269269
info.devices[id].warp_size = 32;
270270
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
271-
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + 100*prop.major + 10*prop.minor;
271+
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
272+
info.devices[id].cc += prop.minor * 0x10;
272273
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
273274
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
274275
#else

0 commit comments

Comments
 (0)