File tree Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Expand file tree Collapse file tree 1 file changed +5
-2
lines changed Original file line number Diff line number Diff line change 7878#define GGML_CUDA_CC_IS_CDNA3 (cc ) (cc >= GGML_CUDA_CC_CDNA3 && cc < GGML_CUDA_CC_RDNA1)
7979
8080// Moore Threads
81+ #define MUSART_HMASK 40300 // MUSA rc4.3, min. ver. for half2 -> uint mask comparisons
82+
8183#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210 ) // MTT S80, MTT S3000
8284#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220 ) // MTT S4000
8385#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310 ) // TBD
@@ -490,13 +492,14 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
490492#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
491493}
492494
493- #if CUDART_VERSION < CUDART_HMASK
495+ #if (defined(CUDART_VERSION) && CUDART_VERSION < CUDART_HMASK) || defined(GGML_USE_HIP) || \
496+ (defined (MUSART_VERSION) && MUSART_VERSION < MUSART_HMASK)
494497static __device__ __forceinline__ uint32_t __hgt2_mask (const half2 a, const half2 b) {
495498 const uint32_t mask_low = 0x0000FFFF * (float ( __low2half (a)) > float ( __low2half (b)));
496499 const uint32_t mask_high = 0xFFFF0000 * (float (__high2half (a)) > float (__high2half (b)));
497500 return mask_low | mask_high;
498501}
499- #endif // CUDART_VERSION < CUDART_HMASK
502+ #endif // (defined( CUDART_VERSION) && CUDART_VERSION < CUDART_HMASK) || defined(GGML_USE_HIP) || (defined(MUSART_VERSION) && MUSART_VERSION < MUSART_HMASK)
500503
501504static __device__ __forceinline__ int ggml_cuda_dp4a (const int a, const int b, int c) {
502505#if defined(GGML_USE_HIP)
You can’t perform that action at this time.
0 commit comments