File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change 7878#define GGML_CUDA_CC_IS_CDNA3 (cc ) (cc >= GGML_CUDA_CC_CDNA3 && cc < GGML_CUDA_CC_RDNA1)
7979
8080// Moore Threads
81+ #define MUSART_HMASK 40300 // MUSA rc4.3, min. ver. for half2 -> uint mask comparisons
82+
8183#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210 ) // MTT S80, MTT S3000
8284#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220 ) // MTT S4000
8385#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310 ) // TBD
@@ -490,13 +492,13 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
490492#endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
491493}
492494
493- #if CUDART_VERSION < CUDART_HMASK
495+ #if CUDART_VERSION < CUDART_HMASK || (defined(MUSART_VERSION) && MUSART_VERSION < MUSART_HMASK)
494496static __device__ __forceinline__ uint32_t __hgt2_mask (const half2 a, const half2 b) {
495497 const uint32_t mask_low = 0x0000FFFF * (float ( __low2half (a)) > float ( __low2half (b)));
496498 const uint32_t mask_high = 0xFFFF0000 * (float (__high2half (a)) > float (__high2half (b)));
497499 return mask_low | mask_high;
498500}
499- #endif // CUDART_VERSION < CUDART_HMASK
501+ #endif // CUDART_VERSION < CUDART_HMASK || (defined(MUSART_VERSION) && MUSART_VERSION < MUSART_HMASK)
500502
501503static __device__ __forceinline__ int ggml_cuda_dp4a (const int a, const int b, int c) {
502504#if defined(GGML_USE_HIP)
You can’t perform that action at this time.
0 commit comments