@@ -464,25 +464,21 @@ static __device__ __forceinline__ half ggml_cuda_hmax(const half a, const half b
464464}
465465
466466static __device__ __forceinline__ half2 ggml_cuda_hmax2 (const half2 a, const half2 b) {
467- #if defined(GGML_USE_HIP) && HIP_VERSION >= 50700000
467+ #if defined(GGML_USE_HIP)
468468 return half2 (__hmax (a.x , b.x ), __hmax (a.y , b.y ));
469- #elif !defined(GGML_USE_HIP) && CUDART_VERSION >= CUDART_HMAX
469+ #elif CUDART_VERSION >= CUDART_HMAX
470470 return __hmax2 (a, b);
471- #elif !defined(GGML_USE_HIP)
471+ #else
472472 half2 ret;
473473 reinterpret_cast <half&>(ret.x ) = __float2half (fmaxf ( __low2float (a), __low2float (b)));
474474 reinterpret_cast <half&>(ret.y ) = __float2half (fmaxf (__high2float (a), __high2float (b)));
475475 return ret;
476- #else
477- GGML_UNUSED (a);
478- GGML_UNUSED (b);
479- NO_DEVICE_CODE;
480476#endif
481477}
482478
483479template <int width = WARP_SIZE>
484480static __device__ __forceinline__ half2 warp_reduce_max (half2 x) {
485- #if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || ( defined(GGML_USE_HIP) && HIP_VERSION >= 50700000 )
481+ #if !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
486482#pragma unroll
487483 for (int offset = width/2 ; offset > 0 ; offset >>= 1 ) {
488484 x = ggml_cuda_hmax2 (x, __shfl_xor_sync (0xffffffff , x, offset, width));
@@ -491,7 +487,7 @@ static __device__ __forceinline__ half2 warp_reduce_max(half2 x) {
491487#else
492488 GGML_UNUSED (x);
493489 NO_DEVICE_CODE;
494- #endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || ( defined(GGML_USE_HIP) && HIP_VERSION >= 50700000 )
490+ #endif // !defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL || defined(GGML_USE_HIP)
495491}
496492
497493#if CUDART_VERSION < CUDART_HMASK
0 commit comments