Skip to content

Commit a135b4c

Browse files
committed
ggml: Make fattn kernel use launch bounds w/HIP
1 parent a90f4cb commit a135b4c

File tree

1 file changed

+0
-2
lines changed

1 file changed

+0
-2
lines changed

ggml/src/ggml-cuda/fattn-wmma-f16.cu

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@ namespace wmma = rocwmma;
1919

2020
// D == head size, VKQ_stride == num VKQ rows calculated in parallel:
2121
template<int D, int ncols, int nwarps, int VKQ_stride, int parallel_blocks, typename KQ_acc_t, bool use_logit_softcap>
22-
#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
2322
__launch_bounds__(nwarps*ggml_cuda_get_physical_warp_size(), 1)
24-
#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__))
2523
static __global__ void flash_attn_ext_f16(
2624
const char * __restrict__ Q,
2725
const char * __restrict__ K,

0 commit comments

Comments
 (0)