Skip to content

Commit 28646bc

Browse files
committed
HIP: Ignore unsupported unroll transformation in fattn-vec
1 parent 8ad7b3e commit 28646bc

File tree

2 files changed

+18
-0
lines changed

2 files changed

+18
-0
lines changed

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
#include "common.cuh"
22
#include "fattn-common.cuh"
33

4+
// Currenlty llvm with the amdgcn target dose not support unrolling loops
5+
// that contain a break that can not be resolved at compile time.
6+
#ifdef __clang__
7+
#pragma clang diagnostic push
8+
#pragma clang diagnostic ignored "-Wpass-failed"
9+
#endif // __clang__
410
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
511
#ifndef GGML_USE_HIP
612
__launch_bounds__(D, 1)
@@ -341,6 +347,9 @@ static __global__ void flash_attn_vec_ext_f16(
341347
NO_DEVICE_CODE;
342348
#endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
343349
}
350+
#ifdef __clang__
351+
#pragma clang diagnostic pop
352+
#endif // __clang__
344353

345354
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
346355
void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

ggml/src/ggml-cuda/fattn-vec-f32.cuh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
#include "common.cuh"
22
#include "fattn-common.cuh"
33

4+
// Currenlty llvm with the amdgcn target dose not support unrolling loops
5+
// that contain a break that can not be resolved at compile time.
6+
#ifdef __clang__
7+
#pragma clang diagnostic push
8+
#pragma clang diagnostic ignored "-Wpass-failed"
9+
#endif // __clang__
410
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
511
#ifndef GGML_USE_HIP
612
__launch_bounds__(D, 1)
@@ -336,6 +342,9 @@ static __global__ void flash_attn_vec_ext_f32(
336342
NO_DEVICE_CODE;
337343
#endif // FLASH_ATTN_AVAILABLE
338344
}
345+
#ifdef __clang__
346+
#pragma clang diagnostic pop
347+
#endif // __clang__
339348

340349
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
341350
void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

0 commit comments

Comments
 (0)