Skip to content

Commit 91fbc37

Browse files
committed
Revert "HIP: Ignore unsupported unroll transformation in fattn-vec (ggml-org#14931)"
This reverts commit c7aa136.
1 parent 7581646 commit 91fbc37

File tree

2 files changed

+0
-18
lines changed

2 files changed

+0
-18
lines changed

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
#include "common.cuh"
22
#include "fattn-common.cuh"
33

4-
// Currenlty llvm with the amdgcn target dose not support unrolling loops
5-
// that contain a break that can not be resolved at compile time.
6-
#ifdef __clang__
7-
#pragma clang diagnostic push
8-
#pragma clang diagnostic ignored "-Wpass-failed"
9-
#endif // __clang__
104
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
115
#ifndef GGML_USE_HIP
126
__launch_bounds__(D, 1)
@@ -347,9 +341,6 @@ static __global__ void flash_attn_vec_ext_f16(
347341
NO_DEVICE_CODE;
348342
#endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
349343
}
350-
#ifdef __clang__
351-
#pragma clang diagnostic pop
352-
#endif // __clang__
353344

354345
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
355346
void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

ggml/src/ggml-cuda/fattn-vec-f32.cuh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
#include "common.cuh"
22
#include "fattn-common.cuh"
33

4-
// Currenlty llvm with the amdgcn target dose not support unrolling loops
5-
// that contain a break that can not be resolved at compile time.
6-
#ifdef __clang__
7-
#pragma clang diagnostic push
8-
#pragma clang diagnostic ignored "-Wpass-failed"
9-
#endif // __clang__
104
template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
115
#ifndef GGML_USE_HIP
126
__launch_bounds__(D, 1)
@@ -342,9 +336,6 @@ static __global__ void flash_attn_vec_ext_f32(
342336
NO_DEVICE_CODE;
343337
#endif // FLASH_ATTN_AVAILABLE
344338
}
345-
#ifdef __clang__
346-
#pragma clang diagnostic pop
347-
#endif // __clang__
348339

349340
template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
350341
void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

0 commit comments

Comments
 (0)