Revert "HIP: Ignore unsupported unroll transformation in fattn-vec (ggml-org#14931)"

Nexesenex · Nexesenex · commit 91fbc37a34d1 · 2025-08-07T12:23:51.000+02:00
This reverts commit c7aa136.
diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml/src/ggml-cuda/fattn-vec-f16.cuh
@@ -1,12 +1,6 @@
 #include "common.cuh"
 #include "fattn-common.cuh"
 
-// Currenlty llvm with the amdgcn target dose not support unrolling loops
-// that contain a break that can not be resolved at compile time.
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wpass-failed"
-#endif // __clang__
 template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
 #ifndef GGML_USE_HIP
 __launch_bounds__(D, 1)
@@ -347,9 +341,6 @@ static __global__ void flash_attn_vec_ext_f16(
     NO_DEVICE_CODE;
 #endif // defined(FLASH_ATTN_AVAILABLE) && defined(FP16_AVAILABLE)
 }
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif // __clang__
 
 template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
 void ggml_cuda_flash_attn_ext_vec_f16_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
diff --git a/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/ggml/src/ggml-cuda/fattn-vec-f32.cuh
@@ -1,12 +1,6 @@
 #include "common.cuh"
 #include "fattn-common.cuh"
 
-// Currenlty llvm with the amdgcn target dose not support unrolling loops
-// that contain a break that can not be resolved at compile time.
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wpass-failed"
-#endif // __clang__
 template<int D, int ncols, ggml_type type_K, ggml_type type_V, bool use_logit_softcap> // D == head size
 #ifndef GGML_USE_HIP
 __launch_bounds__(D, 1)
@@ -342,9 +336,6 @@ static __global__ void flash_attn_vec_ext_f32(
     NO_DEVICE_CODE;
 #endif // FLASH_ATTN_AVAILABLE
 }
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif // __clang__
 
 template <int D, int cols_per_block, ggml_type type_K, ggml_type type_V, bool use_logit_softcap>
 void ggml_cuda_flash_attn_ext_vec_f32_case_impl(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {