File tree Expand file tree Collapse file tree 3 files changed +7
-2
lines changed Expand file tree Collapse file tree 3 files changed +7
-2
lines changed Original file line number Diff line number Diff line change @@ -172,6 +172,7 @@ option(GGML_HIP "ggml: use HIP"
172172option (GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF )
173173option (GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON )
174174option (GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF )
175+ option (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF )
175176option (GGML_VULKAN "ggml: use Vulkan" OFF )
176177option (GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF )
177178option (GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF )
Original file line number Diff line number Diff line change @@ -207,9 +207,9 @@ typedef float2 dfloat2;
207207#define FP16_MMA_AVAILABLE
208208#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA
209209
210- #if defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || defined(RDNA4))
210+ #if defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || ( defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined( RDNA4) ))
211211#define FP16_MMA_AVAILABLE
212- #endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || defined(RDNA4))
212+ #endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || ( defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined( RDNA4) ))
213213
214214#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING
215215#define NEW_MMA_AVAILABLE
Original file line number Diff line number Diff line change @@ -113,6 +113,10 @@ if (GGML_HIP_ROCWMMA_FATTN)
113113 add_compile_definitions (GGML_HIP_ROCWMMA_FATTN)
114114endif ()
115115
116+ if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 OR ${hip_VERSION} VERSION_GREATER_EQUAL 7.0)
117+ add_compile_definitions (GGML_HIP_ROCWMMA_FATTN_GFX12)
118+ endif ()
119+
116120if (NOT GGML_CUDA_FA)
117121 add_compile_definitions (GGML_CUDA_NO_FA)
118122endif ()
You can’t perform that action at this time.
0 commit comments