Skip to content

Commit 828577a

Browse files
committed
Move comments to reduce confusion.
1 parent 419f1ea commit 828577a

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/fattn.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst
252252
const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
253253
const enum ggml_prec prec = ggml_flash_attn_ext_get_prec(KQV);
254254

255-
// On AMD the tile kernels perform poorly, use the vec kernel instead:
256255
if (cc >= GGML_CUDA_CC_OFFSET_AMD) {
257256
#if defined(GGML_HIP_ROCWMMA_FATTN) && defined(FP16_MMA_AVAILABLE)
258257
if (fp16_mma_available(cc) && dst->src[0]->ne[1] > 8) {
@@ -261,6 +260,7 @@ void ggml_cuda_flash_attn_ext(ggml_backend_cuda_context & ctx, ggml_tensor * dst
261260
}
262261
#endif // defined(GGML_HIP_ROCWMMA_FATTN) && defined(FP16_MMA_AVAILABLE)
263262

263+
// On AMD the tile kernels perform poorly, use the vec kernel instead:
264264
if (prec == GGML_PREC_DEFAULT && fast_fp16_available(cc)) {
265265
ggml_cuda_flash_attn_ext_vec_f16(ctx, dst);
266266
} else {

0 commit comments

Comments
 (0)