Skip to content

Commit 841b749

Browse files
committed
Revert "old gpu fattn fix"
This reverts commit d2c9fdd.
1 parent 1bfc9af commit 841b749

File tree

1 file changed

+0
-5
lines changed

1 file changed

+0
-5
lines changed

ggml/src/ggml-cuda/fattn.cu

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -415,11 +415,6 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
415415
return BEST_FATTN_KERNEL_WMMA_F16;
416416
}
417417

418-
//kcpp: always force WMMA for older gpus, fix issues like "FlashAttention without tensor cores only supports head sizes 64 and 128."
419-
if (ggml_cuda_highest_compiled_arch(cc) <= GGML_CUDA_CC_TURING || cc == GGML_CUDA_CC_TURING) {
420-
return BEST_FATTN_KERNEL_WMMA_F16;
421-
}
422-
423418
// If there is no suitable kernel for tensor cores or small batch sizes, use the generic kernel for large batch sizes:
424419
if (prec == GGML_PREC_DEFAULT && fast_fp16_available(cc)) {
425420
return BEST_FATTN_KERNEL_TILE_F16;

0 commit comments

Comments
 (0)