Skip to content

Commit 1edd6ed

Browse files
committed
ggml-cpu: switch flash attention disable to ggml-cpu
Signed-off-by: Aaron Teo <[email protected]>
1 parent 0cc2017 commit 1edd6ed

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

common/common.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -312,13 +312,7 @@ struct common_params {
312312
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
313313
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
314314
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
315-
#if defined(GGML_NNPA) || defined(__NNPA__)
316-
// disable Flash Attention on NNPA
317-
// see: https://github.com/ggml-org/llama.cpp/issues/15721
318-
enum llama_flash_attn_type flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED;
319-
#else
320315
enum llama_flash_attn_type flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO; // whether to use Flash Attention
321-
#endif
322316

323317
struct common_params_sampling sampling;
324318
struct common_params_speculative speculative;

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,15 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
441441
case GGML_OP_OUT_PROD:
442442
return (src0->type == GGML_TYPE_F32 || (ggml_is_quantized(src0->type) && src0->ne[2] == src1->ne[2] && src0->ne[3] == src1->ne[3])) &&
443443
src1->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32;
444+
case GGML_OP_FLASH_ATTN_EXT:
445+
case GGML_OP_FLASH_ATTN_BACK:
446+
#if defined(GGML_NNPA) || defined(__NNPA__)
447+
// disable Flash Attention on NNPA
448+
// see: https://github.com/ggml-org/llama.cpp/issues/15721
449+
return false;
450+
#else
451+
return true;
452+
#endif // GGML_NNPA || __NNPA__
444453
default:
445454
return true;
446455
}

0 commit comments

Comments
 (0)