Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2222,19 +2222,6 @@ extern "C" {
GGML_API void ggml_threadpool_params_init (struct ggml_threadpool_params * p, int n_threads);
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1);

// Enhanced flash attention with state tensor for S/M values
// s_m_state: [2, n_heads * q_len] tensor containing [M, S] pairs for each head/position
GGML_API struct ggml_tensor * ggml_flash_attn_ext_with_state(
struct ggml_context * ctx,
struct ggml_tensor * q,
struct ggml_tensor * k,
struct ggml_tensor * v,
struct ggml_tensor * mask,
struct ggml_tensor * s_m_state, // State tensor for S and M values
float scale,
float max_bias,
float logit_softcap);

#ifdef __cplusplus
}
#endif
Loading