Add skeleton for GGML_OP_SPARSEK_ATTN (SparseK Attention): new operator definition and tensor creation, backend implementation pending to ggml.c/h

yael-works · yael-works · commit 66248d207c73 · 2025-10-28T11:25:39.000+02:00
Co-authored-by: Yael Shuker &lt;yaelshuker100@gmail.com&gt;
Co-authored-by: Gitty Burstein &lt;g0534163997@gmail.com&gt;
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
@@ -529,7 +529,7 @@ extern "C" {
         GGML_OP_TIMESTEP_EMBEDDING,
         GGML_OP_ARGSORT,
         GGML_OP_LEAKY_RELU,
-
+        GGML_OP_SPARSEK_ATTN,
         GGML_OP_FLASH_ATTN_EXT,
         GGML_OP_FLASH_ATTN_BACK,
         GGML_OP_SSM_CONV,
@@ -2231,6 +2231,16 @@ extern "C" {
     //   n_head % ne32      == 0
     //   ne3    % ne33      == 0
     //
+
+    GGML_API struct ggml_tensor * ggml_sparsek_attn(
+              struct ggml_context * ctx,
+              struct ggml_tensor  * Q,   
+              struct ggml_tensor  * K,  
+              struct ggml_tensor  * V,   
+              int32_t               k_top,
+              int32_t               win_local,
+              int32_t               stride_global);
+
     GGML_API struct ggml_tensor * ggml_flash_attn_ext(
             struct ggml_context * ctx,
             struct ggml_tensor  * q,
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -1019,7 +1019,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "GLU",
 };
 
-static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
+static_assert(GGML_OP_COUNT == 91, "GGML_OP_COUNT != 91");
 
 static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "none",
@@ -1094,7 +1094,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "timestep_embedding(timesteps, dim, max_period)",
     "argsort(x)",
     "leaky_relu(x)",
-
+    "sparsek_attn(Q, K, V, k_top, win_local, stride_global)",
     "flash_attn_ext(x)",
     "flash_attn_back(x)",
     "ssm_conv(x)",
@@ -1123,7 +1123,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "glu(x)",
 };
 
-static_assert(GGML_OP_COUNT == 90, "GGML_OP_COUNT != 90");
+static_assert(GGML_OP_COUNT == 91, "GGML_OP_COUNT != 91");
 
 static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
 
@@ -5063,6 +5063,46 @@ struct ggml_tensor * ggml_top_k(
     return result;
 }
 
+// ggml_sparsek_attn
+struct ggml_tensor * ggml_sparsek_attn(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * Q,
+        struct ggml_tensor  * K,
+        struct ggml_tensor  * V,
+        int32_t               k_top,
+        int32_t               win_local,
+        int32_t               stride_global) {
+
+    // ביטול אזהרות (אם טרם משתמשים בפרמטרים)
+    GGML_UNUSED(k_top);
+    GGML_UNUSED(win_local);
+    GGML_UNUSED(stride_global);
+
+    // בדיקות תקינות בסיסיות
+    GGML_ASSERT(Q != NULL);
+    GGML_ASSERT(K != NULL);
+    GGML_ASSERT(V != NULL);
+    GGML_ASSERT(ggml_can_mul_mat(K, Q));
+
+    // יצירת טנזור פלט בממדים המתאימים
+    int64_t ne[GGML_MAX_DIMS] = { V->ne[0], Q->ne[2], Q->ne[1], Q->ne[3] };
+    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, ne);
+
+    // הגדרת סוג האופרטור והמקורות
+    result->op     = GGML_OP_SPARSEK_ATTN;
+    result->src[0] = Q;
+    result->src[1] = K;
+    result->src[2] = V;
+
+    // שמירת הפרמטרים המספריים במערך op_params (שיטה הנהוגה ב־ggml)
+    result->op_params[0] = k_top;
+    result->op_params[1] = win_local;
+    result->op_params[2] = stride_global;
+
+    return result;
+}
+
+
 // ggml_flash_attn_ext
 
 struct ggml_tensor * ggml_flash_attn_ext(