Scaffolding for snake activation fn

jsrgb · jsrgb · commit ad7d7ff05fe6 · 2025-03-21T20:48:21.000-07:00
SNAC uses the snake activation function. Added scaffolding to include
`GGML_OP_SNAKE` as a new op. Should this be a unary op?

The SNAC decoder uses noise blocks to enhance outputs, its optional,
so omitting it for now until the model is integrated e2e.

Next steps: write the `llm_graph_context` for SNAC
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -2329,7 +2329,7 @@ def set_gguf_parameters(self):
 
 @Model.register("SNACDec")
 class SNACDecModel(Model):
-    model_arch = gguf.MODEL_ARCH.SNAC_DEC  # Assumes this constant is defined in gguf
+    model_arch = gguf.MODEL_ARCH.SNAC_DEC
 
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[Tuple[str, Tensor]]:
         del bid  # unused
@@ -2357,7 +2357,6 @@ def set_gguf_parameters(self):
         self.gguf_writer.add_embedding_length(self.hparams["decoder_dim"])
         self.gguf_writer.add_decoder_upsample_rates(self.hparams["decoder_rates"])
         self.gguf_writer.add_decoder_channel_dims(self.hparams["decoder_channel_dims"])
-        self.gguf_writer.add_convnext_block_count(1)
 
 @Model.register("Qwen2MoeForCausalLM")
 class Qwen2MoeModel(Model):
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
@@ -492,6 +492,7 @@ extern "C" {
         GGML_OP_TIMESTEP_EMBEDDING,
         GGML_OP_ARGSORT,
         GGML_OP_LEAKY_RELU,
+        GGML_OP_SNAKE,
 
         GGML_OP_FLASH_ATTN_EXT,
         GGML_OP_FLASH_ATTN_BACK,
@@ -1062,6 +1063,16 @@ extern "C" {
             struct ggml_context * ctx,
             struct ggml_tensor  * a);
 
+    GGML_API struct ggml_tensor * ggml_snake(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            struct ggml_tensor  * alpha);
+
+    GGML_API struct ggml_tensor * ggml_snake_inplace(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            struct ggml_tensor  * alpha);
+
     // normalize along rows
     GGML_API struct ggml_tensor * ggml_norm(
             struct ggml_context * ctx,
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -1911,6 +1911,21 @@ inline static void ggml_vec_leaky_relu_f16 (const int n, ggml_fp16_t * y, const
         y[i] = GGML_FP32_TO_FP16(((v > 0.f) ? v : 0.f) + ns * ((v < 0.0f) ? v : 0.f));
     }
 }
+inline static void ggml_vec_snake_f32(const int n, float * y, const float * x, const float a) {
+    for (int i = 0; i < n; ++i) {
+        float x_val = x[i];
+        float sin_val = sinf(a * x_val);
+        y[i] = x_val + sin_val * sin_val;
+    }
+}
+inline static void ggml_vec_snake_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t a) {
+    for (int i = 0; i < n; ++i) {
+        float x_val = GGML_FP16_TO_FP32(x[i]); // TODO: double check this conversion
+        float a_val = GGML_FP16_TO_FP32(a);
+        float sin_val = sinf(a_val * x_val);
+        y[i] = GGML_FP32_TO_FP16(x_val + sin_val * sin_val);
+    }
+}
 inline static void ggml_vec_sigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = 1.f / (1.f + expf(-x[i])); }
 inline static void ggml_vec_sigmoid_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
     for (int i = 0; i < n; ++i) {
@@ -7817,6 +7832,86 @@ static void ggml_compute_forward_leaky_relu(
     }
 }
 
+// ggml_compute_forward_snake
+
+static void ggml_compute_forward_snake_f32(
+        const struct ggml_compute_params * params,
+        struct ggml_tensor * dst) {
+    const struct ggml_tensor * src0 = dst->src[0];
+
+    // Scaffold code, 1 thread for now
+    // TODO: add multithreading
+    if (params->ith != 0) {
+        return;
+    }
+
+    struct ggml_tensor * alpha = *(struct ggml_tensor **)(dst->op_params);
+    const float * x = (const float *)src0->data;
+    const float * a = (const float *)alpha->data;
+    float * y = (float *)dst->data;
+
+    const int n = ggml_nrows(src0);
+    const int nc = src0->ne[0];
+    const int channels = src0->ne[1];
+
+    for (int i = 0; i < n; i++) {
+        int c = i % channels;
+        ggml_vec_snake_f32(nc,
+                (float *) ((char *) y + i * dst->nb[1]),
+                (const float *) ((const char *) x + i * src0->nb[1]),
+                a[c]); // alpha tensor for this channel
+    }
+}
+
+static void ggml_compute_forward_snake_f16(
+        const struct ggml_compute_params * params,
+        struct ggml_tensor * dst) {
+    const struct ggml_tensor * src0 = dst->src[0];
+
+    if (params->ith != 0) {
+        return;
+    }
+
+    struct ggml_tensor * alpha = *(struct ggml_tensor **)(dst->op_params);
+    const ggml_fp16_t * x = (const ggml_fp16_t *)src0->data;
+    const ggml_fp16_t * a = (const ggml_fp16_t *)alpha->data;
+    ggml_fp16_t * y = (ggml_fp16_t *)dst->data;
+
+    const int n = ggml_nrows(src0);
+    const int nc = src0->ne[0];
+    const int channels = src0->ne[1];
+
+    for (int i = 0; i < n; i++) {
+        int c = i % channels;
+        ggml_vec_snake_f16(nc,
+                (ggml_fp16_t *) ((char *) y + i * dst->nb[1]),
+                (const ggml_fp16_t *) ((const char *) x + i * src0->nb[1]),
+                a[c]);
+    }
+}
+
+static void ggml_compute_forward_snake(
+        const struct ggml_compute_params * params,
+        struct ggml_tensor * dst) {
+
+    const struct ggml_tensor * src0 = dst->src[0];
+
+    switch (src0->type) {
+        case GGML_TYPE_F32:
+            {
+                ggml_compute_forward_snake_f32(params, dst);
+            } break;
+        case GGML_TYPE_F16:
+            {
+                ggml_compute_forward_snake_f16(params, dst);
+            } break;
+        default:
+            {
+                GGML_ABORT("fatal error");
+            }
+    }
+}
+
 // ggml_compute_forward_silu_back
 
 static void ggml_compute_forward_silu_back_f32(
@@ -14555,6 +14650,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
             {
                 ggml_compute_forward_leaky_relu(params, tensor);
             } break;
+        case GGML_OP_SNAKE:
+            {
+                ggml_compute_forward_snake(params, tensor);
+            } break;
         case GGML_OP_FLASH_ATTN_EXT:
             {
                 ggml_compute_forward_flash_attn_ext(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], tensor);
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -967,6 +967,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "TIMESTEP_EMBEDDING",
     "ARGSORT",
     "LEAKY_RELU",
+    "SNAKE",
 
     "FLASH_ATTN_EXT",
     "FLASH_ATTN_BACK",
@@ -998,7 +999,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "OPT_STEP_ADAMW",
 };
 
-static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
+static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86");
 
 static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "none",
@@ -1097,7 +1098,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "adamw(x)",
 };
 
-static_assert(GGML_OP_COUNT == 85, "GGML_OP_COUNT != 85");
+static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86");
 
 static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
 
@@ -2474,6 +2475,35 @@ struct ggml_tensor * ggml_leaky_relu(
     return result;
 }
 
+// ggml snake
+
+struct ggml_tensor * ggml_snake(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        struct ggml_tensor  * alpha) {
+    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
+
+    // store ptr to alpha tensor
+    ggml_set_op_params(result, &alpha, sizeof(alpha));
+    result->op = GGML_OP_SNAKE;
+    result->src[0] = a;
+
+    return result;
+}
+
+struct ggml_tensor * ggml_snake_inplace(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        struct ggml_tensor  * alpha) {
+    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
+
+    ggml_set_op_params(result, &alpha, sizeof(alpha));
+    result->op = GGML_OP_SNAKE;
+    result->src[0] = a;
+
+    return result;
+}
+
 // ggml_sigmoid
 
 struct ggml_tensor * ggml_sigmoid(