added two winograd ops

bssrdf · bssrdf · commit 5b4e44828890 · 2024-09-26T18:04:04.000-04:00
diff --git a/include/ggml.h b/include/ggml.h
@@ -510,6 +510,8 @@ extern "C" {
         GGML_OP_TIMESTEP_EMBEDDING,
         GGML_OP_ARGSORT,
         GGML_OP_LEAKY_RELU,
+        GGML_OP_WINOGRAD_STAGE0,
+        GGML_OP_WINOGRAD_STAGE1,
 
         GGML_OP_FLASH_ATTN_EXT,
         GGML_OP_FLASH_ATTN_BACK,
diff --git a/src/ggml.c b/src/ggml.c
@@ -2995,6 +2995,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
     "TIMESTEP_EMBEDDING",
     "ARGSORT",
     "LEAKY_RELU",
+    "WINOGRAD_STAGE0",
+    "WINOGRAD_STAGE1",
 
     "FLASH_ATTN_EXT",
     "FLASH_ATTN_BACK",
@@ -3089,6 +3091,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "timestep_embedding(timesteps, dim, max_period)",
     "argsort(x)",
     "leaky_relu(x)",
+    "winograd_stage0(x)",
+    "winograd_stage1(x)",
 
     "flash_attn_ext(x)",
     "flash_attn_back(x)",
@@ -3118,7 +3122,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
     "adamw(x)",
 };
 
-static_assert(GGML_OP_COUNT == 80, "GGML_OP_COUNT != 80");
+static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82");
 
 static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
 
@@ -7166,6 +7170,70 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
     return result;
 }
 
+
+// ggml_winograd
+
+// a: [OC，IC, 3, 3]
+// result: [OC, IC, 16]
+struct ggml_tensor * ggml_winograd_stage0(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a) {
+    bool is_node = false;
+    GGML_ASSERT(a->ne[0] == 3 && a->ne[1] == 3); // kernel should be 3x3
+    if (a->grad) {
+        is_node = true;
+    }
+
+    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 16, a->ne[2], a->ne[3], 1);
+
+    result->op   = GGML_OP_WINOGRAD_STAGE0;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src[0] = a;
+
+    return result;
+}
+
+// ggml_winograd
+// a: [OC, IC, 4, 4]
+// b: [1, IC, IH, IW]
+// result: [N, OC, OH, OW]
+struct ggml_tensor * ggml_winograd_stage1(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        struct ggml_tensor  * b) {
+    bool is_node = false;
+    if (a->grad) {
+        is_node = true;
+    }
+
+    int OW = b->ne[0];
+    int OH = b->ne[1];
+    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, OW, OH, a->ne[3] /* OC */, 1);
+
+    result->op   = GGML_OP_WINOGRAD_STAGE1;
+    result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+    result->src[0] = a;
+    result->src[1] = b;
+
+    return result;
+}
+
+struct ggml_tensor * ggml_conv_2d_3x3(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        struct ggml_tensor  * b){
+
+
+    GGML_ASSERT(b->ne[3] == 1); // only works for 1 input image
+
+    struct ggml_tensor* W = ggml_winograd_stage0(ctx, a);
+    struct ggml_tensor * result = ggml_winograd_stage1(ctx, W, b);
+
+    return result;
+
+}
+
+
 // ggml_pool_*
 
 static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {