From b39cc2d7f71f23eb9d7d7396673109209697c961 Mon Sep 17 00:00:00 2001
From: yael-works <yaelshuker100@gmail.com>
Date: Mon, 15 Sep 2025 10:50:55 +0300
Subject: [PATCH 1/5] SYCL: Add COUNT_EQUAL operator support (rebased on
 master)

---
 ggml/src/ggml-sycl/binbcast.cpp  |  9 +++++++++
 ggml/src/ggml-sycl/binbcast.hpp  | 21 +++++++++++++++++++++
 ggml/src/ggml-sycl/ggml-sycl.cpp |  4 ++++
 tests/test-backend-ops.cpp       | 30 ++++++++++++++++++++++++++++++
 4 files changed, 64 insertions(+)

diff --git a/ggml/src/ggml-sycl/binbcast.cpp b/ggml/src/ggml-sycl/binbcast.cpp
index 0a3883ae1eda5..e0a1de0f32263 100644
--- a/ggml/src/ggml-sycl/binbcast.cpp
+++ b/ggml/src/ggml-sycl/binbcast.cpp
@@ -303,6 +303,10 @@ inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor *dst)
     ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_sub>>(ctx, dst->src[0], dst->src[1], dst);
 }
 
+inline void ggml_sycl_op_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_count_equal>>(ctx, dst->src[0], dst->src[1], dst);
+}
+
 inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
 
     ggml_sycl_op_bin_bcast<bin_bcast_sycl<op_mul>>(ctx, dst->src[0], dst->src[1], dst);
@@ -328,6 +332,11 @@ void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     ggml_sycl_op_sub(ctx, dst);
 }
 
+void ggml_sycl_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
+    scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
+    ggml_sycl_op_count_equal(ctx, dst);
+}
+
 void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
     scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
     ggml_sycl_op_mul(ctx, dst);
diff --git a/ggml/src/ggml-sycl/binbcast.hpp b/ggml/src/ggml-sycl/binbcast.hpp
index 9cce0f053a582..9d59ee58fb659 100644
--- a/ggml/src/ggml-sycl/binbcast.hpp
+++ b/ggml/src/ggml-sycl/binbcast.hpp
@@ -13,9 +13,30 @@ static __dpct_inline__ float op_add(const float a, const float b) {
 }
 
 static __dpct_inline__ float op_sub(const float a, const float b) {
+
+static __dpct_inline__ float op_count_equal(const float a, const float b) {
+    return (a == b) ? 1.0f : 0.0f;
+}
+
+void ggml_sycl_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+
     return a - b;
+
+static __dpct_inline__ float op_count_equal(const float a, const float b) {
+    return (a == b) ? 1.0f : 0.0f;
+}
+
+void ggml_sycl_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+
+}
+
+static __dpct_inline__ float op_count_equal(const float a, const float b) {
+    return (a == b) ? 1.0f : 0.0f;
 }
 
+void ggml_sycl_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
+
+
 static __dpct_inline__ float op_mul(const float a, const float b) {
     return a * b;
 }
diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp
index e06ec613fc81f..9404e3ff4ad9b 100644
--- a/ggml/src/ggml-sycl/ggml-sycl.cpp
+++ b/ggml/src/ggml-sycl/ggml-sycl.cpp
@@ -3577,6 +3577,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
         case GGML_OP_SUB:
             ggml_sycl_sub(ctx, dst);
             break;
+        case GGML_OP_COUNT_EQUAL:
+            ggml_sycl_count_equal(ctx, dst);
+            break;
         case GGML_OP_ACC:
             ggml_sycl_acc(ctx, dst);
             break;
@@ -4356,6 +4359,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
         case GGML_OP_ADD:
         case GGML_OP_ADD1:
         case GGML_OP_SUB:
+        case GGML_OP_COUNT_EQUAL:
         case GGML_OP_MUL:
         case GGML_OP_DIV:
         case GGML_OP_REPEAT:
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index b54a1a4e823f9..04d19921fd192 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -2236,6 +2236,30 @@ struct test_count_equal : public test_case {
     }
 };
 
+/* COUNT_EQUAL – typed test (no argmax), to cover F32/F16/I32/I16 */
+struct test_count_equal_typed : public test_case {
+    const ggml_type type;
+    const std::array<int64_t, 4> ne;
+
+    test_count_equal_typed(ggml_type type = GGML_TYPE_F32,
+                           std::array<int64_t, 4> ne = {128, 64, 1, 1})
+        : type(type), ne(ne) {}
+
+    std::string vars() override {
+        return VARS_TO_STR2(type, ne);
+    }
+
+    ggml_tensor * build_graph(ggml_context * ctx) override {
+        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_set_name(a, "a");
+        ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
+        ggml_set_name(b, "b");
+        ggml_tensor * out = ggml_count_equal(ctx, a, b);
+        ggml_set_name(out, "out");
+        return out;
+    }
+};
+
 // GGML_OP_REPEAT
 struct test_repeat : public test_case {
     const ggml_type type;
@@ -5940,6 +5964,12 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
 
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4,  500, 1, 1}));
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4, 5000, 1, 1}));
+    // COUNT_EQUAL – typed tests by dtype
+    test_cases.emplace_back(new test_count_equal_typed(GGML_TYPE_F32, {1024,  1, 1, 1}));
+    test_cases.emplace_back(new test_count_equal_typed(GGML_TYPE_F32, {  64, 64, 1, 1}));
+    test_cases.emplace_back(new test_count_equal_typed(GGML_TYPE_F16, { 256, 32, 1, 1}));
+    test_cases.emplace_back(new test_count_equal_typed(GGML_TYPE_I32, { 512, 16, 1, 1}));
+    test_cases.emplace_back(new test_count_equal_typed(GGML_TYPE_I16, { 512, 16, 1, 1}));
 
     test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32,    1, 1, 1}));
     test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32,  513, 1, 1}));

From f7910f37a6dd68f7792df8ac27d0a3ce557d6e8f Mon Sep 17 00:00:00 2001
From: yael-works <yaelshuker100@gmail.com>
Date: Mon, 15 Sep 2025 11:14:52 +0300
Subject: [PATCH 2/5] SYCL: remove duplicate op_count_equal definition

---
 ggml/src/ggml-sycl/binbcast.hpp | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/ggml/src/ggml-sycl/binbcast.hpp b/ggml/src/ggml-sycl/binbcast.hpp
index 9d59ee58fb659..34c4064f5287f 100644
--- a/ggml/src/ggml-sycl/binbcast.hpp
+++ b/ggml/src/ggml-sycl/binbcast.hpp
@@ -13,21 +13,7 @@ static __dpct_inline__ float op_add(const float a, const float b) {
 }
 
 static __dpct_inline__ float op_sub(const float a, const float b) {
-
-static __dpct_inline__ float op_count_equal(const float a, const float b) {
-    return (a == b) ? 1.0f : 0.0f;
-}
-
-void ggml_sycl_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-
     return a - b;
-
-static __dpct_inline__ float op_count_equal(const float a, const float b) {
-    return (a == b) ? 1.0f : 0.0f;
-}
-
-void ggml_sycl_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
-
 }
 
 static __dpct_inline__ float op_count_equal(const float a, const float b) {
@@ -36,7 +22,6 @@ static __dpct_inline__ float op_count_equal(const float a, const float b) {
 
 void ggml_sycl_count_equal(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
 
-
 static __dpct_inline__ float op_mul(const float a, const float b) {
     return a * b;
 }

From 4d6cebb4573a1cb0a5418037087f3adc5be71b84 Mon Sep 17 00:00:00 2001
From: yael-works <yaelshuker100@gmail.com>
Date: Mon, 15 Sep 2025 12:52:55 +0300
Subject: [PATCH 3/5] tests: remove test_count_equal_typed and use
 test_count_equal for all cases

---
 tests/test-backend-ops.cpp | 34 +++++-----------------------------
 1 file changed, 5 insertions(+), 29 deletions(-)

diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 04d19921fd192..f0f1b69318b59 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -2236,29 +2236,6 @@ struct test_count_equal : public test_case {
     }
 };
 
-/* COUNT_EQUAL – typed test (no argmax), to cover F32/F16/I32/I16 */
-struct test_count_equal_typed : public test_case {
-    const ggml_type type;
-    const std::array<int64_t, 4> ne;
-
-    test_count_equal_typed(ggml_type type = GGML_TYPE_F32,
-                           std::array<int64_t, 4> ne = {128, 64, 1, 1})
-        : type(type), ne(ne) {}
-
-    std::string vars() override {
-        return VARS_TO_STR2(type, ne);
-    }
-
-    ggml_tensor * build_graph(ggml_context * ctx) override {
-        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_name(a, "a");
-        ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
-        ggml_set_name(b, "b");
-        ggml_tensor * out = ggml_count_equal(ctx, a, b);
-        ggml_set_name(out, "out");
-        return out;
-    }
-};
 
 // GGML_OP_REPEAT
 struct test_repeat : public test_case {
@@ -5964,12 +5941,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
 
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4,  500, 1, 1}));
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4, 5000, 1, 1}));
-    // COUNT_EQUAL – typed tests by dtype
-    test_cases.emplace_back(new test_count_equal_typed(GGML_TYPE_F32, {1024,  1, 1, 1}));
-    test_cases.emplace_back(new test_count_equal_typed(GGML_TYPE_F32, {  64, 64, 1, 1}));
-    test_cases.emplace_back(new test_count_equal_typed(GGML_TYPE_F16, { 256, 32, 1, 1}));
-    test_cases.emplace_back(new test_count_equal_typed(GGML_TYPE_I32, { 512, 16, 1, 1}));
-    test_cases.emplace_back(new test_count_equal_typed(GGML_TYPE_I16, { 512, 16, 1, 1}));
+    test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {1024,  1, 1, 1}));
+    test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {  64, 64, 1, 1}));
+    test_cases.emplace_back(new test_count_equal(GGML_TYPE_F16, { 256, 32, 1, 1}));
+    test_cases.emplace_back(new test_count_equal(GGML_TYPE_I32, { 512, 16, 1, 1}));
+    test_cases.emplace_back(new test_count_equal(GGML_TYPE_I16, { 512, 16, 1, 1}));
 
     test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32,    1, 1, 1}));
     test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32,  513, 1, 1}));

From 163b9490eb8415c7bdf99f355f9047b992a844aa Mon Sep 17 00:00:00 2001
From: yael-works <yaelshuker100@gmail.com>
Date: Mon, 15 Sep 2025 13:04:33 +0300
Subject: [PATCH 4/5] tests: keep only I32 case for COUNT_EQUAL as suggested

---
 tests/test-backend-ops.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index f0f1b69318b59..2a14d8354ec19 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -5941,8 +5941,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
 
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4,  500, 1, 1}));
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4, 5000, 1, 1}));
-    test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {1024,  1, 1, 1}));
-    test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {  64, 64, 1, 1}));
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_F16, { 256, 32, 1, 1}));
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_I32, { 512, 16, 1, 1}));
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_I16, { 512, 16, 1, 1}));

From b427efb7edf9a59bb82a4c5ad1352923a3c24f80 Mon Sep 17 00:00:00 2001
From: yael-works <yaelshuker100@gmail.com>
Date: Mon, 15 Sep 2025 13:14:41 +0300
Subject: [PATCH 5/5] tests: keep only I32 case for COUNT_EQUAL as requested

---
 tests/test-backend-ops.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 2a14d8354ec19..b54a1a4e823f9 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -2236,7 +2236,6 @@ struct test_count_equal : public test_case {
     }
 };
 
-
 // GGML_OP_REPEAT
 struct test_repeat : public test_case {
     const ggml_type type;
@@ -5941,9 +5940,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
 
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4,  500, 1, 1}));
     test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4, 5000, 1, 1}));
-    test_cases.emplace_back(new test_count_equal(GGML_TYPE_F16, { 256, 32, 1, 1}));
-    test_cases.emplace_back(new test_count_equal(GGML_TYPE_I32, { 512, 16, 1, 1}));
-    test_cases.emplace_back(new test_count_equal(GGML_TYPE_I16, { 512, 16, 1, 1}));
 
     test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32,    1, 1, 1}));
     test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32,  513, 1, 1}));