check that number of unary ops matches in debug

CISC · web-flow · commit 4ec0e680bdea · 2025-07-28T22:35:15.000+02:00
ggml-ci
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2768,6 +2768,11 @@ static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
 #endif
 
 static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::initializer_list<enum ggml_op> ops, std::initializer_list<enum ggml_unary_op> unary_ops) {
+#ifndef NDEBUG
+    const size_t num_unary = std::count(ops.begin(), ops.end(), GGML_OP_UNARY);
+    GGML_ASSERT(unary_ops.size() == num_unary);
+#endif
+
     if (!ggml_can_fuse(cgraph, node_idx, ops)) {
         return false;
     }
@@ -2802,16 +2807,12 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
     if (ops.size() == 3 && ops.begin()[0] == GGML_OP_SCALE && ops.begin()[1] == GGML_OP_UNARY && ops.begin()[2] == GGML_OP_SCALE
      && unary_ops.size() == 1 && unary_ops.begin()[0] == GGML_UNARY_OP_TANH) {
         const ggml_tensor *scale  = cgraph->nodes[node_idx];
-        const ggml_tensor *tanh   = cgraph->nodes[node_idx+1];
         const ggml_tensor *scale2 = cgraph->nodes[node_idx+2];
 
         GGML_ASSERT(scale->src[0]->type == GGML_TYPE_F32);
         GGML_ASSERT(scale->type == GGML_TYPE_F32);
 
-        if (tanh->src[0] != scale || scale2->src[0] != tanh) {
-            return false;
-        }
-
+        // Check for bias
         if (ggml_get_op_params_f32(scale, 1) != 0.0f || ggml_get_op_params_f32(scale2, 1) != 0.0f) {
             return false;
         }
diff --git a/ggml/src/ggml-cuda/softcap.cu b/ggml/src/ggml-cuda/softcap.cu
@@ -15,6 +15,7 @@ static void softcap_f32_cuda(const float * x, float * dst, const float scale, co
     softcap_f32<<<num_blocks, CUDA_SOFTCAP_BLOCK_SIZE, 0, stream>>>(x, dst, scale, softcap, k);
 }
 
+// fused GGML_OP_SCALE + GGML_UNARY_OP_TANH + GGML_OP_SCALE
 void ggml_cuda_op_softcap(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * src) {
     const ggml_tensor * src0 = src->src[0];
     const float * src0_d = (const float *)src0->data;

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,7 @@ static void softcap_f32_cuda(const float * x, float * dst, const float scale, co`
`15`	`15`	`softcap_f32<<<num_blocks, CUDA_SOFTCAP_BLOCK_SIZE, 0, stream>>>(x, dst, scale, softcap, k);`
`16`	`16`	`}`
`17`	`17`
	`18`	`+// fused GGML_OP_SCALE + GGML_UNARY_OP_TANH + GGML_OP_SCALE`
`18`	`19`	`void ggml_cuda_op_softcap(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * src) {`
`19`	`20`	`const ggml_tensor * src0 = src->src[0];`
`20`	`21`	`const float * src0_d = (const float *)src0->data;`