ggml: add ggml_can_fuse_subgraph

am17an · am17an · commit b8a3661aad78 · 2025-10-20T01:52:42.000+08:00
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
@@ -647,6 +647,42 @@ static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx
     return ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
 }
 
+GGML_API bool ggml_can_fuse_subgraph_ext(
+        const struct ggml_cgraph * cgraph,
+        const int * node_idxs,
+        int count,
+        const enum ggml_op * ops,
+        const int * inputs,
+        int num_inputs,
+        const int * outputs,
+        int num_outputs);
+
+// Returns true if the subgraph formed by {node_idxs} can be fused
+// checks whethers all nodes which are not part of inputs/outputs can be elided
+// by checking if their num_uses are confined to the subgraph
+static inline bool ggml_can_fuse_subgraph(
+        const struct ggml_cgraph * cgraph,
+        int node_idx,
+        int count,
+        const enum ggml_op * ops,
+        const int * inputs,
+        int num_inputs,
+        const int * outputs,
+        int num_outputs) {
+
+    if (node_idx + count > cgraph->n_nodes) {
+        return false;
+    }
+
+    int idxs[32];
+
+    for (int i = 0; i < count; ++i) {
+        idxs[i] = node_idx + i;
+    }
+
+    return ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, inputs, num_inputs, outputs, num_outputs);
+}
+
 #ifdef __cplusplus
 }
 #endif
@@ -660,6 +696,23 @@ inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::
     return ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
 }
 
+inline bool ggml_can_fuse_subgraph(
+        const struct ggml_cgraph * cgraph,
+        int start_idx,
+        std::initializer_list<enum ggml_op> ops,
+        std::initializer_list<int> inputs = {},
+        std::initializer_list<int> outputs = {}) {
+    return ggml_can_fuse_subgraph(
+        cgraph,
+        start_idx,
+        ops.size(),
+        ops.begin(),
+        inputs.begin(),
+        inputs.size(),
+        outputs.begin(),
+        outputs.size());
+}
+
 // expose GGUF internals for test code
 GGML_API size_t gguf_type_size(enum gguf_type type);
 GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -6964,6 +6964,86 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
     GGML_LOG_INFO("========================================\n");
 }
 
+static int ggml_find_tensor_node_list(const struct ggml_cgraph * cgraph, const int * idxs, int count, const struct ggml_tensor * tensor) {
+    if (idxs == NULL || cgraph == NULL) {
+        return -1;
+    }
+
+    for(int i = 0; i < count; ++i) {
+        const int node_idx = idxs[count];
+
+        if (node_idx >= cgraph->n_nodes) {
+            return -1;
+        }
+        if (cgraph->nodes[node_idx] == tensor) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+bool ggml_can_fuse_subgraph_ext(
+        const struct ggml_cgraph * cgraph,
+        const int * node_idxs,
+        int count,
+        const enum ggml_op * ops,
+        const int * inputs,
+        int num_inputs,
+        const int * outputs,
+        int num_outputs) {
+
+    GGML_ASSERT(count < 32 && num_inputs > 0 && num_outputs > 0);
+    int interior_nodes_count = 0;
+    int interior_nodes[32];
+
+    for(int i = 0 ; i < count; ++i) {
+        if (node_idxs[i] >= cgraph->n_nodes || cgraph->nodes[node_idxs[i]]->op != ops[i]) {
+            return false;
+        }
+
+        const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
+
+        if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
+            return false;
+        }
+
+        if (ggml_find_tensor_node_list(cgraph, inputs, num_inputs, node) != -1) {
+            continue;
+        }
+
+        if (ggml_find_tensor_node_list(cgraph, outputs, num_outputs, node) != -1) {
+            continue;
+        }
+
+        interior_nodes[interior_nodes_count++] = node_idxs[i];
+    }
+
+    // if interior-node has n-uses, ensure that all of them lie within in this subgraph
+    for(int i = 0 ; i < interior_nodes_count; ++i) {
+
+        const int num_uses = ggml_node_get_use_count(cgraph, interior_nodes[i]);
+
+        const struct ggml_tensor * node = cgraph->nodes[interior_nodes[i]];
+
+        int subgraph_uses = 0;
+        //check if all uses are within the graph
+        for(int j = 0; j < count; ++j) {
+            const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
+            for(int src_idx = 0 ; src_idx < GGML_MAX_SRC; src_idx++) {
+                if (other_node->src[src_idx] && other_node->src[src_idx] == node) {
+                    subgraph_uses++;
+                }
+            }
+        }
+
+        if (subgraph_uses != num_uses) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
 // check if node is part of the graph
 static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
     if (cgraph == NULL) {