Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 26 additions & 9 deletions ggml/src/ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -570,27 +570,27 @@ static inline bool ggml_node_has_n_uses(const struct ggml_cgraph * cgraph, int n
return true;
}

// Returns true if nodes [i, i+ops.size()) are the sequence of ggml_ops in ops[]
// Returns true if nodes with indices { node_idxs } are the sequence of ggml_ops in ops[]
// and are fusable. Nodes are considered fusable according to this function if:
// - all nodes except the last have only one use and are not views/outputs (see ggml_node_has_N_uses).
// - all nodes except the last are a src of the following node.
// - all nodes are the same shape.
// TODO: Consider allowing GGML_OP_NONE nodes in between
static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, const enum ggml_op * ops, int num_ops) {
if (node_idx + num_ops > cgraph->n_nodes) {
return false;
}

static inline bool ggml_can_fuse_ext(const struct ggml_cgraph * cgraph, const int * node_idxs, const enum ggml_op * ops, int num_ops) {
for (int i = 0; i < num_ops; ++i) {
struct ggml_tensor * node = cgraph->nodes[node_idx + i];
if (node_idxs[i] + num_ops > cgraph->n_nodes) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why add num_ops here? Also, should be >=, i think.

return false;
}

struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
if (node->op != ops[i]) {
return false;
}
if (i < num_ops - 1 && !ggml_node_has_n_uses(cgraph, node_idx + i, 1)) {
if (i < num_ops - 1 && !ggml_node_has_n_uses(cgraph, node_idxs[i], 1)) {
return false;
}
if (i > 0) {
struct ggml_tensor * prev = cgraph->nodes[node_idx + i - 1];
struct ggml_tensor * prev = cgraph->nodes[node_idxs[i - 1]];
if (node->src[0] != prev && node->src[1] != prev) {
return false;
}
Expand All @@ -602,6 +602,18 @@ static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx
return true;
}

// same as above, for sequential indices starting at node_idx
static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, const enum ggml_op * ops, int num_ops) {
assert(num_ops < 32);

int idxs[32];
for (int i = 0; i < num_ops; ++i) {
idxs[i] = node_idx + i;
}

return ggml_can_fuse_ext(cgraph, idxs, ops, num_ops);
}

#ifdef __cplusplus
}
#endif
Expand All @@ -615,6 +627,11 @@ inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::
return ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size());
}

inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, std::initializer_list<int> node_idx, std::initializer_list<enum ggml_op> ops) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not totally clear on how you foresee this function being used. I assume the caller needs to look through nodes and find the ops its interested in (skipping empty nodes), so I dont think it'll be able to use an initializer list for the node idxs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is to create a list of indices that excludes the empty nodes in idxs. And then use this like so:

    // non-empty node indices
    std::vector<int> idxs;

    bool can_fuse(int i0, int i1, std::initializer_list<enum ggml_op> ops) const {
        assert(use_fusion);
        assert(i0 >= 0 && i0 < n_nodes());
        assert(i1 >= 0 && i1 < n_nodes());
        assert(ops.size() == 2);

        return ggml_can_fuse(gf, { idxs[i0], idxs[i1] }, ops);
    }

Btw, I really need an API to tell if 2 ops are fusable - don't think there is any benefit to support more than a pair of ops. The assumption is that if we can fuse N ops, then for sure we can fuse N-1 ops. So at step N we only need to check if ops N and N+1 are fusable - no need to iterate again over all previous ops.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you have to rebuild the initializer_list from the vector, why not just pass an int* instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I can do that. Removed the extra overload.

assert(node_idx.size() == ops.size());
return ggml_can_fuse_ext(cgraph, node_idx.begin(), ops.begin(), (int)ops.size());
}

// expose GGUF internals for test code
GGML_API size_t gguf_type_size(enum gguf_type type);
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
Expand Down
Loading