@@ -2826,7 +2826,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
28262826 ggml_cuda_topk_moe_ops (/* with_norm=*/ false , /* delayed_softmax=*/ true );
28272827
28282828 if (ops.size () == topk_moe_ops_with_norm.size () &&
2829- ggml_can_fuse_subgraph (cgraph, node_idx, topk_moe_ops_with_norm , { node_idx + 3 , node_idx + 8 })) {
2829+ ggml_can_fuse_subgraph (cgraph, node_idx, ops , { node_idx + 3 , node_idx + 8 })) {
28302830 ggml_tensor * softmax = cgraph->nodes [node_idx];
28312831 ggml_tensor * weights = cgraph->nodes [node_idx+8 ];
28322832
@@ -2836,7 +2836,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
28362836 }
28372837
28382838 if (ops.size () == topk_moe_ops.size () &&
2839- ggml_can_fuse_subgraph (cgraph, node_idx, topk_moe_ops , { node_idx + 3 , node_idx + 4 })) {
2839+ ggml_can_fuse_subgraph (cgraph, node_idx, ops , { node_idx + 3 , node_idx + 4 })) {
28402840 ggml_tensor * softmax = cgraph->nodes [node_idx];
28412841 ggml_tensor * weights = cgraph->nodes [node_idx+4 ];
28422842 if (ggml_cuda_should_use_topk_moe (softmax, weights)) {
@@ -2845,7 +2845,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
28452845 }
28462846
28472847 if (ops.size () == topk_moe_ops_delayed_softmax.size () &&
2848- ggml_can_fuse_subgraph (cgraph, node_idx, topk_moe_ops_delayed_softmax , { node_idx + 2 , node_idx + 5 })) {
2848+ ggml_can_fuse_subgraph (cgraph, node_idx, ops , { node_idx + 2 , node_idx + 5 })) {
28492849 ggml_tensor * softmax = cgraph->nodes [node_idx + 4 ];
28502850 ggml_tensor * weights = cgraph->nodes [node_idx + 5 ];
28512851
0 commit comments