@@ -2832,7 +2832,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
28322832 ggml_cuda_topk_moe_ops (/* with_norm=*/ false , /* delayed_softmax=*/ true );
28332833
28342834 if (ops.size () == topk_moe_ops_with_norm.size () &&
2835- ggml_can_fuse_subgraph (cgraph, node_idx, topk_moe_ops_with_norm , { node_idx + 3 , node_idx + 8 })) {
2835+ ggml_can_fuse_subgraph (cgraph, node_idx, ops , { node_idx + 3 , node_idx + 8 })) {
28362836 ggml_tensor * softmax = cgraph->nodes [node_idx];
28372837 ggml_tensor * weights = cgraph->nodes [node_idx+8 ];
28382838
@@ -2842,7 +2842,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
28422842 }
28432843
28442844 if (ops.size () == topk_moe_ops.size () &&
2845- ggml_can_fuse_subgraph (cgraph, node_idx, topk_moe_ops , { node_idx + 3 , node_idx + 4 })) {
2845+ ggml_can_fuse_subgraph (cgraph, node_idx, ops , { node_idx + 3 , node_idx + 4 })) {
28462846 ggml_tensor * softmax = cgraph->nodes [node_idx];
28472847 ggml_tensor * weights = cgraph->nodes [node_idx+4 ];
28482848 if (ggml_cuda_should_use_topk_moe (softmax, weights)) {
@@ -2851,7 +2851,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
28512851 }
28522852
28532853 if (ops.size () == topk_moe_ops_delayed_softmax.size () &&
2854- ggml_can_fuse_subgraph (cgraph, node_idx, topk_moe_ops_delayed_softmax , { node_idx + 2 , node_idx + 5 })) {
2854+ ggml_can_fuse_subgraph (cgraph, node_idx, ops , { node_idx + 2 , node_idx + 5 })) {
28552855 ggml_tensor * softmax = cgraph->nodes [node_idx + 4 ];
28562856 ggml_tensor * weights = cgraph->nodes [node_idx + 5 ];
28572857
0 commit comments