Skip to content

Commit 91cd070

Browse files
committed
fix for qwen3 too
1 parent 3449842 commit 91cd070

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2974,7 +2974,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
29742974
ggml_cuda_topk_moe_ops(/*with_norm=*/false, /*delayed_softmax=*/true);
29752975

29762976
if (ops.size() == topk_moe_ops_with_norm.size() &&
2977-
ggml_can_fuse_subgraph(cgraph, node_idx, ops, { node_idx + 3, node_idx + 8 })) {
2977+
ggml_can_fuse_subgraph(cgraph, node_idx, ops, { node_idx + 3, node_idx + 9 })) {
29782978
ggml_tensor * softmax = cgraph->nodes[node_idx];
29792979
ggml_tensor * weights = cgraph->nodes[node_idx + 9];
29802980

0 commit comments

Comments
 (0)