Skip to content

Commit c8554b6

Browse files
authored
graph : use fill instead of scale_bias in grouped expert selection (ggml-org#17867)
* use fill instead of scale_bias in grouped expert selection * do not explicitly use _inplace
1 parent 2fa51c1 commit c8554b6

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

src/llama-graph.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -973,7 +973,7 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
973973

974974
// mask out the other groups
975975
selection_probs = ggml_get_rows(ctx0, selection_groups, expert_groups); // [n_exp_per_group, n_group_used, n_tokens]
976-
selection_probs = ggml_set_rows(ctx0, ggml_scale_bias(ctx0, selection_groups, 0.0f, -INFINITY), selection_probs, expert_groups); // [n_exp_per_group, n_expert_groups, n_tokens]
976+
selection_probs = ggml_set_rows(ctx0, ggml_fill(ctx0, selection_groups, -INFINITY), selection_probs, expert_groups); // [n_exp_per_group, n_expert_groups, n_tokens]
977977
selection_probs = ggml_reshape_2d(ctx0, selection_probs, n_expert, n_tokens); // [n_expert, n_tokens]
978978
cb(selection_probs, "ffn_moe_probs_masked", il);
979979
}

0 commit comments

Comments
 (0)