Skip to content

Commit 32a3f26

Browse files
CISCam17an
authored andcommitted
use clamping instead, update number and add comment
1 parent 4267d3e commit 32a3f26

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

src/llama-graph.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,8 +1006,9 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
10061006
ggml_tensor * weights_sum = ggml_sum_rows(ctx0, weights); // [1, n_tokens]
10071007
cb(weights_sum, "ffn_moe_weights_sum", il);
10081008

1009-
weights_sum = ggml_scale_bias(ctx0, weights_sum, 1.0, 1e-20);
1010-
cb(weights_sum, "ffn_moe_weights_sum_biased", il);
1009+
// Avoid division by zero, clamp to smallest number representable by F16
1010+
weights_sum = ggml_clamp(ctx0, weights_sum, 6.103515625e-5, INFINITY);
1011+
cb(weights_sum, "ffn_moe_weights_sum_clamped", il);
10111012

10121013
weights = ggml_div(ctx0, weights, weights_sum); // [n_expert_used, n_tokens]
10131014
cb(weights, "ffn_moe_weights_norm", il);

0 commit comments

Comments
 (0)