Skip to content

Commit 4f50543

Browse files
committed
fix: Only do Granite multipliers if set
Branch: GraniteFour Signed-off-by: Gabe Goodhart <[email protected]>
1 parent 6ffa2d3 commit 4f50543

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

src/llama-model.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12965,7 +12965,9 @@ struct llm_build_hybrid_mamba : public llm_graph_context {
1296512965
}
1296612966

1296712967
// For Granite architectures - scale residual
12968-
cur = ggml_scale(ctx0, cur, hparams.f_residual_scale);
12968+
if (hparams.f_residual_scale) {
12969+
cur = ggml_scale(ctx0, cur, hparams.f_residual_scale);
12970+
}
1296912971
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
1297012972
cb(ffn_inp, "ffn_inp", il);
1297112973

@@ -13023,7 +13025,9 @@ struct llm_build_hybrid_mamba : public llm_graph_context {
1302313025
}
1302413026

1302513027
// For Granite architectures - scale residual
13026-
cur = ggml_scale(ctx0, cur, hparams.f_residual_scale);
13028+
if (hparams.f_residual_scale) {
13029+
cur = ggml_scale(ctx0, cur, hparams.f_residual_scale);
13030+
}
1302713031
cur = ggml_add(ctx0, cur, ffn_inp);
1302813032
cb(cur, "ffn_out", il);
1302913033

@@ -13047,7 +13051,9 @@ struct llm_build_hybrid_mamba : public llm_graph_context {
1304713051
cur = build_lora_mm(model.output, cur);
1304813052

1304913053
// For Granite architectures - scale logits
13050-
cur = ggml_scale(ctx0, cur, 1.0f / hparams.f_logit_scale);
13054+
if (hparams.f_logit_scale) {
13055+
cur = ggml_scale(ctx0, cur, 1.0f / hparams.f_logit_scale);
13056+
}
1305113057
cb(cur, "result_output", -1);
1305213058
res->t_logits = cur;
1305313059

0 commit comments

Comments
 (0)