Skip to content

Commit e63ee46

Browse files
cleanup
1 parent da8a338 commit e63ee46

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

src/llama-model.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4515,8 +4515,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
45154515
const int64_t ssm_state_size = hparams.ssm_d_state; // ssm_state_size
45164516
const int64_t ssm_intermediate_size = hparams.ssm_d_inner; // TODO expand
45174517
const int64_t ssm_num_heads = hparams.ssm_dt_rank; // ssm_num_heads
4518-
const int64_t ssm_conv_dim = ssm_mamba_d_ssm + 2 * ssm_n_groups * ssm_state_size;
4519-
const int64_t ssm_projection_size = ssm_mamba_d_ssm + ssm_conv_dim + ssm_num_heads;
4518+
const int64_t ssm_conv_dim = ssm_intermediate_size + 2 * ssm_n_groups * ssm_state_size;
4519+
const int64_t ssm_projection_size = ssm_intermediate_size + ssm_conv_dim + ssm_num_heads;
45204520

45214521
// attn params
45224522
const int64_t attn_num_attention_head = hparams.n_head(0); // rename to: attn_num_attention_head
@@ -4550,9 +4550,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
45504550
layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), {1, ssm_num_heads}, 0);
45514551
layer.ssm_d = create_tensor(tn(LLM_TENSOR_SSM_D, i), {1, ssm_num_heads}, 0);
45524552
// ssm_norm
4553-
layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), {ssm_mamba_d_ssm / ssm_n_groups, ssm_n_groups}, 0);
4553+
layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), {ssm_intermediate_size / ssm_n_groups, ssm_n_groups}, 0);
45544554
// out_proj
4555-
layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), {ssm_mamba_d_ssm, hidden_size}, 0);
4555+
layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), {ssm_intermediate_size, hidden_size}, 0);
45564556

45574557
/*ATTENTION LAYERS*/
45584558
// attention layers (with optional bias)
@@ -14873,7 +14873,7 @@ struct llm_build_falcon_h1 : public llm_graph_context {
1487314873

1487414874
// grouped RMS norm
1487514875
if (model.layers[il].ssm_norm) {
14876-
y = ggml_reshape_4d(ctx0, y, d_ssm / n_group, n_group, n_seq_tokens, n_seqs);
14876+
y = ggml_reshape_4d(ctx0, y, d_inner / n_group, n_group, n_seq_tokens, n_seqs);
1487714877
y = build_norm(y, model.layers[il].ssm_norm, NULL, LLM_NORM_RMS, il);
1487814878
}
1487914879

0 commit comments

Comments
 (0)