Skip to content

Commit d473d42

Browse files
committed
more cleanups
1 parent e63ee46 commit d473d42

File tree

4 files changed

+8
-11
lines changed

4 files changed

+8
-11
lines changed

convert_hf_to_gguf.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6673,17 +6673,16 @@ def set_gguf_parameters(self):
66736673

66746674

66756675
# Add Falcon Mamba2 specific configuration
6676-
self.gguf_writer.add_uint32("falcon_h1.attention.head_dim", self.hparams["head_dim"])
6677-
self.gguf_writer.add_uint32("falcon_h1.ssm.mamba_d_inner", self.hparams["mamba_d_ssm"])
6676+
self.gguf_writer.add_ssm_head_dim(self.hparams["mamba_d_head"])
66786677
self.gguf_writer.add_ssm_inner_size(self.hparams["mamba_d_ssm"])
6679-
self.gguf_writer.add_uint32("falcon_h1.num_attention_heads", self.find_hparam(["num_attention_heads"]))
6680-
self.gguf_writer.add_uint32("falcon_h1.num_key_value_heads",
6681-
self.find_hparam(["num_key_value_heads"], optional=True) or
6682-
self.find_hparam(["num_attention_heads"]))
6678+
self.gguf_writer.add_head_count(self.find_hparam(["num_attention_heads"]))
6679+
self.gguf_writer.add_key_length(self.hparams["head_dim"])
6680+
self.gguf_writer.add_value_length(self.hparams["head_dim"])
6681+
self.gguf_writer.add_head_count_kv(self.find_hparam(["num_key_value_heads"], optional=True) or
6682+
self.find_hparam(["num_attention_heads"]))
66836683

66846684

66856685
# Add any other Falcon Mamba2 specific configuration
6686-
self.gguf_writer.add_bool("falcon_h1.mamba_rms_norm", self.find_hparam(["mamba_rms_norm"], optional=True))
66876686
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
66886687

66896688
###### CONVERSION LOGIC ######

src/llama-arch.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
128128
{ LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
129129
{ LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
130130
{ LLM_KV_INTERLEAVE_MOE_LAYER_STEP, "%s.interleave_moe_layer_step" },
131-
{ LLM_KV_ATTN_HEAD_DIM, "%s.attention.head_dim" },
132-
131+
133132
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
134133
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
135134
{ LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },

src/llama-arch.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,6 @@ enum llm_kv {
158158
LLM_KV_ATTENTION_LAYER_INDICES,
159159

160160
// Falcon-H1 specific
161-
LLM_KV_ATTN_HEAD_DIM,
162161
LLM_KV_SSM_HEAD_DIM,
163162
LLM_KV_N_LAYER,
164163
LLM_KV_FALCON_H1_MAMBA_RMS_NORM,

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4550,7 +4550,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
45504550
layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), {1, ssm_num_heads}, 0);
45514551
layer.ssm_d = create_tensor(tn(LLM_TENSOR_SSM_D, i), {1, ssm_num_heads}, 0);
45524552
// ssm_norm
4553-
layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), {ssm_intermediate_size / ssm_n_groups, ssm_n_groups}, 0);
4553+
layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), {ssm_intermediate_size / ssm_n_groups, ssm_n_groups}, 1);
45544554
// out_proj
45554555
layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), {ssm_intermediate_size, hidden_size}, 0);
45564556

0 commit comments

Comments
 (0)