Skip to content

Commit 7a25441

Browse files
fixed multipliers
1 parent 9760c8b commit 7a25441

File tree

3 files changed

+21
-20
lines changed

3 files changed

+21
-20
lines changed

convert_hf_to_gguf.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6645,11 +6645,11 @@ def set_gguf_parameters(self):
66456645
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
66466646
self.gguf_writer.add_key_length(self.hparams["head_dim"])
66476647
self.gguf_writer.add_value_length(self.hparams["head_dim"])
6648-
self.gguf_writer.add_float32("falcon_h1.key_multiplier", self.hparams["key_multiplier"])
6648+
self.gguf_writer.add_float64("falcon_h1.key_multiplier", self.hparams["key_multiplier"])
66496649

66506650
## Other params
6651-
self.gguf_writer.add_float32("falcon_h1.lm_head_multiplier", self.hparams["lm_head_multiplier"])
6652-
self.gguf_writer.add_float32("falcon_h1.embedding_multiplier", self.hparams["embedding_multiplier"])
6651+
self.gguf_writer.add_float64("falcon_h1.lm_head_multiplier", self.hparams["lm_head_multiplier"])
6652+
self.gguf_writer.add_float64("falcon_h1.embedding_multiplier", self.hparams["embedding_multiplier"])
66536653

66546654
## Validation ##
66556655
assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported"
@@ -6666,15 +6666,15 @@ def set_gguf_parameters(self):
66666666
self.find_hparam(["num_attention_heads"]))
66676667

66686668
# Add multipliers as metadata instead of tensors
6669-
self.gguf_writer.add_float32("falcon_h1.attention_in_multiplier", self.attention_in_multiplier)
6670-
self.gguf_writer.add_float32("falcon_h1.attention_out_multiplier", self.attention_out_multiplier)
6671-
self.gguf_writer.add_float32("falcon_h1.ssm_in_multiplier", self.ssm_in_multiplier)
6672-
self.gguf_writer.add_float32("falcon_h1.ssm_out_multiplier", self.ssm_out_multiplier)
6669+
self.gguf_writer.add_float64("falcon_h1.attention_in_multiplier", self.attention_in_multiplier)
6670+
self.gguf_writer.add_float64("falcon_h1.attention_out_multiplier", self.attention_out_multiplier)
6671+
self.gguf_writer.add_float64("falcon_h1.ssm_in_multiplier", self.ssm_in_multiplier)
6672+
self.gguf_writer.add_float64("falcon_h1.ssm_out_multiplier", self.ssm_out_multiplier)
66736673

66746674
# Add MLP multipliers
66756675
if isinstance(self.mlp_multipliers, (list, tuple)) and len(self.mlp_multipliers) == 2:
6676-
self.gguf_writer.add_float32("falcon_h1.mlp_gate_multiplier", self.mlp_multipliers[0])
6677-
self.gguf_writer.add_float32("falcon_h1.mlp_down_multiplier", self.mlp_multipliers[1])
6676+
self.gguf_writer.add_float64("falcon_h1.mlp_gate_multiplier", self.mlp_multipliers[0])
6677+
self.gguf_writer.add_float64("falcon_h1.mlp_down_multiplier", self.mlp_multipliers[1])
66786678

66796679
# Add has MuP flag if SSM multipliers are present
66806680
if self.ssm_multipliers is not None:
@@ -6684,7 +6684,7 @@ def set_gguf_parameters(self):
66846684
self.gguf_writer.add_bool("falcon_h1.mamba_use_mlp", self.find_hparam(["mamba_use_mlp"], optional=True))
66856685
self.gguf_writer.add_bool("falcon_h1.mamba_norm_before_gate", self.find_hparam(["mamba_norm_before_gate"], optional=True))
66866686
self.gguf_writer.add_bool("falcon_h1.mamba_rms_norm", self.find_hparam(["mamba_rms_norm"], optional=True))
6687-
self.gguf_writer.add_float32("falcon_h1.rope_theta", self.find_hparam(["rope_theta"], optional=True))
6687+
self.gguf_writer.add_float64("falcon_h1.rope_theta", self.find_hparam(["rope_theta"], optional=True))
66886688

66896689
###### CONVERSION LOGIC ######
66906690

src/llama-hparams.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -122,17 +122,17 @@ struct llama_hparams {
122122
bool mamba_use_mlp = false;
123123
bool mamba_norm_before_gate = false;
124124
bool mamba_rms_norm = false;
125-
float attention_in_multiplier = 1.0f;
126-
float attention_out_multiplier = 1.0f;
127-
float ssm_in_multiplier = 1.0f;
128-
float ssm_out_multiplier = 1.0f;
129-
float mlp_gate_multiplier = 1.0f;
130-
float mlp_down_multiplier = 1.0f;
131-
float key_multiplier = 1.0f;
132-
float lm_head_multiplier = 1.0f;
133-
float rope_theta = 10000.0f;
125+
double attention_in_multiplier = 1.0;
126+
double attention_out_multiplier = 1.0;
127+
double ssm_in_multiplier = 1.0;
128+
double ssm_out_multiplier = 1.0;
129+
double mlp_gate_multiplier = 1.0;
130+
double mlp_down_multiplier = 1.0;
131+
double key_multiplier = 1.0;
132+
double lm_head_multiplier = 1.0;
133+
double rope_theta = 10000.0;
134+
double embedding_multiplier = 1.0;
134135
bool ssm_has_mup = false;
135-
float embedding_multiplier = 1.0f;
136136
uint32_t vocab_size = 0;
137137
uint32_t intermediate_size = 0;
138138
float mamba_expand = 0.0f;

src/llama-model-loader.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ namespace GGUFMeta {
400400

401401
template bool llama_model_loader::get_key<bool> (enum llm_kv kid, bool & result, bool required);
402402
template bool llama_model_loader::get_key<float> (enum llm_kv kid, float & result, bool required);
403+
template bool llama_model_loader::get_key<double> (enum llm_kv kid, double & result, bool required);
403404
template bool llama_model_loader::get_key<uint32_t> (enum llm_kv kid, uint32_t & result, bool required);
404405
template bool llama_model_loader::get_key<std::string>(enum llm_kv kid, std::string & result, bool required);
405406

0 commit comments

Comments
 (0)