Skip to content

Commit 433782b

Browse files
committed
Fix to use hidden_size_per_head
1 parent 62c3b64 commit 433782b

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

convert_hf_to_gguf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4204,8 +4204,9 @@ def set_gguf_parameters(self):
42044204

42054205
self.gguf_writer.add_context_length(hparams.get("max_position_embeddings", 2048))
42064206
self.gguf_writer.add_embedding_length(hparams.get("hidden_size", 4096))
4207+
self.gguf_writer.add_features_length(hparams.get("hidden_size_per_head", 128))
42074208
self.gguf_writer.add_block_count(block_count)
4208-
self.gguf_writer.add_head_count(hparams.get("num_attention_heads", 32))
4209+
self.gguf_writer.add_wkv_head_size(hparams.get("num_attention_heads", 32))
42094210
self.gguf_writer.add_layer_norm_rms_eps(hparams.get("rms_norm_eps", 1e-06))
42104211
self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 10000))
42114212

src/llama-model.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3371,11 +3371,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
33713371
const uint32_t d_state = hparams.ssm_d_state;
33723372
const uint32_t num_heads = hparams.ssm_dt_rank;
33733373
const uint32_t intermediate_size = hparams.ssm_d_inner;
3374-
const uint32_t head_dim = intermediate_size / num_heads;
3374+
const uint32_t head_dim = hparams.wkv_head_size;
33753375
const uint32_t qk_dim = head_dim;
33763376
const uint32_t v_dim = head_dim;
3377-
const int64_t num_attention_heads = hparams.n_head();
3378-
const int64_t q_num_heads = num_attention_heads;
33793377
const int64_t dt_dim = std::max(64, int(hparams.n_embd / 16));
33803378

33813379
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
@@ -3392,6 +3390,9 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
33923390
auto & layer = layers[i];
33933391
bool is_mamba_layer = hparams.is_recurrent(i);
33943392

3393+
const int64_t num_attention_heads = hparams.n_head_kv_arr[i];
3394+
const int64_t q_num_heads = num_attention_heads;
3395+
33953396
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
33963397

33973398
if (is_mamba_layer) {

0 commit comments

Comments
 (0)