Skip to content

Commit 05b52fa

Browse files
authored
update attention temp length metadata
1 parent a8fa83f commit 05b52fa

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

src/llama-hparams.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ struct llama_hparams {
136136
float f_attention_scale = 0.0f;
137137

138138
// grok-2
139-
float f_attn_out_scale = 0.0f;
140-
float f_attn_temp_len = 0.0f;
139+
float f_attn_out_scale = 0.0f;
140+
uint32_t attn_temp_length = 0;
141141

142142
bool causal_attn = true;
143143
bool use_alibi = false;

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
702702
ml.get_key(LLM_KV_ROUTER_LOGIT_SOFTCAPPING, hparams.f_router_logit_softcapping, false);
703703
ml.get_key(LLM_KV_FINAL_LOGIT_SOFTCAPPING, hparams.f_final_logit_softcapping, false);
704704

705-
ml.get_key(LLM_KV_ATTENTION_TEMPERATURE_LENGTH, hparams.f_attn_temp_len, false);
705+
ml.get_key(LLM_KV_ATTENTION_TEMPERATURE_LENGTH, hparams.attn_temp_length, false);
706706

707707
switch (hparams.n_layer) {
708708
case 64: type = LLM_TYPE_314B; break;

0 commit comments

Comments
 (0)