Skip to content

Commit 4d66bdc

Browse files
committed
ntk alpha freq_base
1 parent 3920faa commit 4d66bdc

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

src/llama-model.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,6 +1511,11 @@ void llama_model::load_hparams(llama_model_loader & ml) {
15111511
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
15121512
ml.get_key(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_shexp);
15131513

1514+
// TODO: read from gguf
1515+
float n_dim = hparams.n_embd_head_k;
1516+
float alpha = 1000.0f; // NTK-Aware
1517+
hparams.rope_freq_base_train = 10000.0f * std::powf(alpha, n_dim / (n_dim - 2.0f));
1518+
15141519
switch (hparams.n_layer) {
15151520
case 32: type = LLM_TYPE_A13B; break;
15161521
default: type = LLM_TYPE_UNKNOWN;

0 commit comments

Comments
 (0)