Skip to content

Commit 428f079

Browse files
committed
llama-hparams : group MoE-specific params together
1 parent 99c30f7 commit 428f079

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

src/llama-hparams.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@ struct llama_hparams {
4343
uint32_t n_rot;
4444
uint32_t n_embd_head_k; // dimension of keys (d_k). d_q is assumed to be the same, but there are n_head q heads, and only n_head_kv k-v heads
4545
uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
46-
uint32_t n_expert = 0;
47-
uint32_t n_expert_used = 0;
4846
uint32_t n_rel_attn_bkts = 0;
4947

5048
// note: deepseek2 using MLA converts into MQA with larger heads, then decompresses to MHA
@@ -61,14 +59,17 @@ struct llama_hparams {
6159
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_kv_arr;
6260
std::array<uint32_t, LLAMA_MAX_LAYERS> n_ff_arr;
6361

64-
uint32_t n_layer_dense_lead = 0;
6562
uint32_t n_lora_q = 0;
6663
uint32_t n_lora_kv = 0;
67-
uint32_t n_ff_exp = 0;
68-
uint32_t n_ff_shexp = 0;
69-
uint32_t n_expert_shared = 0;
7064
uint32_t n_norm_groups = 0;
7165

66+
// these params are specific to MoE models
67+
uint32_t n_expert = 0;
68+
uint32_t n_expert_used = 0;
69+
uint32_t n_expert_shared = 0;
70+
uint32_t n_layer_dense_lead = 0;
71+
uint32_t n_ff_exp = 0;
72+
uint32_t n_ff_shexp = 0;
7273
float expert_weights_scale = 0.0;
7374
bool expert_weights_norm = false;
7475
uint32_t expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_NONE;

0 commit comments

Comments
 (0)