@@ -98,6 +98,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
9898 { LLM_ARCH_LLADA, " llada" },
9999 { LLM_ARCH_LLADA_MOE, " llada-moe" },
100100 { LLM_ARCH_SEED_OSS, " seed_oss" },
101+ { LLM_ARCH_GROVEMOE, " grovemoe" },
101102 { LLM_ARCH_UNKNOWN, " (unknown)" },
102103};
103104
@@ -125,6 +126,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
125126 { LLM_KV_FEED_FORWARD_LENGTH, " %s.feed_forward_length" },
126127 { LLM_KV_EXPERT_FEED_FORWARD_LENGTH, " %s.expert_feed_forward_length" },
127128 { LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, " %s.expert_shared_feed_forward_length" },
129+ { LLM_KV_EXPERT_CHUNK_FEED_FORWARD_LENGTH, " %s.expert_chunk_feed_forward_length" },
128130 { LLM_KV_USE_PARALLEL_RESIDUAL, " %s.use_parallel_residual" },
129131 { LLM_KV_TENSOR_DATA_LAYOUT, " %s.tensor_data_layout" },
130132 { LLM_KV_EXPERT_COUNT, " %s.expert_count" },
@@ -133,6 +135,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
133135 { LLM_KV_EXPERT_WEIGHTS_SCALE, " %s.expert_weights_scale" },
134136 { LLM_KV_EXPERT_WEIGHTS_NORM, " %s.expert_weights_norm" },
135137 { LLM_KV_EXPERT_GATING_FUNC, " %s.expert_gating_func" },
138+ { LLM_KV_EXPERT_GROUP_SCALE, " %s.expert_group_scale" },
139+ { LLM_KV_EXPERTS_PER_GROUP, " %s.experts_per_group" },
136140 { LLM_KV_MOE_EVERY_N_LAYERS, " %s.moe_every_n_layers" },
137141 { LLM_KV_NEXTN_PREDICT_LAYERS, " %s.nextn_predict_layers" },
138142 { LLM_KV_POOLING_TYPE, " %s.pooling_type" },
@@ -2186,6 +2190,29 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
21862190 { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
21872191 },
21882192 },
2193+ {
2194+ LLM_ARCH_GROVEMOE,
2195+ {
2196+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
2197+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
2198+ { LLM_TENSOR_OUTPUT, " output" },
2199+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
2200+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
2201+ { LLM_TENSOR_ATTN_Q_NORM, " blk.%d.attn_q_norm" },
2202+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
2203+ { LLM_TENSOR_ATTN_K_NORM, " blk.%d.attn_k_norm" },
2204+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
2205+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
2206+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
2207+ { LLM_TENSOR_FFN_GATE_INP, " blk.%d.ffn_gate_inp" },
2208+ { LLM_TENSOR_FFN_GATE_EXPS, " blk.%d.ffn_gate_exps" },
2209+ { LLM_TENSOR_FFN_DOWN_EXPS, " blk.%d.ffn_down_exps" },
2210+ { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
2211+ { LLM_TENSOR_FFN_GATE_CHEXPS, " blk.%d.ffn_gate_chexps" },
2212+ { LLM_TENSOR_FFN_DOWN_CHEXPS, " blk.%d.ffn_down_chexps" },
2213+ { LLM_TENSOR_FFN_UP_CHEXPS, " blk.%d.ffn_up_chexps" },
2214+ },
2215+ },
21892216 {
21902217 LLM_ARCH_UNKNOWN,
21912218 {
@@ -2318,6 +2345,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
23182345 {LLM_TENSOR_FFN_DOWN_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
23192346 {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
23202347 {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2348+ {LLM_TENSOR_FFN_DOWN_CHEXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2349+ {LLM_TENSOR_FFN_GATE_CHEXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
2350+ {LLM_TENSOR_FFN_UP_CHEXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
23212351 {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
23222352 // altup / laurel (gemma 3n)
23232353 {LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
0 commit comments