Skip to content

Commit 41169a8

Browse files
committed
more consistent organization
1 parent 5f9e4e1 commit 41169a8

File tree

2 files changed

+29
-27
lines changed

2 files changed

+29
-27
lines changed

gguf-py/gguf/constants.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ class MODEL_ARCH(IntEnum):
357357
DEEPSEEK2 = auto()
358358
CHATGLM = auto()
359359
GLM4 = auto()
360+
GLM4_MOE = auto()
360361
BITNET = auto()
361362
T5 = auto()
362363
T5ENCODER = auto()
@@ -382,7 +383,6 @@ class MODEL_ARCH(IntEnum):
382383
DREAM = auto()
383384
SMALLTHINKER = auto()
384385
LLADA = auto()
385-
GLM4_MOE = auto()
386386

387387

388388
class VISION_PROJECTOR_TYPE(IntEnum):
@@ -2126,6 +2126,29 @@ class MODEL_TENSOR(IntEnum):
21262126
MODEL_TENSOR.ATTN_POST_NORM,
21272127
MODEL_TENSOR.FFN_POST_NORM,
21282128
],
2129+
MODEL_ARCH.GLM4_MOE: [
2130+
MODEL_TENSOR.TOKEN_EMBD,
2131+
MODEL_TENSOR.OUTPUT_NORM,
2132+
MODEL_TENSOR.OUTPUT,
2133+
MODEL_TENSOR.ATTN_NORM,
2134+
MODEL_TENSOR.ATTN_K_NORM, # not always present
2135+
MODEL_TENSOR.ATTN_Q_NORM, # not always present
2136+
MODEL_TENSOR.ATTN_Q,
2137+
MODEL_TENSOR.ATTN_K,
2138+
MODEL_TENSOR.ATTN_V,
2139+
MODEL_TENSOR.ATTN_OUT,
2140+
MODEL_TENSOR.FFN_NORM,
2141+
MODEL_TENSOR.FFN_GATE,
2142+
MODEL_TENSOR.FFN_DOWN,
2143+
MODEL_TENSOR.FFN_UP,
2144+
MODEL_TENSOR.FFN_GATE_EXP,
2145+
MODEL_TENSOR.FFN_DOWN_EXP,
2146+
MODEL_TENSOR.FFN_UP_EXP,
2147+
MODEL_TENSOR.FFN_GATE_SHEXP,
2148+
MODEL_TENSOR.FFN_DOWN_SHEXP,
2149+
MODEL_TENSOR.FFN_UP_SHEXP,
2150+
MODEL_TENSOR.FFN_EXP_PROBS_B, # AKA "e_score_correction_bias" in transformers
2151+
],
21292152
MODEL_ARCH.BITNET: [
21302153
MODEL_TENSOR.ATTN_Q,
21312154
MODEL_TENSOR.ATTN_K,
@@ -2543,29 +2566,6 @@ class MODEL_TENSOR(IntEnum):
25432566
MODEL_TENSOR.FFN_DOWN_EXP,
25442567
MODEL_TENSOR.FFN_UP_EXP,
25452568
],
2546-
MODEL_ARCH.GLM4_MOE: [
2547-
MODEL_TENSOR.TOKEN_EMBD,
2548-
MODEL_TENSOR.OUTPUT_NORM,
2549-
MODEL_TENSOR.OUTPUT,
2550-
MODEL_TENSOR.ATTN_NORM,
2551-
MODEL_TENSOR.ATTN_Q,
2552-
MODEL_TENSOR.ATTN_Q_NORM, # not used in the 106B.A12B model
2553-
MODEL_TENSOR.ATTN_K,
2554-
MODEL_TENSOR.ATTN_K_NORM, # not used in the 106B.A12B model
2555-
MODEL_TENSOR.ATTN_V,
2556-
MODEL_TENSOR.ATTN_OUT,
2557-
MODEL_TENSOR.FFN_NORM,
2558-
MODEL_TENSOR.FFN_GATE,
2559-
MODEL_TENSOR.FFN_DOWN,
2560-
MODEL_TENSOR.FFN_UP,
2561-
MODEL_TENSOR.FFN_GATE_EXP,
2562-
MODEL_TENSOR.FFN_DOWN_EXP,
2563-
MODEL_TENSOR.FFN_UP_EXP,
2564-
MODEL_TENSOR.FFN_GATE_SHEXP,
2565-
MODEL_TENSOR.FFN_DOWN_SHEXP,
2566-
MODEL_TENSOR.FFN_UP_SHEXP,
2567-
MODEL_TENSOR.FFN_EXP_PROBS_B, # AKA "e_score_correction_bias" in transformers
2568-
],
25692569
# TODO
25702570
}
25712571

src/llama-arch.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,15 +1395,19 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
13951395
LLM_ARCH_GLM4_MOE,
13961396
{
13971397
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1398+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1399+
{ LLM_TENSOR_OUTPUT, "output" },
13981400
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1401+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1402+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
13991403
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
14001404
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
14011405
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
14021406
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
14031407
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
14041408
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1405-
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
14061409
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1410+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
14071411
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
14081412
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
14091413
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
@@ -1412,8 +1416,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
14121416
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
14131417
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
14141418
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1415-
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1416-
{ LLM_TENSOR_OUTPUT, "output" },
14171419
},
14181420
},
14191421
{

0 commit comments

Comments
 (0)