@@ -705,6 +705,7 @@ class MODEL_TENSOR(IntEnum):
705705 MODEL_ARCH .DREAM : "dream" ,
706706 MODEL_ARCH .SMALLTHINKER : "smallthinker" ,
707707 MODEL_ARCH .LLADA : "llada" ,
708+ MODEL_ARCH .GLM4_MOE : "glm4_moe" ,
708709}
709710
710711VISION_PROJECTOR_TYPE_NAMES : dict [VISION_PROJECTOR_TYPE , str ] = {
@@ -2542,6 +2543,27 @@ class MODEL_TENSOR(IntEnum):
25422543 MODEL_TENSOR .FFN_DOWN_EXP ,
25432544 MODEL_TENSOR .FFN_UP_EXP ,
25442545 ],
2546+ MODEL_ARCH .GLM4_MOE : [
2547+ MODEL_TENSOR .TOKEN_EMBD ,
2548+ MODEL_TENSOR .OUTPUT_NORM ,
2549+ MODEL_TENSOR .OUTPUT ,
2550+ MODEL_TENSOR .ATTN_NORM ,
2551+ MODEL_TENSOR .ATTN_Q ,
2552+ MODEL_TENSOR .ATTN_K ,
2553+ MODEL_TENSOR .ATTN_V ,
2554+ MODEL_TENSOR .ATTN_OUT ,
2555+ MODEL_TENSOR .FFN_NORM ,
2556+ MODEL_TENSOR .FFN_GATE ,
2557+ MODEL_TENSOR .FFN_DOWN ,
2558+ MODEL_TENSOR .FFN_UP ,
2559+ MODEL_TENSOR .FFN_GATE_EXP ,
2560+ MODEL_TENSOR .FFN_DOWN_EXP ,
2561+ MODEL_TENSOR .FFN_UP_EXP ,
2562+ MODEL_TENSOR .FFN_GATE_SHEXP ,
2563+ MODEL_TENSOR .FFN_DOWN_SHEXP ,
2564+ MODEL_TENSOR .FFN_UP_SHEXP ,
2565+ MODEL_TENSOR .FFN_EXP_PROBS_B , # AKA "e_score_correction_bias" in transformers
2566+ ],
25452567 # TODO
25462568}
25472569
0 commit comments