@@ -717,6 +717,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
717717 if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406" :
718718 # ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
719719 res = "llama4"
720+ if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2" :
721+ # ref: https://huggingface.co/THUDM/glm-4-9b-hf
722+ res = "glm4"
720723
721724 if res is None :
722725 logger .warning ("\n " )
@@ -4882,6 +4885,41 @@ def prepare_tensors(self):
48824885 super ().prepare_tensors ()
48834886 self .gguf_writer .add_max_alibi_bias (self .max_alibi_bias )
48844887
4888+ @Model .register ("Glm4ForCausalLM" )
4889+ class Glm4Model (Model ):
4890+ model_arch = gguf .MODEL_ARCH .GLM4
4891+
4892+ def set_vocab (self ):
4893+ self ._set_vocab_gpt2 ()
4894+
4895+ def set_gguf_parameters (self ):
4896+ super ().set_gguf_parameters ()
4897+ if self .hparams .get ("rope_scaling" ) is not None and "factor" in self .hparams ["rope_scaling" ]:
4898+ if self .hparams ["rope_scaling" ].get ("type" ) == "yarn" :
4899+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
4900+ self .gguf_writer .add_rope_scaling_factor (self .hparams ["rope_scaling" ]["factor" ])
4901+ self .gguf_writer .add_rope_scaling_orig_ctx_len (self .hparams ["rope_scaling" ]["original_max_position_embeddings" ])
4902+
4903+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [Tuple [str , Tensor ]]:
4904+ if "gate_up_proj" in name :
4905+ match = re .match (r"model\.layers\.(\d+)\.gate_up_proj\.weight" , name )
4906+ if match :
4907+ bid = int (match .group (1 ))
4908+ return [(f"blk.{ bid } .ffn_up.weight" , data_torch )]
4909+
4910+ if "post_self_attn_layernorm" in name :
4911+ match = re .match (r"model\.layers\.(\d+)\.post_self_attn_layernorm\.weight" , name )
4912+ if match :
4913+ bid = int (match .group (1 ))
4914+ return [(f"blk.{ bid } .post_attn_norm.weight" , data_torch )]
4915+
4916+ if "post_mlp_layernorm" in name :
4917+ match = re .match (r"model\.layers\.(\d+)\.post_mlp_layernorm\.weight" , name )
4918+ if match :
4919+ bid = int (match .group (1 ))
4920+ return [(f"blk.{ bid } .post_mlp_norm.weight" , data_torch )]
4921+
4922+ return super ().modify_tensors (data_torch , name , bid )
48854923
48864924@Model .register ("GlmForCausalLM" , "ChatGLMModel" , "ChatGLMForConditionalGeneration" )
48874925class ChatGLMModel (Model ):
@@ -5551,7 +5589,6 @@ def main() -> None:
55515589 with torch .inference_mode ():
55525590 output_type = ftype_map [args .outtype ]
55535591 model_architecture = hparams ["architectures" ][0 ]
5554-
55555592 try :
55565593 model_class = Model .from_model_architecture (model_architecture )
55575594 except NotImplementedError :
0 commit comments