@@ -888,6 +888,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
888888 if chkhsh == "a1e163ecab2e718a4c829d1148b6e86824ec36163bb71941c3dca9cd5ac25756" :
889889 # ref: https://huggingface.co/JetBrains/Mellum-4b-base
890890 res = "mellum"
891+ if chkhsh == "9b1be57e70d20d9501b2b3186e792d81181ae36ada3903c26f9fea418cf87206" :
892+ # ref: https://huggingface.co/inclusionAI/LLaDA-MoE-7B-A1B-Base
893+ res = "llada-moe"
891894
892895 if res is None :
893896 logger .warning ("\n " )
@@ -2390,7 +2393,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
23902393 return [] # skip other tensors
23912394
23922395
2393- @ModelBase .register ("Llama4ForConditionalGeneration" )
2396+ @ModelBase .register (
2397+ "Llama4ForConditionalGeneration" ,
2398+ "Llama4ForCausalLM" ,
2399+ )
23942400class Llama4Model (LlamaModel ):
23952401 model_arch = gguf .MODEL_ARCH .LLAMA4
23962402 undo_permute = False
@@ -2408,6 +2414,10 @@ def set_gguf_parameters(self):
24082414 super ().set_gguf_parameters ()
24092415 self .gguf_writer .add_interleave_moe_layer_step (self .hparams ["interleave_moe_layer_step" ])
24102416 self .gguf_writer .add_expert_feed_forward_length (self .hparams ["intermediate_size_moe" ])
2417+ if "layer_types" in self .hparams :
2418+ if all (lt == "full_attention" for lt in self .hparams ["layer_types" ]):
2419+ # all layers are full attention (for MobileLLM), disable swa
2420+ self .gguf_writer .add_sliding_window (0 )
24112421
24122422 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ):
24132423 if name .startswith ("language_model." ):
@@ -6006,9 +6016,34 @@ class SeedOssModel(TextModel):
60066016
60076017
60086018@ModelBase .register ("Olmo2ForCausalLM" )
6019+ @ModelBase .register ("Olmo3ForCausalLM" )
60096020class Olmo2Model (TextModel ):
60106021 model_arch = gguf .MODEL_ARCH .OLMO2
60116022
6023+ def set_gguf_parameters (self ):
6024+ super ().set_gguf_parameters ()
6025+
6026+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
6027+ if rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
6028+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
6029+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
6030+ self .gguf_writer .add_rope_scaling_attn_factors (rope_scaling ["attention_factor" ])
6031+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
6032+
6033+ if "sliding_window" in self .hparams :
6034+ self .gguf_writer .add_sliding_window (self .hparams ["sliding_window" ])
6035+
6036+ sliding_window_pattern = []
6037+ if "layer_types" in self .hparams :
6038+ sliding_window_pattern = [t == "sliding_attention" for t in self .hparams ["layer_types" ]]
6039+ else :
6040+ # Olmo2 does not use sliding window attention.
6041+ # Olmo3 defaults to using sliding window for all layers except every 4th.
6042+ for i in range (self .hparams ["num_hidden_layers" ]):
6043+ sliding_window_pattern .append ((i + 1 ) % 4 != 0 )
6044+
6045+ self .gguf_writer .add_sliding_window_pattern (sliding_window_pattern )
6046+
60126047
60136048@ModelBase .register ("OlmoeForCausalLM" )
60146049class OlmoeModel (TextModel ):
@@ -8239,6 +8274,76 @@ def prepare_tensors(self):
82398274 raise ValueError (f"Unprocessed experts: { experts } " )
82408275
82418276
8277+ @ModelBase .register ("LLaDAMoEModel" , "LLaDAMoEModelLM" )
8278+ class LLaDAMoEModel (TextModel ):
8279+ model_arch = gguf .MODEL_ARCH .LLADA_MOE
8280+
8281+ def set_gguf_parameters (self ):
8282+ super ().set_gguf_parameters ()
8283+ if (n_experts := self .hparams .get ("num_experts" )) is not None :
8284+ self .gguf_writer .add_expert_count (n_experts )
8285+
8286+ if (expert_intermediate_size := self .hparams .get ("expert_intermediate_size" )) is not None :
8287+ self .gguf_writer .add_expert_feed_forward_length (expert_intermediate_size )
8288+
8289+ # number of experts used per token (top-k)
8290+ if (n_experts_used := self .hparams .get ("num_experts_per_tok" )) is not None :
8291+ self .gguf_writer .add_expert_used_count (n_experts_used )
8292+
8293+ self .gguf_writer .add_mask_token_id (156895 )
8294+ self .gguf_writer .add_causal_attention (False )
8295+ self .gguf_writer .add_diffusion_shift_logits (False )
8296+
8297+ _experts : list [dict [str , Tensor ]] | None = None
8298+
8299+ # Copied from: Qwen2MoeModel
8300+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
8301+ # process the experts separately
8302+ if name .find ("experts" ) != - 1 :
8303+ n_experts = self .hparams ["num_experts" ]
8304+ assert bid is not None
8305+
8306+ if self ._experts is None :
8307+ self ._experts = [{} for _ in range (self .block_count )]
8308+
8309+ self ._experts [bid ][name ] = data_torch
8310+
8311+ if len (self ._experts [bid ]) >= n_experts * 3 :
8312+ tensors : list [tuple [str , Tensor ]] = []
8313+
8314+ # merge the experts into a single 3d tensor
8315+ for w_name in ["down_proj" , "gate_proj" , "up_proj" ]:
8316+ datas : list [Tensor ] = []
8317+
8318+ for xid in range (n_experts ):
8319+ ename = f"model.layers.{ bid } .mlp.experts.{ xid } .{ w_name } .weight"
8320+ datas .append (self ._experts [bid ][ename ])
8321+ del self ._experts [bid ][ename ]
8322+
8323+ data_torch = torch .stack (datas , dim = 0 )
8324+
8325+ merged_name = f"model.layers.{ bid } .mlp.experts.{ w_name } .weight"
8326+
8327+ new_name = self .map_tensor_name (merged_name )
8328+
8329+ tensors .append ((new_name , data_torch ))
8330+ return tensors
8331+ else :
8332+ return []
8333+
8334+ return [(self .map_tensor_name (name ), data_torch )]
8335+
8336+ # Copied from: Qwen2MoeModel
8337+ def prepare_tensors (self ):
8338+ super ().prepare_tensors ()
8339+
8340+ if self ._experts is not None :
8341+ # flatten `list[dict[str, Tensor]]` into `list[str]`
8342+ experts = [k for d in self ._experts for k in d .keys ()]
8343+ if len (experts ) > 0 :
8344+ raise ValueError (f"Unprocessed experts: { experts } " )
8345+
8346+
82428347@ModelBase .register ("HunYuanDenseV1ForCausalLM" )
82438348class HunYuanModel (TextModel ):
82448349 model_arch = gguf .MODEL_ARCH .HUNYUAN_DENSE
0 commit comments