@@ -1054,6 +1054,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
10541054        if  chkhsh  ==  "53e325976a6e142379c19b09afcae354f2f496f147afa8f9e189a33fe4e3024e" :
10551055            # ref: https://huggingface.co/ibm-granite/granite-docling-258M 
10561056            res  =  "granite-docling" 
1057+         if  chkhsh  ==  "f4f37b6c8eb9ea29b3eac6bb8c8487c5ab7885f8d8022e67edc1c68ce8403e95" :
1058+             # ref: https://huggingface.co/MiniMaxAI/MiniMax-M2 
1059+             res  =  "minimax-m2" 
10571060
10581061        if  res  is  None :
10591062            logger .warning ("\n " )
@@ -7126,6 +7129,64 @@ def prepare_tensors(self):
71267129                raise  ValueError (f"Unprocessed experts: { experts }  " )
71277130
71287131
7132+ @ModelBase .register ("MiniMaxM2ForCausalLM" ) 
7133+ class  MiniMaxM2Model (TextModel ):
7134+     model_arch  =  gguf .MODEL_ARCH .MINIMAXM2 
7135+     _experts_cache : dict [int , dict [str , Tensor ]] =  {}
7136+ 
7137+     def  __init__ (self , * args , ** kwargs ):
7138+         super ().__init__ (* args , ** kwargs )
7139+         self .hparams ["num_experts" ] =  self .hparams ["num_local_experts" ]
7140+ 
7141+     def  set_gguf_parameters (self ):
7142+         super ().set_gguf_parameters ()
7143+         if  self .hparams ["scoring_func" ] ==  "sigmoid" :
7144+             self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
7145+         elif  self .hparams ["scoring_func" ] ==  "softmax" :
7146+             self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
7147+         else :
7148+             raise  ValueError (f"Unsupported scoring_func value: { self .hparams ['scoring_func' ]}  " )
7149+ 
7150+         self .gguf_writer .add_expert_feed_forward_length (self .find_hparam (["intermediate_size" ]))
7151+         self .gguf_writer .add_rope_dimension_count (self .find_hparam (["rotary_dim" ]))
7152+ 
7153+     def  modify_tensors (self , data_torch : Tensor , name : str , bid : int  |  None ):
7154+         if  name .endswith ("e_score_correction_bias" ):
7155+             name  =  name .replace ("e_score_correction_bias" , "e_score_correction.bias" )
7156+ 
7157+         # merge expert weights 
7158+         if  'experts'  in  name :
7159+             n_experts  =  self .hparams ["num_experts" ]
7160+             assert  bid  is  not   None 
7161+ 
7162+             expert_cache  =  self ._experts_cache .setdefault (bid , {})
7163+             expert_cache [name ] =  data_torch 
7164+             expert_weights  =  ["w1" , "w2" , "w3" ]
7165+ 
7166+             # not enough expert weights to merge 
7167+             if  len (expert_cache ) <  n_experts  *  len (expert_weights ):
7168+                 return  []
7169+ 
7170+             tensors : list [tuple [str , Tensor ]] =  []
7171+             for  w_name  in  expert_weights :
7172+                 datas : list [Tensor ] =  []
7173+ 
7174+                 for  xid  in  range (n_experts ):
7175+                     ename  =  f"model.layers.{ bid }  .block_sparse_moe.experts.{ xid }  .{ w_name }  .weight" 
7176+                     datas .append (expert_cache [ename ])
7177+                     del  expert_cache [ename ]
7178+ 
7179+                 data_torch  =  torch .stack (datas , dim = 0 )
7180+                 merged_name  =  f"model.layers.{ bid }  .block_sparse_moe.experts.{ w_name }  .weight" 
7181+                 new_name  =  self .map_tensor_name (merged_name )
7182+                 tensors .append ((new_name , data_torch ))
7183+ 
7184+             del  self ._experts_cache [bid ]
7185+             return  tensors 
7186+ 
7187+         return  super ().modify_tensors (data_torch , name , bid )
7188+ 
7189+ 
71297190@ModelBase .register ("Dots1ForCausalLM" ) 
71307191class  Dots1Model (Qwen2MoeModel ):
71317192    model_arch  =  gguf .MODEL_ARCH .DOTS1 
0 commit comments