@@ -4591,6 +4591,14 @@ def set_gguf_parameters(self):
45914591class MambaModel (TextModel ):
45924592 model_arch = gguf .MODEL_ARCH .MAMBA
45934593
4594+ def __init__ (self , dir_model : Path , * args , ** kwargs ):
4595+ # Avoid using AutoConfig for hparams
4596+ hparams = kwargs .pop ("hparams" , None )
4597+ if hparams is None :
4598+ with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
4599+ hparams = json .load (f )
4600+ super ().__init__ (dir_model , * args , hparams = hparams , ** kwargs )
4601+
45944602 def set_vocab (self ):
45954603 vocab_size = self .hparams ["vocab_size" ]
45964604 # Round vocab size to next multiple of 8
@@ -4665,6 +4673,100 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
46654673 return [(new_name , data_torch )]
46664674
46674675
4676+ @ModelBase .register ("Mamba2ForCausalLM" )
4677+ class Mamba2Model (TextModel ):
4678+ model_arch = gguf .MODEL_ARCH .MAMBA2
4679+
4680+ def __init__ (self , dir_model : Path , * args , ** kwargs ):
4681+ # Avoid using AutoConfig for hparams
4682+ # It wrongly assumes all Mamba2 models are Mamba-Codestral-7B-v0.1
4683+ hparams = kwargs .pop ("hparams" , None )
4684+ if hparams is None :
4685+ with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
4686+ hparams = json .load (f )
4687+ super ().__init__ (dir_model , * args , hparams = hparams , ** kwargs )
4688+
4689+ def set_vocab (self ):
4690+ vocab_size = self .hparams ["vocab_size" ]
4691+ # Round vocab size to next multiple of 16
4692+ pad_vocab = self .hparams .get ("pad_vocab_size_multiple" , 16 )
4693+ # pad using ceiling division
4694+ # ref: https://stackoverflow.com/a/17511341/22827863
4695+ vocab_size = - (vocab_size // - pad_vocab ) * pad_vocab
4696+ self .hparams ["vocab_size" ] = vocab_size
4697+
4698+ if (self .dir_model / "tokenizer.model" ).is_file ():
4699+ self ._set_vocab_sentencepiece ()
4700+ elif (self .dir_model / "tokenizer.model.v3" ).is_file ():
4701+ # mamba-codestral
4702+ raise NotImplementedError (f"Please rename { self .dir_model / 'tokenizer.model.v3' } to { self .dir_model / 'tokenizer.model' } " )
4703+ elif (self .dir_model / "tokenizer.json" ).is_file ():
4704+ self ._set_vocab_gpt2 ()
4705+ else :
4706+ # Use the GPT-NeoX tokenizer when no tokenizer files are present
4707+ self ._set_vocab_builtin ("gpt-neox" , vocab_size )
4708+
4709+ def set_gguf_parameters (self ):
4710+ d_model = self .find_hparam (["hidden_size" , "d_model" , "dim" ])
4711+ d_conv = self .find_hparam (["conv_kernel" , "d_conv" ], optional = True ) or 4
4712+ d_inner = self .find_hparam (["intermediate_size" , "d_inner" ], optional = True ) or 2 * d_model
4713+ d_state = self .find_hparam (["state_size" , "d_state" ], optional = True ) or 128
4714+ head_dim = self .find_hparam (["head_dim" ], optional = True ) or 64
4715+ n_group = self .find_hparam (["n_groups" ], optional = True ) or 1
4716+
4717+ rms_norm_eps = self .find_hparam (["layer_norm_epsilon" , "rms_norm_eps" ], optional = True ) or 1e-5
4718+
4719+ # Fail early for models which don't have a block expansion factor of 2
4720+ # TODO: does this really matter?
4721+ assert d_inner == 2 * d_model
4722+ assert d_inner % head_dim == 0
4723+
4724+ self .gguf_writer .add_context_length (2 ** 20 ) # arbitrary value; for those who use the default
4725+ self .gguf_writer .add_embedding_length (d_model )
4726+ self .gguf_writer .add_feed_forward_length (0 ) # unused, but seemingly required when loading
4727+ self .gguf_writer .add_head_count (0 ) # unused, but seemingly required when loading
4728+ self .gguf_writer .add_block_count (self .block_count )
4729+ self .gguf_writer .add_ssm_conv_kernel (d_conv )
4730+ self .gguf_writer .add_ssm_inner_size (d_inner )
4731+ self .gguf_writer .add_ssm_state_size (d_state )
4732+ self .gguf_writer .add_ssm_time_step_rank (d_inner // head_dim )
4733+ self .gguf_writer .add_ssm_group_count (n_group )
4734+ self .gguf_writer .add_layer_norm_rms_eps (rms_norm_eps )
4735+ self .gguf_writer .add_file_type (self .ftype )
4736+
4737+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
4738+
4739+ if name .startswith ("model.backbone" ) or name .startswith ("model.lm_head" ):
4740+ # map Mamba-Codestral-7B-v0.1 tensor names to the names used by Mamba-2
4741+ name = name .removeprefix ("model." )
4742+
4743+ if name .endswith (".dt_bias" ):
4744+ name = name .rpartition (".dt_bias" )[0 ] + ".dt_proj.bias"
4745+
4746+ new_name = self .map_tensor_name (name )
4747+
4748+ if self .match_model_tensor_name (new_name , gguf .MODEL_TENSOR .SSM_CONV1D , bid ):
4749+ data_torch = data_torch .squeeze ()
4750+ elif any (self .match_model_tensor_name (new_name , t , bid , suffix = "" ) for t in [
4751+ gguf .MODEL_TENSOR .SSM_A ,
4752+ gguf .MODEL_TENSOR .SSM_D ,
4753+ ]):
4754+ # unsqueeze A to use similar shape semantics as Mamba-1
4755+ # (D is also unsqueezed, but for more straightforward broadcast internally)
4756+ data_torch = data_torch .reshape ((* data_torch .shape , 1 ))
4757+ elif self .match_model_tensor_name (new_name , gguf .MODEL_TENSOR .SSM_NORM , bid ):
4758+ d_model = self .find_hparam (["hidden_size" , "d_model" , "dim" ])
4759+ d_inner = self .find_hparam (["intermediate_size" , "d_inner" ], optional = True ) or 2 * d_model
4760+ n_group = self .hparams .get ("n_groups" , 1 )
4761+ data_torch = data_torch .reshape ((n_group , d_inner // n_group ))
4762+
4763+ if name .endswith (".A_log" ):
4764+ logger .debug ("A_log --> A ==> " + new_name )
4765+ data_torch = - torch .exp (data_torch )
4766+
4767+ yield (new_name , data_torch )
4768+
4769+
46684770@ModelBase .register ("CohereForCausalLM" )
46694771class CommandR2Model (TextModel ):
46704772 model_arch = gguf .MODEL_ARCH .COMMAND_R
@@ -6431,12 +6533,20 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st
64316533 # maybe we should fallback to text model's arch in that case, since not many models have both
64326534 text_config = hparams .get ("text_config" , {})
64336535 vision_config = hparams .get ("vision_config" , {})
6434- arch = hparams ["architectures" ][0 ]
6536+ arch = None
6537+ if (arches := hparams .get ("architectures" )) is not None and len (arches ) > 0 :
6538+ arch = arches [0 ]
6539+ elif "ssm_cfg" in hparams :
6540+ # For non-hf Mamba and Mamba2 models
6541+ arch = hparams ["ssm_cfg" ].get ("layer" , "Mamba" ) + "ForCausalLM"
6542+
64356543 # if "architectures" is found in the sub-config, use that instead
64366544 if model_type == ModelType .TEXT and text_config .get ("architectures" ) is not None :
64376545 arch = text_config ["architectures" ][0 ]
64386546 elif model_type == ModelType .MMPROJ and vision_config .get ("architectures" ) is not None :
64396547 arch = vision_config ["architectures" ][0 ]
6548+ if arch is None :
6549+ raise ValueError ("Failed to detect model architecture" )
64406550 return arch
64416551
64426552
0 commit comments