@@ -1900,6 +1900,7 @@ def prepare_tensors(self):
19001900 "MixtralForCausalLM" ,
19011901 "VLlama3ForCausalLM" ,
19021902 "LlavaForConditionalGeneration" ,
1903+ "VoxtralForConditionalGeneration" ,
19031904 "LlamaModel" )
19041905class LlamaModel (TextModel ):
19051906 model_arch = gguf .MODEL_ARCH .LLAMA
@@ -1912,6 +1913,11 @@ def __init__(self, *args, **kwargs):
19121913 self .hparams ["num_attention_heads" ] = self .hparams .get ("num_attention_heads" , 32 )
19131914
19141915 def set_vocab (self ):
1916+ path_tekken_json = self .dir_model / "tekken.json"
1917+ path_tokenizer_json = self .dir_model / "tokenizer.json"
1918+ if path_tekken_json .is_file () and not path_tokenizer_json .is_file ():
1919+ return self .set_vocab_tekken ()
1920+
19151921 try :
19161922 self ._set_vocab_sentencepiece ()
19171923 except FileNotFoundError :
@@ -1944,6 +1950,52 @@ def set_vocab(self):
19441950 if self .hparams .get ("vocab_size" , 32000 ) == 49152 :
19451951 self .gguf_writer .add_add_bos_token (False )
19461952
1953+ def set_vocab_tekken (self ):
1954+ vocab = gguf .vocab .MistralVocab (self .dir_model )
1955+ self .gguf_writer .add_tokenizer_model (vocab .gguf_tokenizer_model )
1956+
1957+ tokens = []
1958+ scores = []
1959+ toktypes = []
1960+
1961+ for text , score , toktype in vocab .all_tokens ():
1962+ tokens .append (text )
1963+ scores .append (score )
1964+ toktypes .append (toktype )
1965+
1966+ assert len (tokens ) == vocab .vocab_size , (
1967+ f"token count ({ len (tokens )} ) != vocab size ({ vocab .vocab_size } )"
1968+ )
1969+
1970+ if vocab .tokenizer_type == gguf .vocab .MistralTokenizerType .tekken :
1971+ self .gguf_writer .add_tokenizer_pre ("tekken" )
1972+ self .gguf_writer .add_token_merges (
1973+ vocab .extract_vocab_merges_from_model ()
1974+ )
1975+
1976+ logger .info (
1977+ f"Setting bos, eos, unk and pad token IDs to { vocab .bos_id } , { vocab .eos_id } , { vocab .unk_id } , { vocab .pad_id } ."
1978+ )
1979+
1980+ self .gguf_writer .add_bos_token_id (vocab .bos_id )
1981+ self .gguf_writer .add_eos_token_id (vocab .eos_id )
1982+ self .gguf_writer .add_unk_token_id (vocab .unk_id )
1983+ self .gguf_writer .add_pad_token_id (vocab .pad_id )
1984+
1985+ self .gguf_writer .add_token_list (tokens )
1986+ self .gguf_writer .add_token_scores (scores )
1987+ self .gguf_writer .add_token_types (toktypes )
1988+ self .gguf_writer .add_vocab_size (vocab .vocab_size )
1989+
1990+ self .gguf_writer .add_add_bos_token (True )
1991+ self .gguf_writer .add_add_eos_token (False )
1992+
1993+ script_dir = Path (__file__ ).parent
1994+ template_path = script_dir / "models/templates/unsloth-mistral-Devstral-Small-2507.jinja"
1995+ with open (template_path , "r" , encoding = "utf-8" ) as f :
1996+ template = f .read ()
1997+ self .gguf_writer .add_chat_template (template )
1998+
19471999 def set_gguf_parameters (self ):
19482000 super ().set_gguf_parameters ()
19492001 hparams = self .hparams
@@ -1971,12 +2023,13 @@ def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
19712023 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
19722024 n_head = self .hparams ["num_attention_heads" ]
19732025 n_kv_head = self .hparams .get ("num_key_value_heads" )
1974- is_vision_tensor = "vision_tower" in name \
2026+ is_multimodal_tensor = "vision_tower" in name \
19752027 or "vision_model" in name \
2028+ or "audio_tower" in name \
19762029 or "model.connector" in name \
19772030 or "multi_modal_projector" in name
19782031
1979- if is_vision_tensor :
2032+ if is_multimodal_tensor :
19802033 return [] # skip vision tensors
19812034 elif self .hf_arch == "LlamaModel" :
19822035 name = "model." + name
@@ -3791,7 +3844,7 @@ def set_gguf_parameters(self):
37913844 self .gguf_writer .add_block_count (block_count )
37923845 self .gguf_writer .add_head_count (hparams .get ("num_attention_heads" , 32 ))
37933846 self .gguf_writer .add_layer_norm_rms_eps (hparams .get ("rms_norm_eps" , 1e-06 ))
3794- self .gguf_writer .add_rope_freq_base (hparams .get ("rope_theta" , 1000000.0 ))
3847+ self .gguf_writer .add_rope_freq_base (hparams .get ("rope_theta" , 10000 ))
37953848
37963849 # Mamba parameters
37973850 self .gguf_writer .add_ssm_state_size (hparams .get ("mamba_d_state" , 64 ))
@@ -3802,7 +3855,7 @@ def set_gguf_parameters(self):
38023855 self .gguf_writer .add_ssm_group_count (0 )
38033856
38043857 # MLP feed forward parameters (for attention layers)
3805- self .gguf_writer .add_feed_forward_length (hparams .get ("intermediate_size" , 16384 ))
3858+ self .gguf_writer .add_feed_forward_length (hparams .get ("intermediate_size" , 13312 ))
38063859 self .gguf_writer .add_file_type (self .ftype )
38073860
38083861 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
@@ -7231,9 +7284,10 @@ class WhisperEncoderModel(MmprojModel):
72317284
72327285 def __init__ (self , * args , ** kwargs ):
72337286 super ().__init__ (* args , ** kwargs )
7234- self .hparams ["hidden_size" ] = self .hparams ["d_model" ]
7235- self .hparams ["intermediate_size" ] = self .hparams ["encoder_ffn_dim" ]
7236- self .hparams ["num_attention_heads" ] = self .hparams ["encoder_attention_heads" ]
7287+ if "hidden_size" not in self .hparams and "intermediate_size" not in self .hparams :
7288+ self .hparams ["hidden_size" ] = self .hparams ["d_model" ]
7289+ self .hparams ["intermediate_size" ] = self .hparams ["encoder_ffn_dim" ]
7290+ self .hparams ["num_attention_heads" ] = self .hparams ["encoder_attention_heads" ]
72377291
72387292 def set_gguf_parameters (self ):
72397293 super ().set_gguf_parameters ()
@@ -7272,9 +7326,21 @@ class UltravoxWhisperEncoderModel(WhisperEncoderModel):
72727326
72737327 def set_gguf_parameters (self ):
72747328 super ().set_gguf_parameters ()
7329+ self .gguf_writer .add_clip_projector_type (gguf .VisionProjectorType .ULTRAVOX )
72757330 self .gguf_writer .add_audio_stack_factor (self .global_config ["stack_factor" ])
72767331
72777332
7333+ @ModelBase .register ("VoxtralForConditionalGeneration" )
7334+ class VoxtralWhisperEncoderModel (WhisperEncoderModel ):
7335+ has_vision_encoder = False # no vision encoder
7336+ has_audio_encoder = True
7337+
7338+ def set_gguf_parameters (self ):
7339+ super ().set_gguf_parameters ()
7340+ self .gguf_writer .add_clip_projector_type (gguf .VisionProjectorType .VOXTRAL )
7341+ self .gguf_writer .add_audio_stack_factor (4 ) # == intermediate_size // hidden_size
7342+
7343+
72787344@ModelBase .register ("FalconH1ForCausalLM" )
72797345class FalconH1Model (Mamba2Model ):
72807346 model_arch = gguf .MODEL_ARCH .FALCON_H1
@@ -7589,6 +7655,88 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
75897655 return [(self .map_tensor_name (name ), data_torch )]
75907656
75917657
7658+ @ModelBase .register ("SmallThinkerForCausalLM" )
7659+ class SmallThinkerModel (TextModel ):
7660+ model_arch = gguf .MODEL_ARCH .SMALLTHINKER
7661+
7662+ def set_gguf_parameters (self ):
7663+ super ().set_gguf_parameters ()
7664+ if (n_experts := self .hparams .get ("num_experts" , self .hparams .get ("moe_num_primary_experts" ))) is not None :
7665+ self .gguf_writer .add_expert_count (n_experts )
7666+ if (n_experts_used := self .hparams .get ("num_experts_per_tok" , self .hparams .get ("moe_num_active_primary_experts" ))) is not None :
7667+ self .gguf_writer .add_expert_used_count (n_experts_used )
7668+ if (moe_intermediate_size := self .hparams .get ("moe_ffn_hidden_size" )) is not None :
7669+ self .gguf_writer .add_expert_feed_forward_length (moe_intermediate_size )
7670+ self .gguf_writer .add_feed_forward_length (moe_intermediate_size )
7671+ logger .info (f"gguf: expert feed forward length = { moe_intermediate_size } " )
7672+ if (self .hparams .get ('moe_primary_router_apply_softmax' )):
7673+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SOFTMAX )
7674+ else :
7675+ self .gguf_writer .add_expert_gating_func (gguf .ExpertGatingFuncType .SIGMOID )
7676+ # YaRN is not enabled by default
7677+ # To enable it, please refer to this guide: https://huggingface.co/Qwen/Qwen3-30B-A3B#processing-long-texts
7678+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
7679+ if rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
7680+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
7681+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
7682+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
7683+
7684+ sliding_window_layout = self .hparams .get ("sliding_window_layout" )
7685+ if sliding_window_layout :
7686+ for i in sliding_window_layout :
7687+ if i != 0 :
7688+ sliding_window = self .hparams .get ("sliding_window_size" )
7689+ if sliding_window :
7690+ self .gguf_writer .add_sliding_window (sliding_window )
7691+ break
7692+
7693+ _experts : list [dict [str , Tensor ]] | None = None
7694+
7695+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
7696+ # process the experts separately
7697+ if name .find ("experts" ) != - 1 :
7698+ n_experts = self .hparams .get ("num_experts" , self .hparams .get ("moe_num_primary_experts" ))
7699+ assert bid is not None
7700+
7701+ if self ._experts is None :
7702+ self ._experts = [{} for _ in range (self .block_count )]
7703+
7704+ self ._experts [bid ][name ] = data_torch
7705+
7706+ if len (self ._experts [bid ]) >= n_experts * 3 :
7707+ tensors : list [tuple [str , Tensor ]] = []
7708+
7709+ # merge the experts into a single 3d tensor
7710+ for w_name in ["down" , "gate" , "up" ]:
7711+ datas : list [Tensor ] = []
7712+
7713+ for xid in range (n_experts ):
7714+ ename = f"model.layers.{ bid } .block_sparse_moe.experts.{ xid } .{ w_name } .weight"
7715+ datas .append (self ._experts [bid ][ename ])
7716+ del self ._experts [bid ][ename ]
7717+
7718+ data_torch = torch .stack (datas , dim = 0 )
7719+
7720+ merged_name = f"model.layers.{ bid } .block_sparse_moe.experts.{ w_name } .weight"
7721+
7722+ new_name = self .map_tensor_name (merged_name )
7723+
7724+ tensors .append ((new_name , data_torch ))
7725+ return tensors
7726+ else :
7727+ return []
7728+
7729+ return [(self .map_tensor_name (name ), data_torch )]
7730+
7731+ def prepare_tensors (self ):
7732+ super ().prepare_tensors ()
7733+
7734+ if self ._experts is not None :
7735+ # flatten `list[dict[str, Tensor]]` into `list[str]`
7736+ experts = [k for d in self ._experts for k in d .keys ()]
7737+ if len (experts ) > 0 :
7738+ raise ValueError (f"Unprocessed experts: { experts } " )
7739+
75927740###### CONVERSION LOGIC ######
75937741
75947742
0 commit comments