@@ -2260,6 +2260,63 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
22602260 return super ().modify_tensors (data_torch , name , bid )
22612261
22622262
2263+ @ModelBase .register ("VoxtralForConditionalGeneration" )
2264+ class VoxtralModel (LlamaModel ):
2265+ model_arch = gguf .MODEL_ARCH .LLAMA
2266+
2267+ def set_vocab (self ):
2268+ vocab = gguf .vocab .MistralVocab (self .dir_model )
2269+ self .gguf_writer .add_tokenizer_model (vocab .gguf_tokenizer_model )
2270+
2271+ tokens = []
2272+ scores = []
2273+ toktypes = []
2274+
2275+ for text , score , toktype in vocab .all_tokens ():
2276+ tokens .append (text )
2277+ scores .append (score )
2278+ toktypes .append (toktype )
2279+
2280+ assert len (tokens ) == vocab .vocab_size , (
2281+ f"token count ({ len (tokens )} ) != vocab size ({ vocab .vocab_size } )"
2282+ )
2283+
2284+ if vocab .tokenizer_type == gguf .vocab .MistralTokenizerType .tekken :
2285+ self .gguf_writer .add_tokenizer_pre ("tekken" )
2286+ self .gguf_writer .add_token_merges (
2287+ vocab .extract_vocab_merges_from_model ()
2288+ )
2289+
2290+ logger .info (
2291+ f"Setting bos, eos, unk and pad token IDs to { vocab .bos_id } , { vocab .eos_id } , { vocab .unk_id } , { vocab .pad_id } ."
2292+ )
2293+
2294+ self .gguf_writer .add_bos_token_id (vocab .bos_id )
2295+ self .gguf_writer .add_eos_token_id (vocab .eos_id )
2296+ self .gguf_writer .add_unk_token_id (vocab .unk_id )
2297+ self .gguf_writer .add_pad_token_id (vocab .pad_id )
2298+
2299+ self .gguf_writer .add_token_list (tokens )
2300+ self .gguf_writer .add_token_scores (scores )
2301+ self .gguf_writer .add_token_types (toktypes )
2302+ self .gguf_writer .add_vocab_size (vocab .vocab_size )
2303+
2304+ self .gguf_writer .add_add_bos_token (True )
2305+ self .gguf_writer .add_add_eos_token (False )
2306+
2307+ script_dir = Path (__file__ ).parent
2308+ template_path = script_dir / "models/templates/unsloth-mistral-Devstral-Small-2507.jinja"
2309+ with open (template_path , "r" , encoding = "utf-8" ) as f :
2310+ template = f .read ()
2311+ self .gguf_writer .add_chat_template (template )
2312+
2313+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ):
2314+ name = name .replace ("language_model." , "" )
2315+ if "multi_modal_projector" in name or "audio_tower" in name :
2316+ return []
2317+ return super ().modify_tensors (data_torch , name , bid )
2318+
2319+
22632320@ModelBase .register ("DeciLMForCausalLM" )
22642321class DeciModel (TextModel ):
22652322 model_arch = gguf .MODEL_ARCH .DECI
@@ -7231,9 +7288,10 @@ class WhisperEncoderModel(MmprojModel):
72317288
72327289 def __init__ (self , * args , ** kwargs ):
72337290 super ().__init__ (* args , ** kwargs )
7234- self .hparams ["hidden_size" ] = self .hparams ["d_model" ]
7235- self .hparams ["intermediate_size" ] = self .hparams ["encoder_ffn_dim" ]
7236- self .hparams ["num_attention_heads" ] = self .hparams ["encoder_attention_heads" ]
7291+ if "hidden_size" not in self .hparams and "intermediate_size" not in self .hparams :
7292+ self .hparams ["hidden_size" ] = self .hparams ["d_model" ]
7293+ self .hparams ["intermediate_size" ] = self .hparams ["encoder_ffn_dim" ]
7294+ self .hparams ["num_attention_heads" ] = self .hparams ["encoder_attention_heads" ]
72377295
72387296 def set_gguf_parameters (self ):
72397297 super ().set_gguf_parameters ()
@@ -7272,9 +7330,21 @@ class UltravoxWhisperEncoderModel(WhisperEncoderModel):
72727330
72737331 def set_gguf_parameters (self ):
72747332 super ().set_gguf_parameters ()
7333+ self .gguf_writer .add_clip_projector_type (gguf .VisionProjectorType .ULTRAVOX )
72757334 self .gguf_writer .add_audio_stack_factor (self .global_config ["stack_factor" ])
72767335
72777336
7337+ @ModelBase .register ("VoxtralForConditionalGeneration" )
7338+ class VoxtralWhisperEncoderModel (WhisperEncoderModel ):
7339+ has_vision_encoder = False # no vision encoder
7340+ has_audio_encoder = True
7341+
7342+ def set_gguf_parameters (self ):
7343+ super ().set_gguf_parameters ()
7344+ self .gguf_writer .add_clip_projector_type (gguf .VisionProjectorType .VOXTRAL )
7345+ self .gguf_writer .add_audio_stack_factor (4 ) # == intermediate_size // hidden_size
7346+
7347+
72787348@ModelBase .register ("FalconH1ForCausalLM" )
72797349class FalconH1Model (Mamba2Model ):
72807350 model_arch = gguf .MODEL_ARCH .FALCON_H1
0 commit comments