@@ -1534,6 +1534,79 @@ def _set_vocab_interns1(self):
15341534 special_vocab ._set_special_token ("bos" , 151643 )
15351535 special_vocab .add_to_gguf (self .gguf_writer )
15361536
1537+ def _set_vocab_mistral (self ):
1538+ if not _mistral_common_installed :
1539+ raise ImportError (_mistral_import_error_msg )
1540+
1541+ vocab = MistralVocab (self .dir_model )
1542+ logger .info (
1543+ f"Converting tokenizer { vocab .tokenizer_type } of size { vocab .vocab_size } ."
1544+ )
1545+
1546+ self .gguf_writer .add_tokenizer_model (vocab .gguf_tokenizer_model )
1547+
1548+ tokens = []
1549+ scores = []
1550+ toktypes = []
1551+
1552+ for text , score , toktype in vocab .all_tokens ():
1553+ tokens .append (text )
1554+ scores .append (score )
1555+ toktypes .append (toktype )
1556+
1557+ assert len (tokens ) == vocab .vocab_size , (
1558+ f"token count ({ len (tokens )} ) != vocab size ({ vocab .vocab_size } )"
1559+ )
1560+
1561+ if vocab .tokenizer_type == MistralTokenizerType .tekken :
1562+ self .gguf_writer .add_tokenizer_pre ("tekken" )
1563+ self .gguf_writer .add_token_merges (
1564+ vocab .extract_vocab_merges_from_model ()
1565+ )
1566+
1567+ logger .info (
1568+ f"Setting bos, eos, unk and pad token IDs to { vocab .bos_id } , { vocab .eos_id } , { vocab .unk_id } , { vocab .pad_id } ."
1569+ )
1570+
1571+ self .gguf_writer .add_bos_token_id (vocab .bos_id )
1572+ self .gguf_writer .add_eos_token_id (vocab .eos_id )
1573+ self .gguf_writer .add_unk_token_id (vocab .unk_id )
1574+ self .gguf_writer .add_pad_token_id (vocab .pad_id )
1575+
1576+ self .gguf_writer .add_token_list (tokens )
1577+ self .gguf_writer .add_token_scores (scores )
1578+ self .gguf_writer .add_token_types (toktypes )
1579+ self .gguf_writer .add_vocab_size (vocab .vocab_size )
1580+
1581+ self .gguf_writer .add_add_bos_token (True )
1582+ self .gguf_writer .add_add_eos_token (False )
1583+
1584+ local_template_file_path = self .dir_model / "chat_template.jinja"
1585+
1586+ if self .is_mistral_format and local_template_file_path .is_file ():
1587+ # Ministral-3 and other new Mistral models come with chat templates.
1588+ # ref: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512/tree/main
1589+ logger .info ("Using an existing Mistral local chat template." )
1590+
1591+ with open (local_template_file_path , "r" , encoding = "utf-8" ) as f :
1592+ template = f .read ()
1593+ elif not self .is_mistral_format or not self .disable_mistral_community_chat_template :
1594+ template_dir = Path (__file__ ).parent / "models/templates/"
1595+
1596+ # Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`.
1597+ if self .is_mistral_format :
1598+ logger .info (
1599+ "Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. "
1600+ "Mistral recommends to use `mistral-common` to perform tokenization and detokenization."
1601+ )
1602+ template = MistralModel .get_community_chat_template (vocab , template_dir , self .is_mistral_format )
1603+ else :
1604+ logger .info ("Not using a Mistral local or community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`." )
1605+ template = None
1606+
1607+ if template is not None :
1608+ self .gguf_writer .add_chat_template (template )
1609+
15371610
15381611class MmprojModel (ModelBase ):
15391612 model_type = ModelType .MMPROJ
@@ -2304,79 +2377,6 @@ def __init__(self, *args, **kwargs):
23042377 if self .hf_arch == "VLlama3ForCausalLM" :
23052378 self .hparams ["num_attention_heads" ] = self .hparams .get ("num_attention_heads" , 32 )
23062379
2307- def _set_vocab_mistral (self ):
2308- if not _mistral_common_installed :
2309- raise ImportError (_mistral_import_error_msg )
2310-
2311- vocab = MistralVocab (self .dir_model )
2312- logger .info (
2313- f"Converting tokenizer { vocab .tokenizer_type } of size { vocab .vocab_size } ."
2314- )
2315-
2316- self .gguf_writer .add_tokenizer_model (vocab .gguf_tokenizer_model )
2317-
2318- tokens = []
2319- scores = []
2320- toktypes = []
2321-
2322- for text , score , toktype in vocab .all_tokens ():
2323- tokens .append (text )
2324- scores .append (score )
2325- toktypes .append (toktype )
2326-
2327- assert len (tokens ) == vocab .vocab_size , (
2328- f"token count ({ len (tokens )} ) != vocab size ({ vocab .vocab_size } )"
2329- )
2330-
2331- if vocab .tokenizer_type == MistralTokenizerType .tekken :
2332- self .gguf_writer .add_tokenizer_pre ("tekken" )
2333- self .gguf_writer .add_token_merges (
2334- vocab .extract_vocab_merges_from_model ()
2335- )
2336-
2337- logger .info (
2338- f"Setting bos, eos, unk and pad token IDs to { vocab .bos_id } , { vocab .eos_id } , { vocab .unk_id } , { vocab .pad_id } ."
2339- )
2340-
2341- self .gguf_writer .add_bos_token_id (vocab .bos_id )
2342- self .gguf_writer .add_eos_token_id (vocab .eos_id )
2343- self .gguf_writer .add_unk_token_id (vocab .unk_id )
2344- self .gguf_writer .add_pad_token_id (vocab .pad_id )
2345-
2346- self .gguf_writer .add_token_list (tokens )
2347- self .gguf_writer .add_token_scores (scores )
2348- self .gguf_writer .add_token_types (toktypes )
2349- self .gguf_writer .add_vocab_size (vocab .vocab_size )
2350-
2351- self .gguf_writer .add_add_bos_token (True )
2352- self .gguf_writer .add_add_eos_token (False )
2353-
2354- local_template_file_path = self .dir_model / "chat_template.jinja"
2355-
2356- if self .is_mistral_format and local_template_file_path .is_file ():
2357- # Ministral-3 and other new Mistral models come with chat templates.
2358- # ref: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512/tree/main
2359- logger .info ("Using an existing Mistral local chat template." )
2360-
2361- with open (local_template_file_path , "r" , encoding = "utf-8" ) as f :
2362- template = f .read ()
2363- elif not self .is_mistral_format or not self .disable_mistral_community_chat_template :
2364- template_dir = Path (__file__ ).parent / "models/templates/"
2365-
2366- # Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`.
2367- if self .is_mistral_format :
2368- logger .info (
2369- "Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. "
2370- "Mistral recommends to use `mistral-common` to perform tokenization and detokenization."
2371- )
2372- template = MistralModel .get_community_chat_template (vocab , template_dir , self .is_mistral_format )
2373- else :
2374- logger .info ("Not using a Mistral local or community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`." )
2375- template = None
2376-
2377- if template is not None :
2378- self .gguf_writer .add_chat_template (template )
2379-
23802380 def set_vocab (self ):
23812381 if self .is_mistral_format :
23822382 return self ._set_vocab_mistral ()
@@ -9934,17 +9934,109 @@ def get_community_chat_template(vocab: MistralVocab, templates_dir: Path, is_mis
99349934
99359935 def set_gguf_parameters (self ):
99369936 super ().set_gguf_parameters ()
9937- if "yarn" in self .hparams :
9938- yarn_params = self .hparams ["yarn" ]
9939- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
9940- self .gguf_writer .add_rope_scaling_factor (yarn_params ["factor" ])
9941- self .gguf_writer .add_rope_scaling_yarn_beta_fast (yarn_params ["beta" ])
9942- self .gguf_writer .add_rope_scaling_yarn_beta_slow (yarn_params ["alpha" ])
9943- self .gguf_writer .add_rope_scaling_yarn_log_mul (1.0 ) # mscale_all_dim
9944- self .gguf_writer .add_rope_scaling_orig_ctx_len (yarn_params ["original_max_position_embeddings" ])
9937+ MistralModel .set_mistral_config (self .gguf_writer , self .hparams )
99459938
9946- if "llama_4_scaling" in self .hparams :
9947- self .gguf_writer .add_attn_temperature_scale (self .hparams ["llama_4_scaling" ]["beta" ])
9939+ @staticmethod
9940+ def set_mistral_config (gguf_writer : gguf .GGUFWriter , hparams : dict ):
9941+ if "yarn" in hparams :
9942+ yarn_params = hparams ["yarn" ]
9943+ gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
9944+ gguf_writer .add_rope_scaling_factor (yarn_params ["factor" ])
9945+ gguf_writer .add_rope_scaling_yarn_beta_fast (yarn_params ["beta" ])
9946+ gguf_writer .add_rope_scaling_yarn_beta_slow (yarn_params ["alpha" ])
9947+ gguf_writer .add_rope_scaling_yarn_log_mul (1.0 ) # mscale_all_dim
9948+ gguf_writer .add_rope_scaling_orig_ctx_len (yarn_params ["original_max_position_embeddings" ])
9949+
9950+ if "llama_4_scaling" in hparams :
9951+ gguf_writer .add_attn_temperature_scale (hparams ["llama_4_scaling" ]["beta" ])
9952+
9953+
9954+ class MistralMoeModel (DeepseekV2Model ):
9955+ model_arch = gguf .MODEL_ARCH .DEEPSEEK2
9956+ model_name = "Mistral"
9957+ hf_arch = ""
9958+ is_mistral_format = True
9959+
9960+ def __init__ (self , * args , ** kwargs ):
9961+ super ().__init__ (* args , ** kwargs )
9962+ logger .info ("Using MistralMoeModel" )
9963+ # remap hparams from Mistral MoE format to DeepseekV2 format
9964+ # we do this way to be able to reuse DeepseekV2Model set_gguf_parameters logic
9965+ # ref: https://github.com/vllm-project/vllm/blob/b294e28db2c5dee61bc25157664edcada8b90b31/vllm/transformers_utils/configs/mistral.py
9966+ config = self .hparams
9967+ # Mistral key -> HF key
9968+ config_mapping = {
9969+ "dim" : "hidden_size" ,
9970+ "norm_eps" : "rms_norm_eps" ,
9971+ "n_kv_heads" : "num_key_value_heads" ,
9972+ "n_layers" : "num_hidden_layers" ,
9973+ "n_heads" : "num_attention_heads" ,
9974+ "hidden_dim" : "intermediate_size" ,
9975+ }
9976+ # HF key -> (Mistral key, default value)
9977+ top_level_mapping_with_default = {
9978+ "model_type" : ("model_type" , "transformer" ),
9979+ "hidden_act" : ("activation" , "silu" ),
9980+ "tie_word_embeddings" : ("tied_embeddings" , False ),
9981+ "max_seq_len" : ("max_seq_len" , config .get ("max_position_embeddings" , 128_000 )),
9982+ "max_position_embeddings" : ("max_position_embeddings" , 128_000 ),
9983+ }
9984+ # mapping top-level keys
9985+ for key , new_key in config_mapping .items ():
9986+ if key in config :
9987+ config [new_key ] = config [key ]
9988+ for new_key , (key , default_value ) in top_level_mapping_with_default .items ():
9989+ config [new_key ] = config .get (key , default_value )
9990+ # mapping MoE-specific keys
9991+ moe_config_map = {
9992+ "route_every_n" : "moe_layer_freq" ,
9993+ "first_k_dense_replace" : "first_k_dense_replace" ,
9994+ "num_experts_per_tok" : "num_experts_per_tok" ,
9995+ "num_experts" : "n_routed_experts" ,
9996+ "expert_hidden_dim" : "moe_intermediate_size" ,
9997+ "routed_scale" : "routed_scaling_factor" ,
9998+ "num_shared_experts" : "n_shared_experts" ,
9999+ "num_expert_groups" : "n_group" ,
10000+ "num_expert_groups_per_tok" : "topk_group" ,
10001+ }
10002+ moe = config ["moe" ]
10003+ for key , new_key in moe_config_map .items ():
10004+ if key in moe :
10005+ config [new_key ] = moe [key ]
10006+ # provide missing values
10007+ config ["topk_method" ] = None
10008+ config ["norm_topk_prob" ] = True
10009+ config ["scoring_func" ] = "softmax"
10010+
10011+ def set_vocab (self ):
10012+ self ._set_vocab_mistral ()
10013+
10014+ def set_gguf_parameters (self ):
10015+ super ().set_gguf_parameters ()
10016+ MistralModel .set_mistral_config (self .gguf_writer , self .hparams )
10017+ yarn_params = self .hparams ["yarn" ]
10018+ self .gguf_writer .add_attn_temperature_length (yarn_params ["original_max_position_embeddings" ])
10019+ self .gguf_writer .add_rope_scaling_yarn_log_mul (0.1 ) # mscale_all_dim * 0.1
10020+
10021+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ):
10022+ if name .startswith ("vision_" ) or name .startswith ("patch_merger." ) or "mm_projector" in name :
10023+ return []
10024+
10025+ # rename certain tensors so that we can reuse DeepseekV2Model modify_tensors logic
10026+ if name .endswith (".qscale_act" ):
10027+ name = name .replace (".qscale_act" , ".input_scale" )
10028+ if name .endswith (".qscale_weight" ):
10029+ name = name .replace (".qscale_weight" , ".weight_scale" )
10030+ if ".wkv_b." in name :
10031+ name = name .replace (".wkv_b." , ".kv_b_proj." )
10032+ if ".experts." in name :
10033+ name = name .replace (".experts." , ".mlp.experts." )
10034+ name = name .replace (".w1." , ".gate_proj." )
10035+ name = name .replace (".w2." , ".down_proj." )
10036+ name = name .replace (".w3." , ".up_proj." )
10037+ name = "model." + name
10038+
10039+ return super ().modify_tensors (data_torch , name , bid )
994810040
994910041
995010042class PixtralModel (LlavaVisionModel ):
@@ -10501,6 +10593,8 @@ def main() -> None:
1050110593 elif args .mmproj :
1050210594 assert hparams .get ("vision_encoder" ) is not None , "This model does not support multimodal"
1050310595 model_class = PixtralModel
10596+ elif "moe" in hparams :
10597+ model_class = MistralMoeModel
1050410598 else :
1050510599 model_class = MistralModel
1050610600
0 commit comments