@@ -3328,7 +3328,13 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
33283328@ModelBase .register ("InternVisionModel" )
33293329class InternVisionModel (MmprojModel ):
33303330 def set_gguf_parameters (self ):
3331+ assert self .hparams_vision is not None
3332+ if isinstance (self .hparams_vision ['image_size' ], list ):
3333+ self .hparams_vision ['image_size' ] = self .hparams_vision ['image_size' ][0 ]
3334+ if isinstance (self .hparams_vision ['patch_size' ], list ):
3335+ self .hparams_vision ['patch_size' ] = self .hparams_vision ['patch_size' ][0 ]
33313336 super ().set_gguf_parameters ()
3337+
33323338 hparams = self .hparams
33333339 self .gguf_writer .add_clip_projector_type (gguf .VisionProjectorType .INTERNVL )
33343340 self .gguf_writer .add_vision_attention_layernorm_eps (hparams ["layer_norm_eps" ])
@@ -3352,14 +3358,30 @@ def tensor_force_quant(self, name, new_name, bid, n_dims):
33523358 return gguf .GGMLQuantizationType .F32
33533359 return False
33543360
3361+ def _mapping_interns1_name (self , name ):
3362+ names_map = {
3363+ "model.multi_modal_projector.layer_norm.bias" : "mlp1.0.bias" ,
3364+ "model.multi_modal_projector.layer_norm.weight" : "mlp1.0.weight" ,
3365+ "model.multi_modal_projector.linear_1.bias" : "mlp1.1.bias" ,
3366+ "model.multi_modal_projector.linear_1.weight" : "mlp1.1.weight" ,
3367+ "model.multi_modal_projector.linear_2.bias" : "mlp1.3.bias" ,
3368+ "model.multi_modal_projector.linear_2.weight" : "mlp1.3.weight" ,
3369+ }
3370+ if name in names_map :
3371+ name = names_map [name ]
3372+ return name
3373+
33553374 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
33563375 del bid # unused
3357- if name .startswith ("vision_model" ) or name .startswith ("mlp" ):
3376+ vision_prefix = ['vision_model' , 'mlp' , 'model.vision_tower' , 'model.multi_modal_projector' ]
3377+ # deal with intern-s1 special case
3378+ name = self ._mapping_interns1_name (name )
3379+ if any ([name .startswith (prefix ) for prefix in vision_prefix ]):
33583380 # process visual tensors
33593381 # correct name
33603382 if name .startswith ("vision_model" ):
33613383 name = "vision_tower." + name
3362- if (".ls" in name or "position_embedding" in name ) and not name .endswith (".weight" ):
3384+ if (".ls" in name or ".lambda_" in name or " position_embedding" in name ) and not name .endswith (".weight" ):
33633385 name += ".weight"
33643386 # split QKV tensors if needed
33653387 if ".qkv." in name :
@@ -3445,6 +3467,10 @@ def set_gguf_parameters(self):
34453467
34463468 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
34473469 # process the experts separately
3470+ name = name .replace ("language_model." , "" ) # InternVL
3471+ if name .startswith ("mlp" ) or name .startswith ("vision_model" ) or name .startswith ("model.vision_tower" ) or name .startswith ("model.multi_modal_projector" ):
3472+ # skip visual tensors
3473+ return []
34483474 if name .find ("experts" ) != - 1 :
34493475 n_experts = self .hparams ["num_experts" ]
34503476 assert bid is not None
@@ -3498,6 +3524,85 @@ class Qwen3Model(Qwen2Model):
34983524class Qwen3MoeModel (Qwen2MoeModel ):
34993525 model_arch = gguf .MODEL_ARCH .QWEN3MOE
35003526
3527+ def __init__ (self , * args , ** kwargs ):
3528+ super ().__init__ (* args , ** kwargs )
3529+ hparams = ModelBase .load_hparams (self .dir_model )
3530+ self .origin_hf_arch = hparams .get ('architectures' , [None ])[0 ]
3531+
3532+ def set_vocab (self ):
3533+ # deal with intern-s1
3534+ if self .origin_hf_arch == 'InternS1ForConditionalGeneration' :
3535+ self ._set_vocab_interns1 ()
3536+ return
3537+
3538+ try :
3539+ self ._set_vocab_sentencepiece ()
3540+ except FileNotFoundError :
3541+ self ._set_vocab_gpt2 ()
3542+
3543+ def _set_vocab_interns1 (self ):
3544+ tokens : list [str ] = []
3545+ toktypes : list [int ] = []
3546+
3547+ from transformers import AutoTokenizer
3548+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
3549+ vocab = getattr (tokenizer , 'vocab' , tokenizer .get_vocab ())
3550+ vocab_size = self .hparams .get ("vocab_size" , len (vocab ))
3551+ assert max (vocab .values ()) < vocab_size
3552+
3553+ tokpre = self .get_vocab_base_pre (tokenizer )
3554+
3555+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in vocab .items ()}
3556+ added_vocab = tokenizer .get_added_vocab ()
3557+
3558+ added_tokens_decoder = tokenizer .added_tokens_decoder
3559+
3560+ for i in range (vocab_size ):
3561+ if i not in reverse_vocab :
3562+ tokens .append (f"[PAD{ i } ]" )
3563+ toktypes .append (gguf .TokenType .UNUSED )
3564+ else :
3565+ token : str = reverse_vocab [i ]
3566+ if token in added_vocab :
3567+ # The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized.
3568+ # To avoid unexpected issues - we make sure to normalize non-normalized tokens
3569+ if not added_tokens_decoder [i ].normalized :
3570+ previous_token = token
3571+ token = tokenizer .decode (tokenizer .encode (token , add_special_tokens = False ))
3572+ if previous_token != token :
3573+ logger .info (f"{ repr (previous_token )} is encoded and decoded back to { repr (token )} using AutoTokenizer" )
3574+
3575+ if added_tokens_decoder [i ].special or self .does_token_look_special (token ):
3576+ toktypes .append (gguf .TokenType .CONTROL )
3577+ else :
3578+ toktypes .append (gguf .TokenType .USER_DEFINED )
3579+ else :
3580+ toktypes .append (gguf .TokenType .NORMAL )
3581+ tokens .append (token )
3582+
3583+ self .gguf_writer .add_tokenizer_model ("gpt2" )
3584+ self .gguf_writer .add_tokenizer_pre (tokpre )
3585+ self .gguf_writer .add_token_list (tokens )
3586+ self .gguf_writer .add_token_types (toktypes )
3587+
3588+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
3589+ special_tokens_map_file = self .dir_model / 'special_tokens_map.json'
3590+ additional_special_tokens = []
3591+ if special_tokens_map_file .is_file ():
3592+ with open (special_tokens_map_file , encoding = 'utf-8' ) as f :
3593+ additional_special_tokens = json .load (f ).get ('additional_special_tokens' , [])
3594+ tokenizer_cfg_file = self .dir_model / 'special_tokens_map.json'
3595+ if tokenizer_cfg_file .is_file ():
3596+ with open (tokenizer_cfg_file , encoding = 'utf-8' ) as f :
3597+ added_tokens_decoder = json .load (f ).get ('added_tokens_decoder' , {})
3598+ token2ids_map = {data ['content' ] : int (token ) for token , data in added_tokens_decoder .items () if data ['special' ]}
3599+ for token in additional_special_tokens :
3600+ if token in token2ids_map :
3601+ special_vocab ._set_special_token (token , token2ids_map [token ])
3602+ special_vocab ._set_special_token ('eos' , 151645 )
3603+ special_vocab ._set_special_token ("bos" , 151643 )
3604+ special_vocab .add_to_gguf (self .gguf_writer )
3605+
35013606
35023607@ModelBase .register ("GPT2LMHeadModel" )
35033608class GPT2Model (TextModel ):
@@ -7997,15 +8102,13 @@ def repack_mxfp4(self, new_name: str, blocks: Tensor, scales: Tensor):
79978102 def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
79988103 blocks0 : Tensor = torch .zeros (1 )
79998104 blocks1 : Tensor = torch .zeros (1 )
8000- found_mxfp4_tensors = False
80018105 # we assume that tensors are loaded in the correct order
80028106 for name , data_torch in self .get_tensors ():
80038107 if "mlp.experts.down_proj_blocks" in name :
80048108 blocks0 = data_torch
80058109 elif "mlp.experts.down_proj_scales" in name :
80068110 new_name = self .map_tensor_name (name .replace ("_scales" , ".weight" ))
80078111 self .repack_mxfp4 (new_name , blocks0 , data_torch )
8008- found_mxfp4_tensors = True
80098112 elif "mlp.experts.gate_up_proj_blocks" in name :
80108113 blocks0 , blocks1 = data_torch [:, ::2 , :, :], data_torch [:, 1 ::2 , :, :]
80118114 elif "mlp.experts.gate_up_proj_scales" in name :
@@ -8014,9 +8117,6 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
80148117 new_name_up = self .map_tensor_name (name .replace ("gate_up_proj_scales" , "up_proj.weight" ))
80158118 self .repack_mxfp4 (new_name_gate , blocks0 , scales0 )
80168119 self .repack_mxfp4 (new_name_up , blocks1 , scales1 )
8017- found_mxfp4_tensors = True
8018- if not found_mxfp4_tensors :
8019- raise ValueError ("No MXFP4 tensors found in the model. Please make sure you are using MXFP4 model." )
80208120 return []
80218121
80228122 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
@@ -8029,7 +8129,12 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
80298129 if "down_proj" in name :
80308130 if name .endswith ("_bias" ):
80318131 name = name .replace ("down_proj_bias" , "down_proj.bias" )
8132+ elif "_blocks" not in name and "_scales" not in name :
8133+ logger .warning (f"{ name } is not in MXFP4, performance may be degraded" )
8134+ name = name .replace ("down_proj" , "down_proj.weight" )
8135+ data_torch = data_torch .transpose (- 1 , - 2 )
80328136 else :
8137+ # otherwise, it should already be repacked to ggml MXFP4 format
80338138 return []
80348139
80358140 # split the gate_up into gate and up
@@ -8042,7 +8147,18 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
80428147 (self .map_tensor_name (name_gate ), gate_proj_bias ),
80438148 (self .map_tensor_name (name_up ), up_proj_bias )
80448149 ]
8150+ elif "_blocks" not in name and "_scales" not in name :
8151+ logger .warning (f"{ name } is not in MXFP4, performance may be degraded" )
8152+ name_up = name .replace ("gate_up_proj" , "up_proj.weight" )
8153+ name_gate = name .replace ("gate_up_proj" , "gate_proj.weight" )
8154+ data_torch = data_torch .transpose (- 1 , - 2 )
8155+ gate_proj_weight , up_proj_weight = data_torch [:, ::2 , :], data_torch [:, 1 ::2 , :]
8156+ return [
8157+ (self .map_tensor_name (name_gate ), gate_proj_weight ),
8158+ (self .map_tensor_name (name_up ), up_proj_weight )
8159+ ]
80458160 else :
8161+ # otherwise, it should already be repacked to ggml MXFP4 format
80468162 return []
80478163
80488164 return [(self .map_tensor_name (name ), data_torch )]
0 commit comments