@@ -1226,6 +1226,55 @@ def _try_set_pooling_type(self) -> None:
12261226 raise NotImplementedError ("Only MEAN, CLS, and LAST pooling types supported" )
12271227 self .gguf_writer .add_pooling_type (pooling_type )
12281228
1229+ def _set_vocab_interns1 (self ):
1230+ tokens : list [str ] = []
1231+ toktypes : list [int ] = []
1232+
1233+ from transformers import AutoTokenizer
1234+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
1235+ vocab = getattr (tokenizer , 'vocab' , tokenizer .get_vocab ())
1236+ vocab_size = self .hparams .get ("vocab_size" , len (vocab ))
1237+ assert max (vocab .values ()) < vocab_size
1238+
1239+ tokpre = self .get_vocab_base_pre (tokenizer )
1240+
1241+ reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in vocab .items ()}
1242+ added_vocab = tokenizer .get_added_vocab ()
1243+
1244+ added_tokens_decoder = tokenizer .added_tokens_decoder
1245+
1246+ for i in range (vocab_size ):
1247+ if i not in reverse_vocab :
1248+ tokens .append (f"[PAD{ i } ]" )
1249+ toktypes .append (gguf .TokenType .UNUSED )
1250+ else :
1251+ token : str = reverse_vocab [i ]
1252+ if token in added_vocab :
1253+ # The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized.
1254+ # To avoid unexpected issues - we make sure to normalize non-normalized tokens
1255+ if not added_tokens_decoder [i ].normalized :
1256+ previous_token = token
1257+ token = tokenizer .decode (tokenizer .encode (token , add_special_tokens = False ))
1258+ if previous_token != token :
1259+ logger .info (f"{ repr (previous_token )} is encoded and decoded back to { repr (token )} using AutoTokenizer" )
1260+
1261+ if added_tokens_decoder [i ].special or self .does_token_look_special (token ):
1262+ toktypes .append (gguf .TokenType .CONTROL )
1263+ else :
1264+ toktypes .append (gguf .TokenType .USER_DEFINED )
1265+ else :
1266+ toktypes .append (gguf .TokenType .NORMAL )
1267+ tokens .append (token )
1268+
1269+ self .gguf_writer .add_tokenizer_model ("gpt2" )
1270+ self .gguf_writer .add_tokenizer_pre (tokpre )
1271+ self .gguf_writer .add_token_list (tokens )
1272+ self .gguf_writer .add_token_types (toktypes )
1273+
1274+ special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
1275+ special_vocab ._set_special_token ("bos" , 151643 )
1276+ special_vocab .add_to_gguf (self .gguf_writer )
1277+
12291278
12301279class MmprojModel (ModelBase ):
12311280 model_type = ModelType .MMPROJ
@@ -2942,7 +2991,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
29422991 if "language_model." in name :
29432992 name = name .replace ("language_model." , "" ) # for InternVL
29442993 if name .startswith ("mlp" ) or name .startswith ("multi_modal_projector" ) \
2945- or name .startswith ("vision_model" ) or name .startswith ("audio_tower" ):
2994+ or name .startswith ("vision_model" ) or name .startswith ("audio_tower" ) \
2995+ or name .startswith ("model.vision_tower" ) or name .startswith ("model.multi_modal_projector" ):
29462996 # skip vision and audio tensors
29472997 return []
29482998 yield from super ().modify_tensors (data_torch , name , bid )
@@ -3119,7 +3169,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
31193169 yield from super ().modify_tensors (data_torch , name , bid )
31203170
31213171
3122- @ModelBase .register ("Ernie4_5_ForCausalLM" )
3172+ @ModelBase .register ("Ernie4_5_ForCausalLM" , "Ernie4_5ForCausalLM" )
31233173class Ernie4_5Model (TextModel ):
31243174 model_arch = gguf .MODEL_ARCH .ERNIE4_5
31253175
@@ -3614,6 +3664,19 @@ def prepare_tensors(self):
36143664class Qwen3Model (Qwen2Model ):
36153665 model_arch = gguf .MODEL_ARCH .QWEN3
36163666
3667+ def __init__ (self , * args , ** kwargs ):
3668+ super ().__init__ (* args , ** kwargs )
3669+ hparams = ModelBase .load_hparams (self .dir_model , is_mistral_format = False )
3670+ self .origin_hf_arch = hparams .get ('architectures' , [None ])[0 ]
3671+
3672+ def set_vocab (self ):
3673+ # deal with intern-s1-mini
3674+ if self .origin_hf_arch == 'InternS1ForConditionalGeneration' :
3675+ self ._set_vocab_interns1 ()
3676+ return
3677+
3678+ super ().set_vocab ()
3679+
36173680
36183681@ModelBase .register ("Qwen3MoeForCausalLM" )
36193682class Qwen3MoeModel (Qwen2MoeModel ):
@@ -3630,73 +3693,7 @@ def set_vocab(self):
36303693 self ._set_vocab_interns1 ()
36313694 return
36323695
3633- try :
3634- self ._set_vocab_sentencepiece ()
3635- except FileNotFoundError :
3636- self ._set_vocab_gpt2 ()
3637-
3638- def _set_vocab_interns1 (self ):
3639- tokens : list [str ] = []
3640- toktypes : list [int ] = []
3641-
3642- from transformers import AutoTokenizer
3643- tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
3644- vocab = getattr (tokenizer , 'vocab' , tokenizer .get_vocab ())
3645- vocab_size = self .hparams .get ("vocab_size" , len (vocab ))
3646- assert max (vocab .values ()) < vocab_size
3647-
3648- tokpre = self .get_vocab_base_pre (tokenizer )
3649-
3650- reverse_vocab = {id_ : encoded_tok for encoded_tok , id_ in vocab .items ()}
3651- added_vocab = tokenizer .get_added_vocab ()
3652-
3653- added_tokens_decoder = tokenizer .added_tokens_decoder
3654-
3655- for i in range (vocab_size ):
3656- if i not in reverse_vocab :
3657- tokens .append (f"[PAD{ i } ]" )
3658- toktypes .append (gguf .TokenType .UNUSED )
3659- else :
3660- token : str = reverse_vocab [i ]
3661- if token in added_vocab :
3662- # The tokenizer in llama.cpp assumes the CONTROL and USER_DEFINED tokens are pre-normalized.
3663- # To avoid unexpected issues - we make sure to normalize non-normalized tokens
3664- if not added_tokens_decoder [i ].normalized :
3665- previous_token = token
3666- token = tokenizer .decode (tokenizer .encode (token , add_special_tokens = False ))
3667- if previous_token != token :
3668- logger .info (f"{ repr (previous_token )} is encoded and decoded back to { repr (token )} using AutoTokenizer" )
3669-
3670- if added_tokens_decoder [i ].special or self .does_token_look_special (token ):
3671- toktypes .append (gguf .TokenType .CONTROL )
3672- else :
3673- toktypes .append (gguf .TokenType .USER_DEFINED )
3674- else :
3675- toktypes .append (gguf .TokenType .NORMAL )
3676- tokens .append (token )
3677-
3678- self .gguf_writer .add_tokenizer_model ("gpt2" )
3679- self .gguf_writer .add_tokenizer_pre (tokpre )
3680- self .gguf_writer .add_token_list (tokens )
3681- self .gguf_writer .add_token_types (toktypes )
3682-
3683- special_vocab = gguf .SpecialVocab (self .dir_model , load_merges = True )
3684- special_tokens_map_file = self .dir_model / 'special_tokens_map.json'
3685- additional_special_tokens = []
3686- if special_tokens_map_file .is_file ():
3687- with open (special_tokens_map_file , encoding = 'utf-8' ) as f :
3688- additional_special_tokens = json .load (f ).get ('additional_special_tokens' , [])
3689- tokenizer_cfg_file = self .dir_model / 'special_tokens_map.json'
3690- if tokenizer_cfg_file .is_file ():
3691- with open (tokenizer_cfg_file , encoding = 'utf-8' ) as f :
3692- added_tokens_decoder = json .load (f ).get ('added_tokens_decoder' , {})
3693- token2ids_map = {data ['content' ] : int (token ) for token , data in added_tokens_decoder .items () if data ['special' ]}
3694- for token in additional_special_tokens :
3695- if token in token2ids_map :
3696- special_vocab ._set_special_token (token , token2ids_map [token ])
3697- special_vocab ._set_special_token ('eos' , 151645 )
3698- special_vocab ._set_special_token ("bos" , 151643 )
3699- special_vocab .add_to_gguf (self .gguf_writer )
3696+ super ().set_vocab ()
37003697
37013698
37023699@ModelBase .register ("GPT2LMHeadModel" )
@@ -6267,9 +6264,11 @@ def prepare_tensors(self):
62676264 raise ValueError (f"Unprocessed experts: { experts } " )
62686265
62696266
6270- @ModelBase .register ("DeepseekV2ForCausalLM" )
6271- @ModelBase .register ("DeepseekV3ForCausalLM" )
6272- @ModelBase .register ("KimiVLForConditionalGeneration" )
6267+ @ModelBase .register (
6268+ "DeepseekV2ForCausalLM" ,
6269+ "DeepseekV3ForCausalLM" ,
6270+ "KimiVLForConditionalGeneration" ,
6271+ )
62736272class DeepseekV2Model (TextModel ):
62746273 model_arch = gguf .MODEL_ARCH .DEEPSEEK2
62756274
@@ -8520,6 +8519,43 @@ def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", "
85208519 return "mm.2.weight"
85218520 return super ().map_tensor_name (name , try_suffixes )
85228521
8522+
8523+ @ModelBase .register ("KimiVLForConditionalGeneration" )
8524+ class KimiVLModel (MmprojModel ):
8525+ def __init__ (self , * args , ** kwargs ):
8526+ super ().__init__ (* args , ** kwargs )
8527+ assert self .hparams_vision is not None
8528+ self .hparams_vision ["image_size" ] = 64 * 14 # for compatibility
8529+
8530+ def set_gguf_parameters (self ):
8531+ super ().set_gguf_parameters ()
8532+ self .gguf_writer .add_clip_projector_type (gguf .VisionProjectorType .KIMIVL )
8533+ self .gguf_writer .add_vision_use_gelu (True )
8534+ self .gguf_writer .add_vision_projector_scale_factor (2 )
8535+ # eps is the same as pytorch's default value
8536+ assert self .hparams_vision is not None
8537+ self .gguf_writer .add_vision_attention_layernorm_eps (self .hparams_vision .get ("layer_norm_eps" , 1e-5 ))
8538+
8539+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
8540+ del bid # unused
8541+ is_vision_tensor = "vision_tower" in name or "multi_modal_projector" in name
8542+
8543+ if is_vision_tensor :
8544+ if "pos_emb.weight" in name :
8545+ data_torch = data_torch .view (data_torch .shape [0 ] * data_torch .shape [1 ], data_torch .shape [2 ])
8546+ elif "wqkv" in name :
8547+ split_dim = 0 if "weight" in name else - 1
8548+ wq , wk , wv = data_torch .chunk (3 , dim = split_dim )
8549+ return [
8550+ (self .map_tensor_name (name .replace ("wqkv" , "wq" )), wq ),
8551+ (self .map_tensor_name (name .replace ("wqkv" , "wk" )), wk ),
8552+ (self .map_tensor_name (name .replace ("wqkv" , "wv" )), wv )
8553+ ]
8554+
8555+ return [(self .map_tensor_name (name ), data_torch )]
8556+
8557+ return [] # skip other tensors
8558+
85238559###### CONVERSION LOGIC ######
85248560
85258561
0 commit comments