@@ -607,7 +607,7 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
607607 toktypes : list [int ] = []
608608
609609 from transformers import AutoTokenizer
610- tokenizer = AutoTokenizer .from_pretrained (self .dir_model , trust_remote_code = True )
610+ tokenizer = AutoTokenizer .from_pretrained (self .dir_model )
611611 vocab = getattr (tokenizer , 'vocab' , tokenizer .get_vocab ())
612612 vocab_size = self .hparams .get ("vocab_size" , len (vocab ))
613613 assert max (vocab .values ()) < vocab_size
@@ -1219,12 +1219,8 @@ def __init__(self, *args, **kwargs):
12191219 self .tensor_map = gguf .get_tensor_name_map (gguf .MODEL_ARCH .MMPROJ , self .block_count )
12201220
12211221 # load preprocessor config
1222- preprocess_config_file = self .dir_model / "preprocessor_config.json"
1223- if preprocess_config_file .exists ():
1224- with open (preprocess_config_file , "r" , encoding = "utf-8" ) as f :
1225- self .preprocessor_config = json .load (f )
1226- else :
1227- self .preprocessor_config = dict (image_mean = [0.485 , 0.456 , 0.406 ], image_std = [0.229 , 0.224 , 0.225 ])
1222+ with open (self .dir_model / "preprocessor_config.json" , "r" , encoding = "utf-8" ) as f :
1223+ self .preprocessor_config = json .load (f )
12281224
12291225 def get_vision_config (self ) -> dict [str , Any ] | None :
12301226 return self .global_config .get ("vision_config" )
@@ -3160,7 +3156,7 @@ def set_gguf_parameters(self):
31603156
31613157 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
31623158 # process the experts separately
3163- name = name .replace (r "language_model." , r "" ) # InternVL
3159+ name = name .replace ("language_model." , "" ) # InternVL
31643160 if name .startswith ("mlp" ) or name .startswith ("vision_model" ) or name .startswith ("model.vision_tower" ) or name .startswith ("model.multi_modal_projector" ):
31653161 # skip visual tensors
31663162 return []
@@ -3217,9 +3213,14 @@ class Qwen3Model(Qwen2Model):
32173213class Qwen3MoeModel (Qwen2MoeModel ):
32183214 model_arch = gguf .MODEL_ARCH .QWEN3MOE
32193215
3216+ def __init__ (self , * args , ** kwargs ):
3217+ super ().__init__ (* args , ** kwargs )
3218+ hparams = ModelBase .load_hparams (self .dir_model )
3219+ self .origin_hf_arch = hparams .get ('architectures' , [None ])[0 ]
3220+
32203221 def set_vocab (self ):
3221- # deal with interns1
3222- if 'interns1' in f' { self . dir_model } ' . lower () :
3222+ # deal with intern-s1
3223+ if self . origin_hf_arch == 'InternS1ForConditionalGeneration' :
32233224 self ._set_vocab_interns1 ()
32243225 return
32253226
@@ -3240,19 +3241,20 @@ def _set_vocab_interns1(self):
32403241 additional_special_tokens = []
32413242 if special_tokens_map_file .is_file ():
32423243 with open (special_tokens_map_file , encoding = 'utf-8' ) as f :
3243- additional_special_tokens = json .load (f ).get ('additional_special_tokens' , [])
3244+ additional_special_tokens = json .load (f ).get ('additional_special_tokens' , [])
32443245 tokenizer_cfg_file = self .dir_model / 'special_tokens_map.json'
32453246 if tokenizer_cfg_file .is_file ():
32463247 with open (tokenizer_cfg_file , encoding = 'utf-8' ) as f :
3247- added_tokens_decoder = json .load (f ).get ('added_tokens_decoder' , {})
3248- token2ids_map = {data ['content' ] : int (token ) for token , data in added_tokens_decoder .items () if data ['special' ]}
3249- for token in additional_special_tokens :
3250- if token in token2ids_map :
3251- special_vocab ._set_special_token (token , token2ids_map [token ])
3248+ added_tokens_decoder = json .load (f ).get ('added_tokens_decoder' , {})
3249+ token2ids_map = {data ['content' ] : int (token ) for token , data in added_tokens_decoder .items () if data ['special' ]}
3250+ for token in additional_special_tokens :
3251+ if token in token2ids_map :
3252+ special_vocab ._set_special_token (token , token2ids_map [token ])
32523253 special_vocab ._set_special_token ('eos' , 151645 )
32533254 special_vocab ._set_special_token ("bos" , 151643 )
32543255 special_vocab .add_to_gguf (self .gguf_writer )
32553256
3257+
32563258@ModelBase .register ("GPT2LMHeadModel" )
32573259class GPT2Model (TextModel ):
32583260 model_arch = gguf .MODEL_ARCH .GPT2
0 commit comments