@@ -423,19 +423,19 @@ def load_hparams(dir_model: Path):
423423 try :
424424 # for security reason, we don't allow loading remote code by default
425425 # if a model need remote code, we will fallback to config.json
426- return AutoConfig .from_pretrained (dir_model , trust_remote_code = False ).to_dict ()
426+ config = AutoConfig .from_pretrained (dir_model , trust_remote_code = False ).to_dict ()
427427 except Exception as e :
428428 logger .warning (f"Failed to load model config from { dir_model } : { e } " )
429429 logger .warning ("Trying to load config.json instead" )
430430 with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
431431 config = json .load (f )
432- if "llm_config" in config :
433- # rename for InternVL
434- config ["text_config" ] = config ["llm_config" ]
435- if "thinker_config" in config :
436- # rename for Qwen2.5-Omni
437- config ["text_config" ] = config ["thinker_config" ]["text_config" ]
438- return config
432+ if "llm_config" in config :
433+ # rename for InternVL
434+ config ["text_config" ] = config ["llm_config" ]
435+ if "thinker_config" in config :
436+ # rename for Qwen2.5-Omni
437+ config ["text_config" ] = config ["thinker_config" ]["text_config" ]
438+ return config
439439
440440 @classmethod
441441 def register (cls , * names : str ) -> Callable [[AnyModel ], AnyModel ]:
@@ -1207,7 +1207,7 @@ def set_gguf_parameters(self):
12071207 self .gguf_writer .add_audio_block_count (self .find_aparam (self .n_block_keys ))
12081208 self .gguf_writer .add_audio_head_count (self .find_aparam (["num_attention_heads" ]))
12091209
1210- else :
1210+ if not self . has_vision_encoder and not self . has_audio_encoder :
12111211 raise ValueError ("MmprojModel must have either vision or audio encoder" )
12121212
12131213 def write_vocab (self ):
@@ -1841,7 +1841,8 @@ def prepare_tensors(self):
18411841 "MistralForCausalLM" ,
18421842 "MixtralForCausalLM" ,
18431843 "VLlama3ForCausalLM" ,
1844- "LlavaForConditionalGeneration" )
1844+ "LlavaForConditionalGeneration" ,
1845+ "LlamaModel" )
18451846class LlamaModel (TextModel ):
18461847 model_arch = gguf .MODEL_ARCH .LLAMA
18471848 undo_permute = True
@@ -1921,6 +1922,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
19211922
19221923 if is_vision_tensor :
19231924 return [] # skip vision tensors
1925+ elif self .hf_arch == "LlamaModel" :
1926+ name = "model." + name
19241927 elif name .startswith ("model.text_model" ):
19251928 name = name .replace ("text_model." , "" ) # for SmolVLM
19261929 elif name .startswith ("language_model." ):
@@ -2169,6 +2172,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
21692172 # process vision tensors
21702173 if "positional_embedding_vlm" in name and ".weight" not in name :
21712174 name += ".weight"
2175+ if "multi_modal_projector.linear_1" in name :
2176+ # despite the name with number postfix, this is a single fully connected layer
2177+ return [(gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .V_MMPROJ_FC ], data_torch )]
21722178 return [(self .map_tensor_name (name ), data_torch )]
21732179 return []
21742180
@@ -3676,7 +3682,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
36763682 return [(self .map_tensor_name (name ), data_torch )]
36773683
36783684
3679- @ModelBase .register ("BertModel" , "BertForMaskedLM" , "CamembertModel" )
3685+ @ModelBase .register ("BertModel" , "BertForMaskedLM" , "CamembertModel" , "BertForSequenceClassification" )
36803686class BertModel (TextModel ):
36813687 model_arch = gguf .MODEL_ARCH .BERT
36823688
@@ -3739,6 +3745,13 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
37393745 if name .startswith ("cls.seq_relationship" ):
37403746 return []
37413747
3748+ # For BertForSequenceClassification (direct projection layer)
3749+ if name == "classifier.weight" :
3750+ name = "classifier.out_proj.weight"
3751+
3752+ if name == "classifier.bias" :
3753+ name = "classifier.out_proj.bias"
3754+
37423755 return [(self .map_tensor_name (name ), data_torch )]
37433756
37443757 def _xlmroberta_tokenizer_init (self ) -> None :
0 commit comments