@@ -426,7 +426,11 @@ def load_hparams(dir_model: Path):
426426 logger .warning (f"Failed to load model config from { dir_model } : { e } " )
427427 logger .warning ("Trying to load config.json instead" )
428428 with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
429- return json .load (f )
429+ config = json .load (f )
430+ if "llm_config" in config :
431+ # rename for InternVL
432+ config ["text_config" ] = config ["llm_config" ]
433+ return config
430434
431435 @classmethod
432436 def register (cls , * names : str ) -> Callable [[AnyModel ], AnyModel ]:
@@ -2606,6 +2610,11 @@ def set_gguf_parameters(self):
26062610 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
26072611 if self .hf_arch == "Qwen2Model" :
26082612 name = f"model.{ name } " # map to Qwen2ForCausalLM tensors
2613+ if "language_model." in name :
2614+ name = name .replace ("language_model." , "" ) # for InternVL
2615+ if name .startswith ("mlp" ) or name .startswith ("vision_model" ):
2616+ # skip visual tensors
2617+ return []
26092618 yield from super ().modify_tensors (data_torch , name , bid )
26102619
26112620
@@ -2709,6 +2718,57 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
27092718 return [] # skip other tensors
27102719
27112720
2721+ @ModelBase .register ("InternVisionModel" )
2722+ class InternVisionModel (VisionModel ):
2723+ def set_gguf_parameters (self ):
2724+ super ().set_gguf_parameters ()
2725+ hparams = self .hparams
2726+ self .gguf_writer .add_vision_projector_type (gguf .VisionProjectorType .INTERNVL )
2727+ # hidden_act
2728+ if hparams ["hidden_act" ] == "silu" :
2729+ self .gguf_writer .add_vision_use_silu (True )
2730+ elif hparams ["hidden_act" ] == "gelu" :
2731+ self .gguf_writer .add_vision_use_gelu (True )
2732+ else :
2733+ raise ValueError (f"Unsupported hidden_act: { hparams ['hidden_act' ]} " )
2734+
2735+ def tensor_force_quant (self , name , new_name , bid , n_dims ):
2736+ del bid , name , n_dims # unused
2737+ if ".patch_embd." in new_name :
2738+ return gguf .GGMLQuantizationType .F16
2739+ if ".position_embd." in new_name :
2740+ return gguf .GGMLQuantizationType .F32
2741+ return False
2742+
2743+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2744+ del bid # unused
2745+ if name .startswith ("vision_model" ) or name .startswith ("mlp" ):
2746+ # process visual tensors
2747+ # correct name
2748+ if name .startswith ("vision_model" ):
2749+ name = "vision_tower." + name
2750+ if ".ls" in name and not name .endswith (".weight" ):
2751+ name += ".weight"
2752+ # split QKV tensors if needed
2753+ if ".qkv." in name :
2754+ if data_torch .ndim == 2 : # weight
2755+ c3 , _ = data_torch .shape
2756+ else : # bias
2757+ c3 = data_torch .shape [0 ]
2758+ assert c3 % 3 == 0
2759+ c = c3 // 3
2760+ wq = data_torch [:c ]
2761+ wk = data_torch [c : c * 2 ]
2762+ wv = data_torch [c * 2 :]
2763+ return [
2764+ (self .map_tensor_name (name .replace ("attn.qkv" , "self_attn.q_proj" )), wq ),
2765+ (self .map_tensor_name (name .replace ("attn.qkv" , "self_attn.k_proj" )), wk ),
2766+ (self .map_tensor_name (name .replace ("attn.qkv" , "self_attn.v_proj" )), wv ),
2767+ ]
2768+ return [(self .map_tensor_name (name ), data_torch )]
2769+ return [] # skip other tensors
2770+
2771+
27122772@ModelBase .register ("WavTokenizerDec" )
27132773class WavTokenizerDecModel (TextModel ):
27142774 model_arch = gguf .MODEL_ARCH .WAVTOKENIZER_DEC
0 commit comments