@@ -426,7 +426,11 @@ def load_hparams(dir_model: Path):
426426 logger .warning (f"Failed to load model config from { dir_model } : { e } " )
427427 logger .warning ("Trying to load config.json instead" )
428428 with open (dir_model / "config.json" , "r" , encoding = "utf-8" ) as f :
429- return json .load (f )
429+ config = json .load (f )
430+ if "llm_config" in config :
431+ # rename for InternVL
432+ config ["text_config" ] = config ["llm_config" ]
433+ return config
430434
431435 @classmethod
432436 def register (cls , * names : str ) -> Callable [[AnyModel ], AnyModel ]:
@@ -794,6 +798,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
794798 if chkhsh == "0e9433cbbb161f89e264eb32e8e64bfe69e834973ffca5d41d3948a604a3e2a3" :
795799 # ref: https://huggingface.co/mistral-community/pixtral-12b
796800 res = "pixtral"
801+ if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec" :
802+ # ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
803+ res = "seed-coder"
797804
798805 if res is None :
799806 logger .warning ("\n " )
@@ -2606,6 +2613,11 @@ def set_gguf_parameters(self):
26062613 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
26072614 if self .hf_arch == "Qwen2Model" :
26082615 name = f"model.{ name } " # map to Qwen2ForCausalLM tensors
2616+ if "language_model." in name :
2617+ name = name .replace ("language_model." , "" ) # for InternVL
2618+ if name .startswith ("mlp" ) or name .startswith ("vision_model" ):
2619+ # skip visual tensors
2620+ return []
26092621 yield from super ().modify_tensors (data_torch , name , bid )
26102622
26112623
@@ -2709,6 +2721,62 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
27092721 return [] # skip other tensors
27102722
27112723
2724+ @ModelBase .register ("InternVisionModel" )
2725+ class InternVisionModel (VisionModel ):
2726+ def set_gguf_parameters (self ):
2727+ super ().set_gguf_parameters ()
2728+ hparams = self .hparams
2729+ self .gguf_writer .add_vision_projector_type (gguf .VisionProjectorType .INTERNVL )
2730+ self .gguf_writer .add_vision_attention_layernorm_eps (hparams ["layer_norm_eps" ])
2731+ # hidden_act
2732+ if hparams ["hidden_act" ] == "silu" :
2733+ self .gguf_writer .add_vision_use_silu (True )
2734+ elif hparams ["hidden_act" ] == "gelu" :
2735+ self .gguf_writer .add_vision_use_gelu (True )
2736+ else :
2737+ raise ValueError (f"Unsupported hidden_act: { hparams ['hidden_act' ]} " )
2738+ # downsample_ratio
2739+ downsample_ratio = self .global_config .get ("downsample_ratio" )
2740+ assert downsample_ratio is not None
2741+ self .gguf_writer .add_vision_projector_scale_factor (int (1.0 / downsample_ratio ))
2742+
2743+ def tensor_force_quant (self , name , new_name , bid , n_dims ):
2744+ del bid , name , n_dims # unused
2745+ if ".patch_embd." in new_name :
2746+ return gguf .GGMLQuantizationType .F16
2747+ if ".position_embd." in new_name :
2748+ return gguf .GGMLQuantizationType .F32
2749+ return False
2750+
2751+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2752+ del bid # unused
2753+ if name .startswith ("vision_model" ) or name .startswith ("mlp" ):
2754+ # process visual tensors
2755+ # correct name
2756+ if name .startswith ("vision_model" ):
2757+ name = "vision_tower." + name
2758+ if (".ls" in name or "position_embedding" in name ) and not name .endswith (".weight" ):
2759+ name += ".weight"
2760+ # split QKV tensors if needed
2761+ if ".qkv." in name :
2762+ if data_torch .ndim == 2 : # weight
2763+ c3 , _ = data_torch .shape
2764+ else : # bias
2765+ c3 = data_torch .shape [0 ]
2766+ assert c3 % 3 == 0
2767+ c = c3 // 3
2768+ wq = data_torch [:c ]
2769+ wk = data_torch [c : c * 2 ]
2770+ wv = data_torch [c * 2 :]
2771+ return [
2772+ (self .map_tensor_name (name .replace ("attn.qkv" , "self_attn.q_proj" )), wq ),
2773+ (self .map_tensor_name (name .replace ("attn.qkv" , "self_attn.k_proj" )), wk ),
2774+ (self .map_tensor_name (name .replace ("attn.qkv" , "self_attn.v_proj" )), wv ),
2775+ ]
2776+ return [(self .map_tensor_name (name ), data_torch )]
2777+ return [] # skip other tensors
2778+
2779+
27122780@ModelBase .register ("WavTokenizerDec" )
27132781class WavTokenizerDecModel (TextModel ):
27142782 model_arch = gguf .MODEL_ARCH .WAVTOKENIZER_DEC
@@ -3360,6 +3428,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
33603428 head_dim = n_embd // num_heads
33613429 num_groups = num_heads // q_per_kv
33623430
3431+ name = name .replace ("language_model." , "" ) # InternVL
3432+ if name .startswith ("mlp" ) or name .startswith ("vision_model" ):
3433+ # skip visual tensors
3434+ return []
3435+
33633436 if bid is not None and f"model.layers.{ bid } .attention.wqkv" in name :
33643437 qkv = data_torch
33653438
@@ -3433,6 +3506,10 @@ def set_gguf_parameters(self):
34333506 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
34343507 n_head = self .hparams ["num_attention_heads" ]
34353508 n_kv_head = self .hparams .get ("num_key_value_heads" )
3509+ name = name .replace ("language_model." , "" ) # InternVL
3510+ if name .startswith ("mlp" ) or name .startswith ("vision_model" ):
3511+ # skip visual tensors
3512+ return []
34363513 if name .endswith (("q_proj.weight" , "q_proj.bias" )):
34373514 data_torch = LlamaModel .permute (data_torch , n_head , n_head )
34383515 if name .endswith (("k_proj.weight" , "k_proj.bias" )):
0 commit comments