1111import os
1212import re
1313import sys
14+ from functools import reduce
1415from enum import IntEnum
1516from pathlib import Path
1617from hashlib import sha256
@@ -1246,7 +1247,7 @@ def set_gguf_parameters(self):
12461247 self .gguf_writer .add_vision_embedding_length (self .find_vparam (["hidden_size" ]))
12471248 self .gguf_writer .add_vision_feed_forward_length (self .find_vparam (["intermediate_size" ]))
12481249 self .gguf_writer .add_vision_block_count (self .find_vparam (self .n_block_keys ))
1249- self .gguf_writer .add_vision_head_count (self .find_vparam (["num_attention_heads" ]))
1250+ self .gguf_writer .add_vision_head_count (self .find_vparam (["num_attention_heads" , "num_heads" ]))
12501251
12511252 # preprocessor config
12521253 self .gguf_writer .add_vision_image_mean (self .preprocessor_config ["image_mean" ])
@@ -2895,14 +2896,15 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
28952896 return [(self .map_tensor_name (name ), data_torch )]
28962897
28972898
2898- @ModelBase .register ("Ernie4_5_MoeForCausalLM" )
2899+ @ModelBase .register ("Ernie4_5_MoeForCausalLM" , "Ernie4_5_VLMoeForConditionalGeneration" )
28992900class Ernie4_5MoeModel (Ernie4_5Model ):
29002901 model_arch = gguf .MODEL_ARCH .ERNIE4_5_MOE
29012902 _experts : list [dict [str , Tensor ]] | None = None
29022903
29032904 def __init__ (self , * args , ** kwargs ):
29042905 super ().__init__ (* args , ** kwargs )
29052906 self ._experts = [{} for _ in range (self .block_count )]
2907+ self .split_cache = {}
29062908
29072909 def set_gguf_parameters (self ):
29082910 super ().set_gguf_parameters ()
@@ -2918,6 +2920,18 @@ def set_gguf_parameters(self):
29182920 self .gguf_writer .add_expert_shared_feed_forward_length (shared_expert_intermediate_size // num_key_value_heads )
29192921
29202922 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
2923+ if name .endswith ((".weight_1" , ".bias_1" )):
2924+ self .split_cache [name ] = data_torch
2925+ return []
2926+
2927+ part1_name = name + "_1"
2928+ if part1_name in self .split_cache :
2929+ part1_tensor = self .split_cache .pop (part1_name )
2930+ dim = 0
2931+ if 'down' in name or 'proj' in name and 'up' not in name and 'gate' not in name :
2932+ dim = 1
2933+ data_torch = torch .cat ((data_torch , part1_tensor ), dim = dim )
2934+
29212935 # Modify correction bias name as in DeepseekV2
29222936 if name .endswith ("e_score_correction_bias" ):
29232937 name = name .replace ("e_score_correction_bias" , "e_score_correction.bias" )
@@ -2949,7 +2963,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
29492963 self ._experts = [{} for _ in range (self .block_count )]
29502964
29512965 self ._experts [bid ][name ] = data_torch
2952-
2966+ n_experts_val = self .hparams ["moe_num_experts" ]
2967+ n_experts = reduce (lambda x , y : x + y , n_experts_val , 0 ) if isinstance (n_experts_val , list ) else n_experts_val
29532968 if len (self ._experts [bid ]) >= n_experts * 3 :
29542969 tensors : list [tuple [str , Tensor ]] = []
29552970
@@ -3012,6 +3027,67 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
30123027 # skip multimodal tensors
30133028 return []
30143029 return [(self .map_tensor_name (name ), data_torch )]
3030+ @ModelBase .register ("Ernie4_5_VLMoeForConditionalGeneration" )
3031+ class Ernie45VLModel (MmprojModel ):
3032+ def __init__ (self , * args , ** kwargs ):
3033+ super ().__init__ (* args , ** kwargs )
3034+ self .model_arch = gguf .MODEL_ARCH .ERNIE4_5_VL_MOE
3035+ if self .hparams_vision is not None and "image_size" not in self .hparams_vision :
3036+ if "size" in self .preprocessor_config and "height" in self .preprocessor_config ["size" ]:
3037+ self .hparams_vision ["image_size" ] = self .preprocessor_config ["size" ]["height" ]
3038+ elif "crop_size" in self .preprocessor_config and "height" in self .preprocessor_config ["crop_size" ]:
3039+ self .hparams_vision ["image_size" ] = self .preprocessor_config ["crop_size" ]["height" ]
3040+ if self .hparams_vision is not None and "intermediate_size" not in self .hparams_vision :
3041+ self .hparams_vision ["intermediate_size" ] = self .hparams_vision ["hidden_size" ] * self .hparams_vision ["mlp_ratio" ]
3042+ if self .hparams_vision is not None and "num_hidden_layers" not in self .hparams_vision and "num_layers" not in self .hparams_vision :
3043+ # FIXME: This is a placeholder calculation.
3044+ # The actual value may need to be derived differently.
3045+ self .hparams_vision ["num_hidden_layers" ] = 32
3046+
3047+ def set_gguf_parameters (self ):
3048+ # super().set_gguf_parameters() # don't call parent
3049+ vision_config = self .hparams_vision
3050+ assert vision_config is not None
3051+ self .gguf_writer .add_vision_embedding_length (vision_config ["hidden_size" ])
3052+ self .gguf_writer .add_vision_feed_forward_length (vision_config ["intermediate_size" ])
3053+ if (block_count := vision_config .get ("num_hidden_layers" , vision_config .get ("num_layers" ))) is None :
3054+ raise KeyError ("Could not find num_hidden_layers or num_layers in vision config" )
3055+ self .gguf_writer .add_vision_block_count (block_count )
3056+ if (head_count := vision_config .get ("num_attention_heads" , vision_config .get ("num_heads" ))) is None :
3057+ raise KeyError ("Could not find num_attention_heads or num_heads in vision config" )
3058+ self .gguf_writer .add_vision_head_count (head_count )
3059+ self .gguf_writer .add_vision_image_size (vision_config ["image_size" ])
3060+ self .gguf_writer .add_vision_patch_size (vision_config ["patch_size" ])
3061+ self .gguf_writer .add_vision_projection_dim (self .hparams ["hidden_size" ])
3062+ self .gguf_writer .add_clip_projector_type ("mlp" )
3063+ if "spatial_conv_size" in self .hparams :
3064+ self .gguf_writer .add_vision_spatial_merge_size (self .hparams ["spatial_conv_size" ])
3065+
3066+ def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
3067+ del bid
3068+ if name .startswith ("vision_model." ):
3069+ if ".attn.qkv." in name :
3070+ if data_torch .ndim == 2 : # weight
3071+ c3 , _ = data_torch .shape
3072+ else : # bias
3073+ c3 = data_torch .shape [0 ]
3074+ assert c3 % 3 == 0
3075+ c = c3 // 3
3076+ wq = data_torch [:c ]
3077+ wk = data_torch [c : c * 2 ]
3078+ wv = data_torch [c * 2 :]
3079+ yield from [
3080+ (self .map_tensor_name (name .replace ("qkv" , "q" )), wq ),
3081+ (self .map_tensor_name (name .replace ("qkv" , "k" )), wk ),
3082+ (self .map_tensor_name (name .replace ("qkv" , "v" )), wv ),
3083+ ]
3084+ return
3085+ if "mm_resampler" in name :
3086+ name = name .replace ("mm_resampler" , "resampler" )
3087+ yield self .map_tensor_name (name ), data_torch
3088+ else :
3089+ # This is a projector model, so we skip the text model tensors.
3090+ return
30153091
30163092
30173093@ModelBase .register ("Qwen2VLModel" , "Qwen2VLForConditionalGeneration" , "Qwen2_5_VLForConditionalGeneration" )
0 commit comments