@@ -5856,6 +5856,27 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
58565856 if ".attn.rel_pos_h" in name or ".attn.rel_pos_w" in name :
58575857 return [(self .map_tensor_name (name , try_suffixes = ("" ,)), data_torch )]
58585858
5859+ if name .startswith ("model.vision_model.transformer.layers." ):
5860+ # process visual tensors
5861+ # split QKV tensors if needed
5862+ if ".qkv_proj." in name :
5863+ if data_torch .ndim == 2 : # weight
5864+ c3 , _ = data_torch .shape
5865+ else : # bias
5866+ c3 = data_torch .shape [0 ]
5867+ assert c3 % 3 == 0
5868+ c = c3 // 3
5869+ wq = data_torch [:c ]
5870+ wk = data_torch [c : c * 2 ]
5871+ wv = data_torch [c * 2 :]
5872+ return [
5873+ (self .map_tensor_name (name .replace ("qkv" , "q" )), wq ),
5874+ (self .map_tensor_name (name .replace ("qkv" , "k" )), wk ),
5875+ (self .map_tensor_name (name .replace ("qkv" , "v" )), wv ),
5876+ ]
5877+ else :
5878+ return [(self .map_tensor_name (name ), data_torch )]
5879+
58595880 return [(self .map_tensor_name (name ), data_torch )]
58605881
58615882
@@ -7100,7 +7121,7 @@ def set_gguf_parameters(self):
71007121 else :
71017122 # note: deepseek2 using MLA converts into MQA (ie: GQA with 1 group)
71027123 self .hparams ["num_key_value_heads" ] = 1
7103-
7124+
71047125 super ().set_gguf_parameters ()
71057126 hparams = self .hparams
71067127 kv_lora_rank = hparams ["q_lora_rank" ] if hparams ["q_lora_rank" ] is not None else 512
0 commit comments