@@ -7860,14 +7860,6 @@ def set_gguf_parameters(self):
78607860
78617861 _experts : list [dict [str , Tensor ]] | None = None
78627862
7863- @staticmethod
7864- def permute (weights : Tensor , n_head : int , n_head_kv : int | None ):
7865- if n_head_kv is not None and n_head != n_head_kv :
7866- n_head = n_head_kv
7867- return (weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
7868- .swapaxes (1 , 2 )
7869- .reshape (weights .shape ))
7870-
78717863 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
78727864 if name .endswith ("query_key_value.weight" ):
78737865 n_head = self .hparams ["num_attention_heads" ]
@@ -7878,8 +7870,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
78787870 q , k , v = data_torch .split ([n_head * head_dim , n_kv_head * head_dim , n_kv_head * head_dim ], dim = - 2 )
78797871
78807872 return [
7881- (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_Q , bid ), self . permute ( q , n_head , n_head ) ),
7882- (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_K , bid ), self . permute ( k , n_head , n_kv_head ) ),
7873+ (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_Q , bid ), q ),
7874+ (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_K , bid ), k ),
78837875 (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_V , bid ), v )
78847876 ]
78857877 elif name .find ("mlp.experts" ) != - 1 :
0 commit comments