@@ -556,11 +556,8 @@ def set_gguf_parameters(self):
556556            logger .info (f"gguf: experts used count = { n_experts_used }  )
557557
558558        if  (head_dim  :=  self .hparams .get ("head_dim" )) is  not None :
559-             # Workaround for incorrect AutoConfig value for DeepSeekV3 (is set correctly in DeepSeekV2Model class) 
560-             # https://github.com/huggingface/transformers/blob/19224c3642705c5b6988c9f5f4251f83323d05ae/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py#L210 
561-             if  self .hparams .get ("model_type" ) !=  "deepseek_v3" :
562-                 self .gguf_writer .add_key_length (head_dim )
563-                 self .gguf_writer .add_value_length (head_dim )
559+             self .gguf_writer .add_key_length (head_dim )
560+             self .gguf_writer .add_value_length (head_dim )
564561
565562        self .gguf_writer .add_file_type (self .ftype )
566563        logger .info (f"gguf: file type = { self .ftype }  )
@@ -1901,9 +1898,7 @@ def set_gguf_parameters(self):
19011898        hparams  =  self .hparams 
19021899        self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
19031900
1904-         if  "head_dim"  in  hparams :
1905-             rope_dim  =  hparams ["head_dim" ]
1906-         else :
1901+         if  (rope_dim  :=  hparams .get ("head_dim" )) is  None :
19071902            rope_dim  =  hparams ["hidden_size" ] //  hparams ["num_attention_heads" ]
19081903        self .gguf_writer .add_rope_dimension_count (rope_dim )
19091904
@@ -1985,7 +1980,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
19851980        if  rope_scaling  :=  self .find_hparam (["rope_scaling" ], optional = True ):
19861981            if  rope_scaling .get ("rope_type" , '' ).lower () ==  "llama3" :
19871982                base  =  self .hparams .get ("rope_theta" , 10000.0 )
1988-                 dim  =  self .hparams .get ("head_dim" , self .hparams ["hidden_size" ] //  self .hparams ["num_attention_heads" ])
1983+                 if  (dim  :=  self .hparams .get ("head_dim" )) is  None :
1984+                     dim  =  self .hparams ["hidden_size" ] //  self .hparams ["num_attention_heads" ]
19891985                freqs  =  1.0  /  (base  **  (torch .arange (0 , dim , 2 , dtype = torch .float32 ) /  dim ))
19901986
19911987                factor  =  rope_scaling .get ("factor" , 8.0 )
@@ -2321,9 +2317,7 @@ def set_gguf_parameters(self):
23212317        hparams  =  self .hparams 
23222318        self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
23232319
2324-         if  "head_dim"  in  hparams :
2325-             rope_dim  =  hparams ["head_dim" ]
2326-         else :
2320+         if  (rope_dim  :=  hparams .get ("head_dim" )) is  None :
23272321            rope_dim  =  hparams ["hidden_size" ] //  hparams ["num_attention_heads" ]
23282322        self .gguf_writer .add_rope_dimension_count (rope_dim )
23292323
@@ -2363,7 +2357,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
23632357        if  rope_scaling  :=  self .find_hparam (["rope_scaling" ], optional = True ):
23642358            if  rope_scaling .get ("rope_type" , '' ).lower () ==  "llama3" :
23652359                base  =  self .hparams .get ("rope_theta" , 10000.0 )
2366-                 dim  =  self .hparams .get ("head_dim" , self .hparams ["hidden_size" ] //  self .hparams ["num_attention_heads" ])
2360+                 if  (dim  :=  self .hparams .get ("head_dim" )) is  None :
2361+                     dim  =  self .hparams ["hidden_size" ] //  self .hparams ["num_attention_heads" ]
23672362                freqs  =  1.0  /  (base  **  (torch .arange (0 , dim , 2 , dtype = torch .float32 ) /  dim ))
23682363
23692364                factor  =  rope_scaling .get ("factor" , 8.0 )
@@ -3681,9 +3676,7 @@ def set_gguf_parameters(self):
36813676        hparams  =  self .hparams 
36823677        self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
36833678
3684-         if  "head_dim"  in  hparams :
3685-             rope_dim  =  hparams ["head_dim" ]
3686-         else :
3679+         if  (rope_dim  :=  hparams .get ("head_dim" )) is  None :
36873680            rope_dim  =  hparams ["hidden_size" ] //  hparams ["num_attention_heads" ]
36883681        self .gguf_writer .add_rope_dimension_count (rope_dim )
36893682
@@ -5098,9 +5091,7 @@ def set_vocab(self):
50985091    def  set_gguf_parameters (self ):
50995092        super ().set_gguf_parameters ()
51005093        hparams  =  self .hparams 
5101-         if  "head_dim"  in  hparams :
5102-             rope_dim  =  hparams ["head_dim" ]
5103-         else :
5094+         if  (rope_dim  :=  hparams .get ("head_dim" )) is  None :
51045095            rope_dim  =  hparams ["hidden_size" ] //  hparams ["num_attention_heads" ]
51055096
51065097        self .gguf_writer .add_rope_dimension_count (rope_dim )
@@ -5990,7 +5981,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
59905981        if  rope_scaling  :=  self .find_hparam (["rope_scaling" ], optional = True ):
59915982            if  rope_scaling .get ("rope_type" , '' ).lower () ==  "llama3" :
59925983                base  =  self .hparams .get ("rope_theta" , 10000.0 )
5993-                 dim  =  self .hparams .get ("head_dim" , self .hparams ["hidden_size" ] //  self .hparams ["num_attention_heads" ])
5984+                 if  (dim  :=  self .hparams .get ("head_dim" )) is  None :
5985+                     dim  =  self .hparams ["hidden_size" ] //  self .hparams ["num_attention_heads" ]
59945986                freqs  =  1.0  /  (base  **  (torch .arange (0 , dim , 2 , dtype = torch .float32 ) /  dim ))
59955987
59965988                factor  =  rope_scaling .get ("factor" , 8.0 )
@@ -6102,7 +6094,8 @@ def set_vocab(self):
61026094    def  set_gguf_parameters (self ):
61036095        super ().set_gguf_parameters ()
61046096        hparams  =  self .hparams 
6105-         rope_dim  =  hparams .get ("head_dim" ) or  hparams ["hidden_size" ] //  hparams ["num_attention_heads" ]
6097+         if  (rope_dim  :=  hparams .get ("head_dim" )) is  None :
6098+             rope_dim  =  hparams ["hidden_size" ] //  hparams ["num_attention_heads" ]
61066099
61076100        self .gguf_writer .add_rope_dimension_count (rope_dim )
61086101        rope_scaling  =  self .hparams .get ("rope_scaling" ) or  {}
@@ -6134,7 +6127,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
61346127        n_head  =  self .hparams ["num_attention_heads" ]
61356128        n_kv_head  =  self .hparams .get ("num_key_value_heads" )
61366129        n_embd  =  self .hparams ["hidden_size" ]
6137-         head_dim  =  self .hparams .get ("head_dim" ) or  n_embd  //  n_head 
6130+         if  (head_dim  :=  self .hparams .get ("head_dim" )) is  None :
6131+             head_dim  =  n_embd  //  n_head 
61386132
61396133        output_name  =  self .format_tensor_name (gguf .MODEL_TENSOR .OUTPUT )
61406134
0 commit comments