@@ -1388,10 +1388,10 @@ def set_gguf_parameters(self):
13881388 self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["rms_norm_eps" ])
13891389 self .gguf_writer .add_file_type (self .ftype )
13901390
1391- if self .hparams .get ("rope_scaling" ) is not None and "factor" in self . hparams [ "rope_scaling" ]:
1392- if self . hparams [ "rope_scaling" ] .get ("type" ) == "linear" :
1393- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
1394- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
1391+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
1392+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
1393+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
1394+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
13951395
13961396 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
13971397 head_count = self .hparams ["num_attention_heads" ]
@@ -1512,10 +1512,10 @@ def set_gguf_parameters(self):
15121512 self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["rms_norm_eps" ])
15131513 self .gguf_writer .add_file_type (self .ftype )
15141514
1515- if self .hparams .get ("rope_scaling" ) is not None and "factor" in self . hparams [ "rope_scaling" ]:
1516- if self . hparams [ "rope_scaling" ] .get ("type" ) == "linear" :
1517- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
1518- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
1515+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
1516+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
1517+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
1518+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
15191519
15201520 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
15211521 del bid # unused
@@ -1828,10 +1828,10 @@ def set_gguf_parameters(self):
18281828 rope_dim = hparams ["hidden_size" ] // hparams ["num_attention_heads" ]
18291829 self .gguf_writer .add_rope_dimension_count (rope_dim )
18301830
1831- if self .hparams .get ("rope_scaling" ) is not None and "factor" in self . hparams [ "rope_scaling" ]:
1832- if self . hparams [ "rope_scaling" ] .get ("type" ) == "linear" :
1833- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
1834- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
1831+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
1832+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
1833+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
1834+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
18351835
18361836 @staticmethod
18371837 def permute (weights : Tensor , n_head : int , n_head_kv : int | None ):
@@ -2206,10 +2206,10 @@ def set_gguf_parameters(self):
22062206 rope_dim = hparams ["hidden_size" ] // hparams ["num_attention_heads" ]
22072207 self .gguf_writer .add_rope_dimension_count (rope_dim )
22082208
2209- if self .hparams .get ("rope_scaling" ) is not None and "factor" in self . hparams [ "rope_scaling" ]:
2210- if self . hparams [ "rope_scaling" ] .get ("type" ) == "linear" :
2211- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
2212- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
2209+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
2210+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
2211+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
2212+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
22132213
22142214 @staticmethod
22152215 def permute (weights : Tensor , n_head : int , n_head_kv : int | None ):
@@ -2449,10 +2449,10 @@ def set_gguf_parameters(self):
24492449 logit_scale = self .hparams ["hidden_size" ] / self .hparams ["dim_model_base" ]
24502450 self .gguf_writer .add_logit_scale (logit_scale )
24512451 logger .info (f"gguf: (minicpm) logit_scale = { logit_scale } " )
2452- if self .hparams .get ("rope_scaling" ) is not None :
2453- if self . hparams [ "rope_scaling" ] .get ("type" ) == "longrope" :
2454- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LONGROPE )
2455- logger .info (f"gguf: (minicpm) rope_scaling_type = { gguf .RopeScalingType .LONGROPE } " )
2452+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
2453+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" ) ) == "longrope" :
2454+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LONGROPE )
2455+ logger .info (f"gguf: (minicpm) rope_scaling_type = { gguf .RopeScalingType .LONGROPE } " )
24562456
24572457 def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
24582458 rope_dims = self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
@@ -2597,11 +2597,11 @@ def set_vocab(self):
25972597 def set_gguf_parameters (self ):
25982598 super ().set_gguf_parameters ()
25992599 self ._try_set_pooling_type ()
2600- if self .hparams .get ("rope_scaling" ) is not None and "factor" in self . hparams [ "rope_scaling" ]:
2601- if self . hparams [ "rope_scaling" ] .get ("type" ) == "yarn" :
2602- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
2603- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
2604- self .gguf_writer .add_rope_scaling_orig_ctx_len (self . hparams [ " rope_scaling" ] ["original_max_position_embeddings" ])
2600+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
2601+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
2602+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
2603+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
2604+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
26052605
26062606 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
26072607 if self .hf_arch == "Qwen2Model" :
@@ -2763,11 +2763,11 @@ def set_gguf_parameters(self):
27632763 logger .info (f"gguf: expert shared feed forward length = { shared_expert_intermediate_size } " )
27642764 # YaRN is not enabled by default
27652765 # To enable it, please refer to this guide: https://huggingface.co/Qwen/Qwen3-30B-A3B#processing-long-texts
2766- if self .hparams .get ("rope_scaling" ) is not None and "factor" in self . hparams [ "rope_scaling" ]:
2767- if self . hparams [ "rope_scaling" ] .get ("type" ) == "yarn" :
2768- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
2769- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
2770- self .gguf_writer .add_rope_scaling_orig_ctx_len (self . hparams [ " rope_scaling" ] ["original_max_position_embeddings" ])
2766+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
2767+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
2768+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
2769+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
2770+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
27712771
27722772 _experts : list [dict [str , Tensor ]] | None = None
27732773
@@ -3035,7 +3035,7 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
30353035
30363036 scale = max_pos_embds / orig_max_pos_embds
30373037
3038- rope_scaling_type = rope_scaling .get ('type' , '' ).lower ()
3038+ rope_scaling_type = rope_scaling .get ('rope_type' , rope_scaling . get ( ' type' , '' ) ).lower ()
30393039 if len (rope_scaling_type ) == 0 :
30403040 raise KeyError ('Missing the required key rope_scaling.type' )
30413041
@@ -3347,10 +3347,10 @@ def set_gguf_parameters(self):
33473347 self .gguf_writer .add_layer_norm_rms_eps (self .hparams ["rms_norm_eps" ])
33483348 self .gguf_writer .add_head_count_kv (self .hparams ["num_key_value_heads" ])
33493349 self .gguf_writer .add_file_type (self .ftype )
3350- if self .hparams .get ("rope_scaling" ) is not None and "factor" in self . hparams [ "rope_scaling" ]:
3351- if self . hparams [ "rope_scaling" ] .get ("type" ) == "linear" :
3352- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
3353- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
3350+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
3351+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
3352+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
3353+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
33543354
33553355 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
33563356 num_heads = self .hparams ["num_attention_heads" ]
@@ -3425,10 +3425,10 @@ def set_gguf_parameters(self):
34253425 rope_dim = hparams ["hidden_size" ] // hparams ["num_attention_heads" ]
34263426 self .gguf_writer .add_rope_dimension_count (rope_dim )
34273427
3428- if self .hparams .get ("rope_scaling" ) is not None and "factor" in self . hparams [ "rope_scaling" ]:
3429- if self . hparams [ "rope_scaling" ] .get ("type" ) == "linear" or self . hparams [ "rope_scaling" ]. get ( "rope_type" ) == "linear" :
3430- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
3431- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
3428+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
3429+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
3430+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
3431+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
34323432
34333433 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
34343434 n_head = self .hparams ["num_attention_heads" ]
@@ -4866,12 +4866,12 @@ def set_gguf_parameters(self):
48664866
48674867 self .gguf_writer .add_rope_dimension_count (hparams ["qk_rope_head_dim" ])
48684868
4869- if self .hparams .get ("rope_scaling" ) is not None and "factor" in self . hparams [ "rope_scaling" ]:
4870- if self . hparams [ "rope_scaling" ] .get ("type" ) == "yarn" :
4871- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
4872- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
4873- self .gguf_writer .add_rope_scaling_orig_ctx_len (self . hparams [ " rope_scaling" ] ["original_max_position_embeddings" ])
4874- self .gguf_writer .add_rope_scaling_yarn_log_mul (0.1 * hparams [ " rope_scaling" ] ["mscale_all_dim" ])
4869+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
4870+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
4871+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
4872+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
4873+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
4874+ self .gguf_writer .add_rope_scaling_yarn_log_mul (0.1 * rope_scaling ["mscale_all_dim" ])
48754875
48764876 _experts : list [dict [str , Tensor ]] | None = None
48774877
@@ -5363,11 +5363,11 @@ def set_gguf_parameters(self):
53635363 super ().set_gguf_parameters ()
53645364 rope_dim = self .hparams ["head_dim" ]
53655365 self .gguf_writer .add_rope_dimension_count (int (rope_dim * self .hparams .get ("partial_rotary_factor" , 0.5 )))
5366- if self .hparams .get ("rope_scaling" ) is not None and "factor" in self . hparams [ "rope_scaling" ]:
5367- if self . hparams [ "rope_scaling" ] .get ("type" ) == "yarn" :
5368- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
5369- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
5370- self .gguf_writer .add_rope_scaling_orig_ctx_len (self . hparams [ " rope_scaling" ] ["original_max_position_embeddings" ])
5366+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
5367+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
5368+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
5369+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
5370+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
53715371
53725372
53735373@ModelBase .register ("GlmForCausalLM" , "ChatGLMModel" , "ChatGLMForConditionalGeneration" )
@@ -5600,10 +5600,10 @@ def set_gguf_parameters(self):
56005600 rotary_factor = self .find_hparam (["partial_rotary_factor" , "rope_pct" ], optional = True )
56015601 rotary_factor = rotary_factor if rotary_factor is not None else 1.0
56025602 self .gguf_writer .add_rope_dimension_count (int (rotary_factor * (hparams ["hidden_size" ] // hparams ["num_attention_heads" ])))
5603- if hparams .get ("rope_scaling" ) is not None and "factor" in hparams [ "rope_scaling" ]:
5604- if hparams [ " rope_scaling" ] .get ("type" ) == "linear" :
5605- self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
5606- self .gguf_writer .add_rope_scaling_factor (hparams [ " rope_scaling" ] ["factor" ])
5603+ rope_scaling = self . hparams .get ("rope_scaling" ) or {}
5604+ if rope_scaling . get ( "rope_type" , rope_scaling .get ("type" )) == "linear" and "factor" in rope_scaling :
5605+ self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .LINEAR )
5606+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
56075607
56085608 def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
56095609 if rope_scaling := self .find_hparam (["rope_scaling" ], optional = True ):
@@ -5706,10 +5706,11 @@ def set_gguf_parameters(self):
57065706 rope_dim = hparams .get ("head_dim" ) or hparams ["hidden_size" ] // hparams ["num_attention_heads" ]
57075707
57085708 self .gguf_writer .add_rope_dimension_count (rope_dim )
5709- if (self .hparams .get ("rope_scaling" ) or {}).get ("type" ) == "yarn" and "factor" in self .hparams ["rope_scaling" ]:
5709+ rope_scaling = self .hparams .get ("rope_scaling" ) or {}
5710+ if rope_scaling .get ("rope_type" , rope_scaling .get ("type" )) == "yarn" and "factor" in rope_scaling :
57105711 self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .YARN )
5711- self .gguf_writer .add_rope_scaling_factor (self . hparams [ " rope_scaling" ] ["factor" ])
5712- self .gguf_writer .add_rope_scaling_orig_ctx_len (self . hparams [ " rope_scaling" ] ["original_max_position_embeddings" ])
5712+ self .gguf_writer .add_rope_scaling_factor (rope_scaling ["factor" ])
5713+ self .gguf_writer .add_rope_scaling_orig_ctx_len (rope_scaling ["original_max_position_embeddings" ])
57135714 else :
57145715 self .gguf_writer .add_rope_scaling_type (gguf .RopeScalingType .NONE )
57155716 self .gguf_writer .add_leading_dense_block_count (hparams ["first_k_dense_replace" ])
0 commit comments