@@ -1841,6 +1841,60 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
18411841 return [(self .map_tensor_name (name ), data_torch )]
18421842
18431843
1844+ @Model .register ("MiniCPM3ForCausalLM" )
1845+ class MiniCPM3Model (Model ):
1846+ model_arch = gguf .MODEL_ARCH .MINICPM3
1847+
1848+ def set_gguf_parameters (self ):
1849+ hparams = self .hparams
1850+
1851+ rope_dims = hparams ["qk_rope_head_dim" ]
1852+
1853+ self .gguf_writer .add_file_type (self .ftype )
1854+ self .gguf_writer .add_context_length (hparams ["max_position_embeddings" ])
1855+ self .gguf_writer .add_embedding_length (hparams ["hidden_size" ])
1856+ self .gguf_writer .add_block_count (self .block_count )
1857+ self .gguf_writer .add_feed_forward_length (hparams ["intermediate_size" ])
1858+ self .gguf_writer .add_head_count (hparams ["num_attention_heads" ])
1859+ self .gguf_writer .add_head_count_kv (hparams ["num_key_value_heads" ])
1860+ self .gguf_writer .add_layer_norm_rms_eps (hparams ["rms_norm_eps" ])
1861+ self .gguf_writer .add_vocab_size (hparams ["vocab_size" ])
1862+ if "q_lora_rank" in hparams and hparams ["q_lora_rank" ] is not None :
1863+ self .gguf_writer .add_q_lora_rank (hparams ["q_lora_rank" ])
1864+ self .gguf_writer .add_kv_lora_rank (hparams ["kv_lora_rank" ])
1865+ self .gguf_writer .add_key_length (hparams ["qk_nope_head_dim" ] + hparams ["qk_rope_head_dim" ])
1866+ self .gguf_writer .add_rope_dimension_count (hparams ["qk_rope_head_dim" ])
1867+
1868+ rope_scaling = self .find_hparam (['rope_scaling' ], True )
1869+ if rope_scaling is None :
1870+ return
1871+
1872+ long_factors = rope_scaling .get ('long_factor' , None )
1873+ short_factors = rope_scaling .get ('short_factor' , None )
1874+
1875+ if long_factors is None or short_factors is None :
1876+ raise KeyError ('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor' )
1877+
1878+ if len (long_factors ) != len (short_factors ) or len (long_factors ) != rope_dims / 2 :
1879+ raise ValueError (f'The length of rope long and short factors must be { rope_dims / 2 } ' )
1880+
1881+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_LONG ] + ".weight" , np .array (long_factors , dtype = np .float32 ))
1882+ self .gguf_writer .add_tensor (gguf .TENSOR_NAMES [gguf .MODEL_TENSOR .ROPE_FACTORS_SHORT ] + ".weight" , np .array (short_factors , dtype = np .float32 ))
1883+
1884+ def set_vocab (self ):
1885+ self ._set_vocab_llama_hf ()
1886+
1887+ def _reverse_hf_permute (self , weights : Tensor , n_head : int , n_kv_head : int | None = None ) -> Tensor :
1888+ if n_kv_head is not None and n_head != n_kv_head :
1889+ n_head //= n_kv_head
1890+
1891+ return (
1892+ weights .reshape (n_head , 2 , weights .shape [0 ] // n_head // 2 , * weights .shape [1 :])
1893+ .swapaxes (1 , 2 )
1894+ .reshape (weights .shape )
1895+ )
1896+
1897+
18441898@Model .register ("QWenLMHeadModel" )
18451899class QwenModel (Model ):
18461900 model_arch = gguf .MODEL_ARCH .QWEN
0 commit comments