@@ -139,6 +139,7 @@ class ModelType(Enum):
139139 MiniCPM2 = 0x1101 # updated chat template, no tie_word_embeddings=False
140140 MiniCPM_MoE = 0x1102
141141 MiniCPM3 = 0x1110
142+ MiniCPM4 = 0x1111
142143
143144 Persimmon = 0x1200
144145 Fuyu = 0x1201
@@ -2076,6 +2077,68 @@ def get_weight_names(config):
20762077 r .remove ('lm_head.weight' )
20772078 return r
20782079
2080+ class MiniCPM4Converter (BaseConverter ):
2081+ MODEL_TYPE = ModelType .MiniCPM4
2082+
2083+ @classmethod
2084+ def pp (cls , config , name : str , tensor ):
2085+ return MiniCPMConverter .pp (config , name , tensor )
2086+
2087+ @staticmethod
2088+ def dump_config (f , config , ggml_type ):
2089+ MAX_FACTOR_LEN = 128
2090+
2091+ assert config .hidden_act == 'silu' , "hidden_act must be silu"
2092+ if config .tie_word_embeddings is None :
2093+ config .tie_word_embeddings = True
2094+ if config .rope_scaling is not None :
2095+ assert config .rope_scaling ['rope_type' ] == 'longrope'
2096+ factor_len = len (config .rope_scaling ['long_factor' ])
2097+ assert factor_len <= MAX_FACTOR_LEN , "config.rope_scaling['long_factor']) must <= MAX_FACTOR_LEN"
2098+ factors = pad_to (config .rope_scaling ['short_factor' ], MAX_FACTOR_LEN ) + pad_to (config .rope_scaling ['long_factor' ], MAX_FACTOR_LEN )
2099+
2100+ if config .max_position_embeddings == 32768 :
2101+ print ("`longrope` is configured, extend to 32k * 4." )
2102+ config .max_position_embeddings = 32768 * 4
2103+ else :
2104+ factor_len = 0
2105+ factors = pad_to ([0.0 ], MAX_FACTOR_LEN * 2 )
2106+
2107+ config_values = [
2108+ ggml_type .value ,
2109+ config .vocab_size ,
2110+ config .hidden_size ,
2111+ config .num_attention_heads ,
2112+ config .num_hidden_layers ,
2113+ config .intermediate_size ,
2114+ config .max_position_embeddings ,
2115+ config .bos_token_id ,
2116+ config .eos_token_id [0 ],
2117+ config .pad_token_id if config .pad_token_id is not None else - 1 ,
2118+ config .sep_token_id if config .sep_token_id is not None else - 1 ,
2119+ config .num_key_value_heads ,
2120+ config .max_position_embeddings ,
2121+ config .rope_scaling ['original_max_position_embeddings' ],
2122+ 1 if config .tie_word_embeddings else 0 ,
2123+ factor_len ,
2124+ ]
2125+ f .write (struct .pack ("i" * len (config_values ), * config_values ))
2126+
2127+ float_values = [
2128+ config .mup_denominator if config .mup_denominator is not None else 0.0 ,
2129+ config .dim_model_base / config .hidden_size ,
2130+ config .rope_theta if config .mup_denominator is not None else 10000.0 ,
2131+ config .scale_depth / math .sqrt (config .num_hidden_layers ),
2132+ ] + factors
2133+ f .write (struct .pack ("<" + "f" * len (float_values ), * float_values ))
2134+
2135+ @staticmethod
2136+ def get_weight_names (config ):
2137+ r = LlamaConverter .get_weight_names (config )
2138+ if config .tie_word_embeddings :
2139+ r .remove ('lm_head.weight' )
2140+ return r
2141+
20792142class MiniCPMEmbConverter (BaseConverter ):
20802143 MODEL_TYPE = ModelType .MiniCPM_Embedding_Light
20812144
@@ -7061,9 +7124,12 @@ def main():
70617124 OrionConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
70627125 elif arch == 'MiniCPMForCausalLM' :
70637126 if config .num_experts is None :
7064- if (config .tie_word_embeddings is not None ) and (not config .tie_word_embeddings ):
7065- MiniCPMConverter .MODEL_TYPE = ModelType .MiniCPM2
7066- MiniCPMConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
7127+ if (config .rope_scaling is not None ) and ('rope_type' in config .rope_scaling ) and (config .rope_scaling ['rope_type' ] == 'longrope' ):
7128+ MiniCPM4Converter .convert (config , model_files , vocab , ggml_type , args .save_path )
7129+ else :
7130+ if (config .tie_word_embeddings is not None ) and (not config .tie_word_embeddings ):
7131+ MiniCPMConverter .MODEL_TYPE = ModelType .MiniCPM2
7132+ MiniCPMConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
70677133 else :
70687134 MiniCPMMoEConverter .convert (config , model_files , vocab , ggml_type , args .save_path )
70697135 elif arch == 'MiniCPM3ForCausalLM' :
0 commit comments