support MiniCPM4

foldl · foldl · commit d5966d1e64f2 · 2025-06-07T09:54:10.000+08:00
diff --git a/README.md b/README.md
@@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
 
 **What's New:**
 
+* 2025-06-07: MiniCPM4
 * 2025-06-06: Qwen-3 Embedding & Reranker
 * 2025-06-03: Kimi-VL
 * 2025-05-28: Gemma3 fully supported
diff --git a/convert.py b/convert.py
@@ -139,6 +139,7 @@ class ModelType(Enum):
     MiniCPM2    = 0x1101   # updated chat template, no tie_word_embeddings=False
     MiniCPM_MoE = 0x1102
     MiniCPM3    = 0x1110
+    MiniCPM4    = 0x1111
 
     Persimmon   = 0x1200
     Fuyu        = 0x1201
@@ -2076,6 +2077,68 @@ def get_weight_names(config):
             r.remove('lm_head.weight')
         return r
 
+class MiniCPM4Converter(BaseConverter):
+    MODEL_TYPE = ModelType.MiniCPM4
+
+    @classmethod
+    def pp(cls, config, name: str, tensor):
+        return MiniCPMConverter.pp(config, name, tensor)
+
+    @staticmethod
+    def dump_config(f, config, ggml_type):
+        MAX_FACTOR_LEN = 128
+
+        assert config.hidden_act == 'silu', "hidden_act must be silu"
+        if config.tie_word_embeddings is None:
+            config.tie_word_embeddings = True
+        if config.rope_scaling is not None:
+            assert config.rope_scaling['rope_type'] == 'longrope'
+            factor_len = len(config.rope_scaling['long_factor'])
+            assert factor_len <= MAX_FACTOR_LEN, "config.rope_scaling['long_factor']) must <= MAX_FACTOR_LEN"
+            factors = pad_to(config.rope_scaling['short_factor'], MAX_FACTOR_LEN) + pad_to(config.rope_scaling['long_factor'], MAX_FACTOR_LEN)
+
+            if config.max_position_embeddings == 32768:
+                print("`longrope` is configured, extend to 32k * 4.")
+                config.max_position_embeddings = 32768 * 4
+        else:
+            factor_len = 0
+            factors = pad_to([0.0], MAX_FACTOR_LEN * 2)
+
+        config_values = [
+            ggml_type.value,
+            config.vocab_size,
+            config.hidden_size,
+            config.num_attention_heads,
+            config.num_hidden_layers,
+            config.intermediate_size,
+            config.max_position_embeddings,
+            config.bos_token_id,
+            config.eos_token_id[0],
+            config.pad_token_id if config.pad_token_id is not None else -1,
+            config.sep_token_id if config.sep_token_id is not None else -1,
+            config.num_key_value_heads,
+            config.max_position_embeddings,
+            config.rope_scaling['original_max_position_embeddings'],
+            1 if config.tie_word_embeddings else 0,
+            factor_len,
+        ]
+        f.write(struct.pack("i" * len(config_values), *config_values))
+
+        float_values = [
+            config.mup_denominator if config.mup_denominator is not None else 0.0,
+            config.dim_model_base / config.hidden_size,
+            config.rope_theta if config.mup_denominator is not None else 10000.0,
+            config.scale_depth / math.sqrt(config.num_hidden_layers),
+        ] + factors
+        f.write(struct.pack("<" + "f" * len(float_values), *float_values))
+
+    @staticmethod
+    def get_weight_names(config):
+        r = LlamaConverter.get_weight_names(config)
+        if config.tie_word_embeddings:
+            r.remove('lm_head.weight')
+        return r
+
 class MiniCPMEmbConverter(BaseConverter):
     MODEL_TYPE = ModelType.MiniCPM_Embedding_Light
 
@@ -7061,9 +7124,12 @@ def main():
         OrionConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch == 'MiniCPMForCausalLM':
         if config.num_experts is None:
-            if (config.tie_word_embeddings is not None) and (not config.tie_word_embeddings):
-                MiniCPMConverter.MODEL_TYPE = ModelType.MiniCPM2
-            MiniCPMConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
+            if (config.rope_scaling is not None) and ('rope_type' in config.rope_scaling) and (config.rope_scaling['rope_type'] == 'longrope'):
+                MiniCPM4Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
+            else:
+                if (config.tie_word_embeddings is not None) and (not config.tie_word_embeddings):
+                    MiniCPMConverter.MODEL_TYPE = ModelType.MiniCPM2
+                MiniCPMConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
         else:
             MiniCPMMoEConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch == 'MiniCPM3ForCausalLM':
diff --git a/docs/models.md b/docs/models.md
@@ -138,7 +138,8 @@
           [SFT-1B](https://huggingface.co/openbmb/MiniCPM-1B-sft-bf16)🔥
     * [x] [2B-128k](https://huggingface.co/openbmb/MiniCPM-2B-128k) (Note: `--temp 0` is recommended.)
     * [x] [MoE-8x2B](https://huggingface.co/openbmb/MiniCPM-MoE-8x2B)
-    * [x] [4B](https://huggingface.co/openbmb/MiniCPM3-4B)
+    * [x] v3: [4B](https://huggingface.co/openbmb/MiniCPM3-4B)
+    * [x] v4: [0.5B](https://huggingface.co/openbmb/BitCPM4-0.5B/tree/fcad2c603edb0663a36e56999016cbf2d7644ea1), [8B](https://huggingface.co/openbmb/MiniCPM4-8B/tree/cd838a273dde346b7c319d443f41ecd31a71f1b6), [8B-Survey](https://huggingface.co/openbmb/MiniCPM4-Survey/tree/f3e7ca37096dbedbdd48f6bacb29513b64e78667), [8B-MCP](https://huggingface.co/openbmb/MiniCPM4-MCP/commit/4a6cefeea3115ca8fc6b03e1879e912718ba6487)
 
 * Mistral (`MistralForCausalLM`, `MixtralForCausalLM`)
     * [x] Mistral: [Instruct-7B-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2), [Instruct-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3)
diff --git a/models/minicpm.cpp b/models/minicpm.cpp
@@ -748,4 +748,61 @@ namespace ranker_light
         {
         }
     };
+}
+
+namespace v4
+{
+    const int MAX_FACTOR_LEN = 128;
+    struct Config : public BaseConfig
+    {
+        int num_key_value_heads;
+        int max_position_embeddings;
+        int original_max_position_embeddings;
+        int tie_word_embeddings;
+        int factor_len;
+
+        float mup_denominator;
+        float lm_head_pre_scale;
+        float rope_theta;
+        float scale_depth;
+        float short_factor[MAX_FACTOR_LEN];
+        float long_factor[MAX_FACTOR_LEN];
+    };
+
+    typedef v3::Tokenizer Tokenizer;
+
+    class ConditionalGeneration : public llama::v2::GenericConditionalGeneration<Phi3SUBlock>
+    {
+    public:
+        ConditionalGeneration() = default;
+        ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type = ModelType::MODEL_TYPE_MINICPM4)
+            : ConditionalGeneration(config, runtime_config, type, config.num_key_value_heads, config.max_length)
+        {}
+
+        ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type,
+                            int num_key_value_heads, int max_length)
+            : llama::v2::GenericConditionalGeneration<Phi3SUBlock>(config, runtime_config, type, num_key_value_heads, max_length, 13, config.tie_word_embeddings != 0)
+        {
+            float scaling_factor = (float)config.max_length / config.original_max_position_embeddings;
+            if (scaling_factor <= 1.0f)
+                scaling_factor = 1.0f;
+            else
+                scaling_factor = sqrtf(1.0f + logf(scaling_factor) / logf((float)config.original_max_position_embeddings));
+
+            for (int i = 0; i < config.num_hidden_layers; i++)
+            {
+                auto &attention = get_typed_transformer<ModelClass>()->layers[i].attention;
+                if (config.factor_len > 0)
+                {
+                    attention.config(&w_ctx_, config.original_max_position_embeddings, config.rope_theta,
+                                    scaling_factor,
+                                    scaling_factor,
+                                    config.factor_len,
+                                    config.short_factor,
+                                    config.long_factor);
+                }
+                get_typed_transformer<ModelClass>()->layers[i].scale_depth = config.scale_depth;
+            }
+        }
+    };
 }
diff --git a/src/models.cpp b/src/models.cpp
@@ -296,6 +296,7 @@ namespace chatllm
         MODEL_TYPE_MINICPM2 = 0x1101,
         MODEL_TYPE_MINICPM_MoE = 0x1102,
         MODEL_TYPE_MINICPM3 = 0x1110,
+        MODEL_TYPE_MINICPM4 = 0x1111,
 
         MODEL_TYPE_PERSIMMON= 0x1200,
         MODEL_TYPE_FUYU     = 0x1201,
@@ -2434,6 +2435,7 @@ namespace chatllm
         CASE(MINICPM2,              minicpm::v2, 1)             \
         CASE(MINICPM_MoE,           minicpm::moe, 1)            \
         CASE(MINICPM3,              minicpm::v3, 1)             \
+        CASE(MINICPM4,              minicpm::v4, 1)             \
                                                                 \
         CASE(PERSIMMON,             adept::persimmon, 1)        \
         CASE(FUYU,                  adept::fuyu, 1)             \