Skip to content

Commit d5966d1

Browse files
committed
support MiniCPM4
1 parent 2403543 commit d5966d1

File tree

5 files changed

+131
-4
lines changed

5 files changed

+131
-4
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
1313

1414
**What's New:**
1515

16+
* 2025-06-07: MiniCPM4
1617
* 2025-06-06: Qwen-3 Embedding & Reranker
1718
* 2025-06-03: Kimi-VL
1819
* 2025-05-28: Gemma3 fully supported

convert.py

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ class ModelType(Enum):
139139
MiniCPM2 = 0x1101 # updated chat template, no tie_word_embeddings=False
140140
MiniCPM_MoE = 0x1102
141141
MiniCPM3 = 0x1110
142+
MiniCPM4 = 0x1111
142143

143144
Persimmon = 0x1200
144145
Fuyu = 0x1201
@@ -2076,6 +2077,68 @@ def get_weight_names(config):
20762077
r.remove('lm_head.weight')
20772078
return r
20782079

2080+
class MiniCPM4Converter(BaseConverter):
2081+
MODEL_TYPE = ModelType.MiniCPM4
2082+
2083+
@classmethod
2084+
def pp(cls, config, name: str, tensor):
2085+
return MiniCPMConverter.pp(config, name, tensor)
2086+
2087+
@staticmethod
2088+
def dump_config(f, config, ggml_type):
2089+
MAX_FACTOR_LEN = 128
2090+
2091+
assert config.hidden_act == 'silu', "hidden_act must be silu"
2092+
if config.tie_word_embeddings is None:
2093+
config.tie_word_embeddings = True
2094+
if config.rope_scaling is not None:
2095+
assert config.rope_scaling['rope_type'] == 'longrope'
2096+
factor_len = len(config.rope_scaling['long_factor'])
2097+
assert factor_len <= MAX_FACTOR_LEN, "config.rope_scaling['long_factor']) must <= MAX_FACTOR_LEN"
2098+
factors = pad_to(config.rope_scaling['short_factor'], MAX_FACTOR_LEN) + pad_to(config.rope_scaling['long_factor'], MAX_FACTOR_LEN)
2099+
2100+
if config.max_position_embeddings == 32768:
2101+
print("`longrope` is configured, extend to 32k * 4.")
2102+
config.max_position_embeddings = 32768 * 4
2103+
else:
2104+
factor_len = 0
2105+
factors = pad_to([0.0], MAX_FACTOR_LEN * 2)
2106+
2107+
config_values = [
2108+
ggml_type.value,
2109+
config.vocab_size,
2110+
config.hidden_size,
2111+
config.num_attention_heads,
2112+
config.num_hidden_layers,
2113+
config.intermediate_size,
2114+
config.max_position_embeddings,
2115+
config.bos_token_id,
2116+
config.eos_token_id[0],
2117+
config.pad_token_id if config.pad_token_id is not None else -1,
2118+
config.sep_token_id if config.sep_token_id is not None else -1,
2119+
config.num_key_value_heads,
2120+
config.max_position_embeddings,
2121+
config.rope_scaling['original_max_position_embeddings'],
2122+
1 if config.tie_word_embeddings else 0,
2123+
factor_len,
2124+
]
2125+
f.write(struct.pack("i" * len(config_values), *config_values))
2126+
2127+
float_values = [
2128+
config.mup_denominator if config.mup_denominator is not None else 0.0,
2129+
config.dim_model_base / config.hidden_size,
2130+
config.rope_theta if config.mup_denominator is not None else 10000.0,
2131+
config.scale_depth / math.sqrt(config.num_hidden_layers),
2132+
] + factors
2133+
f.write(struct.pack("<" + "f" * len(float_values), *float_values))
2134+
2135+
@staticmethod
2136+
def get_weight_names(config):
2137+
r = LlamaConverter.get_weight_names(config)
2138+
if config.tie_word_embeddings:
2139+
r.remove('lm_head.weight')
2140+
return r
2141+
20792142
class MiniCPMEmbConverter(BaseConverter):
20802143
MODEL_TYPE = ModelType.MiniCPM_Embedding_Light
20812144

@@ -7061,9 +7124,12 @@ def main():
70617124
OrionConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
70627125
elif arch == 'MiniCPMForCausalLM':
70637126
if config.num_experts is None:
7064-
if (config.tie_word_embeddings is not None) and (not config.tie_word_embeddings):
7065-
MiniCPMConverter.MODEL_TYPE = ModelType.MiniCPM2
7066-
MiniCPMConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
7127+
if (config.rope_scaling is not None) and ('rope_type' in config.rope_scaling) and (config.rope_scaling['rope_type'] == 'longrope'):
7128+
MiniCPM4Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
7129+
else:
7130+
if (config.tie_word_embeddings is not None) and (not config.tie_word_embeddings):
7131+
MiniCPMConverter.MODEL_TYPE = ModelType.MiniCPM2
7132+
MiniCPMConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
70677133
else:
70687134
MiniCPMMoEConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
70697135
elif arch == 'MiniCPM3ForCausalLM':

docs/models.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@
138138
[SFT-1B](https://huggingface.co/openbmb/MiniCPM-1B-sft-bf16)🔥
139139
* [x] [2B-128k](https://huggingface.co/openbmb/MiniCPM-2B-128k) (Note: `--temp 0` is recommended.)
140140
* [x] [MoE-8x2B](https://huggingface.co/openbmb/MiniCPM-MoE-8x2B)
141-
* [x] [4B](https://huggingface.co/openbmb/MiniCPM3-4B)
141+
* [x] v3: [4B](https://huggingface.co/openbmb/MiniCPM3-4B)
142+
* [x] v4: [0.5B](https://huggingface.co/openbmb/BitCPM4-0.5B/tree/fcad2c603edb0663a36e56999016cbf2d7644ea1), [8B](https://huggingface.co/openbmb/MiniCPM4-8B/tree/cd838a273dde346b7c319d443f41ecd31a71f1b6), [8B-Survey](https://huggingface.co/openbmb/MiniCPM4-Survey/tree/f3e7ca37096dbedbdd48f6bacb29513b64e78667), [8B-MCP](https://huggingface.co/openbmb/MiniCPM4-MCP/commit/4a6cefeea3115ca8fc6b03e1879e912718ba6487)
142143

143144
* Mistral (`MistralForCausalLM`, `MixtralForCausalLM`)
144145
* [x] Mistral: [Instruct-7B-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2), [Instruct-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3)

models/minicpm.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,4 +748,61 @@ namespace ranker_light
748748
{
749749
}
750750
};
751+
}
752+
753+
namespace v4
754+
{
755+
const int MAX_FACTOR_LEN = 128;
756+
struct Config : public BaseConfig
757+
{
758+
int num_key_value_heads;
759+
int max_position_embeddings;
760+
int original_max_position_embeddings;
761+
int tie_word_embeddings;
762+
int factor_len;
763+
764+
float mup_denominator;
765+
float lm_head_pre_scale;
766+
float rope_theta;
767+
float scale_depth;
768+
float short_factor[MAX_FACTOR_LEN];
769+
float long_factor[MAX_FACTOR_LEN];
770+
};
771+
772+
typedef v3::Tokenizer Tokenizer;
773+
774+
class ConditionalGeneration : public llama::v2::GenericConditionalGeneration<Phi3SUBlock>
775+
{
776+
public:
777+
ConditionalGeneration() = default;
778+
ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type = ModelType::MODEL_TYPE_MINICPM4)
779+
: ConditionalGeneration(config, runtime_config, type, config.num_key_value_heads, config.max_length)
780+
{}
781+
782+
ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type,
783+
int num_key_value_heads, int max_length)
784+
: llama::v2::GenericConditionalGeneration<Phi3SUBlock>(config, runtime_config, type, num_key_value_heads, max_length, 13, config.tie_word_embeddings != 0)
785+
{
786+
float scaling_factor = (float)config.max_length / config.original_max_position_embeddings;
787+
if (scaling_factor <= 1.0f)
788+
scaling_factor = 1.0f;
789+
else
790+
scaling_factor = sqrtf(1.0f + logf(scaling_factor) / logf((float)config.original_max_position_embeddings));
791+
792+
for (int i = 0; i < config.num_hidden_layers; i++)
793+
{
794+
auto &attention = get_typed_transformer<ModelClass>()->layers[i].attention;
795+
if (config.factor_len > 0)
796+
{
797+
attention.config(&w_ctx_, config.original_max_position_embeddings, config.rope_theta,
798+
scaling_factor,
799+
scaling_factor,
800+
config.factor_len,
801+
config.short_factor,
802+
config.long_factor);
803+
}
804+
get_typed_transformer<ModelClass>()->layers[i].scale_depth = config.scale_depth;
805+
}
806+
}
807+
};
751808
}

src/models.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ namespace chatllm
296296
MODEL_TYPE_MINICPM2 = 0x1101,
297297
MODEL_TYPE_MINICPM_MoE = 0x1102,
298298
MODEL_TYPE_MINICPM3 = 0x1110,
299+
MODEL_TYPE_MINICPM4 = 0x1111,
299300

300301
MODEL_TYPE_PERSIMMON= 0x1200,
301302
MODEL_TYPE_FUYU = 0x1201,
@@ -2434,6 +2435,7 @@ namespace chatllm
24342435
CASE(MINICPM2, minicpm::v2, 1) \
24352436
CASE(MINICPM_MoE, minicpm::moe, 1) \
24362437
CASE(MINICPM3, minicpm::v3, 1) \
2438+
CASE(MINICPM4, minicpm::v4, 1) \
24372439
\
24382440
CASE(PERSIMMON, adept::persimmon, 1) \
24392441
CASE(FUYU, adept::fuyu, 1) \

0 commit comments

Comments
 (0)