Skip to content

Commit c4e9239

Browse files
authored
model : support MiniCPM-V 4.5 (ggml-org#15575)
1 parent 39842a7 commit c4e9239

File tree

5 files changed

+61
-2
lines changed

5 files changed

+61
-2
lines changed

docs/multimodal/minicpmv4.0.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Download [MiniCPM-V-4](https://huggingface.co/openbmb/MiniCPM-V-4) PyTorch model
66

77

88
### Build llama.cpp
9-
Readme modification time: 20250206
9+
Readme modification time: 20250731
1010

1111
If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md)
1212

docs/multimodal/minicpmv4.5.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
## MiniCPM-V 4.5
2+
3+
### Prepare models and code
4+
5+
Download [MiniCPM-V-4_5](https://huggingface.co/openbmb/MiniCPM-V-4_5) PyTorch model from huggingface to "MiniCPM-V-4_5" folder.
6+
7+
8+
### Build llama.cpp
9+
Readme modification time: 20250826
10+
11+
If there are differences in usage, please refer to the official build [documentation](https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md)
12+
13+
Clone llama.cpp:
14+
```bash
15+
git clone https://github.com/ggerganov/llama.cpp
16+
cd llama.cpp
17+
```
18+
19+
Build llama.cpp using `CMake`:
20+
```bash
21+
cmake -B build
22+
cmake --build build --config Release
23+
```
24+
25+
26+
### Usage of MiniCPM-V 4
27+
28+
Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-V-4_5-gguf) by us)
29+
30+
```bash
31+
python ./tools/mtmd/legacy-models/minicpmv-surgery.py -m ../MiniCPM-V-4_5
32+
python ./tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-4_5 --minicpmv-projector ../MiniCPM-V-4_5/minicpmv.projector --output-dir ../MiniCPM-V-4_5/ --minicpmv_version 6
33+
python ./convert_hf_to_gguf.py ../MiniCPM-V-4_5/model
34+
35+
# quantize int4 version
36+
./build/bin/llama-quantize ../MiniCPM-V-4_5/model/ggml-model-f16.gguf ../MiniCPM-V-4_5/model/ggml-model-Q4_K_M.gguf Q4_K_M
37+
```
38+
39+
40+
Inference on Linux or Mac
41+
```bash
42+
# run in single-turn mode
43+
./build/bin/llama-mtmd-cli -m ../MiniCPM-V-4_5/model/ggml-model-f16.gguf --mmproj ../MiniCPM-V-4_5/mmproj-model-f16.gguf -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image xx.jpg -p "What is in the image?"
44+
45+
# run in conversation mode
46+
./build/bin/llama-mtmd-cli -m ../MiniCPM-V-4_5/model/ggml-model-Q4_K_M.gguf --mmproj ../MiniCPM-V-4_5/mmproj-model-f16.gguf
47+
```

tools/mtmd/clip.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2202,6 +2202,8 @@ struct clip_model_loader {
22022202
hparams.minicpmv_query_num = 64;
22032203
} else if (hparams.minicpmv_version == 5) {
22042204
hparams.minicpmv_query_num = 64;
2205+
} else if (hparams.minicpmv_version == 6) {
2206+
hparams.minicpmv_query_num = 64;
22052207
} else {
22062208
hparams.minicpmv_query_num = 96;
22072209
}
@@ -3685,6 +3687,9 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
36853687
} else if (params.minicpmv_version == 5) {
36863688
// MiniCPM-V 4.0
36873689
n_patches = 64;
3690+
} else if (params.minicpmv_version == 6) {
3691+
// MiniCPM-V 4.5
3692+
n_patches = 64;
36883693
} else {
36893694
GGML_ABORT("Unknown minicpmv version");
36903695
}

tools/mtmd/legacy-models/minicpmv-convert-image-encoder-to-gguf.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,9 @@ def bytes_to_unicode():
607607
elif minicpmv_version == 5:
608608
emb_dim = 2560
609609
block_count = 27
610+
elif minicpmv_version == 6:
611+
emb_dim = 4096
612+
block_count = 27
610613

611614
default_vision_config = {
612615
"hidden_size": 1152,
@@ -630,6 +633,10 @@ def bytes_to_unicode():
630633
default_vision_config["model_type"] = "siglip_vision_model"
631634
vision_config = SiglipVisionConfig(**default_vision_config)
632635
model = SiglipVisionTransformer(vision_config)
636+
elif minicpmv_version == 6:
637+
default_vision_config["model_type"] = "siglip_vision_model"
638+
vision_config = SiglipVisionConfig(**default_vision_config)
639+
model = SiglipVisionTransformer(vision_config)
633640

634641
processor = None
635642
# if model.attn_pool is not None:

tools/mtmd/mtmd.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ struct mtmd_context {
207207
tok_row_end_trail = false; // no trailing end-of-row token
208208
ov_img_first = true;
209209

210-
} else if (minicpmv_version == 3 || minicpmv_version == 4 || minicpmv_version == 5) {
210+
} else if (minicpmv_version == 3 || minicpmv_version == 4 || minicpmv_version == 5 || minicpmv_version == 6) {
211211
// minicpmv 2.6 format:
212212
// <image> (overview) </image><slice> (slice) </slice><slice> (slice) </slice>\n ...
213213
slice_tmpl = MTMD_SLICE_TMPL_MINICPMV_2_6;

0 commit comments

Comments
 (0)