Skip to content

Commit c0d93dd

Browse files
committed
minicpmv works but missing uhd slices
1 parent ba489b4 commit c0d93dd

File tree

11 files changed

+432
-290
lines changed

11 files changed

+432
-290
lines changed

convert_hf_to_gguf.py

Lines changed: 156 additions & 153 deletions
Large diffs are not rendered by default.

examples/vision/vision.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,9 @@ int main(int argc, char ** argv) {
9898
common_params params;
9999

100100
// default prompt for llava 1.5
101-
params.prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\n"
102-
"USER:<img_placement>\nwhat did you see?\nASSISTANT:";
101+
//params.prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:<img_placement>\nwhat did you see?\nASSISTANT:";
102+
// default prompt for minicpmv 2.6
103+
params.prompt = "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nwhat did you see?\n<image><img_placement></image><|im_end|>\n<|im_start|>assistant\n";
103104
params.n_predict = 64;
104105
params.n_batch = 2048;
105106
params.n_ubatch = 1024;

gguf-py/gguf/constants.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -457,12 +457,14 @@ class MODEL_TENSOR(IntEnum):
457457
V_PRE_NORM = auto()
458458
V_POST_NORM = auto()
459459
V_RESMPL_POS_EMBD_K = auto() # minicpmv
460-
V_RESMPL_ATTN_IN = auto() # minicpmv
460+
V_RESMPL_ATTN_Q = auto() # minicpmv
461+
V_RESMPL_ATTN_K = auto() # minicpmv
462+
V_RESMPL_ATTN_V = auto() # minicpmv
461463
V_RESMPL_ATTN_OUT = auto() # minicpmv
462-
V_RESMPL_KV_PROJ = auto() # minicpmv
463-
V_RESMPL_NORM_POST = auto() # minicpmv
464-
V_RESMPL_NORM_KV = auto() # minicpmv
465-
V_RESMPL_NORM_Q = auto() # minicpmv
464+
V_RESMPL_KV = auto() # minicpmv
465+
V_RESMPL_KV_NORM = auto() # minicpmv
466+
V_RESMPL_POST_NORM = auto() # minicpmv
467+
V_RESMPL_Q_NORM = auto() # minicpmv
466468
V_RESMPL_PROJ = auto() # minicpmv
467469
V_RESMPL_QUERY = auto() # minicpmv
468470

@@ -674,12 +676,14 @@ class MODEL_TENSOR(IntEnum):
674676
MODEL_TENSOR.V_PRE_NORM: "v.pre_norm",
675677
MODEL_TENSOR.V_POST_NORM: "v.post_norm",
676678
MODEL_TENSOR.V_RESMPL_POS_EMBD_K: "v.resmpl.pos_embd_k",
677-
MODEL_TENSOR.V_RESMPL_ATTN_IN: "v.resmpl.attn_in",
679+
MODEL_TENSOR.V_RESMPL_ATTN_Q: "v.resmpl.attn_q",
680+
MODEL_TENSOR.V_RESMPL_ATTN_K: "v.resmpl.attn_k",
681+
MODEL_TENSOR.V_RESMPL_ATTN_V: "v.resmpl.attn_v",
678682
MODEL_TENSOR.V_RESMPL_ATTN_OUT: "v.resmpl.attn_out",
679-
MODEL_TENSOR.V_RESMPL_KV_PROJ: "v.resmpl.kv_proj",
680-
MODEL_TENSOR.V_RESMPL_NORM_POST: "v.resmpl.norm_post",
681-
MODEL_TENSOR.V_RESMPL_NORM_KV: "v.resmpl.norm_kv",
682-
MODEL_TENSOR.V_RESMPL_NORM_Q: "v.resmpl.norm_q",
683+
MODEL_TENSOR.V_RESMPL_KV: "v.resmpl.kv",
684+
MODEL_TENSOR.V_RESMPL_KV_NORM: "v.resmpl.kv_norm",
685+
MODEL_TENSOR.V_RESMPL_POST_NORM: "v.resmpl.post_norm",
686+
MODEL_TENSOR.V_RESMPL_Q_NORM: "v.resmpl.q_norm",
683687
MODEL_TENSOR.V_RESMPL_PROJ: "v.resmpl.proj",
684688
MODEL_TENSOR.V_RESMPL_QUERY: "v.resmpl.query",
685689
}
@@ -1667,12 +1671,15 @@ class MODEL_TENSOR(IntEnum):
16671671
MODEL_TENSOR.V_ENC_OUTPUT_NORM,
16681672
MODEL_TENSOR.V_ENC_FFN_UP,
16691673
MODEL_TENSOR.V_ENC_FFN_DOWN,
1670-
MODEL_TENSOR.V_RESMPL_ATTN_IN,
1674+
MODEL_TENSOR.V_RESMPL_POS_EMBD_K,
1675+
MODEL_TENSOR.V_RESMPL_ATTN_Q,
1676+
MODEL_TENSOR.V_RESMPL_ATTN_K,
1677+
MODEL_TENSOR.V_RESMPL_ATTN_V,
16711678
MODEL_TENSOR.V_RESMPL_ATTN_OUT,
1672-
MODEL_TENSOR.V_RESMPL_KV_PROJ,
1673-
MODEL_TENSOR.V_RESMPL_NORM_POST,
1674-
MODEL_TENSOR.V_RESMPL_NORM_KV,
1675-
MODEL_TENSOR.V_RESMPL_NORM_Q,
1679+
MODEL_TENSOR.V_RESMPL_KV,
1680+
MODEL_TENSOR.V_RESMPL_KV_NORM,
1681+
MODEL_TENSOR.V_RESMPL_POST_NORM,
1682+
MODEL_TENSOR.V_RESMPL_Q_NORM,
16761683
MODEL_TENSOR.V_RESMPL_PROJ,
16771684
MODEL_TENSOR.V_RESMPL_QUERY,
16781685
],

gguf-py/gguf/tensor_mapping.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -868,27 +868,35 @@ class TensorNameMap:
868868
"resampler.pos_embed_k",
869869
),
870870

871-
MODEL_TENSOR.V_RESMPL_ATTN_IN: (
872-
"resampler.attn.in_proj",
871+
MODEL_TENSOR.V_RESMPL_ATTN_Q: (
872+
"resampler.attn.in_proj_q", # tensor generated from resampler.attn.in_proj
873+
),
874+
875+
MODEL_TENSOR.V_RESMPL_ATTN_K: (
876+
"resampler.attn.in_proj_k", # tensor generated from resampler.attn.in_proj
877+
),
878+
879+
MODEL_TENSOR.V_RESMPL_ATTN_V: (
880+
"resampler.attn.in_proj_v", # tensor generated from resampler.attn.in_proj
873881
),
874882

875883
MODEL_TENSOR.V_RESMPL_ATTN_OUT: (
876884
"resampler.attn.out_proj",
877885
),
878886

879-
MODEL_TENSOR.V_RESMPL_KV_PROJ: (
887+
MODEL_TENSOR.V_RESMPL_KV: (
880888
"resampler.kv_proj",
881889
),
882890

883-
MODEL_TENSOR.V_RESMPL_NORM_POST: (
891+
MODEL_TENSOR.V_RESMPL_POST_NORM: (
884892
"resampler.ln_post",
885893
),
886894

887-
MODEL_TENSOR.V_RESMPL_NORM_KV: (
895+
MODEL_TENSOR.V_RESMPL_KV_NORM: (
888896
"resampler.ln_kv",
889897
),
890898

891-
MODEL_TENSOR.V_RESMPL_NORM_Q: (
899+
MODEL_TENSOR.V_RESMPL_Q_NORM: (
892900
"resampler.ln_q",
893901
),
894902

src/llama-arch.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,12 +1372,14 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
13721372
{ LLM_TENSOR_V_ENC_FFN_UP, "v.enc.blk.%d.ffn_up" },
13731373
{ LLM_TENSOR_V_ENC_FFN_DOWN, "v.enc.blk.%d.ffn_down" },
13741374
{ LLM_TENSOR_V_RESMPL_POS_EMBD_K, "v.resmpl.pos_embd_k" },
1375-
{ LLM_TENSOR_V_RESMPL_ATTN_IN, "v.resmpl.attn_in" },
1375+
{ LLM_TENSOR_V_RESMPL_ATTN_Q, "v.resmpl.attn_q" },
1376+
{ LLM_TENSOR_V_RESMPL_ATTN_K, "v.resmpl.attn_k" },
1377+
{ LLM_TENSOR_V_RESMPL_ATTN_V, "v.resmpl.attn_v" },
13761378
{ LLM_TENSOR_V_RESMPL_ATTN_OUT, "v.resmpl.attn_out" },
1377-
{ LLM_TENSOR_V_RESMPL_KV_PROJ, "v.resmpl.kv_proj" },
1378-
{ LLM_TENSOR_V_RESMPL_NORM_POST, "v.resmpl.norm_post" },
1379-
{ LLM_TENSOR_V_RESMPL_NORM_KV, "v.resmpl.norm_kv" },
1380-
{ LLM_TENSOR_V_RESMPL_NORM_Q, "v.resmpl.norm_q" },
1379+
{ LLM_TENSOR_V_RESMPL_KV, "v.resmpl.kv" },
1380+
{ LLM_TENSOR_V_RESMPL_KV_NORM, "v.resmpl.kv_norm" },
1381+
{ LLM_TENSOR_V_RESMPL_POST_NORM, "v.resmpl.post_norm" },
1382+
{ LLM_TENSOR_V_RESMPL_Q_NORM, "v.resmpl.q_norm" },
13811383
{ LLM_TENSOR_V_RESMPL_PROJ, "v.resmpl.proj" },
13821384
{ LLM_TENSOR_V_RESMPL_QUERY, "v.resmpl.query" },
13831385
}
@@ -1531,6 +1533,24 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
15311533
{LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
15321534
{LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
15331535
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1536+
// vision
1537+
{LLM_TENSOR_V_MMPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1538+
{LLM_TENSOR_V_MMPROJ_MLP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1539+
{LLM_TENSOR_V_MMPROJ_PEG, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1540+
{LLM_TENSOR_V_ENC_EMBD_CLS, {LLM_TENSOR_LAYER_INPUT, GGML_OP_ADD}},
1541+
{LLM_TENSOR_V_ENC_EMBD_PATCH, {LLM_TENSOR_LAYER_INPUT, GGML_OP_ADD}},
1542+
{LLM_TENSOR_V_ENC_EMBD_POS, {LLM_TENSOR_LAYER_INPUT, GGML_OP_ADD}},
1543+
{LLM_TENSOR_V_ENC_ATTN_Q, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1544+
{LLM_TENSOR_V_ENC_ATTN_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1545+
{LLM_TENSOR_V_ENC_ATTN_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1546+
{LLM_TENSOR_V_ENC_INPUT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1547+
{LLM_TENSOR_V_ENC_OUTPUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1548+
{LLM_TENSOR_V_ENC_OUTPUT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1549+
{LLM_TENSOR_V_ENC_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1550+
{LLM_TENSOR_V_ENC_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1551+
{LLM_TENSOR_V_PRE_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1552+
{LLM_TENSOR_V_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1553+
// TODO: add minicpmv resampler tensors
15341554
};
15351555

15361556
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}

src/llama-arch.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -371,12 +371,14 @@ enum llm_tensor {
371371
LLM_TENSOR_V_POST_NORM,
372372
// vision - minicpmv
373373
LLM_TENSOR_V_RESMPL_POS_EMBD_K,
374-
LLM_TENSOR_V_RESMPL_ATTN_IN,
374+
LLM_TENSOR_V_RESMPL_ATTN_Q,
375+
LLM_TENSOR_V_RESMPL_ATTN_K,
376+
LLM_TENSOR_V_RESMPL_ATTN_V,
375377
LLM_TENSOR_V_RESMPL_ATTN_OUT,
376-
LLM_TENSOR_V_RESMPL_KV_PROJ,
377-
LLM_TENSOR_V_RESMPL_NORM_POST,
378-
LLM_TENSOR_V_RESMPL_NORM_KV,
379-
LLM_TENSOR_V_RESMPL_NORM_Q,
378+
LLM_TENSOR_V_RESMPL_KV,
379+
LLM_TENSOR_V_RESMPL_KV_NORM,
380+
LLM_TENSOR_V_RESMPL_POST_NORM,
381+
LLM_TENSOR_V_RESMPL_Q_NORM,
380382
LLM_TENSOR_V_RESMPL_PROJ,
381383
LLM_TENSOR_V_RESMPL_QUERY,
382384
};

0 commit comments

Comments
 (0)