Skip to content

Commit 0fec64b

Browse files
author
lzhang
committed
Code update for pr/14750
1 parent a8935c9 commit 0fec64b

File tree

2 files changed

+14
-32
lines changed

2 files changed

+14
-32
lines changed

tools/mtmd/clip-impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
#define KEY_ATTN_WINDOW_SIZE "clip.vision.window_size"
4646
#define KEY_MINICPMV_VERSION "clip.minicpmv_version"
4747
#define KEY_MINICPMV_QUERY_NUM "clip.minicpmv_query_num"
48-
#define KEY_MINICPMV_PROJECTION_DIM "clip.minicpmv_projection_dim"
48+
#define KEY_MINICPMV_PROJ_DIM "clip.minicpmv_projection_dim"
4949

5050
// audio-specific
5151
#define KEY_A_NUM_MEL_BINS "clip.audio.num_mel_bins"

tools/mtmd/clip.cpp

Lines changed: 13 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -850,20 +850,7 @@ struct clip_graph {
850850
const int d_head = 128;
851851
int n_head = n_embd/d_head;
852852
// Use actual config value if available, otherwise fall back to hardcoded values
853-
int num_query = 96;
854-
if (ctx->model.hparams.minicpmv_query_num > 0) {
855-
num_query = ctx->model.hparams.minicpmv_query_num;
856-
} else {
857-
// Fallback to hardcoded values for legacy models
858-
if (ctx->model.hparams.minicpmv_version == 2) {
859-
num_query = 96;
860-
} else if (ctx->model.hparams.minicpmv_version == 3) {
861-
num_query = 64;
862-
} else if (ctx->model.hparams.minicpmv_version == 4) {
863-
num_query = 64;
864-
}
865-
}
866-
853+
int num_query = ctx->model.hparams.minicpmv_query_num;
867854
ggml_tensor * Q = ggml_add(ctx0,
868855
ggml_mul_mat(ctx0, model.mm_model_attn_q_w, q),
869856
model.mm_model_attn_q_b);
@@ -2119,8 +2106,17 @@ struct clip_model_loader {
21192106
get_u32(KEY_IMAGE_CROP_RESOLUTION, hparams.image_crop_resolution, false);
21202107
get_i32(KEY_MINICPMV_VERSION, hparams.minicpmv_version, false); // legacy
21212108
get_u32(KEY_MINICPMV_QUERY_NUM, hparams.minicpmv_query_num, false);
2122-
get_u32(KEY_MINICPMV_PROJECTION_DIM, hparams.minicpmv_projection_dim, false);
2123-
2109+
get_u32(KEY_MINICPMV_PROJ_DIM, hparams.minicpmv_projection_dim, false);
2110+
if (hparams.minicpmv_query_num == 0) {
2111+
// Fallback to hardcoded values for legacy models
2112+
if (hparams.minicpmv_version == 3) {
2113+
hparams.minicpmv_query_num = 64;
2114+
} else if (hparams.minicpmv_version == 4) {
2115+
hparams.minicpmv_query_num = 64;
2116+
} else {
2117+
hparams.minicpmv_query_num = 96;
2118+
}
2119+
}
21242120
} else if (is_audio) {
21252121
get_u32(KEY_A_NUM_MEL_BINS, hparams.n_mel_bins);
21262122

@@ -4063,7 +4059,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
40634059
}
40644060

40654061
int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
4066-
const auto & hparams = ctx->model.hparams;
40674062
switch (ctx->model.proj_type) {
40684063
case PROJECTOR_TYPE_LDP:
40694064
return ctx->model.mm_model_block_1_block_2_1_b->ne[0];
@@ -4075,20 +4070,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
40754070
case PROJECTOR_TYPE_MLP_NORM:
40764071
return ctx->model.mm_3_b->ne[0];
40774072
case PROJECTOR_TYPE_MINICPMV:
4078-
// Use actual config value if available, otherwise fall back to hardcoded values
4079-
if (hparams.minicpmv_projection_dim > 0) {
4080-
return hparams.minicpmv_projection_dim;
4081-
} else {
4082-
// Fallback to hardcoded values for legacy models
4083-
if (hparams.minicpmv_version == 2) {
4084-
return 4096;
4085-
} else if (hparams.minicpmv_version == 3) {
4086-
return 3584;
4087-
} else if (hparams.minicpmv_version == 4) {
4088-
return 3584;
4089-
}
4090-
GGML_ABORT("Unknown minicpmv version");
4091-
}
4073+
return ctx->model.mm_model_proj->ne[0];
40924074
case PROJECTOR_TYPE_GLM_EDGE:
40934075
return ctx->model.mm_model_mlp_3_w->ne[1];
40944076
case PROJECTOR_TYPE_QWEN2VL:

0 commit comments

Comments
 (0)