@@ -850,20 +850,7 @@ struct clip_graph {
850850 const int d_head = 128 ;
851851 int n_head = n_embd/d_head;
852852 // Use actual config value if available, otherwise fall back to hardcoded values
853- int num_query = 96 ;
854- if (ctx->model .hparams .minicpmv_query_num > 0 ) {
855- num_query = ctx->model .hparams .minicpmv_query_num ;
856- } else {
857- // Fallback to hardcoded values for legacy models
858- if (ctx->model .hparams .minicpmv_version == 2 ) {
859- num_query = 96 ;
860- } else if (ctx->model .hparams .minicpmv_version == 3 ) {
861- num_query = 64 ;
862- } else if (ctx->model .hparams .minicpmv_version == 4 ) {
863- num_query = 64 ;
864- }
865- }
866-
853+ int num_query = ctx->model .hparams .minicpmv_query_num ;
867854 ggml_tensor * Q = ggml_add (ctx0,
868855 ggml_mul_mat (ctx0, model.mm_model_attn_q_w , q),
869856 model.mm_model_attn_q_b );
@@ -2119,8 +2106,17 @@ struct clip_model_loader {
21192106 get_u32 (KEY_IMAGE_CROP_RESOLUTION, hparams.image_crop_resolution , false );
21202107 get_i32 (KEY_MINICPMV_VERSION, hparams.minicpmv_version , false ); // legacy
21212108 get_u32 (KEY_MINICPMV_QUERY_NUM, hparams.minicpmv_query_num , false );
2122- get_u32 (KEY_MINICPMV_PROJECTION_DIM, hparams.minicpmv_projection_dim , false );
2123-
2109+ get_u32 (KEY_MINICPMV_PROJ_DIM, hparams.minicpmv_projection_dim , false );
2110+ if (hparams.minicpmv_query_num == 0 ) {
2111+ // Fallback to hardcoded values for legacy models
2112+ if (hparams.minicpmv_version == 3 ) {
2113+ hparams.minicpmv_query_num = 64 ;
2114+ } else if (hparams.minicpmv_version == 4 ) {
2115+ hparams.minicpmv_query_num = 64 ;
2116+ } else {
2117+ hparams.minicpmv_query_num = 96 ;
2118+ }
2119+ }
21242120 } else if (is_audio) {
21252121 get_u32 (KEY_A_NUM_MEL_BINS, hparams.n_mel_bins );
21262122
@@ -4063,7 +4059,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
40634059}
40644060
40654061int clip_n_mmproj_embd (const struct clip_ctx * ctx) {
4066- const auto & hparams = ctx->model .hparams ;
40674062 switch (ctx->model .proj_type ) {
40684063 case PROJECTOR_TYPE_LDP:
40694064 return ctx->model .mm_model_block_1_block_2_1_b ->ne [0 ];
@@ -4075,20 +4070,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
40754070 case PROJECTOR_TYPE_MLP_NORM:
40764071 return ctx->model .mm_3_b ->ne [0 ];
40774072 case PROJECTOR_TYPE_MINICPMV:
4078- // Use actual config value if available, otherwise fall back to hardcoded values
4079- if (hparams.minicpmv_projection_dim > 0 ) {
4080- return hparams.minicpmv_projection_dim ;
4081- } else {
4082- // Fallback to hardcoded values for legacy models
4083- if (hparams.minicpmv_version == 2 ) {
4084- return 4096 ;
4085- } else if (hparams.minicpmv_version == 3 ) {
4086- return 3584 ;
4087- } else if (hparams.minicpmv_version == 4 ) {
4088- return 3584 ;
4089- }
4090- GGML_ABORT (" Unknown minicpmv version" );
4091- }
4073+ return ctx->model .mm_model_proj ->ne [0 ];
40924074 case PROJECTOR_TYPE_GLM_EDGE:
40934075 return ctx->model .mm_model_mlp_3_w ->ne [1 ];
40944076 case PROJECTOR_TYPE_QWEN2VL:
0 commit comments