@@ -153,20 +153,20 @@ static std::string format(const char * fmt, ...) {
153153#define  TN_MINICPMV_LN  " resampler.ln_%s.%s" 
154154
155155
156- enum  projector_type  {
157-     PROJECTOR_TYPE_MLP ,
158-     PROJECTOR_TYPE_MLP_NORM ,
159-     PROJECTOR_TYPE_LDP ,
160-     PROJECTOR_TYPE_LDPV2 ,
161-     PROJECTOR_TYPE_RESAMPLER ,
162-     PROJECTOR_TYPE_UNKNOWN ,
156+ enum  class   ProjectorType  {
157+     MLP ,
158+     MLP_NORM ,
159+     LDP ,
160+     LDPV2 ,
161+     RESAMPLER ,
162+     UNKNOWN ,
163163};
164164
165- static  std::map<projector_type , std::string> PROJECTOR_TYPE_NAMES  = {
166-     { PROJECTOR_TYPE_MLP , " mlp"   },
167-     { PROJECTOR_TYPE_LDP , " ldp"   },
168-     { PROJECTOR_TYPE_LDPV2 , " ldpv2"  },
169-     { PROJECTOR_TYPE_RESAMPLER , " resampler"  },
165+ static  std::map<ProjectorType , std::string> ProjectorTypeNames  = {
166+     { ProjectorType::MLP , " mlp"   },
167+     { ProjectorType::LDP , " ldp"   },
168+     { ProjectorType::LDPV2 , " ldpv2"  },
169+     { ProjectorType::RESAMPLER , " resampler"  },
170170};
171171
172172
@@ -287,13 +287,13 @@ static void print_tensor_info(const ggml_tensor * tensor, const char * prefix =
287287            tensor->ne [0 ], tensor->ne [1 ], tensor->ne [2 ], tensor->ne [3 ], ggml_type_name (tensor->type ));
288288}
289289
290- static  projector_type  clip_projector_type_from_string (const  std::string & name) {
291-     for  (const  auto  & kv : PROJECTOR_TYPE_NAMES ) { //  NOLINT
290+ static  ProjectorType  clip_projector_type_from_string (const  std::string & name) {
291+     for  (const  auto  & kv : ProjectorTypeNames ) { //  NOLINT
292292        if  (kv.second  == name) {
293293            return  kv.first ;
294294        }
295295    }
296-     return  PROJECTOR_TYPE_UNKNOWN ;
296+     return  ProjectorType::UNKNOWN ;
297297}
298298
299299#ifdef  CLIP_DEBUG_FUNCTIONS
@@ -552,7 +552,7 @@ struct clip_ctx {
552552    int  minicpmv_version = 2 ;
553553
554554    struct  clip_vision_model  vision_model;
555-     projector_type  proj_type = PROJECTOR_TYPE_MLP ;
555+     ProjectorType  proj_type = ProjectorType::MLP ;
556556
557557    float  image_mean[3 ];
558558    float  image_std[3 ];
@@ -790,15 +790,15 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
790790        //  print_tensor_info(embeddings, "embeddings");
791791
792792        //  llava projector
793-         if  (ctx->proj_type  == PROJECTOR_TYPE_MLP ) {
793+         if  (ctx->proj_type  == ProjectorType::MLP ) {
794794            embeddings = ggml_mul_mat (ctx0, model.mm_0_w , embeddings);
795795            embeddings = ggml_add (ctx0, embeddings, model.mm_0_b );
796796
797797            embeddings = ggml_gelu (ctx0, embeddings);
798798            embeddings = ggml_mul_mat (ctx0, model.mm_2_w , embeddings);
799799            embeddings = ggml_add (ctx0, embeddings, model.mm_2_b );
800800        }
801-         else  if  (ctx->proj_type  == PROJECTOR_TYPE_MLP_NORM ) {
801+         else  if  (ctx->proj_type  == ProjectorType::MLP_NORM ) {
802802            embeddings = ggml_mul_mat (ctx0, model.mm_0_w , embeddings);
803803            embeddings = ggml_add (ctx0, embeddings, model.mm_0_b );
804804            //  ggml_tensor_printf(embeddings, "mm_0_w",0,true,false);
@@ -819,7 +819,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
819819            embeddings = ggml_add (ctx0, ggml_mul (ctx0, embeddings, model.mm_4_w ),
820820                                model.mm_4_b );
821821        }
822-         else  if  (ctx->proj_type  == PROJECTOR_TYPE_LDP ) {
822+         else  if  (ctx->proj_type  == ProjectorType::LDP ) {
823823            //  MobileVLM projector
824824            int  n_patch = 24 ;
825825            struct  ggml_tensor  * mlp_1 = ggml_mul_mat (ctx0, model.mm_model_mlp_1_w , embeddings);
@@ -929,7 +929,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
929929            }
930930            embeddings = block_1;
931931        }
932-         else  if  (ctx->proj_type  == PROJECTOR_TYPE_LDPV2 )
932+         else  if  (ctx->proj_type  == ProjectorType::LDPV2 )
933933        {
934934            int  n_patch = 24 ;
935935            struct  ggml_tensor  * mlp_0 = ggml_mul_mat (ctx0, model.mm_model_mlp_0_w , embeddings);
@@ -960,7 +960,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
960960    //  minicpmv projector
961961    else  if  (ctx->has_minicpmv_projector )
962962    {
963-         if  (ctx->proj_type  == PROJECTOR_TYPE_RESAMPLER ) {
963+         if  (ctx->proj_type  == ProjectorType::RESAMPLER ) {
964964            struct  ggml_tensor  * q = model.mm_model_query ;
965965            { //  layernorm
966966                q = ggml_norm (ctx0, q, eps);
@@ -1139,12 +1139,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
11391139            const  std::string proj_type = gguf_get_val_str (ctx, idx);
11401140            new_clip->proj_type  = clip_projector_type_from_string (proj_type);
11411141        } else  {
1142-             new_clip->proj_type  = PROJECTOR_TYPE_MLP ;
1142+             new_clip->proj_type  = ProjectorType::MLP ;
11431143        }
11441144
1145-         if  (new_clip->proj_type  == PROJECTOR_TYPE_MLP ) {
1145+         if  (new_clip->proj_type  == ProjectorType::MLP ) {
11461146            if  (gguf_find_tensor (ctx, format (TN_LLAVA_PROJ, 3 , " weight"  ).c_str ()) != -1 ) {
1147-                 new_clip->proj_type  = PROJECTOR_TYPE_MLP_NORM ;
1147+                 new_clip->proj_type  = ProjectorType::MLP_NORM ;
11481148            }
11491149        }
11501150    }
@@ -1387,7 +1387,11 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
13871387        }
13881388
13891389        //  LLaVA projection
1390-         if  (new_clip->proj_type  == PROJECTOR_TYPE_MLP || new_clip->proj_type  == PROJECTOR_TYPE_MLP_NORM) {
1390+         switch  (new_clip->proj_type )
1391+         {
1392+         case  ProjectorType::MLP:
1393+         case  ProjectorType::MLP_NORM:
1394+         {
13911395            vision_model.mm_0_w               = get_tensor (new_clip->ctx_data , format (TN_LLAVA_PROJ, 0 , " weight"  ));
13921396            vision_model.mm_0_b               = get_tensor (new_clip->ctx_data , format (TN_LLAVA_PROJ, 0 , " bias"  ));
13931397            try  {
@@ -1414,7 +1418,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
14141418                vision_model.image_newline  = get_tensor (new_clip->ctx_data , TN_IMAGE_NEWLINE);
14151419                //  LOG_INF("%s: image_newline tensor (llava-1.6) found\n", __func__);
14161420            } catch  (std::runtime_error & /* e*/  ) { }
1417-         } else  if  (new_clip->proj_type  == PROJECTOR_TYPE_LDP) {
1421+             break ;
1422+         }
1423+         case  ProjectorType::LDP:
1424+         {
14181425            //  MobileVLM projection
14191426            vision_model.mm_model_mlp_1_w                = get_tensor (new_clip->ctx_data , format (TN_MVLM_PROJ_MLP, 1 , " weight"  ));
14201427            vision_model.mm_model_mlp_1_b                = get_tensor (new_clip->ctx_data , format (TN_MVLM_PROJ_MLP, 1 , " bias"  ));
@@ -1440,8 +1447,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
14401447            vision_model.mm_model_block_2_block_2_0_w    = get_tensor (new_clip->ctx_data , format (TN_MVLM_PROJ_BLOCK, 2 , 2 , " 0.weight"  ));
14411448            vision_model.mm_model_block_2_block_2_1_w    = get_tensor (new_clip->ctx_data , format (TN_MVLM_PROJ_BLOCK, 2 , 2 , " 1.weight"  ));
14421449            vision_model.mm_model_block_2_block_2_1_b    = get_tensor (new_clip->ctx_data , format (TN_MVLM_PROJ_BLOCK, 2 , 2 , " 1.bias"  ));
1450+             break ;
14431451        }
1444-         else   if  (new_clip-> proj_type  == PROJECTOR_TYPE_LDPV2) 
1452+         case  ProjectorType::LDPV2: 
14451453        {
14461454            //  MobilVLM_V2 projection
14471455            vision_model.mm_model_mlp_0_w  = get_tensor (new_clip->ctx_data , format (TN_MVLM_PROJ_MLP, 0 , " weight"  ));
@@ -1450,8 +1458,10 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
14501458            vision_model.mm_model_mlp_2_b  = get_tensor (new_clip->ctx_data , format (TN_MVLM_PROJ_MLP, 2 , " bias"  ));
14511459            vision_model.mm_model_peg_0_w  = get_tensor (new_clip->ctx_data , format (TN_MVLM_PROJ_PEG, 0 , " weight"  ));
14521460            vision_model.mm_model_peg_0_b  = get_tensor (new_clip->ctx_data , format (TN_MVLM_PROJ_PEG, 0 , " bias"  ));
1461+             break ;
14531462        }
1454-         else  if  (new_clip->proj_type  == PROJECTOR_TYPE_RESAMPLER) {
1463+         case  ProjectorType::RESAMPLER:
1464+         {
14551465            //  vision_model.mm_model_pos_embed = get_tensor(new_clip->ctx_data, TN_MINICPMV_POS_EMBD);
14561466            vision_model.mm_model_pos_embed_k  = get_tensor (new_clip->ctx_data , TN_MINICPMV_POS_EMBD_K);
14571467            vision_model.mm_model_query  = get_tensor (new_clip->ctx_data , TN_MINICPMV_QUERY);
@@ -1471,10 +1481,15 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
14711481            vision_model.mm_model_ln_kv_b  = get_tensor (new_clip->ctx_data , format (TN_MINICPMV_LN, " kv"  , " bias"  ));
14721482            vision_model.mm_model_ln_post_w  = get_tensor (new_clip->ctx_data , format (TN_MINICPMV_LN, " post"  , " weight"  ));
14731483            vision_model.mm_model_ln_post_b  = get_tensor (new_clip->ctx_data , format (TN_MINICPMV_LN, " post"  , " bias"  ));
1484+             break ;
1485+         }
1486+         case  ProjectorType::UNKNOWN:
1487+         {
1488+             LOG_ERR (" %s: ProjectorType\n "  , __func__);
1489+             clip_free (new_clip);
1490+             gguf_free (ctx);
1491+             std::terminate ();
14741492        }
1475-         else  {
1476-             std::string proj_type = PROJECTOR_TYPE_NAMES[new_clip->proj_type ];
1477-             throw  std::runtime_error (format (" %s: don't support projector with: %s currently\n "  , __func__, proj_type.c_str ()));
14781493        }
14791494
14801495        vision_model.layers .resize (hparams.n_layer );
@@ -2189,9 +2204,9 @@ int clip_n_patches(const struct clip_ctx * ctx) {
21892204
21902205    int  n_patches = (params.image_size  / params.patch_size ) * (params.image_size  / params.patch_size );
21912206
2192-     if  (ctx->proj_type  == PROJECTOR_TYPE_LDP  || ctx->proj_type  == PROJECTOR_TYPE_LDPV2 ) {
2207+     if  (ctx->proj_type  == ProjectorType::LDP  || ctx->proj_type  == ProjectorType::LDPV2 ) {
21932208        n_patches /= 4 ;
2194-     } else  if  (ctx->proj_type  == PROJECTOR_TYPE_RESAMPLER ) {
2209+     } else  if  (ctx->proj_type  == ProjectorType::RESAMPLER ) {
21952210        if  (ctx->minicpmv_version  == 2 ) {
21962211            n_patches = 96 ;
21972212        }
@@ -2597,29 +2612,31 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
25972612}
25982613
25992614int  clip_n_mmproj_embd (const  struct  clip_ctx  * ctx) {
2600-     if  (ctx->proj_type  == PROJECTOR_TYPE_LDP) {
2615+     switch  (ctx->proj_type )
2616+     {
2617+     case  ProjectorType::LDP:
26012618        return  ctx->vision_model .mm_model_block_1_block_2_1_b ->ne [0 ];
2602-     }
2603-     if  (ctx->proj_type  == PROJECTOR_TYPE_LDPV2) {
2619+     case  ProjectorType::LDPV2:
26042620        return  ctx->vision_model .mm_model_peg_0_b ->ne [0 ];
2605-     }
2606-     if  (ctx->proj_type  == PROJECTOR_TYPE_MLP) {
2621+     case  ProjectorType::MLP:
26072622        return  ctx->vision_model .mm_2_b ->ne [0 ];
2608-     }
2609-     if  (ctx->proj_type  == PROJECTOR_TYPE_MLP_NORM) {
2623+     case  ProjectorType::MLP_NORM:
26102624        return  ctx->vision_model .mm_3_b ->ne [0 ];
2611-     }
2612-     if  (ctx->proj_type  == PROJECTOR_TYPE_RESAMPLER) {
2625+     case  ProjectorType::RESAMPLER:
26132626        if  (ctx->minicpmv_version  == 2 ) {
26142627            return  4096 ;
26152628        }
26162629        else  if  (ctx->minicpmv_version  == 3 ) {
26172630            return  3584 ;
26182631        }
2632+         [[fallthrough]];
2633+     case  ProjectorType::UNKNOWN:
2634+         LOG_ERR (" %s: ProjectorType\n "  , __func__);
2635+         std::terminate ();
26192636    }
2620- 
2621-     std::string proj_type = PROJECTOR_TYPE_NAMES[ctx-> proj_type ] ;
2622-     throw   std::runtime_error  ( format ( " %s: don't support projector with: %s currently \n " , __func__, proj_type. c_str ()) );
2637+      //  Handle unexpected ProjectorType values explicitly since Enum Class switch should have no default case 
2638+     LOG_ERR ( " %s: Unhandled ProjectorType \n " , __func__) ;
2639+     std::terminate  ( );
26232640}
26242641
26252642int  clip_is_minicpmv (const  struct  clip_ctx  * ctx) {
0 commit comments