@@ -288,8 +288,6 @@ struct clip_model {
288288 // GLMV-Edge projection
289289 ggml_tensor * mm_model_adapter_conv_w = nullptr ;
290290 ggml_tensor * mm_model_adapter_conv_b = nullptr ;
291- ggml_tensor * mm_glm_tok_boi = nullptr ;
292- ggml_tensor * mm_glm_tok_eoi = nullptr ;
293291
294292 // MobileVLM projection
295293 ggml_tensor * mm_model_mlp_1_w = nullptr ;
@@ -1505,8 +1503,8 @@ struct clip_graph {
15051503 // note: these embeddings are not present in text model, hence we cannot process them as text tokens
15061504 // see: https://huggingface.co/THUDM/glm-edge-v-2b/blob/main/siglip.py#L53
15071505 {
1508- embeddings = ggml_concat (ctx0, model.mm_glm_tok_boi , embeddings, 1 ); // BOI
1509- embeddings = ggml_concat (ctx0, embeddings, model.mm_glm_tok_eoi , 1 ); // EOI
1506+ embeddings = ggml_concat (ctx0, model.mm_boi , embeddings, 1 ); // BOI
1507+ embeddings = ggml_concat (ctx0, embeddings, model.mm_eoi , 1 ); // EOI
15101508 }
15111509 }
15121510
@@ -2797,8 +2795,8 @@ struct clip_model_loader {
27972795 model.mm_model_mlp_1_w = get_tensor (string_format (TN_GLM_ADAPTER_D_H_2_4H, " weight" ));
27982796 model.mm_model_mlp_2_w = get_tensor (string_format (TN_GLM_ADAPTER_GATE, " weight" ));
27992797 model.mm_model_mlp_3_w = get_tensor (string_format (TN_GLM_ADAPTER_D_4H_2_H, " weight" ));
2800- model.mm_glm_tok_boi = get_tensor (string_format (TN_TOK_GLM_BOI, " weight" ));
2801- model.mm_glm_tok_eoi = get_tensor (string_format (TN_TOK_GLM_EOI, " weight" ));
2798+ model.mm_boi = get_tensor (string_format (TN_TOK_GLM_BOI, " weight" ));
2799+ model.mm_eoi = get_tensor (string_format (TN_TOK_GLM_EOI, " weight" ));
28022800 } break ;
28032801 case PROJECTOR_TYPE_QWEN2VL:
28042802 case PROJECTOR_TYPE_QWEN25VL:
@@ -2894,14 +2892,14 @@ struct clip_model_loader {
28942892 } break ;
28952893 case PROJECTOR_TYPE_COGVLM:
28962894 {
2897- model.mm_model_proj = get_tensor (TN_MM_PROJECTOR);
2895+ model.mm_model_proj = get_tensor (TN_MM_PROJECTOR);
28982896 model.mm_post_fc_norm_w = get_tensor (string_format (TN_MM_POST_FC_NORM, " weight" ));
28992897 model.mm_post_fc_norm_b = get_tensor (string_format (TN_MM_POST_FC_NORM, " bias" ));
2900- model.mm_h_to_4h_w = get_tensor (string_format (TN_MM_H_TO_4H, " weight" ));
2901- model.mm_gate_w = get_tensor (string_format (TN_MM_GATE, " weight" ));
2902- model.mm_4h_to_h_w = get_tensor (string_format (TN_MM_4H_TO_H, " weight" ));
2903- model.mm_boi = get_tensor (TN_TOK_BOI);
2904- model.mm_eoi = get_tensor (TN_TOK_EOI);
2898+ model.mm_h_to_4h_w = get_tensor (string_format (TN_MM_H_TO_4H, " weight" ));
2899+ model.mm_gate_w = get_tensor (string_format (TN_MM_GATE, " weight" ));
2900+ model.mm_4h_to_h_w = get_tensor (string_format (TN_MM_4H_TO_H, " weight" ));
2901+ model.mm_boi = get_tensor (TN_TOK_BOI);
2902+ model.mm_eoi = get_tensor (TN_TOK_EOI);
29052903 } break ;
29062904 default :
29072905 GGML_ASSERT (false && " unknown projector type" );
@@ -3951,7 +3949,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
39513949 case PROJECTOR_TYPE_GLM_EDGE:
39523950 {
39533951 n_patches /= 4 ;
3954- if (ctx->model .mm_glm_tok_boi ) {
3952+ if (ctx->model .mm_boi ) {
39553953 n_patches += 2 ; // for BOI and EOI token embeddings
39563954 }
39573955 } break ;
@@ -4043,7 +4041,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
40434041 } break ;
40444042 case PROJECTOR_TYPE_COGVLM:
40454043 {
4046- n_patches += 2 ;
4044+ n_patches += 2 ; // for BOI and EOI token embeddings
40474045 } break ;
40484046 default :
40494047 GGML_ABORT (" unsupported projector type" );
0 commit comments