@@ -244,8 +244,6 @@ struct clip_vision_model {
244244    // GLMV-Edge projection
245245    struct  ggml_tensor  * mm_model_adapter_conv_w = nullptr ;
246246    struct  ggml_tensor  * mm_model_adapter_conv_b = nullptr ;
247-     struct  ggml_tensor  * boi_w = nullptr ;
248-     struct  ggml_tensor  * eoi_w = nullptr ;
249247
250248    //  MobileVLM projection
251249    struct  ggml_tensor  * mm_model_mlp_1_w = nullptr ;
@@ -1697,8 +1695,6 @@ struct clip_model_loader {
16971695                    vision_model.mm_model_mlp_1_w  = get_tensor (string_format (TN_GLM_ADAPTER_D_H_2_4H," weight"  ));
16981696                    vision_model.mm_model_mlp_2_w  = get_tensor (string_format (TN_GLM_ADAPTER_GATE," weight"  ));
16991697                    vision_model.mm_model_mlp_3_w  = get_tensor (string_format (TN_GLM_ADAPTER_D_4H_2_H," weight"  ));
1700-                     vision_model.boi_w  = get_tensor (TN_GLM_BOI_W);
1701-                     vision_model.eoi_w  = get_tensor (TN_GLM_EOI_W);
17021698                } break ;
17031699            case  PROJECTOR_TYPE_MERGER:
17041700                {
@@ -2593,8 +2589,7 @@ void clip_free(clip_ctx * ctx) {
25932589}
25942590
25952591size_t  clip_embd_nbytes (const  struct  clip_ctx  * ctx) {
2596-     int  extra_tokens = ctx->has_glm_projector  ? 2  : 0 ;
2597-     return  (clip_n_patches (ctx) + extra_tokens) * clip_n_mmproj_embd (ctx) * sizeof (float );
2592+     return  clip_n_patches (ctx) * clip_n_mmproj_embd (ctx) * sizeof (float );
25982593}
25992594
26002595size_t  clip_embd_nbytes_by_img (const  struct  clip_ctx  * ctx, int  img_h, int  img_w) {
@@ -2790,9 +2785,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
27902785    }
27912786    if  (ctx->has_glm_projector ) {
27922787        GGML_ASSERT (batch_size == 1 );
2793-         ggml_tensor * boi = ctx->vision_model .boi_w ;
2794-         ggml_backend_tensor_get (boi,vec,0 ,ggml_nbytes (boi));
2795-         vec = (float *)(vec+ggml_nelements (boi)); // offset for boi
27962788    }
27972789
27982790    //  build the inference graph
@@ -3001,13 +2993,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
30012993    //  copy the embeddings to the location passed by the user
30022994    ggml_backend_tensor_get (embeddings, vec, 0 , ggml_nbytes (embeddings));
30032995
3004-     if  (ctx->has_glm_projector ) {
3005-         // eoi
3006-         ggml_tensor * eoi = ctx->vision_model .eoi_w ;
3007-         int  offset = ggml_nelements (embeddings);
3008-         ggml_backend_tensor_get (eoi, vec+offset, 0 , ggml_nbytes (eoi));
3009-     }
3010- 
30112996    return  true ;
30122997}
30132998
0 commit comments