@@ -1718,7 +1718,8 @@ struct clip_model_loader {
17181718
17191719                if  (ctx_clip.proj_type  == PROJECTOR_TYPE_MINICPMV
17201720                        || ctx_clip.proj_type  == PROJECTOR_TYPE_GLM_EDGE
1721-                         || ctx_clip.proj_type  == PROJECTOR_TYPE_QWEN2VL) {
1721+                         || ctx_clip.proj_type  == PROJECTOR_TYPE_QWEN2VL
1722+                         || ctx_clip.proj_type  == PROJECTOR_TYPE_QWEN25VL) {
17221723                    n_layer += 1 ;
17231724                }
17241725
@@ -2744,7 +2745,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
27442745        }
27452746        return  true ;
27462747    }
2747-     else  if  (ctx->proj_type  == PROJECTOR_TYPE_QWEN2VL) {
2748+     else  if  (ctx->proj_type  == PROJECTOR_TYPE_QWEN2VL || ctx-> proj_type  == PROJECTOR_TYPE_QWEN25VL ) {
27482749        clip_image_u8 resized;
27492750        auto  patch_size = clip_get_patch_size (ctx) * 2 ;
27502751        int  nx = ceil ((float )img->nx  / patch_size) * patch_size;
@@ -3139,7 +3140,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
31393140    else  {
31403141        //  non-minicpmv models
31413142
3142-         if  (ctx->proj_type  == PROJECTOR_TYPE_QWEN2VL) {
3143+         if  (ctx->proj_type  == PROJECTOR_TYPE_QWEN2VL || ctx-> proj_type  == PROJECTOR_TYPE_QWEN25VL ) {
31433144            //  pw * ph = number of tokens output by ViT after apply patch merger
31443145            //  ipw * ipw = number of vision token been processed inside ViT
31453146            const  int  merge_ratio = 2 ;
@@ -3279,7 +3280,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
32793280        }
32803281    }
32813282
3282-     if  (use_window_attn && ctx->proj_type  == PROJECTOR_TYPE_QWEN25VL) {
3283+     if  (use_window_attn && ( ctx->proj_type  == PROJECTOR_TYPE_QWEN2VL || ctx-> proj_type  ==  PROJECTOR_TYPE_QWEN25VL) ) {
32833284        struct  ggml_tensor  * window_idx = ggml_graph_get_tensor (gf, " window_idx"  );
32843285        struct  ggml_tensor  * inv_window_idx = ggml_graph_get_tensor (gf, " inv_window_idx"  );
32853286        struct  ggml_tensor  * window_mask = ggml_graph_get_tensor (gf, " window_mask"  );
0 commit comments