@@ -331,7 +331,6 @@ struct clip_ctx {
331331 float image_std[3 ];
332332 bool use_gelu = false ;
333333 bool use_silu = false ;
334- int32_t ftype = 1 ;
335334
336335 struct gguf_context * ctx_gguf = nullptr ;
337336 struct ggml_context * ctx_data = nullptr ;
@@ -380,6 +379,7 @@ struct clip_ctx {
380379 if (backend_cpu != backend) {
381380 ggml_backend_free (backend_cpu);
382381 }
382+ clip_image_size_free (load_image_size);
383383 }
384384};
385385
@@ -1141,9 +1141,6 @@ struct clip_model_loader {
11411141
11421142 // print gguf info
11431143 {
1144- int ftype = -1 ;
1145- get_u32 (KEY_FTYPE, ftype, false );
1146- const std::string ftype_str = ggml_type_name (static_cast <ggml_type>(ftype));
11471144 std::string name;
11481145 get_string (KEY_NAME, name, false );
11491146 std::string description;
@@ -1154,7 +1151,6 @@ struct clip_model_loader {
11541151 LOG_INF (" %s: alignment: %zu\n " , __func__, gguf_get_alignment (ctx_gguf.get ()));
11551152 LOG_INF (" %s: n_tensors: %d\n " , __func__, n_tensors);
11561153 LOG_INF (" %s: n_kv: %d\n " , __func__, (int )gguf_get_n_kv (ctx_gguf.get ()));
1157- LOG_INF (" %s: ftype: %s\n " , __func__, ftype_str.c_str ());
11581154 LOG_INF (" \n " );
11591155 }
11601156
@@ -1618,6 +1614,12 @@ struct clip_image_f32 * clip_image_f32_init() {
16181614 return new clip_image_f32 ();
16191615}
16201616
1617+ void clip_image_size_free (struct clip_image_size * load_image_size) {
1618+ if (load_image_size == nullptr ) {
1619+ return ;
1620+ }
1621+ delete load_image_size;
1622+ }
16211623void clip_image_u8_free (struct clip_image_u8 * img) { delete img; }
16221624void clip_image_f32_free (struct clip_image_f32 * img) { delete img; }
16231625void clip_image_u8_batch_free (struct clip_image_u8_batch * batch) {
@@ -2270,6 +2272,9 @@ ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx) {
22702272}
22712273
22722274void clip_free (clip_ctx * ctx) {
2275+ if (ctx == nullptr ) {
2276+ return ;
2277+ }
22732278 delete ctx;
22742279}
22752280
@@ -2840,10 +2845,19 @@ int clip_is_minicpmv(const struct clip_ctx * ctx) {
28402845bool clip_is_glm (const struct clip_ctx * ctx) {
28412846 return ctx->has_glm_projector ;
28422847}
2848+
28432849bool clip_is_qwen2vl (const struct clip_ctx * ctx) {
28442850 return ctx->has_qwen2vl_merger ;
28452851}
28462852
2853+ bool clip_is_llava (const struct clip_ctx * ctx) {
2854+ return ctx->has_llava_projector ;
2855+ }
2856+
2857+ bool clip_is_gemma3 (const struct clip_ctx * ctx) {
2858+ return ctx->proj_type == PROJECTOR_TYPE_GEMMA3;
2859+ }
2860+
28472861// Determine the number of encoder layers to iterate over
28482862int get_deepest_feature_layer (const struct clip_ctx * ctx) {
28492863 // Get the index of the second to last layer; this is the
0 commit comments