@@ -1474,10 +1474,20 @@ struct clip_model_loader {
14741474
14751475 void alloc_compute_meta () {
14761476 ctx_clip.buf_compute_meta .resize (GGML_DEFAULT_GRAPH_SIZE * ggml_tensor_overhead () + ggml_graph_overhead ());
1477+
1478+ // create a fake batch
14771479 clip_image_f32_batch batch;
14781480 clip_image_f32_ptr img;
1481+ clip_image_size image_size;
1482+ image_size.width = clip_get_image_size (&ctx_clip);
1483+ image_size.height = clip_get_image_size (&ctx_clip);
1484+ int n_patches = clip_get_image_size (&ctx_clip) / image_size.width ;
1485+ img->nx = n_patches;
1486+ img->ny = n_patches;
1487+ img->buf .resize (n_patches * image_size.width * image_size.height * 3 );
14791488 batch.push_back (std::move (img));
1480- ggml_cgraph * gf = clip_image_build_graph (&ctx_clip, batch, clip_image_size{}, false );
1489+
1490+ ggml_cgraph * gf = clip_image_build_graph (&ctx_clip, batch, image_size, false );
14811491 ggml_backend_sched_reserve (ctx_clip.sched .get (), gf);
14821492 for (size_t i = 0 ; i < ctx_clip.backend_ptrs .size (); ++i) {
14831493 ggml_backend_t backend = ctx_clip.backend_ptrs [i];
@@ -2150,10 +2160,10 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
21502160 // bilinear_resize(*img, *image_original_resize, params.image_size, params.image_size); // in python this is "shortest_edge", but all CLIP are square
21512161 bicubic_resize (*img, *image_original_resize, params.image_size , params.image_size ); // in python this is "shortest_edge", but all CLIP are square
21522162 patches.insert (patches.begin (), std::move (image_original_resize));
2153- int num = 0 ;
21542163 for (auto & patch : patches) {
2155- normalize_image_u8_to_f32 (*patch, *res_imgs->at (num), ctx->image_mean , ctx->image_std );
2156- num++;
2164+ clip_image_f32_ptr res;
2165+ normalize_image_u8_to_f32 (*patch, *res, ctx->image_mean , ctx->image_std );
2166+ res_imgs->push_back (std::move (res));
21572167 }
21582168
21592169 return true ;
0 commit comments